Loading [MathJax]/extensions/tex2jax.js
Go to the documentation of this file.
26 using VVTN = std::vector<VTN>;
27 using VVVTN = std::vector<VVTN>;
44 if (palevel < pblevel) {
46 }
else if (palevel == pblevel) {
67 for (
size_t i = 0;
i < level.size(); ++
i) {
68 level[
i]->treenode_order =
i;
75 for (
auto& level: levels) {
76 for (
auto* nd: level) {
77 nd->treenode_order = order++;
86 static size_t g32(TNode* nd) {
90 static bool is_parent_race(TNode* nd) {
92 for (
const auto& child: nd->children) {
93 if (pg == g32(child)) {
104 for (
const auto& child: nd->
children) {
113 static bool is_child_race(TNode* nd) {
114 if (nd->children.size() < 2) {
117 if (nd->children.size() == 2) {
118 return g32(nd->children[0]) == g32(nd->children[1]);
121 for (
const auto& child: nd->children) {
122 std::size_t gc = g32(child);
123 if (s.find(gc) != s.end()) {
137 size_t c0 = nd->
children[0]->nodevec_index;
138 size_t c1 = nd->
children[1]->nodevec_index;
139 c0 = (c0 < c1) ? (c1 - c0) : (c0 - c1);
142 size_t ic0 = nd->
children[0]->nodevec_index;
144 size_t ic = nd->
children[
i]->nodevec_index;
156 for (
const auto& child: nd->
children) {
157 std::size_t d1 = child->nodevec_index -
pi;
166 template <
typename T>
167 static void move_range(
size_t start,
size_t length,
size_t dst, std::vector<T>&
v) {
168 typename std::vector<T>::iterator first, middle, last;
170 first =
v.begin() + start;
171 middle = first + length;
172 last =
v.begin() + dst;
174 first =
v.begin() + dst;
175 middle =
v.begin() + start;
176 last = middle + length;
178 std::rotate(first, middle, last);
181 static void move_nodes(
size_t start,
size_t length,
size_t dst,
VTN& nodes) {
187 for (
size_t i = start;
i < dst - length; ++
i) {
190 for (
size_t i = dst - length;
i < dst; ++
i) {
191 nrn_assert(nodes[
i]->nodevec_index == start + (
i - (dst - length)));
195 for (
size_t i = start;
i < dst; ++
i) {
196 nodes[
i]->nodevec_index =
i;
202 static size_t need2move(TNode* nd) {
207 static void how_many_warpsize_groups_have_only_leaves(
VTN& nodes) {
209 for (
size_t i = 0;
i < nodes.size();
i +=
warpsize) {
211 for (
size_t j = 0; j <
warpsize; ++j) {
212 if (!nodes[
i + j]->children.empty()) {
218 printf(
"warpsize group %ld starting at level %ld\n",
i /
warpsize, nodes[
i]->level);
222 printf(
"number of warpsize groups with only leaves = %ld\n", n);
225 static void pr_race_situation(
VTN& nodes) {
229 for (
size_t i = nodes.size() - 1; nodes[
i]->level != 0; --
i) {
230 TNode* nd = nodes[
i];
234 if (is_parent_race(nd)) {
235 printf(
"level=%ld i=%ld d=%ld n=%ld",
240 for (
const auto& cnd: nd->children) {
241 printf(
" %ld %ld", cnd->level, cnd->nodevec_index);
246 if (is_child_race(nd)) {
250 printf(
"prace=%ld crace=%ld prace2=%ld\n", prace, crace, prace2);
257 if (nodes[
i]->children.empty()) {
284 while (nodes[
i - 1]->children.empty() && n < d) {
300 printf(
"could not eliminate prace for g=%ld c=%ld l=%ld o=%ld %ld\n",
310 size_t c0 = nd->
children[0]->nodevec_index;
311 size_t c1 = nd->
children[1]->nodevec_index;
312 size_t d =
warpsize - ((c0 > c1) ? (c0 - c1) : (c1 - c0));
316 printf(
"could not eliminate crace for g=%ld c=%ld l=%ld o=%ld %ld\n",
327 std::size_t nnode = std::accumulate(levels.begin(),
330 [](std::size_t s,
const VTN& l) { return s + l.size(); });
333 for (
const auto& level: levels) {
334 for (
const auto& l: level) {
338 for (
size_t i = 0;
i < nodes.size(); ++
i) {
339 nodes[
i]->nodevec_index =
i;
374 if (0 && nodes.size() %
warpsize != 0) {
375 size_t nnode = nodes.size() - levels[0].size();
376 printf(
"warp of %ld cells has %ld nodes in last cycle %ld\n",
386 for (
size_t i = nodes.size() - 1;
i >= levels[0].size(); --
i) {
398 for (
size_t i = 0;
i < nodes.size(); ++
i) {
399 nodes[
i]->treenode_order =
i;
412 for (
auto& level: levels) {
414 for (
const auto& nd: level) {
415 for (
size_t k = 0; k < nd->children.size(); ++k) {
416 nd->children[k]->treenode_order = k;
421 for (
auto& level: levels) {
431 for (
size_t i = 0;
i < groups[0].size(); ++
i) {
433 for (
const auto& group: groups) {
434 printf(
" %5ld", group[
i].size());
455 for (
size_t i = 0;
i < nodevec.size(); ++
i) {
456 nodevec[
i]->nodevec_index =
i;
474 for (
auto& group: groups) {
475 group.resize(maxlevel + 1);
480 for (
const auto& nd: nodevec) {
481 groups[nd->groupindex][nd->level].push_back(nd);
487 for (
auto& group: groups) {
size_t dist2child(TNode *nd)
size_t hash
Hash algorith that generates a hash based on the hash of the children and the number of compartments ...
size_t level_from_root(VecTNode &)
static void question2(VVTN &levels)
static void move_range(size_t start, size_t length, size_t dst, std::vector< T > &v)
static void sortlevel(VTN &level)
static bool is_parent_race2(TNode *nd)
void chklevel(VTN &level, size_t nident=8)
static size_t next_leaf(TNode *nd, VTN &nodes)
static void checkrace(TNode *nd, VTN &nodes)
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;}}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) if(nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams loop if(nt->compute_gpu)) for(int icore=0;icore< ncore;icore+=warpsize) { int iwarp=icore/warpsize;int ic=icore &(warpsize - 1);int ncycle=ncycles[iwarp];int *stride=strides+stridedispl[iwarp];int root=rootbegin[iwarp];int lastroot=rootbegin[iwarp+1];int firstnode=nodebegin[iwarp];int lastnode=nodebegin[iwarp+1];triang_interleaved2(nt, ic, ncycle, stride, lastnode);bksub_interleaved2(nt, root+ic, lastroot, ic, ncycle, stride, firstnode);} nrn_pragma_acc(wait(nt->stream_id))}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
TNode is the tree node that represents the tree of the compartments.
static void analyze(VVTN &levels)
std::vector< VVTN > VVVTN
static void set_nodeindex(VecTNode &nodevec)
void prgroupsize(VVVTN &groups)
size_t warp_balance(size_t ncell, VecTNode &nodevec)
Use of the LPT (Least Processing Time) algorithm to create balanced groups of cells.
size_t groupindex
Cell ID that this compartment belongs to.
static bool eliminate_race(TNode *nd, size_t d, VTN &nodes, TNode *look)
static bool is_child_race2(TNode *nd)
static bool final_nodevec_cmp(TNode *a, TNode *b)
void group_order2(VecTNode &, size_t groupsize, size_t ncell)
Implementation of the advanced interleaving strategy (interleave_permute_type == 2)
static bool sortlevel_cmp(TNode *a, TNode *b)
static void eliminate_prace(TNode *nd, VTN &nodes)
size_t cellindex
level of of this compartment in the tree
size_t treenode_order
index in nodevec that is set in check() In cell permute 2 this is set as Breadth First traversal
size_t level
For cell permute 1 (Interleaved):
static void move_nodes(size_t start, size_t length, size_t dst, VTN &nodes)
std::vector< TNode * > VecTNode
static void eliminate_crace(TNode *nd, VTN &nodes)
size_t nodevec_index
Total number of compartments from the current node and below.
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
static void set_treenode_order(VVTN &levels)