CoreNEURON
cellorder1.cpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================
7 */
8 
9 #include <cstdio>
10 #include <map>
11 #include <set>
12 #include <algorithm>
13 #include <cstring>
14 
18 
19 // just for interleave_permute_type
22 
23 
24 namespace coreneuron {
25 static size_t groupsize = 32;
26 
27 /**
28  * \brief Function to order trees by size, hash and nodeindex
29  */
30 static bool tnode_earlier(TNode* a, TNode* b) {
31  bool result = false;
32  if (a->treesize < b->treesize) { // treesize dominates
33  result = true;
34  } else if (a->treesize == b->treesize) {
35  if (a->hash < b->hash) { // if treesize same, keep identical trees together
36  result = true;
37  } else if (a->hash == b->hash) {
38  result = a->nodeindex < b->nodeindex; // identical trees ordered by nodeindex
39  }
40  }
41  return result;
42 }
43 
44 static bool ptr_tnode_earlier(TNode* a, TNode* b) {
45  return tnode_earlier(a, b);
46 }
47 
48 TNode::TNode(int ix) {
49  nodeindex = ix;
50  cellindex = 0;
51  groupindex = 0;
52  level = 0;
53  hash = 0;
54  treesize = 1;
55  nodevec_index = 0;
56  treenode_order = 0;
57  parent = nullptr;
58  children.reserve(2);
59 }
60 
62 
63 size_t TNode::mkhash() { // call on all nodes in leaf to root order
64  // concept from http://stackoverflow.com/questions/20511347/a-good-hash-function-for-a-vector
65  std::sort(children.begin(), children.end(), ptr_tnode_earlier);
66  hash = children.size();
67  treesize = 1;
68  for (size_t i = 0; i < children.size(); ++i) { // need sorted by child hash
69  hash ^= children[i]->hash + 0x9e3779b9 + (hash << 6) + (hash >> 2);
70  treesize += children[i]->treesize;
71  }
72  return hash; // hash of leaf nodes is 0
73 }
74 
75 static void tree_analysis(int* parent, int nnode, int ncell, VecTNode&);
76 static void node_interleave_order(int ncell, VecTNode&);
77 static void admin1(int ncell,
78  VecTNode& nodevec,
79  int& nwarp,
80  int& nstride,
81  int*& stride,
82  int*& firstnode,
83  int*& lastnode,
84  int*& cellsize);
85 static void admin2(int ncell,
86  VecTNode& nodevec,
87  int& nwarp,
88  int& nstride,
89  int*& stridedispl,
90  int*& strides,
91  int*& rootbegin,
92  int*& nodebegin,
93  int*& ncycles);
94 static void check(VecTNode&);
95 #if CORENRN_DEBUG
96 static void prtree(VecTNode&);
97 #endif
98 
99 using TNI = std::pair<TNode*, int>;
100 using HashCnt = std::map<size_t, std::pair<TNode*, int>>;
101 using TNIVec = std::vector<TNI>;
102 
103 /*
104 assess the quality of the ordering. The measure is the size of a contiguous
105 list of nodes whose parents have the same order. How many contiguous lists
106 have that same size. How many nodes participate in that size list.
107 Modify the quality measure from experience with performance. Start with
108 list of (nnode, size_participation)
109 */
110 static void quality(VecTNode& nodevec, size_t max = 32) {
111  size_t qcnt = 0; // how many contiguous nodes have contiguous parents
112 
113  // first ncell nodes are by definition in contiguous order
114  for (const auto& n: nodevec) {
115  if (n->parent != nullptr) {
116  break;
117  }
118  qcnt += 1;
119  }
120  size_t ncell = qcnt;
121 
122  // key is how many parents in contiguous order
123  // value is number of nodes that participate in that
124  std::map<size_t, size_t> qual;
125  size_t ip_last = 10000000000;
126  for (size_t i = ncell; i < nodevec.size(); ++i) {
127  size_t ip = nodevec[i]->parent->nodevec_index;
128  // i%max == 0 means that if we start a warp with 8 and then have 32
129  // the 32 is broken into 24 and 8. (modify if the arrangement during
130  // gaussian elimination becomes more sophisticated.(
131  if (ip == ip_last + 1 && i % max != 0) { // contiguous
132  qcnt += 1;
133  } else {
134  if (qcnt == 1) {
135  // printf("unique %ld p=%ld ix=%d\n", i, ip, nodevec[i]->nodeindex);
136  }
137  qual[max] += (qcnt / max) * max;
138  size_t x = qcnt % max;
139  if (x) {
140  qual[x] += x;
141  }
142  qcnt = 1;
143  }
144  ip_last = ip;
145  }
146  qual[max] += (qcnt / max) * max;
147  size_t x = qcnt % max;
148  if (x) {
149  qual[x] += x;
150  }
151 
152  // print result
153  qcnt = 0;
154 #if CORENRN_DEBUG
155  for (const auto& q: qual) {
156  qcnt += q.second;
157  printf("%6ld %6ld\n", q.first, q.second);
158  }
159 #endif
160 #if CORENRN_DEBUG
161  printf("qual.size=%ld qual total nodes=%ld nodevec.size=%ld\n",
162  qual.size(),
163  qcnt,
164  nodevec.size());
165 #endif
166 
167  // how many race conditions. ie refer to same parent on different core
168  // of warp (max cores) or parent in same group of max.
169  size_t maxip = ncell;
170  size_t nrace1 = 0;
171  size_t nrace2 = 0;
172  std::set<size_t> ipused;
173  for (size_t i = ncell; i < nodevec.size(); ++i) {
174  TNode* nd = nodevec[i];
175  size_t ip = nd->parent->nodevec_index;
176  if (i % max == 0) {
177  maxip = i;
178  ipused.clear();
179  }
180  if (ip >= maxip) {
181  nrace1 += 1;
182  } /*else*/
183  {
184  if (ipused.find(ip) != ipused.end()) {
185  nrace2 += 1;
186  if (ip >= maxip) {
187  // printf("race for parent %ld (parent in same group as multiple users))\n",
188  // ip);
189  }
190  } else {
191  ipused.insert(ip);
192  }
193  }
194  }
195  static_cast<void>(nrace1);
196  static_cast<void>(nrace2);
197 #if CORENRN_DEBUG
198  printf("nrace = %ld (parent in same group of %ld nodes)\n", nrace1, max);
199  printf("nrace = %ld (parent used more than once by same group of %ld nodes)\n", nrace2, max);
200 #endif
201 }
202 
203 size_t level_from_root(VecTNode& nodevec) {
204  size_t maxlevel = 0;
205  for (auto& nd: nodevec) {
206  if (nd->parent) {
207  nd->level = nd->parent->level + 1;
208  if (maxlevel < nd->level) {
209  maxlevel = nd->level;
210  }
211  } else {
212  nd->level = 0;
213  }
214  }
215  return maxlevel;
216 }
217 
218 size_t level_from_leaf(VecTNode& nodevec) {
219  size_t maxlevel = 0;
220  for (size_t i = nodevec.size() - 1; true; --i) {
221  TNode* nd = nodevec[i];
222  size_t lmax = 0;
223  for (auto& child: nd->children) {
224  if (lmax <= child->level) {
225  lmax = child->level + 1;
226  }
227  }
228  nd->level = lmax;
229  if (maxlevel < lmax) {
230  maxlevel = lmax;
231  }
232  if (i == 0) {
233  break;
234  }
235  }
236  return maxlevel;
237 }
238 
239 /**
240  * \brief Set the cellindex to distinguish the different cells.
241  */
242 static void set_cellindex(int ncell, VecTNode& nodevec) {
243  for (int i = 0; i < ncell; ++i) {
244  nodevec[i]->cellindex = i;
245  }
246  for (size_t i = 0; i < nodevec.size(); ++i) {
247  TNode& nd = *nodevec[i];
248  for (size_t j = 0; j < nd.children.size(); ++j) {
249  TNode* cnode = nd.children[j];
250  cnode->cellindex = nd.cellindex;
251  }
252  }
253 }
254 
255 /**
256  * \brief Initialization of the groupindex (groups)
257  *
258  * The cells are groupped at a later stage based on a load balancing algorithm.
259  * This is just an initialization function.
260  */
261 static void set_groupindex(VecTNode& nodevec) {
262  for (size_t i = 0; i < nodevec.size(); ++i) {
263  TNode* nd = nodevec[i];
264  if (nd->parent) {
265  nd->groupindex = nd->parent->groupindex;
266  } else {
267  nd->groupindex = i / groupsize;
268  }
269  }
270 }
271 
272 // how many identical trees and their levels
273 // print when more than one instance of a type
274 // reverse the sense of levels (all leaves are level 0) to get a good
275 // idea of the depth of identical subtrees.
276 static void ident_statistic(VecTNode& nodevec, size_t ncell) {
277  // reverse sense of levels
278  // size_t maxlevel = level_from_leaf(nodevec);
279  size_t maxlevel = level_from_root(nodevec);
280 
281  // # in each level
282  std::vector<std::vector<size_t>> n_in_level(maxlevel + 1);
283  for (auto& n: n_in_level) {
284  n.resize(ncell / groupsize);
285  }
286  for (const auto& n: nodevec) {
287  n_in_level[n->level][n->groupindex]++;
288  }
289  printf("n_in_level.size = %ld\n", n_in_level.size());
290  for (size_t i = 0; i < n_in_level.size(); ++i) {
291  printf("%5ld\n", i);
292  for (const auto& n: n_in_level[i]) {
293  printf(" %5ld", n);
294  }
295  printf("\n");
296  }
297 }
298 #undef MSS
299 
300 int* node_order(int ncell,
301  int nnode,
302  int* parent,
303  int& nwarp,
304  int& nstride,
305  int*& stride,
306  int*& firstnode,
307  int*& lastnode,
308  int*& cellsize,
309  int*& stridedispl) {
310  VecTNode nodevec;
311 
312  // nodevec[0:ncell] in increasing size, with identical trees together,
313  // and otherwise nodeindex order
314  // nodevec.size = nnode
315  tree_analysis(parent, nnode, ncell, nodevec);
316  check(nodevec);
317 
318  set_cellindex(ncell, nodevec);
319  set_groupindex(nodevec);
320  level_from_root(nodevec);
321 
322  // nodevec[ncell:nnode] cells are interleaved in nodevec[0:ncell] cell order
323  if (interleave_permute_type == 1) {
324  node_interleave_order(ncell, nodevec);
325  } else {
326  group_order2(nodevec, groupsize, ncell);
327  }
328  check(nodevec);
329 
330 #if CORENRN_DEBUG
331  for (int i = 0; i < ncell; ++i) {
332  TNode& nd = *nodevec[i];
333  printf("%d size=%ld hash=%ld ix=%d\n", i, nd.treesize, nd.hash, nd.nodeindex);
334  }
335 #endif
336 
337  if (0)
338  ident_statistic(nodevec, ncell);
339  quality(nodevec);
340 
341  // the permutation
342  int* nodeorder = new int[nnode];
343  for (int i = 0; i < nnode; ++i) {
344  TNode& nd = *nodevec[i];
345  nodeorder[nd.nodeindex] = i;
346  }
347 
348  // administrative statistics for gauss elimination
349  if (interleave_permute_type == 1) {
350  admin1(ncell, nodevec, nwarp, nstride, stride, firstnode, lastnode, cellsize);
351  } else {
352  // admin2(ncell, nodevec, nwarp, nstride, stridedispl, stride, rootbegin, nodebegin,
353  // ncycles);
354  admin2(ncell, nodevec, nwarp, nstride, stridedispl, stride, firstnode, lastnode, cellsize);
355  }
356 
357  int ntopol = 1;
358  for (int i = 1; i < ncell; ++i) {
359  if (nodevec[i - 1]->hash != nodevec[i]->hash) {
360  ntopol += 1;
361  }
362  }
363  static_cast<void>(ntopol);
364 #ifdef DEBUG
365  printf("%d distinct tree topologies\n", ntopol);
366 #endif
367 
368  for (size_t i = 0; i < nodevec.size(); ++i) {
369  delete nodevec[i];
370  }
371 
372  return nodeorder;
373 }
374 
375 void check(VecTNode& nodevec) {
376  // printf("check\n");
377  size_t nnode = nodevec.size();
378  size_t ncell = 0;
379  for (size_t i = 0; i < nnode; ++i) {
380  nodevec[i]->nodevec_index = i;
381  if (nodevec[i]->parent == nullptr) {
382  ncell++;
383  }
384  }
385  /// Check that the first compartments of nodevec are the root nodes (cells)
386  for (size_t i = 0; i < ncell; ++i) {
387  nrn_assert(nodevec[i]->parent == nullptr);
388  }
389  for (size_t i = ncell; i < nnode; ++i) {
390  TNode& nd = *nodevec[i];
391  if (nd.parent->nodevec_index >= nd.nodevec_index) {
392  printf("error i=%ld nodevec_index=%ld parent=%ld\n",
393  i,
394  nd.nodevec_index,
395  nd.parent->nodevec_index);
396  }
398  }
399 }
400 
401 #if CORENRN_DEBUG
402 void prtree(VecTNode& nodevec) {
403  size_t nnode = nodevec.size();
404  for (size_t i = 0; i < nnode; ++i) {
405  nodevec[i]->nodevec_index = i;
406  }
407  for (size_t i = 0; i < nnode; ++i) {
408  TNode& nd = *nodevec[i];
409  printf("%ld p=%d c=%ld l=%ld o=%ld ix=%d pix=%d\n",
410  i,
411  nd.parent ? int(nd.parent->nodevec_index) : -1,
412  nd.cellindex,
413  nd.level,
414  nd.treenode_order,
415  nd.nodeindex,
416  nd.parent ? int(nd.parent->nodeindex) : -1);
417  }
418 }
419 #endif
420 
421 /**
422  * \brief Perform tree preparation for interleaving strategies
423  *
424  * \param parent vector of parent indices
425  * \param nnode number of compartments in the cells
426  * \param ncell number of cells
427  */
428 void tree_analysis(int* parent, int nnode, int ncell, VecTNode& nodevec) {
429  // create empty TNodes (knowing only their index)
430  nodevec.reserve(nnode);
431  for (int i = 0; i < nnode; ++i) {
432  nodevec.push_back(new TNode(i));
433  }
434 
435  // determine the (sorted by hash) children of each node
436  for (int i = nnode - 1; i >= ncell; --i) {
437  nodevec[i]->parent = nodevec[parent[i]];
438  nodevec[i]->mkhash();
439  nodevec[parent[i]]->children.push_back(nodevec[i]);
440  }
441 
442  // determine hash of the cells
443  for (int i = 0; i < ncell; ++i) {
444  nodevec[i]->mkhash();
445  }
446 
447  // sort it by tree size (from smaller to larger)
448  std::sort(nodevec.begin(), nodevec.begin() + ncell, tnode_earlier);
449 }
450 
451 static bool interleave_comp(TNode* a, TNode* b) {
452  bool result = false;
453  if (a->treenode_order < b->treenode_order) {
454  result = true;
455  } else if (a->treenode_order == b->treenode_order) {
456  if (a->cellindex < b->cellindex) {
457  result = true;
458  }
459  }
460  return result;
461 }
462 
463 /**
464  * \brief Naive interleaving strategy (interleave_permute_type == 1)
465  *
466  * Sort so nodevec[ncell:nnode] cell instances are interleaved. Keep the
467  * secondary ordering with respect to treenode_order so each cell is still a tree.
468  *
469  * \param ncell number of cells (trees)
470  * \param nodevec vector that contains compartments (nodes of the trees)
471  */
472 void node_interleave_order(int ncell, VecTNode& nodevec) {
473  int* order = new int[ncell];
474  for (int i = 0; i < ncell; ++i) {
475  order[i] = 0;
476  nodevec[i]->treenode_order = order[i]++;
477  }
478  for (size_t i = 0; i < nodevec.size(); ++i) {
479  TNode& nd = *nodevec[i];
480  for (size_t j = 0; j < nd.children.size(); ++j) {
481  TNode* cnode = nd.children[j];
482  cnode->treenode_order = order[nd.cellindex]++;
483  }
484  }
485  delete[] order;
486 
487  // std::sort(nodevec.begin() + ncell, nodevec.end(), contig_comp);
488  // Traversal of nodevec: From root to leaves (this is why we compute the tree node order)
489  std::sort(nodevec.begin() + ncell, nodevec.end(), interleave_comp);
490 
491 #if CORENRN_DEBUG
492  for (size_t i = 0; i < nodevec.size(); ++i) {
493  TNode& nd = *nodevec[i];
494  printf("%ld cell=%ld ix=%d\n", i, nd.cellindex, nd.nodeindex);
495  }
496 #endif
497 }
498 
499 static void admin1(int ncell,
500  VecTNode& nodevec,
501  int& nwarp,
502  int& nstride,
503  int*& stride,
504  int*& firstnode,
505  int*& lastnode,
506  int*& cellsize) {
507  firstnode = (int*) ecalloc_align(ncell, sizeof(int));
508  lastnode = (int*) ecalloc_align(ncell, sizeof(int));
509  cellsize = (int*) ecalloc_align(ncell, sizeof(int));
510 
511  nwarp = (ncell % warpsize == 0) ? (ncell / warpsize) : (ncell / warpsize + 1);
512 
513  for (int i = 0; i < ncell; ++i) {
514  firstnode[i] = -1;
515  lastnode[i] = -1;
516  cellsize[i] = 0;
517  }
518 
519  nstride = 0;
520  for (size_t i = ncell; i < nodevec.size(); ++i) {
521  TNode& nd = *nodevec[i];
522  size_t ci = nd.cellindex;
523  if (firstnode[ci] == -1) {
524  firstnode[ci] = i;
525  }
526  lastnode[ci] = i;
527  cellsize[ci] += 1;
528  if (nstride < cellsize[ci]) {
529  nstride = cellsize[ci];
530  }
531  }
532 
533  // this vector is used to move from one compartment to the other (per cell)
534  // its length is equal to the cell with the highest number of compartments
535  stride = static_cast<int*>(ecalloc_align(nstride + 1, sizeof(int)));
536  for (size_t i = ncell; i < nodevec.size(); ++i) {
537  TNode& nd = *nodevec[i];
538  // compute how many compartments with the same order
539  // treenode_order : defined in breadth first fashion (for each cell separately)
540  stride[nd.treenode_order - 1] += 1; // -1 because treenode order includes root
541  }
542 }
543 
544 // for admin2 we allow the node organisation in warps of (say 4 cores per warp)
545 // ............... ideal warp but unbalanced relative to warp with max cycles
546 // ............... ncycle = 15, icore [0:4), all strides are 4.
547 // ...............
548 // ...............
549 //
550 // .......... unbalanced relative to warp with max cycles
551 // .......... ncycle = 10, not all strides the same because
552 // .......... of need to avoid occasional race conditions.
553 // . . .. icore [4:8) only 4 strides of 4
554 //
555 // .................... ncycle = 20, uses only one core in the warp (cable)
556 // icore 8, all ncycle strides are 1
557 
558 // One thing to be unhappy about is the large stride vector of size about
559 // number of compartments/warpsize. There are a lot of models where the
560 // stride for a warp is constant except for one cycle in the warp and that
561 // is easy to obtain when there are more than warpsize cells per warp.
562 
563 static size_t stride_length(size_t begin, size_t end, VecTNode& nodevec) {
564  // return stride length starting at i. Do not go past j.
565  // max stride is warpsize.
566  // At this time, only assume vicious parent race conditions matter.
567  if (end - begin > warpsize) {
568  end = begin + warpsize;
569  }
570  for (size_t i = begin; i < end; ++i) {
571  TNode* nd = nodevec[i];
572  nrn_assert(nd->nodevec_index == i);
573  size_t diff = dist2child(nd);
574  if (i + diff < end) {
575  end = i + diff;
576  }
577  }
578  return end - begin;
579 }
580 
581 /**
582  * \brief Prepare for solve_interleaved2
583  *
584  * One group of cells per warp.
585  *
586  * warp[i] has a number of compute cycles (ncycle[i])
587  * the index of its first root (rootbegin[i], last rootbegin[nwarp] = ncell)
588  * the index of its first node (nodebegin[i], last nodebegin[nwarp] = nnode)
589  *
590  * Each compute cycle has a stride
591  * A stride is how many nodes are processed by a warp in one compute cycle
592  * There are nstride strides. nstride is the sum of ncycles of all warps.
593  * warp[i] has ncycle[i] strides
594  * same as sum of ncycle
595  * warp[i] has a stridedispl[i] which is stridedispl[i-1] + ncycle[i].
596  * ie. The zeroth cycle of warp[j] works on stride[stridedispl[j]]
597  * The value of a stride beginning at node i (node i is computed by core 0 of
598  * some warp for some cycle) is determined by stride_length(i, j, nodevec)
599  *
600  */
601 static void admin2(int ncell,
602  VecTNode& nodevec,
603  int& nwarp,
604  int& nstride,
605  int*& stridedispl,
606  int*& strides,
607  int*& rootbegin,
608  int*& nodebegin,
609  int*& ncycles) {
610  // the number of groups is the number of warps needed
611  // ncore is the number of warps * warpsize
612  nwarp = nodevec[ncell - 1]->groupindex + 1;
613 
614  ncycles = (int*) ecalloc_align(nwarp, sizeof(int));
615  stridedispl = (int*) ecalloc_align(nwarp + 1,
616  sizeof(int)); // running sum of ncycles (start at 0)
617  rootbegin = (int*) ecalloc_align(nwarp + 1, sizeof(int)); // index (+1) of first root in warp.
618  nodebegin = (int*) ecalloc_align(nwarp + 1, sizeof(int)); // index (+1) of first node in warp.
619 
620  // rootbegin and nodebegin are the root index values + 1 of the last of
621  // the sequence of constant groupindex
622  rootbegin[0] = 0;
623  for (size_t i = 0; i < size_t(ncell); ++i) {
624  rootbegin[nodevec[i]->groupindex + 1] = i + 1;
625  }
626  nodebegin[0] = ncell;
627  // We start from the leaves and go backwards towards the root
628  for (size_t i = size_t(ncell); i < nodevec.size(); ++i) {
629  nodebegin[nodevec[i]->groupindex + 1] = i + 1;
630  }
631 
632  // ncycles, stridedispl, and nstride
633  nstride = 0;
634  stridedispl[0] = 0;
635  for (size_t iwarp = 0; iwarp < (size_t) nwarp; ++iwarp) {
636  size_t j = size_t(nodebegin[iwarp + 1]);
637  int nc = 0;
638  size_t i = nodebegin[iwarp];
639  // in this loop we traverse all the children of all the cells in the current warp (iwarp)
640  while (i < j) {
641  i += stride_length(i, j, nodevec);
642  ++nc; // how many times the warp should loop in order to finish with all the tree
643  // depths (for all the trees of the warp/group)
644  }
645  ncycles[iwarp] = nc;
646  stridedispl[iwarp + 1] = stridedispl[iwarp] + nc;
647  nstride += nc;
648  }
649 
650  // strides
651  strides = (int*) ecalloc_align(nstride, sizeof(int));
652  nstride = 0;
653  for (size_t iwarp = 0; iwarp < (size_t) nwarp; ++iwarp) {
654  size_t j = size_t(nodebegin[iwarp + 1]);
655  size_t i = nodebegin[iwarp];
656  while (i < j) {
657  int k = stride_length(i, j, nodevec);
658  i += k;
659  strides[nstride++] = k;
660  }
661  }
662 
663 #if CORENRN_DEBUG
664  printf("warp rootbegin nodebegin stridedispl\n");
665  for (int i = 0; i <= nwarp; ++i) {
666  printf("%4d %4d %4d %4d\n", i, rootbegin[i], nodebegin[i], stridedispl[i]);
667  }
668 #endif
669 }
670 } // namespace coreneuron
coreneuron::set_groupindex
static void set_groupindex(VecTNode &nodevec)
Initialization of the groupindex (groups)
Definition: cellorder1.cpp:261
coreneuron::dist2child
size_t dist2child(TNode *nd)
Definition: cellorder2.cpp:153
coreneuron::TNode::hash
size_t hash
Hash algorith that generates a hash based on the hash of the children and the number of compartments ...
Definition: tnode.hpp:31
coreneuron::level_from_root
size_t level_from_root(VecTNode &)
Definition: cellorder1.cpp:203
coreneuron::tree_analysis
static void tree_analysis(int *parent, int nnode, int ncell, VecTNode &)
Perform tree preparation for interleaving strategies.
Definition: cellorder1.cpp:428
coreneuron::node_order
int * node_order(int ncell, int nnode, int *parents, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize, int *&stridedispl)
Function that returns a permutation of length nnode.
Definition: cellorder1.cpp:300
coreneuron::lastnode
int int int int lastnode
Definition: cellorder.cpp:482
coreneuron::TNode::nodeindex
int nodeindex
Initialized index / groupsize.
Definition: tnode.hpp:55
warpsize
#define warpsize
Definition: tnode.hpp:85
coreneuron::TNode::~TNode
virtual ~TNode()
Definition: cellorder1.cpp:61
coreneuron::stride_length
static size_t stride_length(size_t begin, size_t end, VecTNode &nodevec)
Definition: cellorder1.cpp:563
coreneuron::interleave_permute_type
int interleave_permute_type
Definition: cellorder.cpp:28
coreneuron::interleave_comp
static bool interleave_comp(TNode *a, TNode *b)
Definition: cellorder1.cpp:451
coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12
coreneuron::TNode::mkhash
size_t mkhash()
Definition: cellorder1.cpp:63
coreneuron::admin1
static void admin1(int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize)
Definition: cellorder1.cpp:499
coreneuron::TNIVec
std::vector< TNI > TNIVec
Definition: cellorder1.cpp:101
coreneuron::ncell
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;}}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) if(nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams loop if(nt->compute_gpu)) for(int icore=0;icore< ncore;icore+=warpsize) { int iwarp=icore/warpsize;int ic=icore &(warpsize - 1);int ncycle=ncycles[iwarp];int *stride=strides+stridedispl[iwarp];int root=rootbegin[iwarp];int lastroot=rootbegin[iwarp+1];int firstnode=nodebegin[iwarp];int lastnode=nodebegin[iwarp+1];triang_interleaved2(nt, ic, ncycle, stride, lastnode);bksub_interleaved2(nt, root+ic, lastroot, ic, ncycle, stride, firstnode);} nrn_pragma_acc(wait(nt->stream_id))}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
Definition: cellorder.cpp:636
coreneuron::TNode
TNode is the tree node that represents the tree of the compartments.
Definition: tnode.hpp:23
coreneuron::i
int i
Definition: cellorder.cpp:485
nrniv_decl.h
coreneuron::set_cellindex
static void set_cellindex(int ncell, VecTNode &nodevec)
Set the cellindex to distinguish the different cells.
Definition: cellorder1.cpp:242
coreneuron::check
static void check(VecTNode &)
Definition: cellorder1.cpp:375
coreneuron::HashCnt
std::map< size_t, std::pair< TNode *, int > > HashCnt
Definition: cellorder1.cpp:100
tnode.hpp
coreneuron::TNode::groupindex
size_t groupindex
Cell ID that this compartment belongs to.
Definition: tnode.hpp:54
coreneuron::group_order2
void group_order2(VecTNode &, size_t groupsize, size_t ncell)
Implementation of the advanced interleaving strategy (interleave_permute_type == 2)
Definition: cellorder2.cpp:460
coreneuron::TNode::treesize
size_t treesize
Hash value generated by mkhash.
Definition: tnode.hpp:32
coreneuron::TNode::cellindex
size_t cellindex
level of of this compartment in the tree
Definition: tnode.hpp:53
coreneuron::admin2
static void admin2(int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stridedispl, int *&strides, int *&rootbegin, int *&nodebegin, int *&ncycles)
Prepare for solve_interleaved2.
Definition: cellorder1.cpp:601
coreneuron::tnode_earlier
static bool tnode_earlier(TNode *a, TNode *b)
Function to order trees by size, hash and nodeindex.
Definition: cellorder1.cpp:30
coreneuron::TNode::treenode_order
size_t treenode_order
index in nodevec that is set in check() In cell permute 2 this is set as Breadth First traversal
Definition: tnode.hpp:35
coreneuron::groupsize
static size_t groupsize
Definition: cellorder1.cpp:25
coreneuron::TNode::children
VecTNode children
Definition: tnode.hpp:28
coreneuron::TNode::level
size_t level
For cell permute 1 (Interleaved):
Definition: tnode.hpp:52
coreneuron::quality
static void quality(VecTNode &nodevec, size_t max=32)
Definition: cellorder1.cpp:110
cellorder.hpp
coreneuron::node_interleave_order
static void node_interleave_order(int ncell, VecTNode &)
Naive interleaving strategy (interleave_permute_type == 1)
Definition: cellorder1.cpp:472
coreneuron::TNode::parent
TNode * parent
Definition: tnode.hpp:27
coreneuron::ptr_tnode_earlier
static bool ptr_tnode_earlier(TNode *a, TNode *b)
Definition: cellorder1.cpp:44
coreneuron::nstride
int nstride
Definition: cellorder.cpp:641
coreneuron::VecTNode
std::vector< TNode * > VecTNode
Definition: tnode.hpp:17
coreneuron::stride
int int int * stride
Definition: cellorder.cpp:482
coreneuron::TNode::nodevec_index
size_t nodevec_index
Total number of compartments from the current node and below.
Definition: tnode.hpp:33
coreneuron::level_from_leaf
size_t level_from_leaf(VecTNode &)
Definition: cellorder1.cpp:218
coreneuron::NrnThread::end
int end
Definition: multicore.hpp:98
coreneuron::TNI
std::pair< TNode *, int > TNI
Definition: cellorder1.cpp:99
coreneuron::ecalloc_align
void * ecalloc_align(size_t n, size_t size, size_t alignment)
coreneuron::firstnode
int int int int int int firstnode
Definition: cellorder.cpp:532
nrn_assert
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
Definition: nrn_assert.h:33
coreneuron::cellsize
int * cellsize
Definition: cellorder.cpp:645
nrn_assert.h
memory.h
coreneuron::ident_statistic
static void ident_statistic(VecTNode &nodevec, size_t ncell)
Definition: cellorder1.cpp:276
coreneuron::TNode::TNode
TNode(int ix)
Definition: cellorder1.cpp:48