CoreNEURON
balance.cpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================
7 */
8 
9 // use LPT algorithm to balance cells so all warps have similar number
10 // of compartments.
11 // NB: Ideally we'd balance so that warps have similar ncycle. But we do not
12 // know how to predict warp quality without an apriori set of cells to
13 // fill the warp. For large numbers of cells in a warp,
14 // it is a justifiable speculation to presume that there will be very
15 // few holes in warp filling. I.e., ncycle = ncompart/warpsize
16 
17 #include <algorithm>
18 
19 #include "coreneuron/nrnconf.h"
21 #include "coreneuron/utils/lpt.hpp"
22 
23 namespace coreneuron {
24 int cellorder_nwarp = 0; // 0 means do not balance
25 
26 // ordering by warp, then old order
27 bool warpcmp(const TNode* a, const TNode* b) {
28  if (a->groupindex < b->groupindex) {
29  return true;
30  } else if (a->groupindex == b->groupindex && a->nodevec_index < b->nodevec_index) {
31  return true;
32  }
33  return false;
34 }
35 
36 // order the ncell nodevec roots for balance and return a displacement
37 // vector specifying the contiguous roots for a warp.
38 // The return vector should be freed by the caller.
39 // On entry, nodevec is ordered so that each cell type is together and
40 // largest cells first. On exit, nodevec is ordered so that warp i
41 // should contain roots nodevec[displ[i]:displ[i+1]]
42 
43 size_t warp_balance(size_t ncell, VecTNode& nodevec) {
44  if (ncell == 0) {
45  return 0;
46  }
47 
48  if (cellorder_nwarp == 0) {
49  return 0;
50  }
51  size_t nwarp = size_t(cellorder_nwarp);
52  // cannot be more warps than cells
53  nwarp = std::min(ncell, nwarp);
54 
55  // cellsize vector and location of types.
56  std::vector<size_t> cellsize(ncell);
57  std::vector<size_t> typedispl;
58  size_t total_compart = 0;
59  typedispl.push_back(0); // types are already in order
60  for (size_t i = 0; i < ncell; ++i) {
61  cellsize[i] = nodevec[i]->treesize;
62  total_compart += cellsize[i];
63  if (i == 0 || nodevec[i]->hash != nodevec[i - 1]->hash) {
64  typedispl.push_back(typedispl.back() + 1);
65  } else {
66  typedispl.back() += 1;
67  }
68  }
69 
70  size_t ideal_compart_per_warp = total_compart / nwarp;
71 
72  size_t min_cells_per_warp = 0;
73  for (size_t i = 0, sz = 0; sz < ideal_compart_per_warp; ++i) {
74  ++min_cells_per_warp;
75  sz += cellsize[i];
76  }
77 
78  // balance when order is unrestricted (identical cells not together)
79  // i.e. pieces are cellsize
80  double best_balance = 0.0;
81  auto inwarp = lpt(nwarp, cellsize, &best_balance);
82  printf("best_balance=%g ncell=%ld ntype=%ld nwarp=%ld\n",
83  best_balance,
84  ncell,
85  typedispl.size() - 1,
86  nwarp);
87 
88  // order the roots for balance
89  for (size_t i = 0; i < ncell; ++i) {
90  TNode* nd = nodevec[i];
91  nd->groupindex = inwarp[i];
92  }
93  std::sort(nodevec.begin(), nodevec.begin() + ncell, warpcmp);
94  for (size_t i = 0; i < nodevec.size(); ++i) {
95  TNode* nd = nodevec[i];
96  for (size_t j = 0; j < nd->children.size(); ++j) {
97  nd->children[j]->groupindex = nd->groupindex;
98  }
99  nd->nodevec_index = i;
100  }
101 
102  return nwarp;
103 }
104 } // namespace coreneuron
coreneuron::cellorder_nwarp
int cellorder_nwarp
Definition: balance.cpp:24
lpt.hpp
lpt
std::vector< std::size_t > lpt(std::size_t nbag, std::vector< std::size_t > &pieces, double *bal)
Definition: lpt.cpp:25
coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12
coreneuron::ncell
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;}}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) if(nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams loop if(nt->compute_gpu)) for(int icore=0;icore< ncore;icore+=warpsize) { int iwarp=icore/warpsize;int ic=icore &(warpsize - 1);int ncycle=ncycles[iwarp];int *stride=strides+stridedispl[iwarp];int root=rootbegin[iwarp];int lastroot=rootbegin[iwarp+1];int firstnode=nodebegin[iwarp];int lastnode=nodebegin[iwarp+1];triang_interleaved2(nt, ic, ncycle, stride, lastnode);bksub_interleaved2(nt, root+ic, lastroot, ic, ncycle, stride, firstnode);} nrn_pragma_acc(wait(nt->stream_id))}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
Definition: cellorder.cpp:636
coreneuron::TNode
TNode is the tree node that represents the tree of the compartments.
Definition: tnode.hpp:23
coreneuron::i
int i
Definition: cellorder.cpp:485
tnode.hpp
coreneuron::warp_balance
size_t warp_balance(size_t ncell, VecTNode &nodevec)
Use of the LPT (Least Processing Time) algorithm to create balanced groups of cells.
Definition: balance.cpp:43
coreneuron::TNode::groupindex
size_t groupindex
Cell ID that this compartment belongs to.
Definition: tnode.hpp:54
nrnconf.h
coreneuron::TNode::children
VecTNode children
Definition: tnode.hpp:28
coreneuron::warpcmp
bool warpcmp(const TNode *a, const TNode *b)
Definition: balance.cpp:27
coreneuron::VecTNode
std::vector< TNode * > VecTNode
Definition: tnode.hpp:17
coreneuron::TNode::nodevec_index
size_t nodevec_index
Total number of compartments from the current node and below.
Definition: tnode.hpp:33
coreneuron::cellsize
int * cellsize
Definition: cellorder.cpp:645