Go to the documentation of this file.
99 using TNI = std::pair<TNode*, int>;
100 using HashCnt = std::map<size_t, std::pair<TNode*, int>>;
114 for (
const auto& n: nodevec) {
115 if (n->parent !=
nullptr) {
124 std::map<size_t, size_t> qual;
125 size_t ip_last = 10000000000;
126 for (
size_t i =
ncell;
i < nodevec.size(); ++
i) {
127 size_t ip = nodevec[
i]->parent->nodevec_index;
131 if (ip == ip_last + 1 &&
i % max != 0) {
137 qual[max] += (qcnt / max) * max;
138 size_t x = qcnt % max;
146 qual[max] += (qcnt / max) * max;
147 size_t x = qcnt % max;
155 for (
const auto& q: qual) {
157 printf(
"%6ld %6ld\n", q.first, q.second);
161 printf(
"qual.size=%ld qual total nodes=%ld nodevec.size=%ld\n",
169 size_t maxip =
ncell;
172 std::set<size_t> ipused;
173 for (
size_t i =
ncell;
i < nodevec.size(); ++
i) {
184 if (ipused.find(ip) != ipused.end()) {
195 static_cast<void>(nrace1);
196 static_cast<void>(nrace2);
198 printf(
"nrace = %ld (parent in same group of %ld nodes)\n", nrace1, max);
199 printf(
"nrace = %ld (parent used more than once by same group of %ld nodes)\n", nrace2, max);
205 for (
auto& nd: nodevec) {
207 nd->level = nd->parent->level + 1;
208 if (maxlevel < nd->level) {
209 maxlevel = nd->level;
220 for (
size_t i = nodevec.size() - 1;
true; --
i) {
224 if (lmax <= child->level) {
225 lmax = child->
level + 1;
229 if (maxlevel < lmax) {
244 nodevec[
i]->cellindex =
i;
246 for (
size_t i = 0;
i < nodevec.size(); ++
i) {
248 for (
size_t j = 0; j < nd.
children.size(); ++j) {
262 for (
size_t i = 0;
i < nodevec.size(); ++
i) {
282 std::vector<std::vector<size_t>> n_in_level(maxlevel + 1);
283 for (
auto& n: n_in_level) {
286 for (
const auto& n: nodevec) {
287 n_in_level[n->level][n->groupindex]++;
289 printf(
"n_in_level.size = %ld\n", n_in_level.size());
290 for (
size_t i = 0;
i < n_in_level.size(); ++
i) {
292 for (
const auto& n: n_in_level[
i]) {
342 int* nodeorder =
new int[nnode];
343 for (
int i = 0;
i < nnode; ++
i) {
359 if (nodevec[
i - 1]->hash != nodevec[
i]->hash) {
363 static_cast<void>(ntopol);
365 printf(
"%d distinct tree topologies\n", ntopol);
368 for (
size_t i = 0;
i < nodevec.size(); ++
i) {
377 size_t nnode = nodevec.size();
379 for (
size_t i = 0;
i < nnode; ++
i) {
380 nodevec[
i]->nodevec_index =
i;
381 if (nodevec[
i]->parent ==
nullptr) {
386 for (
size_t i = 0;
i <
ncell; ++
i) {
389 for (
size_t i =
ncell;
i < nnode; ++
i) {
392 printf(
"error i=%ld nodevec_index=%ld parent=%ld\n",
403 size_t nnode = nodevec.size();
404 for (
size_t i = 0;
i < nnode; ++
i) {
405 nodevec[
i]->nodevec_index =
i;
407 for (
size_t i = 0;
i < nnode; ++
i) {
408 TNode& nd = *nodevec[
i];
409 printf(
"%ld p=%d c=%ld l=%ld o=%ld ix=%d pix=%d\n",
411 nd.parent ?
int(nd.parent->nodevec_index) : -1,
416 nd.parent ?
int(nd.parent->nodeindex) : -1);
430 nodevec.reserve(nnode);
431 for (
int i = 0;
i < nnode; ++
i) {
432 nodevec.push_back(
new TNode(
i));
436 for (
int i = nnode - 1;
i >=
ncell; --
i) {
437 nodevec[
i]->parent = nodevec[parent[
i]];
438 nodevec[
i]->mkhash();
439 nodevec[parent[
i]]->children.push_back(nodevec[
i]);
444 nodevec[
i]->mkhash();
473 int* order =
new int[
ncell];
476 nodevec[
i]->treenode_order = order[
i]++;
478 for (
size_t i = 0;
i < nodevec.size(); ++
i) {
480 for (
size_t j = 0; j < nd.
children.size(); ++j) {
492 for (
size_t i = 0;
i < nodevec.size(); ++
i) {
520 for (
size_t i =
ncell;
i < nodevec.size(); ++
i) {
536 for (
size_t i =
ncell;
i < nodevec.size(); ++
i) {
570 for (
size_t i = begin;
i < end; ++
i) {
574 if (
i + diff < end) {
612 nwarp = nodevec[
ncell - 1]->groupindex + 1;
623 for (
size_t i = 0;
i < size_t(
ncell); ++
i) {
624 rootbegin[nodevec[
i]->groupindex + 1] =
i + 1;
626 nodebegin[0] =
ncell;
628 for (
size_t i =
size_t(
ncell);
i < nodevec.size(); ++
i) {
629 nodebegin[nodevec[
i]->groupindex + 1] =
i + 1;
635 for (
size_t iwarp = 0; iwarp < (size_t) nwarp; ++iwarp) {
636 size_t j = size_t(nodebegin[iwarp + 1]);
638 size_t i = nodebegin[iwarp];
646 stridedispl[iwarp + 1] = stridedispl[iwarp] + nc;
653 for (
size_t iwarp = 0; iwarp < (size_t) nwarp; ++iwarp) {
654 size_t j = size_t(nodebegin[iwarp + 1]);
655 size_t i = nodebegin[iwarp];
664 printf(
"warp rootbegin nodebegin stridedispl\n");
665 for (
int i = 0;
i <= nwarp; ++
i) {
666 printf(
"%4d %4d %4d %4d\n",
i, rootbegin[
i], nodebegin[
i], stridedispl[
i]);
static void set_groupindex(VecTNode &nodevec)
Initialization of the groupindex (groups)
size_t dist2child(TNode *nd)
size_t hash
Hash algorith that generates a hash based on the hash of the children and the number of compartments ...
size_t level_from_root(VecTNode &)
static void tree_analysis(int *parent, int nnode, int ncell, VecTNode &)
Perform tree preparation for interleaving strategies.
int * node_order(int ncell, int nnode, int *parents, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize, int *&stridedispl)
Function that returns a permutation of length nnode.
int nodeindex
Initialized index / groupsize.
static size_t stride_length(size_t begin, size_t end, VecTNode &nodevec)
int interleave_permute_type
static bool interleave_comp(TNode *a, TNode *b)
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
static void admin1(int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize)
std::vector< TNI > TNIVec
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;}}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) if(nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams loop if(nt->compute_gpu)) for(int icore=0;icore< ncore;icore+=warpsize) { int iwarp=icore/warpsize;int ic=icore &(warpsize - 1);int ncycle=ncycles[iwarp];int *stride=strides+stridedispl[iwarp];int root=rootbegin[iwarp];int lastroot=rootbegin[iwarp+1];int firstnode=nodebegin[iwarp];int lastnode=nodebegin[iwarp+1];triang_interleaved2(nt, ic, ncycle, stride, lastnode);bksub_interleaved2(nt, root+ic, lastroot, ic, ncycle, stride, firstnode);} nrn_pragma_acc(wait(nt->stream_id))}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
TNode is the tree node that represents the tree of the compartments.
static void set_cellindex(int ncell, VecTNode &nodevec)
Set the cellindex to distinguish the different cells.
static void check(VecTNode &)
std::map< size_t, std::pair< TNode *, int > > HashCnt
size_t groupindex
Cell ID that this compartment belongs to.
void group_order2(VecTNode &, size_t groupsize, size_t ncell)
Implementation of the advanced interleaving strategy (interleave_permute_type == 2)
size_t treesize
Hash value generated by mkhash.
size_t cellindex
level of of this compartment in the tree
static void admin2(int ncell, VecTNode &nodevec, int &nwarp, int &nstride, int *&stridedispl, int *&strides, int *&rootbegin, int *&nodebegin, int *&ncycles)
Prepare for solve_interleaved2.
static bool tnode_earlier(TNode *a, TNode *b)
Function to order trees by size, hash and nodeindex.
size_t treenode_order
index in nodevec that is set in check() In cell permute 2 this is set as Breadth First traversal
size_t level
For cell permute 1 (Interleaved):
static void quality(VecTNode &nodevec, size_t max=32)
static void node_interleave_order(int ncell, VecTNode &)
Naive interleaving strategy (interleave_permute_type == 1)
static bool ptr_tnode_earlier(TNode *a, TNode *b)
std::vector< TNode * > VecTNode
size_t nodevec_index
Total number of compartments from the current node and below.
size_t level_from_leaf(VecTNode &)
std::pair< TNode *, int > TNI
void * ecalloc_align(size_t n, size_t size, size_t alignment)
int int int int int int firstnode
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
static void ident_statistic(VecTNode &nodevec, size_t ncell)