CoreNEURON
cellorder.hpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================
7 */
8 
9 #pragma once
10 
12 #include <algorithm>
13 namespace coreneuron {
14 
15 /**
16  * \brief Function that performs the permutation of the cells such that the
17  * execution threads access coalesced memory.
18  *
19  * \param ith NrnThread to access
20  * \param ncell number of cells in NrnThread
21  * \param nnode number of compartments in the ncells
22  * \param parent parent indices of cells
23  *
24  * \return int* order, interleaved order of the cells
25  */
26 int* interleave_order(int ith, int ncell, int nnode, int* parent);
27 
30 
31 /**
32  *
33  * \brief Solve the Hines matrices based on the interleave_permute_type (1 or 2).
34  *
35  * For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one
36  * Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix
37  * is solved by multiple execution threads (with coalesced memory access as well)
38  */
39 extern void solve_interleaved(int ith);
40 
41 class InterleaveInfo; // forward declaration
42 /**
43  *
44  * \brief CUDA branch of the solve_interleaved with interleave_permute_type == 2.
45  *
46  * This branch is activated in runtime with the --cuda-interface CLI flag
47  */
48 void solve_interleaved2_launcher(NrnThread* nt, InterleaveInfo* info, int ncore, void* stream);
49 
51  public:
52  InterleaveInfo() = default;
56  int nwarp = 0; // used only by interleave2
57  int nstride = 0;
58  int* stridedispl = nullptr; // interleave2: nwarp+1
59  int* stride = nullptr; // interleave2: stride length is stridedispl[nwarp]
60  int* firstnode = nullptr; // interleave2: rootbegin nwarp+1 displacements
61  int* lastnode = nullptr; // interleave2: nodebegin nwarp+1 displacements
62  int* cellsize = nullptr; // interleave2: ncycles nwarp
63 
64  // statistics (nwarp of each)
65  size_t* nnode = nullptr;
66  size_t* ncycle = nullptr;
67  size_t* idle = nullptr;
68  size_t* cache_access = nullptr;
69  size_t* child_race = nullptr;
70 
71  private:
72  void swap(InterleaveInfo& info);
73 };
74 
75 /**
76  * \brief Function that returns a permutation of length nnode.
77  *
78  * There are two permutation strategies:
79  * For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one
80  * Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix
81  * is solved by multiple execution threads (with coalesced memory access as well)
82  *
83  * \param ncell number of cells
84  * \param nnode number of compartments in the ncells
85  * \param parents parent indices of the cells
86  * \param nwarp number of warps
87  * \param nstride nstride is the maximum cell size (not counting root)
88  * \param stride stride[i] is the number of cells with an ith node:
89  * using stride[i] we know how many positions to move in order to
90  * access the next element of the same cell (given that the cells are
91  * ordered with the treenode_order).
92  * \param firstnode firstnode[i] is the index of the first nonroot node of the cell
93  * \param lastnode lastnode[i] is the index of the last node of the cell
94  * \param cellsize cellsize is the number of nodes in the cell not counting root.
95  * \param stridedispl
96  * \return int* : a permutation of length nnode
97  */
98 int* node_order(int ncell,
99  int nnode,
100  int* parents,
101  int& nwarp,
102  int& nstride,
103  int*& stride,
104  int*& firstnode,
105  int*& lastnode,
106  int*& cellsize,
107  int*& stridedispl);
108 
109 // copy src array to dest with new allocation
110 template <typename T>
111 void copy_array(T*& dest, T* src, size_t n) {
112  dest = new T[n];
113  std::copy(src, src + n, dest);
114 }
115 
116 // copy src array to dest with NRN_SOA_BYTE_ALIGN ecalloc_align allocation
117 template <typename T>
118 void copy_align_array(T*& dest, T* src, size_t n) {
119  dest = static_cast<T*>(ecalloc_align(n, sizeof(T)));
120  std::copy(src, src + n, dest);
121 }
122 
123 #ifndef INTERLEAVE_DEBUG
124 #define INTERLEAVE_DEBUG 0
125 #endif
126 
127 #if INTERLEAVE_DEBUG
128 void mk_cell_indices();
129 #endif
130 } // namespace coreneuron
coreneuron::InterleaveInfo::nnode
size_t * nnode
Definition: cellorder.hpp:65
coreneuron::interleave_order
int * interleave_order(int ith, int ncell, int nnode, int *parent)
Function that performs the permutation of the cells such that the execution threads access coalesced ...
Definition: cellorder.cpp:290
coreneuron::InterleaveInfo::InterleaveInfo
InterleaveInfo()=default
coreneuron::InterleaveInfo::nstride
int nstride
Definition: cellorder.hpp:57
coreneuron::node_order
int * node_order(int ncell, int nnode, int *parents, int &nwarp, int &nstride, int *&stride, int *&firstnode, int *&lastnode, int *&cellsize, int *&stridedispl)
Function that returns a permutation of length nnode.
Definition: cellorder1.cpp:300
coreneuron::lastnode
int int int int lastnode
Definition: cellorder.cpp:482
coreneuron::copy_align_array
void copy_align_array(T *&dest, T *src, size_t n)
Definition: cellorder.hpp:118
MemoryManaged
for gpu builds with unified memory support
Definition: memory.h:172
coreneuron::InterleaveInfo::~InterleaveInfo
~InterleaveInfo()
Definition: cellorder.cpp:77
coreneuron::InterleaveInfo::swap
void swap(InterleaveInfo &info)
Definition: cellorder.cpp:32
coreneuron::InterleaveInfo::lastnode
int * lastnode
Definition: cellorder.hpp:61
coreneuron::InterleaveInfo::nwarp
int nwarp
Definition: cellorder.hpp:56
coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12
coreneuron::ncell
icycle< ncycle;++icycle) { int istride=stride[icycle];nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for(int icore=0;icore< warpsize;++icore) { int i=ii+icore;if(icore< istride) { int ip=GPU_PARENT(i);GPU_RHS(i) -=GPU_B(i) *GPU_RHS(ip);GPU_RHS(i)/=GPU_D(i);} i+=istride;} ii+=istride;}}void solve_interleaved2(int ith) { NrnThread *nt=nrn_threads+ith;InterleaveInfo &ii=interleave_info[ith];int nwarp=ii.nwarp;if(nwarp==0) return;int ncore=nwarp *warpsize;int *ncycles=ii.cellsize;int *stridedispl=ii.stridedispl;int *strides=ii.stride;int *rootbegin=ii.firstnode;int *nodebegin=ii.lastnode;nrn_pragma_acc(parallel loop gang present(nt[0:1], strides[0:nstride], ncycles[0:nwarp], stridedispl[0:nwarp+1], rootbegin[0:nwarp+1], nodebegin[0:nwarp+1]) if(nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams loop if(nt->compute_gpu)) for(int icore=0;icore< ncore;icore+=warpsize) { int iwarp=icore/warpsize;int ic=icore &(warpsize - 1);int ncycle=ncycles[iwarp];int *stride=strides+stridedispl[iwarp];int root=rootbegin[iwarp];int lastroot=rootbegin[iwarp+1];int firstnode=nodebegin[iwarp];int lastnode=nodebegin[iwarp+1];triang_interleaved2(nt, ic, ncycle, stride, lastnode);bksub_interleaved2(nt, root+ic, lastroot, ic, ncycle, stride, firstnode);} nrn_pragma_acc(wait(nt->stream_id))}void solve_interleaved1(int ith) { NrnThread *nt=nrn_threads+ith;int ncell=nt-> ncell
Definition: cellorder.cpp:636
coreneuron::InterleaveInfo::stridedispl
int * stridedispl
Definition: cellorder.hpp:58
coreneuron::solve_interleaved2_launcher
void solve_interleaved2_launcher(NrnThread *nt, InterleaveInfo *info, int ncore, void *stream)
CUDA branch of the solve_interleaved with interleave_permute_type == 2.
coreneuron::InterleaveInfo::cellsize
int * cellsize
Definition: cellorder.hpp:62
coreneuron::destroy_interleave_info
void destroy_interleave_info()
Definition: cellorder.cpp:101
coreneuron::InterleaveInfo::firstnode
int * firstnode
Definition: cellorder.hpp:60
coreneuron::InterleaveInfo::cache_access
size_t * cache_access
Definition: cellorder.hpp:68
coreneuron::InterleaveInfo::idle
size_t * idle
Definition: cellorder.hpp:67
coreneuron::InterleaveInfo::operator=
InterleaveInfo & operator=(const InterleaveInfo &)
Definition: cellorder.cpp:66
coreneuron::InterleaveInfo::stride
int * stride
Definition: cellorder.hpp:59
coreneuron::solve_interleaved
void solve_interleaved(int ith)
Solve the Hines matrices based on the interleave_permute_type (1 or 2).
coreneuron::copy_array
void copy_array(T *&dest, T *src, size_t n)
Definition: cellorder.hpp:111
coreneuron::create_interleave_info
void create_interleave_info()
Definition: cellorder.cpp:96
coreneuron::nstride
int nstride
Definition: cellorder.cpp:641
coreneuron::InterleaveInfo::ncycle
size_t * ncycle
Definition: cellorder.hpp:66
coreneuron::stride
int int int * stride
Definition: cellorder.cpp:482
coreneuron::InterleaveInfo
Definition: cellorder.hpp:50
coreneuron::ecalloc_align
void * ecalloc_align(size_t n, size_t size, size_t alignment)
coreneuron::firstnode
int int int int int int firstnode
Definition: cellorder.cpp:532
coreneuron::cellsize
int * cellsize
Definition: cellorder.cpp:645
coreneuron::InterleaveInfo::child_race
size_t * child_race
Definition: cellorder.hpp:69
memory.h