CoreNEURON
node_permute.cpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================
7 */
8 
9 /*
10 Below, the sense of permutation, is reversed. Though consistent, forward
11 permutation should be defined as (and the code should eventually transformed)
12 so that
13  v: original vector
14  p: forward permutation
15  pv: permuted vector
16  pv[i] = v[p[i]]
17 and
18  pinv: inverse permutation
19  pv[pinv[i]] = v[i]
20 Note: pinv[p[i]] = i = p[pinv[i]]
21 */
22 
23 /*
24 Permute nodes.
25 
26 To make gaussian elimination on gpu more efficient.
27 
28 Permutation vector p[i] applied to a data vector, moves the data_original[i]
29 to data[p[i]].
30 That suffices for node properties such as area[i], a[i], b[i]. e.g.
31  area[p[i]] <- area_original[i]
32 
33 Notice that p on the left side is a forward permutation. On the right side
34 it serves as the inverse permutation.
35 area_original[i] <- area_permuted[p[i]]
36 
37 but things
38 get a bit more complicated when the data is an integer index into the
39 original data.
40 
41 For example:
42 
43 parent[i] needs to be transformed so that
44 parent[p[i]] <- p[parent_original[i]] except that if parent_original[j] = -1
45  then parent[p[j]] = -1
46 
47 membrane mechanism nodelist ( a subset of nodes) needs to be at least
48 minimally transformed so that
49 nodelist_new[k] <- p[nodelist_original[k]]
50 This does not affect the order of the membrane mechanism property data.
51 
52 However, computation is more efficient to permute (sort) nodelist_new so that
53 it follows as much as possible the permuted node ordering, ie in increasing
54 node order. Consider this further mechanism specific nodelist permutation,
55 which is to be applied to the above nodelist_new, to be p_m, which has the same
56 size as nodelist. ie.
57 nodelist[p_m[k]] <- nodelist_new[k].
58 
59 Notice the similarity to the parent case...
60 nodelist[p_m[k]] = p[nodelist_original[k]]
61 
62 and now the membrane mechanism node data, does need to be permuted to have an
63 order consistent with the new nodelist. Since there are nm instances of the
64 mechanism each with sz data values (consider AoS layout).
65 The data permutation is
66 for k=[0:nm] for isz=[0:sz]
67  data_m[p_m[k]*sz + isz] = data_m_original[k*sz + isz]
68 
69 For an SoA layout the indexing is k + isz*nm (where nm may include padding).
70 
71 A more complicated case is a mechanisms dparam array (nm instances each with
72 dsz values) Some of those values are indices into another mechanism (eg
73 pointers to ion properties) or voltage or area depending on the semantics of
74 the value. We can use the above data_m permutation but then need to update
75 the values according to the permutation of the object the value indexes into.
76 Consider the permutation of the target object to be p_t . Then a value
77 iold = pdata_m(k, isz) - data_t in AoS format
78 refers to k_t = iold % sz_t and isz_t = iold - k_t*sz_t
79 and for a target in SoA format isz_t = iold % nm_t and k_t = iold - isz_t*nm_t
80 ie k_t_new = p_m_t[k_t] so, for AoS, inew = k_t_new*sz_t + isz_t
81 or , for SoA, inew = k_t_new + isz_t*nm_t
82 so pdata_m(k, isz) = inew + data_t
83 
84 
85 */
86 
87 #include <vector>
88 #include <utility>
89 #include <algorithm>
90 
96 namespace coreneuron {
97 template <typename T>
98 void permute(T* data, int cnt, int sz, int layout, int* p) {
99  // data(p[icnt], isz) <- data(icnt, isz)
100  // this does not change data, merely permutes it.
101  // assert len(p) == cnt
102  if (!p) {
103  return;
104  }
105  int n = cnt * sz;
106  if (n < 1) {
107  return;
108  }
109 
110  if (layout == Layout::SoA) { // for SoA, n might be larger due to cnt padding
111  n = nrn_soa_padded_size(cnt, layout) * sz;
112  }
113 
114  T* data_orig = new T[n];
115  for (int i = 0; i < n; ++i) {
116  data_orig[i] = data[i];
117  }
118 
119  for (int icnt = 0; icnt < cnt; ++icnt) {
120  for (int isz = 0; isz < sz; ++isz) {
121  // note that when layout==0, nrn_i_layout takes into account SoA padding.
122  int i = nrn_i_layout(icnt, cnt, isz, sz, layout);
123  int ip = nrn_i_layout(p[icnt], cnt, isz, sz, layout);
124  data[ip] = data_orig[i];
125  }
126  }
127 
128  delete[] data_orig;
129 }
130 
131 int* inverse_permute(int* p, int n) {
132  int* pinv = new int[n];
133  for (int i = 0; i < n; ++i) {
134  pinv[p[i]] = i;
135  }
136  return pinv;
137 }
138 
139 static void invert_permute(int* p, int n) {
140  int* pinv = inverse_permute(p, n);
141  for (int i = 0; i < n; ++i) {
142  p[i] = pinv[i];
143  }
144  delete[] pinv;
145 }
146 
147 // type_of_ntdata: Return the mechanism type (or voltage) for nt._data[i].
148 // Used for updating POINTER. Analogous to nrn_dblpntr2nrncore in NEURON.
149 // To reduce search time, consider voltage first, then a few of the previous
150 // search results.
151 // type_hint first and store a few
152 // of the previous search result types to try next.
153 // Most usage is for voltage. Most of the rest is likely for a specific type.
154 // Occasionally, eg. axial current, there are two types oscillationg between
155 // a SUFFIX (for non-zero area node) and POINT_PROCESS (for zero area nodes)
156 // version
157 // full_search: helper for type_of_ntdata. Return mech type for nt._data[i].
158 // Update type_hints.
159 
160 static std::vector<int> type_hints;
161 
162 static int full_search(NrnThread& nt, double* pd) {
163  int type = -1;
164  for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) {
165  Memb_list* ml = tml->ml;
166  int n = corenrn.get_prop_param_size()[tml->index] * ml->_nodecount_padded;
167  if (pd >= ml->data && pd < ml->data + n) {
168  type = tml->index;
169  // insert into type_hints
170  int i = 0;
171  for (int type_hint: type_hints) {
172  if (type < type_hint) {
173  break;
174  }
175  i++;
176  }
177  type_hints.insert(type_hints.begin() + i, type);
178  break;
179  }
180  }
181  assert(type > 0);
182  return type;
183 }
184 
185 // no longer static because also used by POINTER in nrn_checkpoint.cpp
186 int type_of_ntdata(NrnThread& nt, int i, bool reset) {
187  double* pd = nt._data + i;
188  assert(pd >= nt._actual_v);
189  if (pd < nt._actual_area) { // voltage first (area just after voltage)
190  return voltage;
191  }
192  assert(size_t(i) < nt._ndata);
193  // then check the type hints. When inserting a hint, keep in type order
194  if (reset) {
195  type_hints.clear();
196  }
197  for (int type: type_hints) {
198  Memb_list* ml = nt._ml_list[type];
199  if (pd >= ml->data) { // this or later
200  int n = corenrn.get_prop_param_size()[type] * ml->_nodecount_padded;
201  if (pd < ml->data + n) { // this is the one
202  return type;
203  }
204  } else { // earlier
205  return full_search(nt, pd);
206  }
207  }
208  // after the last type_hints
209  return full_search(nt, pd);
210 }
211 
212 static void update_pdata_values(Memb_list* ml, int type, NrnThread& nt) {
213  // assumes AoS to SoA transformation already made since we are using
214  // nrn_i_layout to determine indices into both ml->pdata and into target data
215  int psz = corenrn.get_prop_dparam_size()[type];
216  if (psz == 0) {
217  return;
218  }
219  if (corenrn.get_is_artificial()[type]) {
220  return;
221  }
222  int* semantics = corenrn.get_memb_func(type).dparam_semantics;
223  if (!semantics) {
224  return;
225  }
226  int* pdata = ml->pdata;
227  int layout = corenrn.get_mech_data_layout()[type];
228  int cnt = ml->nodecount;
229  // ml padding does not matter (but target padding does matter)
230 
231  // interesting semantics are -1 (area), -5 (pointer), -9 (diam), or 0-999 (ion variables)
232  for (int i = 0; i < psz; ++i) {
233  int s = semantics[i];
234  if (s == -1) { // area
235  int area0 = nt._actual_area - nt._data; // includes padding if relevant
236  int* p_target = nt._permute;
237  for (int iml = 0; iml < cnt; ++iml) {
238  int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);
239  // *pd is the original integer into nt._data . Needs to be replaced
240  // by the permuted value
241 
242  // This is ok whether or not area changed by padding?
243  // since old *pd updated appropriately by earlier AoS to SoA
244  // transformation
245  int ix = *pd - area0; // original integer into area array.
246  nrn_assert((ix >= 0) && (ix < nt.end));
247  int ixnew = p_target[ix];
248  *pd = ixnew + area0;
249  }
250  } else if (s == -9) { // diam
251  int diam0 = nt._actual_diam - nt._data; // includes padding if relevant
252  int* p_target = nt._permute;
253  for (int iml = 0; iml < cnt; ++iml) {
254  int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);
255  // *pd is the original integer into nt._data . Needs to be replaced
256  // by the permuted value
257 
258  // This is ok whether or not diam changed by padding?
259  // since old *pd updated appropriately by earlier AoS to SoA
260  // transformation
261  int ix = *pd - diam0; // original integer into actual_diam array.
262  nrn_assert((ix >= 0) && (ix < nt.end));
263  int ixnew = p_target[ix];
264  *pd = ixnew + diam0;
265  }
266  } else if (s == -5) { // POINTER
267  // assume pointer into nt._data. Most likely voltage.
268  // If not voltage, most likely same mechanism for all indices.
269  for (int iml = 0; iml < cnt; ++iml) {
270  int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);
271  int etype = type_of_ntdata(nt, *pd, iml == 0);
272  if (etype == voltage) {
273  int v0 = nt._actual_v - nt._data;
274  int* e_target = nt._permute;
275  int ix = *pd - v0; // original integer into area array.
276  nrn_assert((ix >= 0) && (ix < nt.end));
277  int ixnew = e_target[ix];
278  *pd = ixnew + v0;
279  } else if (etype > 0) {
280  // about same as for ion below but check each instance
281  Memb_list* eml = nt._ml_list[etype];
282  int edata0 = eml->data - nt._data;
283  int ecnt = eml->nodecount;
284  int esz = corenrn.get_prop_param_size()[etype];
285  int elayout = corenrn.get_mech_data_layout()[etype];
286  int* e_permute = eml->_permute;
287  int i_ecnt, i_esz, padded_ecnt;
288  int ix = *pd - edata0;
289  if (elayout == Layout::AoS) {
290  padded_ecnt = ecnt;
291  i_ecnt = ix / esz;
292  i_esz = ix % esz;
293  } else { // SoA
294  assert(elayout == Layout::SoA);
295  padded_ecnt = nrn_soa_padded_size(ecnt, elayout);
296  i_ecnt = ix % padded_ecnt;
297  i_esz = ix / padded_ecnt;
298  }
299  int i_ecnt_new = e_permute ? e_permute[i_ecnt] : i_ecnt;
300  int ix_new = nrn_i_layout(i_ecnt_new, ecnt, i_esz, esz, elayout);
301  *pd = ix_new + edata0;
302  } else {
303  nrn_assert(0);
304  }
305  }
306  } else if (s >= 0 && s < 1000) { // ion
307  int etype = s;
308  int elayout = corenrn.get_mech_data_layout()[etype];
309  Memb_list* eml = nt._ml_list[etype];
310  int edata0 = eml->data - nt._data;
311  int ecnt = eml->nodecount;
312  int esz = corenrn.get_prop_param_size()[etype];
313  int* e_permute = eml->_permute;
314  for (int iml = 0; iml < cnt; ++iml) {
315  int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout);
316  int ix = *pd - edata0;
317  // from ix determine i_ecnt and i_esz (need to permute i_ecnt)
318  int i_ecnt, i_esz, padded_ecnt;
319  if (elayout == Layout::AoS) {
320  padded_ecnt = ecnt;
321  i_ecnt = ix / esz;
322  i_esz = ix % esz;
323  } else { // SoA
324  assert(elayout == Layout::SoA);
325  padded_ecnt = nrn_soa_padded_size(ecnt, elayout);
326  i_ecnt = ix % padded_ecnt;
327  i_esz = ix / padded_ecnt;
328  }
329  int i_ecnt_new = e_permute[i_ecnt];
330  int ix_new = nrn_i_layout(i_ecnt_new, ecnt, i_esz, esz, elayout);
331  *pd = ix_new + edata0;
332  }
333  }
334  }
335 }
336 
337 void node_permute(int* vec, int n, int* permute) {
338  for (int i = 0; i < n; ++i) {
339  if (vec[i] >= 0) {
340  vec[i] = permute[vec[i]];
341  }
342  }
343 }
344 
345 void permute_ptr(int* vec, int n, int* p) {
346  permute(vec, n, 1, 1, p);
347 }
348 
349 void permute_data(double* vec, int n, int* p) {
350  permute(vec, n, 1, 1, p);
351 }
352 
353 void permute_ml(Memb_list* ml, int type, NrnThread& nt) {
354  int sz = corenrn.get_prop_param_size()[type];
355  int psz = corenrn.get_prop_dparam_size()[type];
356  int layout = corenrn.get_mech_data_layout()[type];
357  permute(ml->data, ml->nodecount, sz, layout, ml->_permute);
358  permute(ml->pdata, ml->nodecount, psz, layout, ml->_permute);
359 
360  update_pdata_values(ml, type, nt);
361 }
362 
363 int nrn_index_permute(int ix, int type, Memb_list* ml) {
364  int* p = ml->_permute;
365  if (!p) {
366  return ix;
367  }
368  int layout = corenrn.get_mech_data_layout()[type];
369  if (layout == Layout::AoS) {
370  int sz = corenrn.get_prop_param_size()[type];
371  int i_cnt = ix / sz;
372  int i_sz = ix % sz;
373  return p[i_cnt] * sz + i_sz;
374  } else {
375  assert(layout == Layout::SoA);
376  int padded_cnt = nrn_soa_padded_size(ml->nodecount, layout);
377  int i_cnt = ix % padded_cnt;
378  int i_sz = ix / padded_cnt;
379  return i_sz * padded_cnt + p[i_cnt];
380  }
381 }
382 
383 #if CORENRN_DEBUG
384 static void pr(const char* s, int* x, int n) {
385  printf("%s:", s);
386  for (int i = 0; i < n; ++i) {
387  printf(" %d %d", i, x[i]);
388  }
389  printf("\n");
390 }
391 
392 static void pr(const char* s, double* x, int n) {
393  printf("%s:", s);
394  for (int i = 0; i < n; ++i) {
395  printf(" %d %g", i, x[i]);
396  }
397  printf("\n");
398 }
399 #endif
400 
401 // note that sort_indices has the sense of an inverse permutation in that
402 // the value of sort_indices[0] is the index with the smallest value in the
403 // indices array
404 
405 static bool nrn_index_sort_cmp(const std::pair<int, int>& a, const std::pair<int, int>& b) {
406  bool result = false;
407  if (a.first < b.first) {
408  result = true;
409  } else if (a.first == b.first) {
410  if (a.second < b.second) {
411  result = true;
412  }
413  }
414  return result;
415 }
416 
417 static int* nrn_index_sort(int* values, int n) {
418  std::vector<std::pair<int, int>> vi(n);
419  for (int i = 0; i < n; ++i) {
420  vi[i].first = values[i];
421  vi[i].second = i;
422  }
423  std::sort(vi.begin(), vi.end(), nrn_index_sort_cmp);
424  int* sort_indices = new int[n];
425  for (int i = 0; i < n; ++i) {
426  sort_indices[i] = vi[i].second;
427  }
428  return sort_indices;
429 }
430 
431 void permute_nodeindices(Memb_list* ml, int* p) {
432  // nodeindices values are permuted according to p (that per se does
433  // not affect vec).
434 
435  node_permute(ml->nodeindices, ml->nodecount, p);
436 
437  // Then the new node indices are sorted by
438  // increasing index. Instances using the same node stay in same
439  // original relative order so that their contributions to rhs, d (if any)
440  // remain in same order (except for gpu parallelism).
441  // That becomes ml->_permute
442 
445  permute_ptr(ml->nodeindices, ml->nodecount, ml->_permute);
446 }
447 } // namespace coreneuron
coreneuron::CoreNeuron::get_mech_data_layout
auto & get_mech_data_layout()
Definition: coreneuron.hpp:174
coreneuron::type_of_ntdata
int type_of_ntdata(NrnThread &nt, int i, bool reset)
Definition: node_permute.cpp:186
coreneuron::voltage
@ voltage
Definition: nrniv_decl.h:19
coreneuron::invert_permute
static void invert_permute(int *p, int n)
Definition: node_permute.cpp:139
coreneuron::type_hints
static std::vector< int > type_hints
Definition: node_permute.cpp:160
data
Definition: alignment.cpp:18
coreneuron::CoreNeuron::get_is_artificial
auto & get_is_artificial()
Definition: coreneuron.hpp:178
coreneuron::CoreNeuron::get_memb_func
auto & get_memb_func(size_t idx)
Definition: coreneuron.hpp:138
coreneuron::permute_data
void permute_data(double *vec, int n, int *p)
Definition: node_permute.cpp:349
coreneuron::Memb_list
Definition: mechanism.hpp:131
coreneuron::NrnThread::tml
NrnThreadMembList * tml
Definition: multicore.hpp:80
coreneuron::nrn_soa_padded_size
int nrn_soa_padded_size(int cnt, int layout)
calculate size after padding for specific memory layout
Definition: mem_layout_util.cpp:15
pdata
#define pdata
Definition: md1redef.h:37
coreneuron.hpp
coreneuron::NrnThread::_actual_diam
double * _actual_diam
Definition: multicore.hpp:117
coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12
coreneuron::i
int i
Definition: cellorder.cpp:485
nrn_setup.hpp
coreneuron::NrnThread::_ml_list
Memb_list ** _ml_list
Definition: multicore.hpp:81
nrniv_decl.h
coreneuron::nrn_index_sort_cmp
static bool nrn_index_sort_cmp(const std::pair< int, int > &a, const std::pair< int, int > &b)
Definition: node_permute.cpp:405
coreneuron::CoreNeuron::get_prop_dparam_size
auto & get_prop_dparam_size()
Definition: coreneuron.hpp:170
coreneuron::full_search
static int full_search(NrnThread &nt, double *pd)
Definition: node_permute.cpp:162
coreneuron::NrnThread::_ndata
size_t _ndata
Definition: multicore.hpp:103
coreneuron::nrn_index_permute
int nrn_index_permute(int ix, int type, Memb_list *ml)
Definition: node_permute.cpp:363
coreneuron::NrnThread
Definition: multicore.hpp:75
coreneuron::NrnThreadMembList
Definition: multicore.hpp:32
coreneuron::permute_ptr
void permute_ptr(int *vec, int n, int *p)
Definition: node_permute.cpp:345
cnt
#define cnt
Definition: tqueue.hpp:44
coreneuron::node_permute
void node_permute(int *vec, int n, int *permute)
Definition: node_permute.cpp:337
coreneuron::NrnThread::_data
double * _data
Definition: multicore.hpp:106
coreneuron::AoS
@ AoS
Definition: nrniv_decl.h:69
coreneuron::permute_ml
void permute_ml(Memb_list *ml, int type, NrnThread &nt)
Definition: node_permute.cpp:353
coreneuron::corenrn
CoreNeuron corenrn
Definition: multicore.cpp:53
coreneuron::nrn_i_layout
int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout)
This function return the index in a flat array of a matrix coordinate (icnt, isz).
Definition: mem_layout_util.cpp:32
coreneuron::Memb_list::nodecount
int nodecount
Definition: mechanism.hpp:144
coreneuron::Memb_list::pdata
Datum * pdata
Definition: mechanism.hpp:140
coreneuron::NrnThreadMembList::next
NrnThreadMembList * next
Definition: multicore.hpp:33
coreneuron::update_pdata_values
static void update_pdata_values(Memb_list *ml, int type, NrnThread &nt)
Definition: node_permute.cpp:212
coreneuron::inverse_permute
int * inverse_permute(int *p, int n)
Definition: node_permute.cpp:131
multicore.hpp
coreneuron::CoreNeuron::get_prop_param_size
auto & get_prop_param_size()
Definition: coreneuron.hpp:166
coreneuron::NrnThread::_actual_v
double * _actual_v
Definition: multicore.hpp:115
coreneuron::Memb_list::_nodecount_padded
int _nodecount_padded
Definition: mechanism.hpp:145
coreneuron::permute_nodeindices
void permute_nodeindices(Memb_list *ml, int *p)
Definition: node_permute.cpp:431
coreneuron::Memb_list::data
double * data
Definition: mechanism.hpp:139
coreneuron::NrnThread::end
int end
Definition: multicore.hpp:98
coreneuron::NrnThread::_actual_area
double * _actual_area
Definition: multicore.hpp:116
nrn_assert
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
Definition: nrn_assert.h:33
coreneuron::SoA
@ SoA
Definition: nrniv_decl.h:69
coreneuron::permute
static int permute(int i, NrnThread &nt)
Definition: prcellstate.cpp:28
nrn_assert.h
coreneuron::nrn_index_sort
static int * nrn_index_sort(int *values, int n)
Definition: node_permute.cpp:417
coreneuron::NrnThread::_permute
int * _permute
Definition: multicore.hpp:127
coreneuron::Memb_list::nodeindices
int * nodeindices
Definition: mechanism.hpp:137
coreneuron::Memb_list::_permute
int * _permute
Definition: mechanism.hpp:138