CoreNEURON
multisend_setup.cpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================
7 */
8 
9 #include <cstdio>
10 #include <cmath>
11 #include <numeric>
12 
13 #if CORENRN_DEBUG
14 #include <fstream>
15 #include <iomanip>
16 #endif
17 
25 /*
26 For very large numbers of processors and cells and fanout, it is taking
27 a long time to figure out each cells target list given the input gids
28 (gid2in) on each host. e.g 240 seconds for 2^25 cells, 1k connections
29 per cell, and 128K cores; and 340 seconds for two phase excchange.
30 To reduce this setup time we experiment with a very different algorithm in which
31 we construct a gid target host list on host gid%nhost and copy that list to
32 the source host owning the gid.
33 */
34 
35 #if NRN_MULTISEND
36 namespace coreneuron {
37 using Gid2IPS = std::map<int, InputPreSyn*>;
38 using Gid2PS = std::map<int, PreSyn*>;
39 
40 #if CORENRN_DEBUG
41 template <typename T>
42 static void celldebug(const char* p, T& map) {
43  std::string fname = std::string("debug.") + std::to_string(nrnmpi_myid);
44  std::ofstream f(fname, std::ios::app);
45  f << std::endl << p << std::endl;
46  int rank = nrnmpi_myid;
47  f << " " << std::setw(2) << std::setfill('0') << rank << ":";
48  for (const auto& m: map) {
49  int gid = m.first;
50  f << " " << std::setw(2) << std::setfill('0') << gid << ":";
51  }
52  f << std::endl;
53 }
54 
55 static void alltoalldebug(const char* p,
56  const std::vector<int>& s,
57  const std::vector<int>& scnt,
58  const std::vector<int>& sdispl,
59  const std::vector<int>& r,
60  const std::vector<int>& rcnt,
61  const std::vector<int>& rdispl) {
62  std::string fname = std::string("debug.") + std::to_string(nrnmpi_myid);
63  std::ofstream f(fname, std::ios::app);
64  f << std::endl << p << std::endl;
65  int rank = nrnmpi_myid;
66  f << " rank " << rank << std::endl;
67  for (int i = 0; i < nrnmpi_numprocs; ++i) {
68  f << " s" << i << " : " << scnt[i] << " " << sdispl[i] << " :";
69  for (int j = sdispl[i]; j < sdispl[i + 1]; ++j) {
70  f << " " << std::setw(2) << std::setfill('0') << s[j] << ":";
71  }
72  f << std::endl;
73  }
74  for (int i = 0; i < nrnmpi_numprocs; ++i) {
75  f << " r" << i << " : " << rcnt[i] << " " << rdispl[i] << " :";
76  for (int j = rdispl[i]; j < rdispl[i + 1]; ++j) {
77  f << " " << std::setw(2) << std::setfill('0') << r[j] << ":";
78  }
79  f << std::endl;
80  }
81 }
82 #else
83 template <typename T>
84 static void celldebug(const char*, T&) {}
85 static void alltoalldebug(const char*,
86  const std::vector<int>&,
87  const std::vector<int>&,
88  const std::vector<int>&,
89  const std::vector<int>&,
90  const std::vector<int>&,
91  const std::vector<int>&) {}
92 #endif
93 
94 #if CORENRN_DEBUG
95 void phase1debug(int* targets_phase1) {
96  std::string fname = std::string("debug.") + std::to_string(nrnmpi_myid);
97  std::ofstream f(fname, std::ios::app);
98  f << std::endl << "phase1debug " << nrnmpi_myid;
99  for (auto& g: gid2out) {
100  PreSyn* ps = g.second;
101  f << std::endl << " " << std::setw(2) << std::setfill('0') << ps->gid_ << ":";
102  int* ranks = targets_phase1 + ps->multisend_index_;
103  int n = ranks[1];
104  ranks += 2;
105  for (int i = 0; i < n; ++i) {
106  f << " " << std::setw(2) << std::setfill('0') << ranks[i];
107  }
108  }
109  f << std::endl;
110 }
111 
112 void phase2debug(int* targets_phase2) {
113  std::string fname = std::string("debug.") + std::to_string(nrnmpi_myid);
114  std::ofstream f(fname, std::ios::app);
115  f << std::endl << "phase2debug " << nrnmpi_myid;
116  for (auto& g: gid2in) {
117  int gid = g.first;
118  InputPreSyn* ps = g.second;
119  f << std::endl << " " << std::setw(2) << std::setfill('0') << gid << ":";
120  int j = ps->multisend_phase2_index_;
121  if (j >= 0) {
122  int* ranks = targets_phase2 + j;
123  int cnt = ranks[0];
124  ranks += 1;
125  for (int i = 0; i < cnt; ++i) {
126  f << " " << std::setw(2) << std::setfill('0') << ranks[i];
127  }
128  }
129  }
130  f << std::endl;
131 }
132 #endif
133 
134 static std::vector<int> newoffset(const std::vector<int>& acnt) {
135  std::vector<int> aoff(acnt.size() + 1);
136  aoff[0] = 0;
137  std::partial_sum(acnt.begin(), acnt.end(), aoff.begin() + 1);
138  return aoff;
139 }
140 
141 // input: scnt, sdispl; output: rcnt, rdispl
142 static std::pair<std::vector<int>, std::vector<int>> all2allv_helper(const std::vector<int>& scnt) {
143  int np = nrnmpi_numprocs;
144  std::vector<int> c(np, 1);
145  std::vector<int> rdispl = newoffset(c);
146  std::vector<int> rcnt(np, 0);
148  scnt.data(), c.data(), rdispl.data(), rcnt.data(), c.data(), rdispl.data());
149  rdispl = newoffset(rcnt);
150  return std::make_pair(std::move(rcnt), std::move(rdispl));
151 }
152 
153 /*
154 define following to 1 if desire space/performance information such as:
155 all2allv_int gidin to intermediate space=1552 total=37345104 time=0.000495835
156 all2allv_int gidout space=528 total=37379376 time=1.641e-05
157 all2allv_int lists space=3088 total=37351312 time=4.4708e-05
158 */
159 
160 #define all2allv_perf 0
161 
162 // input: s, scnt, sdispl; output: r, rdispl
163 static std::pair<std::vector<int>, std::vector<int>> all2allv_int(const std::vector<int>& s,
164  const std::vector<int>& scnt,
165  const std::vector<int>& sdispl,
166  const char* dmes) {
167 #if all2allv_perf
168  double tm = nrn_wtime();
169 #endif
170  int np = nrnmpi_numprocs;
171 
172  std::vector<int> rcnt;
173  std::vector<int> rdispl;
174  std::tie(rcnt, rdispl) = all2allv_helper(scnt);
175  std::vector<int> r(rdispl[np], 0);
177  s.data(), scnt.data(), sdispl.data(), r.data(), rcnt.data(), rdispl.data());
178  alltoalldebug(dmes, s, scnt, sdispl, r, rcnt, rdispl);
179 
180 #if all2allv_perf
181  if (nrnmpi_myid == 0) {
182  int nb = 4 * nrnmpi_numprocs + sdispl[nrnmpi_numprocs] + rdispl[nrnmpi_numprocs];
183  tm = nrn_wtime() - tm;
184  printf("all2allv_int %s space=%d total=%g time=%g\n", dmes, nb, nrn_mallinfo(), tm);
185  }
186 #endif
187  return std::make_pair(std::move(r), std::move(rdispl));
188 }
189 
190 class TarList {
191  public:
192  TarList();
193  virtual ~TarList();
194  virtual void alloc();
195  int size;
196  int* list;
197  int rank;
198 
199  int* indices; // indices of list for groups of phase2 targets.
200  // If indices is not null, then size is one less than
201  // the size of the indices list where indices[size] = the size of
202  // the list. Indices[0] is 0 and list[indices[i]] is the rank
203  // to send the ith group of phase2 targets.
204 };
205 
206 using Int2TarList = std::map<int, TarList*>;
207 
208 TarList::TarList()
209  : size(0)
210  , list(nullptr)
211  , rank(-1)
212  , indices(nullptr) {}
213 
214 TarList::~TarList() {
215  delete[] list;
216  delete[] indices;
217 }
218 
219 void TarList::alloc() {
220  if (size) {
221  list = new int[size];
222  }
223 }
224 
225 // for two phase
226 
227 static nrnran123_State* ranstate{nullptr};
228 
229 static void random_init(int i) {
230  if (!ranstate) {
231  ranstate = nrnran123_newstream(i, 0);
232  }
233 }
234 
235 static unsigned int get_random() {
236  return nrnran123_ipick(ranstate);
237 }
238 
239 // Avoid warnings if the global index is changed on subsequent psolve.
240 static void random_delete() {
241  if (ranstate) {
242  nrnran123_deletestream(ranstate);
243  ranstate = nullptr;
244  }
245 }
246 
247 static int iran(int i1, int i2) {
248  // discrete uniform random integer from i2 to i2 inclusive. Must
249  // work if i1 == i2
250  if (i1 == i2) {
251  return i1;
252  }
253  int i3 = i1 + get_random() % (i2 - i1 + 1);
254  return i3;
255 }
256 
257 static void phase2organize(TarList* tl) {
258  int nt = tl->size;
259  int n = int(sqrt(double(nt)));
260  // change to about 20
261  if (n > 1) { // do not bother if not many connections
262  // equal as possible group sizes
263  tl->indices = new int[n + 1];
264  tl->indices[n] = tl->size;
265  tl->size = n;
266  for (int i = 0; i < n; ++i) {
267  tl->indices[i] = (i * nt) / n;
268  }
269  // Note: not sure the following is true anymore but it could be.
270  // This distribution is very biased (if 0 is a phase1 target
271  // it is always a phase2 sender. So now choose a random
272  // target in the subset and make that the phase2 sender
273  // (need to switch the indices[i] target and the one chosen)
274  for (int i = 0; i < n; ++i) {
275  int i1 = tl->indices[i];
276  int i2 = tl->indices[i + 1] - 1;
277  // need discrete uniform random integer from i1 to i2
278  int i3 = iran(i1, i2);
279  int itar = tl->list[i1];
280  tl->list[i1] = tl->list[i3];
281  tl->list[i3] = itar;
282  }
283  }
284 }
285 
286 // end of twophase
287 
288 /*
289 Setting up target lists uses a lot of temporary memory. It is conceiveable
290 that this can be done prior to creating any cells or connections. I.e.
291 gid2out is presently known from pc.set_gid2node(gid,...). Gid2in is presenly
292 known from NetCon = pc.gid_connect(gid, target) and it is quite a style
293 and hoc network programming change to use something like pc.need_gid(gid)
294 before cells with their synapses are created since one would have to imagine
295 that the hoc network setup code would have to be executed in a virtual
296 or 'abstract' fashion without actually creating, cells, targets, or NetCons.
297 Anyway, to potentially support this in the future, we write setup_target_lists
298 to not use any PreSyn information.
299 */
300 
301 static std::vector<int> setup_target_lists(bool);
302 static void fill_multisend_lists(bool, const std::vector<int>&, int*&, int*&);
303 
304 void nrn_multisend_setup_targets(bool use_phase2, int*& targets_phase1, int*& targets_phase2) {
305  auto r = setup_target_lists(use_phase2);
306 
307  // initialize as unused
308  for (auto& g: gid2out) {
309  PreSyn* ps = g.second;
310  ps->multisend_index_ = -1;
311  }
312 
313  // Only will be not -1 if non-nullptr input is a phase 2 sender.
314  for (auto& g: gid2in) {
315  InputPreSyn* ps = g.second;
316  ps->multisend_phase2_index_ = -1;
317  }
318 
319  fill_multisend_lists(use_phase2, r, targets_phase1, targets_phase2);
320 
321  // phase1debug(targets_phase1);
322  // phase2debug(targets_phase2);
323 }
324 
325 // Some notes about threads and the rank lists.
326 // Assume all MPI message sent and received from a single thread (0).
327 // gid2in and gid2out are rank wide lists for all threads
328 //
329 static void fill_multisend_lists(bool use_phase2,
330  const std::vector<int>& r,
331  int*& targets_phase1,
332  int*& targets_phase2) {
333  // sequence of gid, size, [totalsize], list
334  // Note that totalsize is there only for output gid's and use_phase2.
335  // Using this sequence, copy lists to proper phase
336  // 1 and phase 2 lists. (Phase one lists found in gid2out_ and phase
337  // two lists found in gid2in_.
338  int phase1_index = 0;
339  int phase2_index = 0;
340  // Count and fill in multisend_index and multisend_phase2_index_
341  // From the counts can allocate targets_phase1 and targets_phase2
342  // Then can iterate again and copy r to proper target locations.
343  for (std::size_t i = 0; i < r.size();) {
344  InputPreSyn* ips = nullptr;
345  int gid = r[i++];
346  int size = r[i++];
347  if (use_phase2) { // look in gid2in first
348  auto gid2in_it = gid2in.find(gid);
349  if (gid2in_it != gid2in.end()) { // phase 2 target list
350  ips = gid2in_it->second;
351  ips->multisend_phase2_index_ = phase2_index;
352  phase2_index += 1 + size; // count + ranks
353  i += size;
354  }
355  }
356  if (!ips) { // phase 1 target list (or whole list if use_phase2 is 0)
357  auto gid2out_it = gid2out.find(gid);
358  assert(gid2out_it != gid2out.end());
359  PreSyn* ps = gid2out_it->second;
360  ps->multisend_index_ = phase1_index;
361  phase1_index += 2 + size; // total + count + ranks
362  if (use_phase2) {
363  i++;
364  }
365  i += size;
366  }
367  }
368 
369  targets_phase1 = new int[phase1_index];
370  targets_phase2 = new int[phase2_index];
371 
372  // printf("%d sz=%d\n", nrnmpi_myid, r.size());
373  for (std::size_t i = 0; i < r.size();) {
374  InputPreSyn* ips = nullptr;
375  int gid = r[i++];
376  int size = r[i++];
377  if (use_phase2) { // look in gid2in first
378  auto gid2in_it = gid2in.find(gid);
379  if (gid2in_it != gid2in.end()) { // phase 2 target list
380  ips = gid2in_it->second;
381  int p = ips->multisend_phase2_index_;
382  int* ranks = targets_phase2 + p;
383  ranks[0] = size;
384  ranks += 1;
385  // printf("%d i=%d gid=%d phase2 size=%d\n", nrnmpi_myid, i, gid, size);
386  for (int j = 0; j < size; ++j) {
387  ranks[j] = r[i++];
388  // printf("%d j=%d rank=%d\n", nrnmpi_myid, j, ranks[j]);
389  assert(ranks[j] != nrnmpi_myid);
390  }
391  }
392  }
393  if (!ips) { // phase 1 target list (or whole list if use_phase2 is 0)
394  auto gid2out_it = gid2out.find(gid);
395  assert(gid2out_it != gid2out.end());
396  PreSyn* ps = gid2out_it->second;
397  int p = ps->multisend_index_;
398  int* ranks = targets_phase1 + p;
399  int total = size;
400  if (use_phase2) {
401  total = r[i++];
402  }
403  ranks[0] = total;
404  ranks[1] = size;
405  ranks += 2;
406  // printf("%d i=%d gid=%d phase1 size=%d total=%d\n", nrnmpi_myid, i, gid, size, total);
407  for (int j = 0; j < size; ++j) {
408  ranks[j] = r[i++];
409  // printf("%d j=%d rank=%d\n", nrnmpi_myid, j, ranks[j]);
410  // There never was a possibility of send2self
411  // because an output presyn is never in gid2in_.
412  assert(ranks[j] != nrnmpi_myid);
413  }
414  }
415  }
416 
417  // compute max_ntarget_host and max_multisend_targets
418  int max_ntarget_host = 0;
419  int max_multisend_targets = 0;
420  for (auto& g: gid2out) {
421  PreSyn* ps = g.second;
422  if (ps->output_index_ >= 0) { // only ones that generate spikes
423  int i = ps->multisend_index_;
424  if (i >= 0) { // only if the gid has targets on other ranks.
425  max_ntarget_host = std::max(targets_phase1[i], max_ntarget_host);
426  max_multisend_targets = std::max(targets_phase1[i + 1], max_multisend_targets);
427  }
428  }
429  }
430  if (use_phase2) {
431  for (auto& g: gid2in) {
432  InputPreSyn* ps = g.second;
433  int i = ps->multisend_phase2_index_;
434  if (i >= 0) {
435  max_multisend_targets = std::max(max_multisend_targets, targets_phase2[i]);
436  }
437  }
438  }
439 }
440 
441 // Return the vector encoding a sequence of gid, target list size, and target list
442 static std::vector<int> setup_target_lists(bool use_phase2) {
443  int nhost = nrnmpi_numprocs;
444 
445  // Construct hash table for finding the target rank list for a given gid.
446  Int2TarList gid2tarlist;
447 
448  celldebug<Gid2PS>("output gid", gid2out);
449  celldebug<Gid2IPS>("input gid", gid2in);
450 
451  // What are the target ranks for a given input gid. All the ranks
452  // with the same input gid send that gid to the intermediate
453  // gid%nhost rank. The intermediate rank can then construct the
454  // list of target ranks for the gids it gets.
455 
456  {
457  // scnt1 is number of input gids from target
458  std::vector<int> scnt1(nhost, 0);
459  for (const auto& g: gid2in) {
460  int gid = g.first;
461  ++scnt1[gid % nhost];
462  }
463 
464  // s1 are the input gids from target to be sent to the various intermediates
465  const std::vector<int> sdispl1 = newoffset(scnt1);
466  // Make an usable copy
467  auto sdispl1_ = sdispl1;
468  std::vector<int> s1(sdispl1[nhost], 0);
469  for (const auto& g: gid2in) {
470  int gid = g.first;
471  s1[sdispl1_[gid % nhost]++] = gid;
472  }
473 
474  std::vector<int> r1;
475  std::vector<int> rdispl1;
476  std::tie(r1, rdispl1) = all2allv_int(s1, scnt1, sdispl1, "gidin to intermediate");
477  // r1 is the gids received by this intermediate rank from all other ranks.
478 
479  // Now figure out the size of the target list for each distinct gid in r1.
480  for (const auto& gid: r1) {
481  if (gid2tarlist.find(gid) == gid2tarlist.end()) {
482  gid2tarlist[gid] = new TarList{};
483  gid2tarlist[gid]->size = 0;
484  }
485  auto tar = gid2tarlist[gid];
486  ++(tar->size);
487  }
488 
489  // Conceptually, now the intermediate is the mpi source and the gid
490  // sources are the mpi destination in regard to target lists.
491  // It would be possible at this point, but confusing,
492  // to allocate a s[rdispl1[nhost]] and figure out scnt and sdispl by
493  // by getting the counts and gids from the ranks that own the source
494  // gids. In this way we could organize s without having to allocate
495  // individual target lists on the intermediate and then allocate
496  // another large s buffer to receive a copy of them. However for
497  // this processing we already require two large buffers for input
498  // gid's so there is no real savings of space.
499  // So let's do the simple obvious sequence and now complete the
500  // target lists.
501 
502  // Allocate the target lists (and set size to 0 (we will recount when filling).
503  for (const auto& g: gid2tarlist) {
504  TarList* tl = g.second;
505  tl->alloc();
506  tl->size = 0;
507  }
508 
509  // fill the target lists
510  for (int rank = 0; rank < nhost; ++rank) {
511  int b = rdispl1[rank];
512  int e = rdispl1[rank + 1];
513  for (int i = b; i < e; ++i) {
514  const auto itl_it = gid2tarlist.find(r1[i]);
515  if (itl_it != gid2tarlist.end()) {
516  TarList* tl = itl_it->second;
517  tl->list[tl->size] = rank;
518  tl->size++;
519  }
520  }
521  }
522  }
523 
524  {
525  // Now the intermediate hosts have complete target lists and
526  // the sources know the intermediate host from the gid2out_ map.
527  // We could potentially organize here for two-phase exchange as well.
528 
529  // Which target lists are desired by the source rank?
530 
531  // Ironically, for round robin distributions, the target lists are
532  // already on the proper source rank so the following code should
533  // be tested for random distributions of gids.
534  // How many on the source rank?
535  std::vector<int> scnt2(nhost, 0);
536  for (auto& g: gid2out) {
537  int gid = g.first;
538  PreSyn* ps = g.second;
539  if (ps->output_index_ >= 0) { // only ones that generate spikes
540  ++scnt2[gid % nhost];
541  }
542  }
543  const auto sdispl2 = newoffset(scnt2);
544  auto sdispl2_ = sdispl2;
545 
546  // what are the gids of those target lists
547  std::vector<int> s2(sdispl2[nhost], 0);
548  for (auto& g: gid2out) {
549  int gid = g.first;
550  PreSyn* ps = g.second;
551  if (ps->output_index_ >= 0) { // only ones that generate spikes
552  s2[sdispl2_[gid % nhost]++] = gid;
553  }
554  }
555  std::vector<int> r2;
556  std::vector<int> rdispl2;
557  std::tie(r2, rdispl2) = all2allv_int(s2, scnt2, sdispl2, "gidout");
558 
559  // fill in the tl->rank for phase 1 target lists
560  // r2 is an array of source spiking gids
561  // tl is list associating input gids with list of target ranks.
562  for (int rank = 0; rank < nhost; ++rank) {
563  int b = rdispl2[rank];
564  int e = rdispl2[rank + 1];
565  for (int i = b; i < e; ++i) {
566  // note that there may be input gids with no corresponding
567  // output gid so that the find may not return true and in
568  // that case the tl->rank remains -1.
569  // For example multisplit gids or simulation of a subset of
570  // cells.
571  const auto itl_it = gid2tarlist.find(r2[i]);
572  if (itl_it != gid2tarlist.end()) {
573  TarList* tl = itl_it->second;
574  tl->rank = rank;
575  }
576  }
577  }
578  }
579 
580  if (use_phase2) {
581  random_init(nrnmpi_myid + 1);
582  for (const auto& gid2tar: gid2tarlist) {
583  TarList* tl = gid2tar.second;
584  if (tl->rank >= 0) { // only if output gid is spike generating
585  phase2organize(tl);
586  }
587  }
588  random_delete();
589  }
590 
591  // For clarity, use the all2allv_int style of information flow
592  // from source to destination as above
593  // and also use a uniform code
594  // for copying one and two phase information from a TarList to
595  // develop the s, scnt, and sdispl3 buffers. That is, a buffer list
596  // section in s for either a one-phase list or the much shorter
597  // (individually) lists for first and second phases, has a
598  // gid, size, totalsize header for each list where totalsize
599  // is only present if the gid is an output gid (for
600  // NrnMultisend_Send.ntarget_host used for conservation).
601  // Note that totalsize is tl->indices[tl->size]
602 
603  // how much to send to each rank
604  std::vector<int> scnt3(nhost, 0);
605  for (const auto& gid2tar: gid2tarlist) {
606  TarList* tl = gid2tar.second;
607  if (tl->rank < 0) {
608  // When the output gid does not generate spikes, that rank
609  // is not interested if there is a target list for it.
610  // If the output gid does not exist, there is no rank.
611  // In either case ignore this target list.
612  continue;
613  }
614  if (tl->indices) {
615  // indices[size] is the size of list but size of those
616  // are the sublist phase 2 destination ranks which
617  // don't get sent as part of the phase 2 target list.
618  // Also there is a phase 1 target list of size so there
619  // are altogether size+1 target lists.
620  // (one phase 1 list and size phase 2 lists)
621  scnt3[tl->rank] += tl->size + 2; // gid, size, list
622  for (int i = 0; i < tl->size; ++i) {
623  scnt3[tl->list[tl->indices[i]]] += tl->indices[i + 1] - tl->indices[i] + 1;
624  // gid, size, list
625  }
626  } else {
627  // gid, list size, list
628  scnt3[tl->rank] += tl->size + 2;
629  }
630  if (use_phase2) {
631  // The phase 1 header has as its third element, the
632  // total list size (needed for conservation);
633  scnt3[tl->rank] += 1;
634  }
635  }
636  const auto sdispl4 = newoffset(scnt3);
637  auto sdispl4_ = sdispl4;
638  std::vector<int> s3(sdispl4[nhost], 0);
639  // what to send to each rank
640  for (const auto& gid2tar: gid2tarlist) {
641  int gid = gid2tar.first;
642  TarList* tl = gid2tar.second;
643  if (tl->rank < 0) {
644  continue;
645  }
646  if (tl->indices) {
647  s3[sdispl4_[tl->rank]++] = gid;
648  s3[sdispl4_[tl->rank]++] = tl->size;
649  if (use_phase2) {
650  s3[sdispl4_[tl->rank]++] = tl->indices[tl->size];
651  }
652  for (int i = 0; i < tl->size; ++i) {
653  s3[sdispl4_[tl->rank]++] = tl->list[tl->indices[i]];
654  }
655  for (int i = 0; i < tl->size; ++i) {
656  int rank = tl->list[tl->indices[i]];
657  s3[sdispl4_[rank]++] = gid;
658  assert(tl->indices[i + 1] > tl->indices[i]);
659  s3[sdispl4_[rank]++] = tl->indices[i + 1] - tl->indices[i] - 1;
660  for (int j = tl->indices[i] + 1; j < tl->indices[i + 1]; ++j) {
661  s3[sdispl4_[rank]++] = tl->list[j];
662  }
663  }
664  } else {
665  // gid, list size, list
666  s3[sdispl4_[tl->rank]++] = gid;
667  s3[sdispl4_[tl->rank]++] = tl->size;
668  if (use_phase2) {
669  s3[sdispl4_[tl->rank]++] = tl->size;
670  }
671  for (int i = 0; i < tl->size; ++i) {
672  s3[sdispl4_[tl->rank]++] = tl->list[i];
673  }
674  }
675  delete tl;
676  }
677  std::vector<int> r_return;
678  std::vector<int> rdispl3;
679  std::tie(r_return, rdispl3) = all2allv_int(s3, scnt3, sdispl4, "lists");
680  return r_return;
681 }
682 } // namespace coreneuron
683 #endif // NRN_MULTISEND
multisend.hpp
nrnran123.h
utils.hpp
coreneuron::nrnmpi_numprocs
int nrnmpi_numprocs
Definition: nrnmpi_def_cinc.cpp:10
nrnmpidec.h
coreneuron::nrnmpi_int_alltoallv
mpi_function< cnrn_make_integral_constant_t(nrnmpi_int_alltoallv_impl)> nrnmpi_int_alltoallv
Definition: nrnmpidec.cpp:34
coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12
coreneuron::i
int i
Definition: cellorder.cpp:485
nrniv_decl.h
i
#define i
Definition: md1redef.h:19
coreneuron::nrnran123_deletestream
void nrnran123_deletestream(nrnran123_State *s, bool use_unified_memory)
Definition: nrnran123.cpp:201
coreneuron::nrn_multisend_setup_targets
void nrn_multisend_setup_targets(bool use_phase2, int *&targets_phase1, int *&targets_phase2)
nrnmpi.hpp
coreneuron::gid2in
std::map< int, InputPreSyn * > gid2in
Definition: nrn_setup.cpp:158
cnt
#define cnt
Definition: tqueue.hpp:44
coreneuron::gid2out
std::map< int, PreSyn * > gid2out
Maps for ouput and input presyns.
Definition: nrn_setup.cpp:157
coreneuron::nrn_wtime
double nrn_wtime()
Definition: utils.cpp:22
coreneuron::np
static int np
Definition: mpispike.cpp:25
coreneuron::nrn_mallinfo
double nrn_mallinfo(void)
Returns current memory usage in KBs.
Definition: memory_utils.cpp:43
memory_utils.h
Function prototypes for the functions providing information about simulator memory usage.
coreneuron::nrnmpi_myid
int nrnmpi_myid
Definition: nrnmpi_def_cinc.cpp:11