37 using Gid2IPS = std::map<int, InputPreSyn*>;
38 using Gid2PS = std::map<int, PreSyn*>;
42 static void celldebug(
const char* p, T& map) {
43 std::string fname = std::string(
"debug.") + std::to_string(
nrnmpi_myid);
44 std::ofstream f(fname, std::ios::app);
45 f << std::endl << p << std::endl;
47 f <<
" " << std::setw(2) << std::setfill(
'0') << rank <<
":";
48 for (
const auto& m: map) {
50 f <<
" " << std::setw(2) << std::setfill(
'0') << gid <<
":";
55 static void alltoalldebug(
const char* p,
56 const std::vector<int>& s,
57 const std::vector<int>& scnt,
58 const std::vector<int>& sdispl,
59 const std::vector<int>& r,
60 const std::vector<int>& rcnt,
61 const std::vector<int>& rdispl) {
62 std::string fname = std::string(
"debug.") + std::to_string(
nrnmpi_myid);
63 std::ofstream f(fname, std::ios::app);
64 f << std::endl << p << std::endl;
66 f <<
" rank " << rank << std::endl;
68 f <<
" s" <<
i <<
" : " << scnt[
i] <<
" " << sdispl[
i] <<
" :";
69 for (
int j = sdispl[
i]; j < sdispl[
i + 1]; ++j) {
70 f <<
" " << std::setw(2) << std::setfill(
'0') << s[j] <<
":";
75 f <<
" r" <<
i <<
" : " << rcnt[
i] <<
" " << rdispl[
i] <<
" :";
76 for (
int j = rdispl[
i]; j < rdispl[
i + 1]; ++j) {
77 f <<
" " << std::setw(2) << std::setfill(
'0') << r[j] <<
":";
84 static void celldebug(
const char*, T&) {}
85 static void alltoalldebug(
const char*,
86 const std::vector<int>&,
87 const std::vector<int>&,
88 const std::vector<int>&,
89 const std::vector<int>&,
90 const std::vector<int>&,
91 const std::vector<int>&) {}
95 void phase1debug(
int* targets_phase1) {
96 std::string fname = std::string(
"debug.") + std::to_string(
nrnmpi_myid);
97 std::ofstream f(fname, std::ios::app);
100 PreSyn* ps = g.second;
101 f << std::endl <<
" " << std::setw(2) << std::setfill(
'0') << ps->gid_ <<
":";
102 int* ranks = targets_phase1 + ps->multisend_index_;
105 for (
int i = 0;
i < n; ++
i) {
106 f <<
" " << std::setw(2) << std::setfill(
'0') << ranks[
i];
112 void phase2debug(
int* targets_phase2) {
113 std::string fname = std::string(
"debug.") + std::to_string(
nrnmpi_myid);
114 std::ofstream f(fname, std::ios::app);
118 InputPreSyn* ps = g.second;
119 f << std::endl <<
" " << std::setw(2) << std::setfill(
'0') << gid <<
":";
120 int j = ps->multisend_phase2_index_;
122 int* ranks = targets_phase2 + j;
125 for (
int i = 0;
i <
cnt; ++
i) {
126 f <<
" " << std::setw(2) << std::setfill(
'0') << ranks[
i];
134 static std::vector<int> newoffset(
const std::vector<int>& acnt) {
135 std::vector<int> aoff(acnt.size() + 1);
137 std::partial_sum(acnt.begin(), acnt.end(), aoff.begin() + 1);
142 static std::pair<std::vector<int>, std::vector<int>> all2allv_helper(
const std::vector<int>& scnt) {
144 std::vector<int> c(
np, 1);
145 std::vector<int> rdispl = newoffset(c);
146 std::vector<int> rcnt(
np, 0);
148 scnt.data(), c.data(), rdispl.data(), rcnt.data(), c.data(), rdispl.data());
149 rdispl = newoffset(rcnt);
150 return std::make_pair(std::move(rcnt), std::move(rdispl));
160 #define all2allv_perf 0
163 static std::pair<std::vector<int>, std::vector<int>> all2allv_int(
const std::vector<int>& s,
164 const std::vector<int>& scnt,
165 const std::vector<int>& sdispl,
172 std::vector<int> rcnt;
173 std::vector<int> rdispl;
174 std::tie(rcnt, rdispl) = all2allv_helper(scnt);
175 std::vector<int> r(rdispl[
np], 0);
177 s.data(), scnt.data(), sdispl.data(), r.data(), rcnt.data(), rdispl.data());
178 alltoalldebug(dmes, s, scnt, sdispl, r, rcnt, rdispl);
184 printf(
"all2allv_int %s space=%d total=%g time=%g\n", dmes, nb,
nrn_mallinfo(), tm);
187 return std::make_pair(std::move(r), std::move(rdispl));
194 virtual void alloc();
206 using Int2TarList = std::map<int, TarList*>;
212 , indices(nullptr) {}
214 TarList::~TarList() {
219 void TarList::alloc() {
221 list =
new int[size];
227 static nrnran123_State* ranstate{
nullptr};
229 static void random_init(
int i) {
231 ranstate = nrnran123_newstream(
i, 0);
235 static unsigned int get_random() {
236 return nrnran123_ipick(ranstate);
240 static void random_delete() {
247 static int iran(
int i1,
int i2) {
253 int i3 = i1 + get_random() % (i2 - i1 + 1);
257 static void phase2organize(TarList* tl) {
259 int n = int(sqrt(
double(nt)));
263 tl->indices =
new int[n + 1];
264 tl->indices[n] = tl->size;
266 for (
int i = 0;
i < n; ++
i) {
267 tl->indices[
i] = (
i * nt) / n;
274 for (
int i = 0;
i < n; ++
i) {
275 int i1 = tl->indices[
i];
276 int i2 = tl->indices[
i + 1] - 1;
278 int i3 = iran(i1, i2);
279 int itar = tl->list[i1];
280 tl->list[i1] = tl->list[i3];
301 static std::vector<int> setup_target_lists(
bool);
302 static void fill_multisend_lists(
bool,
const std::vector<int>&,
int*&,
int*&);
305 auto r = setup_target_lists(use_phase2);
309 PreSyn* ps = g.second;
310 ps->multisend_index_ = -1;
315 InputPreSyn* ps = g.second;
316 ps->multisend_phase2_index_ = -1;
319 fill_multisend_lists(use_phase2, r, targets_phase1, targets_phase2);
329 static void fill_multisend_lists(
bool use_phase2,
330 const std::vector<int>& r,
331 int*& targets_phase1,
332 int*& targets_phase2) {
338 int phase1_index = 0;
339 int phase2_index = 0;
343 for (std::size_t
i = 0;
i < r.size();) {
344 InputPreSyn* ips =
nullptr;
348 auto gid2in_it =
gid2in.find(gid);
349 if (gid2in_it !=
gid2in.end()) {
350 ips = gid2in_it->second;
351 ips->multisend_phase2_index_ = phase2_index;
352 phase2_index += 1 + size;
357 auto gid2out_it =
gid2out.find(gid);
358 assert(gid2out_it !=
gid2out.end());
359 PreSyn* ps = gid2out_it->second;
360 ps->multisend_index_ = phase1_index;
361 phase1_index += 2 + size;
369 targets_phase1 =
new int[phase1_index];
370 targets_phase2 =
new int[phase2_index];
373 for (std::size_t
i = 0;
i < r.size();) {
374 InputPreSyn* ips =
nullptr;
378 auto gid2in_it =
gid2in.find(gid);
379 if (gid2in_it !=
gid2in.end()) {
380 ips = gid2in_it->second;
381 int p = ips->multisend_phase2_index_;
382 int* ranks = targets_phase2 + p;
386 for (
int j = 0; j < size; ++j) {
394 auto gid2out_it =
gid2out.find(gid);
395 assert(gid2out_it !=
gid2out.end());
396 PreSyn* ps = gid2out_it->second;
397 int p = ps->multisend_index_;
398 int* ranks = targets_phase1 + p;
407 for (
int j = 0; j < size; ++j) {
418 int max_ntarget_host = 0;
419 int max_multisend_targets = 0;
421 PreSyn* ps = g.second;
422 if (ps->output_index_ >= 0) {
423 int i = ps->multisend_index_;
425 max_ntarget_host = std::max(targets_phase1[
i], max_ntarget_host);
426 max_multisend_targets = std::max(targets_phase1[
i + 1], max_multisend_targets);
432 InputPreSyn* ps = g.second;
433 int i = ps->multisend_phase2_index_;
435 max_multisend_targets = std::max(max_multisend_targets, targets_phase2[
i]);
442 static std::vector<int> setup_target_lists(
bool use_phase2) {
446 Int2TarList gid2tarlist;
448 celldebug<Gid2PS>(
"output gid",
gid2out);
449 celldebug<Gid2IPS>(
"input gid",
gid2in);
458 std::vector<int> scnt1(nhost, 0);
459 for (
const auto& g:
gid2in) {
461 ++scnt1[gid % nhost];
465 const std::vector<int> sdispl1 = newoffset(scnt1);
467 auto sdispl1_ = sdispl1;
468 std::vector<int> s1(sdispl1[nhost], 0);
469 for (
const auto& g:
gid2in) {
471 s1[sdispl1_[gid % nhost]++] = gid;
475 std::vector<int> rdispl1;
476 std::tie(r1, rdispl1) = all2allv_int(s1, scnt1, sdispl1,
"gidin to intermediate");
480 for (
const auto& gid: r1) {
481 if (gid2tarlist.find(gid) == gid2tarlist.end()) {
482 gid2tarlist[gid] =
new TarList{};
483 gid2tarlist[gid]->size = 0;
485 auto tar = gid2tarlist[gid];
503 for (
const auto& g: gid2tarlist) {
504 TarList* tl = g.second;
510 for (
int rank = 0; rank < nhost; ++rank) {
511 int b = rdispl1[rank];
512 int e = rdispl1[rank + 1];
513 for (
int i = b;
i < e; ++
i) {
514 const auto itl_it = gid2tarlist.find(r1[
i]);
515 if (itl_it != gid2tarlist.end()) {
516 TarList* tl = itl_it->second;
517 tl->list[tl->size] = rank;
535 std::vector<int> scnt2(nhost, 0);
538 PreSyn* ps = g.second;
539 if (ps->output_index_ >= 0) {
540 ++scnt2[gid % nhost];
543 const auto sdispl2 = newoffset(scnt2);
544 auto sdispl2_ = sdispl2;
547 std::vector<int> s2(sdispl2[nhost], 0);
550 PreSyn* ps = g.second;
551 if (ps->output_index_ >= 0) {
552 s2[sdispl2_[gid % nhost]++] = gid;
556 std::vector<int> rdispl2;
557 std::tie(r2, rdispl2) = all2allv_int(s2, scnt2, sdispl2,
"gidout");
562 for (
int rank = 0; rank < nhost; ++rank) {
563 int b = rdispl2[rank];
564 int e = rdispl2[rank + 1];
565 for (
int i = b;
i < e; ++
i) {
571 const auto itl_it = gid2tarlist.find(r2[
i]);
572 if (itl_it != gid2tarlist.end()) {
573 TarList* tl = itl_it->second;
582 for (
const auto& gid2tar: gid2tarlist) {
583 TarList* tl = gid2tar.second;
604 std::vector<int> scnt3(nhost, 0);
605 for (
const auto& gid2tar: gid2tarlist) {
606 TarList* tl = gid2tar.second;
621 scnt3[tl->rank] += tl->size + 2;
622 for (
int i = 0;
i < tl->size; ++
i) {
623 scnt3[tl->list[tl->indices[
i]]] += tl->indices[
i + 1] - tl->indices[
i] + 1;
628 scnt3[tl->rank] += tl->size + 2;
633 scnt3[tl->rank] += 1;
636 const auto sdispl4 = newoffset(scnt3);
637 auto sdispl4_ = sdispl4;
638 std::vector<int> s3(sdispl4[nhost], 0);
640 for (
const auto& gid2tar: gid2tarlist) {
641 int gid = gid2tar.first;
642 TarList* tl = gid2tar.second;
647 s3[sdispl4_[tl->rank]++] = gid;
648 s3[sdispl4_[tl->rank]++] = tl->size;
650 s3[sdispl4_[tl->rank]++] = tl->indices[tl->size];
652 for (
int i = 0;
i < tl->size; ++
i) {
653 s3[sdispl4_[tl->rank]++] = tl->list[tl->indices[
i]];
655 for (
int i = 0;
i < tl->size; ++
i) {
656 int rank = tl->list[tl->indices[
i]];
657 s3[sdispl4_[rank]++] = gid;
658 assert(tl->indices[
i + 1] > tl->indices[
i]);
659 s3[sdispl4_[rank]++] = tl->indices[
i + 1] - tl->indices[
i] - 1;
660 for (
int j = tl->indices[
i] + 1; j < tl->indices[
i + 1]; ++j) {
661 s3[sdispl4_[rank]++] = tl->list[j];
666 s3[sdispl4_[tl->rank]++] = gid;
667 s3[sdispl4_[tl->rank]++] = tl->size;
669 s3[sdispl4_[tl->rank]++] = tl->size;
671 for (
int i = 0;
i < tl->size; ++
i) {
672 s3[sdispl4_[tl->rank]++] = tl->list[
i];
677 std::vector<int> r_return;
678 std::vector<int> rdispl3;
679 std::tie(r_return, rdispl3) = all2allv_int(s3, scnt3, sdispl4,
"lists");
683 #endif // NRN_MULTISEND