CoreNEURON
partrans.cpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================
7 */
8 
9 #include "coreneuron/nrnconf.h"
11 #include "coreneuron/mpi/nrnmpi.h"
15 
16 // This is the computational code for src->target transfer (e.g. gap junction)
17 // simulation.
18 // The setup code is in partrans_setup.cpp
19 
20 namespace coreneuron {
22 
23 using namespace nrn_partrans;
24 
26 
27 // MPI_Alltoallv buffer info
28 double* nrn_partrans::insrc_buf_; // Receive buffer for gap voltages
29 double* nrn_partrans::outsrc_buf_; // Send buffer for gap voltages
34 
36  // copy source values to outsrc_buf_ and mpi transfer to insrc_buf
37 
38  // note that same source value (usually voltage) may get copied to
39  // several locations in outsrc_buf
40 
41  // gather the source values. can be done in parallel
42  for (int tid = 0; tid < nrn_nthread; ++tid) {
43  auto& ttd = transfer_thread_data_[tid];
44  auto* nt = &nrn_threads[tid];
45  int n = int(ttd.outsrc_indices.size());
46  if (n == 0) {
47  continue;
48  }
49  double* src_data = nt->_data;
50  int* src_indices = ttd.src_indices.data();
51 
52  // gather sources on gpu and copy to cpu, cpu scatters to outsrc_buf
53  double* src_gather = ttd.src_gather.data();
54  size_t n_src_gather = ttd.src_gather.size();
55 
56  nrn_pragma_acc(parallel loop present(src_indices [0:n_src_gather],
57  src_data [0:nt->_ndata],
58  src_gather [0:n_src_gather]) if (nt->compute_gpu)
59  async(nt->stream_id))
60  nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu))
61  for (std::size_t i = 0; i < n_src_gather; ++i) {
62  src_gather[i] = src_data[src_indices[i]];
63  }
64  nrn_pragma_acc(update host(src_gather [0:n_src_gather]) if (nt->compute_gpu)
65  async(nt->stream_id))
66  nrn_pragma_omp(target update from(src_gather [0:n_src_gather]) if (nt->compute_gpu))
67  }
68 
69  // copy gathered source values to outsrc_buf_
70  bool compute_gpu = false;
71  for (int tid = 0; tid < nrn_nthread; ++tid) {
72  if (nrn_threads[tid].compute_gpu) {
73  compute_gpu = true;
74  nrn_pragma_acc(wait(nrn_threads[tid].stream_id))
75  }
77  size_t n_outsrc_indices = ttd.outsrc_indices.size();
78  int* outsrc_indices = ttd.outsrc_indices.data();
79  double* src_gather = ttd.src_gather.data();
80  int* src_gather_indices = ttd.gather2outsrc_indices.data();
81  for (size_t i = 0; i < n_outsrc_indices; ++i) {
82  outsrc_buf_[outsrc_indices[i]] = src_gather[src_gather_indices[i]];
83  }
84  }
85  static_cast<void>(compute_gpu);
86 
87  // transfer
88  int n_insrc_buf = insrcdspl_[nrnmpi_numprocs];
89 #if NRNMPI
90  if (corenrn_param.mpi_enable) { // otherwise insrc_buf_ == outsrc_buf_
94  } else
95 #endif
96  { // Use the multiprocess code even for one process to aid debugging
97  // For nrnmpi_numprocs == 1, insrc_buf_ and outsrc_buf_ are same size.
98  for (int i = 0; i < n_insrc_buf; ++i) {
100  }
101  }
102 
103  // insrc_buf_ will get copied to targets via nrnthread_v_transfer
104  nrn_pragma_acc(update device(insrc_buf_ [0:n_insrc_buf]) if (compute_gpu))
105  nrn_pragma_omp(target update to(insrc_buf_ [0:n_insrc_buf]) if (compute_gpu))
106 }
107 
109  // Copy insrc_buf_ values to the target locations. (An insrc_buf_ value
110  // may be copied to several target locations.
112  size_t ntar = ttd.tar_indices.size();
113  int* tar_indices = ttd.tar_indices.data();
114  int* insrc_indices = ttd.insrc_indices.data();
115  double* tar_data = _nt->_data;
116  // last element in the displacement vector gives total length
117 #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
118  defined(_OPENACC)
119  int n_insrc_buf = insrcdspl_[nrnmpi_numprocs];
120  int ndata = _nt->_ndata;
121 #endif
122  nrn_pragma_acc(parallel loop copyin(tar_indices [0:ntar])
123  present(insrc_indices [0:ntar],
124  tar_data [0:ndata],
125  insrc_buf_ [0:n_insrc_buf]) if (_nt->compute_gpu)
126  async(_nt->stream_id))
127  nrn_pragma_omp(target teams distribute parallel for simd map(to: tar_indices[0:ntar]) if(_nt->compute_gpu))
128  for (size_t i = 0; i < ntar; ++i) {
129  tar_data[tar_indices[i]] = insrc_buf_[insrc_indices[i]];
130  }
131 }
132 
134  // Ensure index vectors, src_gather, and insrc_buf_ are on the gpu.
135  if (insrcdspl_) {
136  // TODO: we don't actually need to copy here, just allocate + associate
137  // storage on the device
139  }
140  for (int tid = 0; tid < nrn_nthread; ++tid) {
141  const NrnThread* nt = nrn_threads + tid;
142  if (!nt->compute_gpu) {
143  continue;
144  }
145 
146  const TransferThreadData& ttd = transfer_thread_data_[tid];
147 
148  if (!ttd.src_indices.empty()) {
149  cnrn_target_copyin(ttd.src_indices.data(), ttd.src_indices.size());
150  // TODO: we don't actually need to copy here, just allocate +
151  // associate storage on the device.
152  cnrn_target_copyin(ttd.src_gather.data(), ttd.src_gather.size());
153  }
154 
155  if (ttd.insrc_indices.size()) {
156  cnrn_target_copyin(ttd.insrc_indices.data(), ttd.insrc_indices.size());
157  }
158  }
159 }
160 
162  if (insrcdspl_) {
163  int n_insrc_buf = insrcdspl_[nrnmpi_numprocs];
164  cnrn_target_delete(insrc_buf_, n_insrc_buf);
165  }
166  for (int tid = 0; tid < nrn_nthread; ++tid) {
167  const NrnThread* nt = nrn_threads + tid;
168  if (!nt->compute_gpu) {
169  continue;
170  }
171 
173 
174  if (!ttd.src_indices.empty()) {
175  cnrn_target_delete(ttd.src_indices.data(), ttd.src_indices.size());
176  cnrn_target_delete(ttd.src_gather.data(), ttd.src_gather.size());
177  }
178 
179  if (!ttd.insrc_indices.empty()) {
180  cnrn_target_delete(ttd.insrc_indices.data(), ttd.insrc_indices.size());
181  }
182  }
183 }
184 } // namespace coreneuron
coreneuron::nrn_partrans::insrc_buf_
double * insrc_buf_
Definition: partrans.cpp:28
coreneuron::nrn_nthread
int nrn_nthread
Definition: multicore.cpp:55
coreneuron::nrn_partrans::TransferThreadData::src_gather
std::vector< double > src_gather
Definition: partrans.hpp:80
coreneuron::nrnmpi_barrier
mpi_function< cnrn_make_integral_constant_t(nrnmpi_barrier_impl)> nrnmpi_barrier
Definition: nrnmpidec.cpp:42
coreneuron::nrnmpi_numprocs
int nrnmpi_numprocs
Definition: nrnmpi_def_cinc.cpp:10
coreneuron::nrn_partrans::copy_gap_indices_to_device
void copy_gap_indices_to_device()
Definition: partrans.cpp:133
coreneuron::nrn_partrans::outsrc_buf_
double * outsrc_buf_
Definition: partrans.cpp:29
coreneuron::NrnThread::id
int id
Definition: multicore.hpp:99
nrn_pragma_omp
nrn_pragma_acc(routine seq) nrn_pragma_omp(declare target) philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron nrn_pragma_omp(end declare target) namespace coreneuron
Provide a helper function in global namespace that is declared target for OpenMP offloading to functi...
Definition: nrnran123.h:69
coreneuron::cnrn_target_delete
void cnrn_target_delete(std::string_view file, int line, T *h_ptr, std::size_t len=1)
Definition: offload.hpp:132
coreneuron::nrn_partrans::TransferThreadData::gather2outsrc_indices
std::vector< int > gather2outsrc_indices
Definition: partrans.hpp:81
coreneuron::nrn_partrans::insrcdspl_
int * insrcdspl_
Definition: partrans.hpp:116
coreneuron::NrnThread::compute_gpu
int compute_gpu
Definition: multicore.hpp:136
coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12
coreneuron::nrn_partrans::TransferThreadData::outsrc_indices
std::vector< int > outsrc_indices
Definition: partrans.hpp:82
corenrn_parameters.hpp
coreneuron::i
int i
Definition: cellorder.cpp:485
coreneuron::cnrn_target_copyin
T * cnrn_target_copyin(std::string_view file, int line, const T *h_ptr, std::size_t len=1)
Definition: offload.hpp:110
coreneuron::nrn_partrans::outsrcdspl_
int * outsrcdspl_
Definition: partrans.hpp:116
coreneuron::update
void update(NrnThread *_nt)
Definition: fadvance_core.cpp:201
nrnmpi.hpp
coreneuron::NrnThread::_ndata
size_t _ndata
Definition: multicore.hpp:103
coreneuron::NrnThread
Definition: multicore.hpp:75
coreneuron::nrn_partrans::outsrccnt_
int * outsrccnt_
Definition: partrans.hpp:116
partrans.hpp
coreneuron::corenrn_param
corenrn_parameters corenrn_param
Printing method.
Definition: corenrn_parameters.cpp:268
coreneuron::NrnThread::stream_id
int stream_id
Definition: multicore.hpp:137
coreneuron::NrnThread::_data
double * _data
Definition: multicore.hpp:106
coreneuron::nrn_threads
NrnThread * nrn_threads
Definition: multicore.cpp:56
coreneuron::nrnmpi_v_transfer
void nrnmpi_v_transfer()
Definition: partrans.cpp:35
nrnconf.h
coreneuron::nrnmpi_dbl_alltoallv
mpi_function< cnrn_make_integral_constant_t(nrnmpi_dbl_alltoallv_impl)> nrnmpi_dbl_alltoallv
Definition: nrnmpidec.cpp:36
coreneuron::nrn_partrans::TransferThreadData::src_indices
std::vector< int > src_indices
Definition: partrans.hpp:79
coreneuron::nrnthread_v_transfer
void nrnthread_v_transfer(NrnThread *_nt)
Definition: partrans.cpp:108
coreneuron::nrn_partrans::TransferThreadData::tar_indices
std::vector< int > tar_indices
Definition: partrans.hpp:85
multicore.hpp
coreneuron::nrn_partrans::TransferThreadData::insrc_indices
std::vector< int > insrc_indices
Definition: partrans.hpp:84
coreneuron::corenrn_parameters_data::mpi_enable
bool mpi_enable
Initialization seed for random number generator (int)
Definition: corenrn_parameters.hpp:59
coreneuron::nrn_partrans::TransferThreadData
The basic problem is to copy sources to targets.
Definition: partrans.hpp:78
coreneuron::nrn_have_gaps
bool nrn_have_gaps
variables defined in coreneuron library
Definition: partrans.cpp:21
coreneuron::nrn_pragma_acc
nrn_pragma_acc(routine vector) static void triang_interleaved2(NrnThread *nt
Definition: ivocvect.cpp:30
coreneuron::if
if(ncell==0)
Definition: cellorder.cpp:637
coreneuron::nrn_partrans::insrccnt_
int * insrccnt_
Definition: partrans.cpp:30
nrnmpi.h
coreneuron::nrn_partrans::transfer_thread_data_
TransferThreadData * transfer_thread_data_
Definition: partrans.cpp:25
coreneuron::nrn_partrans::delete_gap_indices_from_device
void delete_gap_indices_from_device()
Definition: partrans.cpp:161