Loading [MathJax]/extensions/tex2jax.js
CoreNEURON
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
solve_core.cpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================.
7 */
8 
9 #include "coreneuron/nrnconf.h"
12 namespace coreneuron {
14 
15 static void triang(NrnThread*), bksub(NrnThread*);
16 
17 /* solve the matrix equation */
20  solve_interleaved(_nt->id);
21  } else {
22  triang(_nt);
23  bksub(_nt);
24  }
25 }
26 
27 /** @todo OpenACC GPU offload is sequential/slow. Because --cell-permute=0 and
28  * --gpu is forbidden anyway, no OpenMP target offload equivalent is implemented.
29  */
30 
31 /* triangularization of the matrix equations */
32 static void triang(NrnThread* _nt) {
33  int i2 = _nt->ncell;
34  int i3 = _nt->end;
35 
36  double* vec_a = &(VEC_A(0));
37  double* vec_b = &(VEC_B(0));
38  double* vec_d = &(VEC_D(0));
39  double* vec_rhs = &(VEC_RHS(0));
40  int* parent_index = _nt->_v_parent_index;
41 
42  nrn_pragma_acc(parallel loop seq present(
43  vec_a [0:i3], vec_b [0:i3], vec_d [0:i3], vec_rhs [0:i3], parent_index [0:i3])
44  async(_nt->stream_id) if (_nt->compute_gpu))
45  nrn_pragma_omp(target if (_nt->compute_gpu))
46  for (int i = i3 - 1; i >= i2; --i) {
47  double p = vec_a[i] / vec_d[i];
48  vec_d[parent_index[i]] -= p * vec_b[i];
49  vec_rhs[parent_index[i]] -= p * vec_rhs[i];
50  }
51 }
52 
53 /* back substitution to finish solving the matrix equations */
54 static void bksub(NrnThread* _nt) {
55  int i1 = 0;
56  int i2 = i1 + _nt->ncell;
57  int i3 = _nt->end;
58 
59  double* vec_b = &(VEC_B(0));
60  double* vec_d = &(VEC_D(0));
61  double* vec_rhs = &(VEC_RHS(0));
62  int* parent_index = _nt->_v_parent_index;
63 
64  nrn_pragma_acc(parallel loop seq present(vec_d [0:i2], vec_rhs [0:i2])
65  async(_nt->stream_id) if (_nt->compute_gpu))
66  nrn_pragma_omp(target if (_nt->compute_gpu))
67  for (int i = i1; i < i2; ++i) {
68  vec_rhs[i] /= vec_d[i];
69  }
70 
72  parallel loop seq present(vec_b [0:i3], vec_d [0:i3], vec_rhs [0:i3], parent_index [0:i3])
73  async(_nt->stream_id) if (_nt->compute_gpu))
74  nrn_pragma_omp(target if (_nt->compute_gpu))
75  for (int i = i2; i < i3; ++i) {
76  vec_rhs[i] -= vec_b[i] * vec_rhs[parent_index[i]];
77  vec_rhs[i] /= vec_d[i];
78  }
79 
80  if (_nt->compute_gpu) {
81  nrn_pragma_acc(wait(_nt->stream_id))
82  }
83 }
84 } // namespace coreneuron
VEC_RHS
#define VEC_RHS(i)
Definition: nrnconf.h:30
coreneuron::NrnThread::id
int id
Definition: multicore.hpp:99
nrn_pragma_omp
nrn_pragma_acc(routine seq) nrn_pragma_omp(declare target) philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron nrn_pragma_omp(end declare target) namespace coreneuron
Provide a helper function in global namespace that is declared target for OpenMP offloading to functi...
Definition: nrnran123.h:69
coreneuron::nrn_solve_minimal
void nrn_solve_minimal(NrnThread *)
Definition: solve_core.cpp:18
coreneuron::NrnThread::compute_gpu
int compute_gpu
Definition: multicore.hpp:136
VEC_A
#define VEC_A(i)
Definition: nrnconf.h:27
coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12
coreneuron::NrnThread::_v_parent_index
int * _v_parent_index
Definition: multicore.hpp:126
coreneuron::i
int i
Definition: cellorder.cpp:485
coreneuron::bksub
static void bksub(NrnThread *)
Definition: solve_core.cpp:54
coreneuron::NrnThread
Definition: multicore.hpp:75
coreneuron::NrnThread::stream_id
int stream_id
Definition: multicore.hpp:137
coreneuron::solve_interleaved
void solve_interleaved(int ith)
Solve the Hines matrices based on the interleave_permute_type (1 or 2).
nrnconf.h
cellorder.hpp
coreneuron::use_solve_interleave
bool use_solve_interleave
Definition: solve_core.cpp:13
multicore.hpp
coreneuron::NrnThread::end
int end
Definition: multicore.hpp:98
coreneuron::nrn_pragma_acc
nrn_pragma_acc(routine vector) static void triang_interleaved2(NrnThread *nt
Definition: ivocvect.cpp:30
coreneuron::if
if(ncell==0)
Definition: cellorder.cpp:637
coreneuron::NrnThread::ncell
int ncell
Definition: multicore.hpp:97
VEC_B
#define VEC_B(i)
Definition: nrnconf.h:28
coreneuron::triang
static void triang(NrnThread *)
Definition: solve_core.cpp:32
VEC_D
#define VEC_D(i)
Definition: nrnconf.h:29