CoreNEURON
offload.hpp
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================
7 */
8 #pragma once
9 #define nrn_pragma_stringify(x) #x
10 #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
11 #define nrn_pragma_acc(x)
12 #define nrn_pragma_omp(x) _Pragma(nrn_pragma_stringify(omp x))
13 #include <omp.h>
14 #elif defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
15  defined(_OPENACC)
16 #define nrn_pragma_acc(x) _Pragma(nrn_pragma_stringify(acc x))
17 #define nrn_pragma_omp(x)
18 #include <openacc.h>
19 #else
20 #define nrn_pragma_acc(x)
21 #define nrn_pragma_omp(x)
22 #endif
23 
24 #include <cstddef>
25 #include <stdexcept>
26 #include <string_view>
27 
28 namespace coreneuron {
29 void cnrn_target_copyin_debug(std::string_view file,
30  int line,
31  std::size_t sizeof_T,
32  std::type_info const& typeid_T,
33  void const* h_ptr,
34  std::size_t len,
35  void* d_ptr);
36 void cnrn_target_delete_debug(std::string_view file,
37  int line,
38  std::size_t sizeof_T,
39  std::type_info const& typeid_T,
40  void const* h_ptr,
41  std::size_t len);
42 void cnrn_target_deviceptr_debug(std::string_view file,
43  int line,
44  std::type_info const& typeid_T,
45  void const* h_ptr,
46  void* d_ptr);
47 void cnrn_target_is_present_debug(std::string_view file,
48  int line,
49  std::type_info const& typeid_T,
50  void const* h_ptr,
51  void* d_ptr);
52 void cnrn_target_memcpy_to_device_debug(std::string_view file,
53  int line,
54  std::size_t sizeof_T,
55  std::type_info const& typeid_T,
56  void const* h_ptr,
57  std::size_t len,
58  void* d_ptr);
59 #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_UNIFIED_MEMORY) && \
60  defined(__NVCOMPILER_MAJOR__) && defined(__NVCOMPILER_MINOR__) && \
61  (__NVCOMPILER_MAJOR__ <= 22) && (__NVCOMPILER_MINOR__ <= 3)
62 // Homegrown implementation for buggy NVHPC versions (<=22.3), see
63 // https://forums.developer.nvidia.com/t/acc-deviceptr-does-not-work-in-openacc-code-dynamically-loaded-from-a-shared-library/211599
64 #define CORENEURON_ENABLE_PRESENT_TABLE
65 std::pair<void*, bool> cnrn_target_deviceptr_impl(bool must_be_present_or_null, void const* h_ptr);
66 void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len);
67 void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len);
68 #endif
69 
70 template <typename T>
71 T* cnrn_target_deviceptr_or_present(std::string_view file,
72  int line,
73  bool must_be_present_or_null,
74  const T* h_ptr) {
75  T* d_ptr{};
76  bool error{false};
77 #ifdef CORENEURON_ENABLE_PRESENT_TABLE
78  auto const d_ptr_and_error = cnrn_target_deviceptr_impl(must_be_present_or_null, h_ptr);
79  d_ptr = static_cast<T*>(d_ptr_and_error.first);
80  error = d_ptr_and_error.second;
81 #elif defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
82  defined(_OPENACC)
83  d_ptr = static_cast<T*>(acc_deviceptr(const_cast<T*>(h_ptr)));
84 #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
85  defined(_OPENMP)
86  if (must_be_present_or_null || omp_target_is_present(h_ptr, omp_get_default_device())) {
87  nrn_pragma_omp(target data use_device_ptr(h_ptr))
88  { d_ptr = const_cast<T*>(h_ptr); }
89  }
90 #else
91  if (must_be_present_or_null && h_ptr) {
92  throw std::runtime_error(
93  "cnrn_target_deviceptr() not implemented without OpenACC/OpenMP and gpu build");
94  }
95 #endif
96  if (must_be_present_or_null) {
97  cnrn_target_deviceptr_debug(file, line, typeid(T), h_ptr, d_ptr);
98  } else {
99  cnrn_target_is_present_debug(file, line, typeid(T), h_ptr, d_ptr);
100  }
101  if (error) {
102  throw std::runtime_error(
103  "cnrn_target_deviceptr() encountered an error, you may want to try setting "
104  "CORENEURON_GPU_DEBUG=1");
105  }
106  return d_ptr;
107 }
108 
109 template <typename T>
110 T* cnrn_target_copyin(std::string_view file, int line, const T* h_ptr, std::size_t len = 1) {
111  T* d_ptr{};
112 #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
113  defined(_OPENACC)
114  d_ptr = static_cast<T*>(acc_copyin(const_cast<T*>(h_ptr), len * sizeof(T)));
115 #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
116  defined(_OPENMP)
117  nrn_pragma_omp(target enter data map(to : h_ptr[:len]))
118  nrn_pragma_omp(target data use_device_ptr(h_ptr))
119  { d_ptr = const_cast<T*>(h_ptr); }
120 #else
121  throw std::runtime_error(
122  "cnrn_target_copyin() not implemented without OpenACC/OpenMP and gpu build");
123 #endif
124 #ifdef CORENEURON_ENABLE_PRESENT_TABLE
125  cnrn_target_copyin_update_present_table(h_ptr, d_ptr, len * sizeof(T));
126 #endif
127  cnrn_target_copyin_debug(file, line, sizeof(T), typeid(T), h_ptr, len, d_ptr);
128  return d_ptr;
129 }
130 
131 template <typename T>
132 void cnrn_target_delete(std::string_view file, int line, T* h_ptr, std::size_t len = 1) {
133  cnrn_target_delete_debug(file, line, sizeof(T), typeid(T), h_ptr, len);
134 #ifdef CORENEURON_ENABLE_PRESENT_TABLE
135  cnrn_target_delete_update_present_table(h_ptr, len * sizeof(T));
136 #endif
137 #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
138  defined(_OPENACC)
139  acc_delete(h_ptr, len * sizeof(T));
140 #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
141  defined(_OPENMP)
142  nrn_pragma_omp(target exit data map(delete : h_ptr[:len]))
143 #else
144  throw std::runtime_error(
145  "cnrn_target_delete() not implemented without OpenACC/OpenMP and gpu build");
146 #endif
147 }
148 
149 template <typename T>
150 void cnrn_target_memcpy_to_device(std::string_view file,
151  int line,
152  T* d_ptr,
153  const T* h_ptr,
154  std::size_t len = 1) {
155  cnrn_target_memcpy_to_device_debug(file, line, sizeof(T), typeid(T), h_ptr, len, d_ptr);
156 #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
157  defined(_OPENACC)
158  acc_memcpy_to_device(d_ptr, const_cast<T*>(h_ptr), len * sizeof(T));
159 #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
160  defined(_OPENMP)
161  omp_target_memcpy(d_ptr,
162  const_cast<T*>(h_ptr),
163  len * sizeof(T),
164  0,
165  0,
166  omp_get_default_device(),
167  omp_get_initial_device());
168 #else
169  throw std::runtime_error(
170  "cnrn_target_memcpy_to_device() not implemented without OpenACC/OpenMP and gpu build");
171 #endif
172 }
173 
174 template <typename T>
175 void cnrn_target_update_on_device(std::string_view file,
176  int line,
177  const T* h_ptr,
178  std::size_t len = 1) {
179  auto* d_ptr = cnrn_target_deviceptr_or_present(file, line, true, h_ptr);
180  cnrn_target_memcpy_to_device(file, line, d_ptr, h_ptr);
181 }
182 
183 // Replace with std::source_location once we have C++20
184 #define cnrn_target_copyin(...) cnrn_target_copyin(__FILE__, __LINE__, __VA_ARGS__)
185 #define cnrn_target_delete(...) cnrn_target_delete(__FILE__, __LINE__, __VA_ARGS__)
186 #define cnrn_target_is_present(...) \
187  cnrn_target_deviceptr_or_present(__FILE__, __LINE__, false, __VA_ARGS__)
188 #define cnrn_target_deviceptr(...) \
189  cnrn_target_deviceptr_or_present(__FILE__, __LINE__, true, __VA_ARGS__)
190 #define cnrn_target_memcpy_to_device(...) \
191  cnrn_target_memcpy_to_device(__FILE__, __LINE__, __VA_ARGS__)
192 #define cnrn_target_update_on_device(...) \
193  cnrn_target_update_on_device(__FILE__, __LINE__, __VA_ARGS__)
194 
195 } // namespace coreneuron
coreneuron::cnrn_target_delete_debug
void cnrn_target_delete_debug(std::string_view file, int line, std::size_t sizeof_T, std::type_info const &typeid_T, void const *h_ptr, std::size_t len)
Definition: nrn_acc_manager.cpp:111
coreneuron::cnrn_target_update_on_device
void cnrn_target_update_on_device(std::string_view file, int line, const T *h_ptr, std::size_t len=1)
Definition: offload.hpp:175
data
Definition: alignment.cpp:18
coreneuron::cnrn_target_delete
void cnrn_target_delete(std::string_view file, int line, T *h_ptr, std::size_t len=1)
Definition: offload.hpp:132
coreneuron::cnrn_target_memcpy_to_device
void cnrn_target_memcpy_to_device(std::string_view file, int line, T *d_ptr, const T *h_ptr, std::size_t len=1)
Definition: offload.hpp:150
coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12
coreneuron::cnrn_target_copyin
T * cnrn_target_copyin(std::string_view file, int line, const T *h_ptr, std::size_t len=1)
Definition: offload.hpp:110
coreneuron::cnrn_target_copyin_debug
void cnrn_target_copyin_debug(std::string_view file, int line, std::size_t sizeof_T, std::type_info const &typeid_T, void const *h_ptr, std::size_t len, void *d_ptr)
Definition: nrn_acc_manager.cpp:97
nrn_pragma_omp
#define nrn_pragma_omp(x)
Definition: offload.hpp:21
coreneuron::cnrn_target_deviceptr_or_present
T * cnrn_target_deviceptr_or_present(std::string_view file, int line, bool must_be_present_or_null, const T *h_ptr)
Definition: offload.hpp:71
coreneuron::cnrn_target_memcpy_to_device_debug
void cnrn_target_memcpy_to_device_debug(std::string_view file, int line, std::size_t sizeof_T, std::type_info const &typeid_T, void const *h_ptr, std::size_t len, void *d_ptr)
Definition: nrn_acc_manager.cpp:146
coreneuron::cnrn_target_is_present_debug
void cnrn_target_is_present_debug(std::string_view file, int line, std::type_info const &typeid_T, void const *h_ptr, void *d_ptr)
Definition: nrn_acc_manager.cpp:135
coreneuron::cnrn_target_deviceptr_debug
void cnrn_target_deviceptr_debug(std::string_view file, int line, std::type_info const &typeid_T, void const *h_ptr, void *d_ptr)
Definition: nrn_acc_manager.cpp:124