CoreNEURON
memory.h
Go to the documentation of this file.
1 /*
2 # =============================================================================
3 # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL
4 #
5 # See top-level LICENSE file for details.
6 # =============================================================================.
7 */
8 
9 #pragma once
10 
11 #include <cstdint>
12 #include <cstring>
13 #include <memory>
14 
17 
18 #if !defined(NRN_SOA_BYTE_ALIGN)
19 // for layout 0, every range variable array must be aligned by at least 16 bytes (the size of the
20 // simd memory bus)
21 #define NRN_SOA_BYTE_ALIGN (8 * sizeof(double))
22 #endif
23 
24 namespace coreneuron {
25 /**
26  * @brief Check if GPU support is enabled.
27  *
28  * This returns true if GPU support was enabled at compile time and at runtime
29  * via coreneuron.gpu = True and/or --gpu, otherwise it returns false.
30  */
31 bool gpu_enabled();
32 
33 /** @brief Allocate unified memory in GPU builds iff GPU enabled, otherwise new
34  */
35 void* allocate_unified(std::size_t num_bytes);
36 
37 /** @brief Deallocate memory allocated by `allocate_unified`.
38  */
39 void deallocate_unified(void* ptr, std::size_t num_bytes);
40 
41 /** @brief C++ allocator that uses [de]allocate_unified.
42  */
43 template <typename T>
45  using value_type = T;
46 
47  unified_allocator() = default;
48 
49  template <typename U>
51 
52  value_type* allocate(std::size_t n) {
53  return static_cast<value_type*>(allocate_unified(n * sizeof(value_type)));
54  }
55 
56  void deallocate(value_type* p, std::size_t n) noexcept {
57  deallocate_unified(p, n * sizeof(value_type));
58  }
59 };
60 
61 template <typename T, typename U>
62 bool operator==(unified_allocator<T> const&, unified_allocator<U> const&) noexcept {
63  return true;
64 }
65 
66 template <typename T, typename U>
67 bool operator!=(unified_allocator<T> const& x, unified_allocator<U> const& y) noexcept {
68  return !(x == y);
69 }
70 
71 /** @brief Allocator-aware deleter for use with std::unique_ptr.
72  *
73  * This is copied from https://stackoverflow.com/a/23132307. See also
74  * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0316r0.html,
75  * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0211r3.html, and
76  * boost::allocate_unique<...>.
77  * Hopefully std::allocate_unique will be included in C++23.
78  */
79 template <typename Alloc>
80 struct alloc_deleter {
81  alloc_deleter() = default; // OL210813 addition
82  alloc_deleter(const Alloc& a)
83  : a(a) {}
84 
85  using pointer = typename std::allocator_traits<Alloc>::pointer;
86 
87  void operator()(pointer p) const {
88  Alloc aa(a);
89  std::allocator_traits<Alloc>::destroy(aa, std::addressof(*p));
90  std::allocator_traits<Alloc>::deallocate(aa, p, 1);
91  }
92 
93  private:
94  Alloc a;
95 };
96 
97 template <typename T, typename Alloc, typename... Args>
98 auto allocate_unique(const Alloc& alloc, Args&&... args) {
99  using AT = std::allocator_traits<Alloc>;
100  static_assert(std::is_same<typename AT::value_type, std::remove_cv_t<T>>{}(),
101  "Allocator has the wrong value_type");
102 
103  Alloc a(alloc);
104  auto p = AT::allocate(a, 1);
105  try {
106  AT::construct(a, std::addressof(*p), std::forward<Args>(args)...);
107  using D = alloc_deleter<Alloc>;
108  return std::unique_ptr<T, D>(p, D(a));
109  } catch (...) {
110  AT::deallocate(a, p, 1);
111  throw;
112  }
113 }
114 } // namespace coreneuron
115 
116 /// for gpu builds with unified memory support
117 #ifdef CORENEURON_UNIFIED_MEMORY
118 
119 #include <cuda_runtime_api.h>
120 
121 // TODO : error handling for CUDA routines
122 inline void alloc_memory(void*& pointer, size_t num_bytes, size_t /*alignment*/) {
123  cudaMallocManaged(&pointer, num_bytes);
124 }
125 
126 inline void calloc_memory(void*& pointer, size_t num_bytes, size_t /*alignment*/) {
127  alloc_memory(pointer, num_bytes, 64);
128  cudaMemset(pointer, 0, num_bytes);
129 }
130 
131 inline void free_memory(void* pointer) {
132  cudaFree(pointer);
133 }
134 
135 /**
136  * A base class providing overloaded new and delete operators for CUDA allocation
137  *
138  * Classes that should be allocated on the GPU should inherit from this class. Additionally they
139  * may need to implement a special copy-construtor. This is documented here:
140  * \link: https://devblogs.nvidia.com/unified-memory-in-cuda-6/
141  */
142 class MemoryManaged {
143  public:
144  void* operator new(size_t len) {
145  void* ptr;
146  cudaMallocManaged(&ptr, len);
147  cudaDeviceSynchronize();
148  return ptr;
149  }
150 
151  void* operator new[](size_t len) {
152  void* ptr;
153  cudaMallocManaged(&ptr, len);
154  cudaDeviceSynchronize();
155  return ptr;
156  }
157 
158  void operator delete(void* ptr) {
159  cudaDeviceSynchronize();
160  cudaFree(ptr);
161  }
162 
163  void operator delete[](void* ptr) {
164  cudaDeviceSynchronize();
165  cudaFree(ptr);
166  }
167 };
168 
169 
170 /// for cpu builds use posix memalign
171 #else
173  // does nothing by default
174 };
175 
176 #include <cstdlib>
177 
178 inline void alloc_memory(void*& pointer, size_t num_bytes, size_t alignment) {
179  size_t fill = 0;
180  if (alignment > 0) {
181  if (num_bytes % alignment != 0) {
182  size_t multiple = num_bytes / alignment;
183  fill = alignment * (multiple + 1) - num_bytes;
184  }
185  nrn_assert((pointer = std::aligned_alloc(alignment, num_bytes + fill)) != nullptr);
186  } else {
187  nrn_assert((pointer = std::malloc(num_bytes)) != nullptr);
188  }
189 }
190 
191 inline void calloc_memory(void*& pointer, size_t num_bytes, size_t alignment) {
192  alloc_memory(pointer, num_bytes, alignment);
193  memset(pointer, 0, num_bytes);
194 }
195 
196 inline void free_memory(void* pointer) {
197  free(pointer);
198 }
199 
200 #endif
201 
202 namespace coreneuron {
203 
204 /** Independent function to compute the needed chunkding,
205  the chunk argument is the number of doubles the chunk is chunkded upon.
206 */
207 template <int chunk>
208 inline int soa_padded_size(int cnt, int layout) {
209  int imod = cnt % chunk;
210  if (layout == Layout::AoS)
211  return cnt;
212  if (imod) {
213  int idiv = cnt / chunk;
214  return (idiv + 1) * chunk;
215  }
216  return cnt;
217 }
218 
219 /** Check for the pointer alignment.
220  */
221 inline bool is_aligned(void* pointer, std::size_t alignment) {
222  return (reinterpret_cast<std::uintptr_t>(pointer) % alignment) == 0;
223 }
224 
225 /**
226  * Allocate aligned memory. This will be unified memory if the corresponding
227  * CMake option is set. This must be freed with the free_memory method.
228  *
229  * \param size Size of buffer to allocate in bytes.
230  * \param alignment Memory alignment, defaults to NRN_SOA_BYTE_ALIGN. Pass 0 for no alignment.
231  */
232 inline void* emalloc_align(size_t size, size_t alignment = NRN_SOA_BYTE_ALIGN) {
233  void* memptr;
234  alloc_memory(memptr, size, alignment);
235  if (alignment != 0) {
236  nrn_assert(is_aligned(memptr, alignment));
237  }
238  return memptr;
239 }
240 
241 /**
242  * Allocate the aligned memory and set it to 0. This will be unified memory if
243  * the corresponding CMake option is set. This must be freed with the
244  * free_memory method.
245  *
246  * \param n Number of objects to allocate
247  * \param size Size of buffer for each object to allocate in bytes.
248  * \param alignment Memory alignment, defaults to NRN_SOA_BYTE_ALIGN. Pass 0 for no alignment.
249  *
250  * \note the allocated size will be \code n*size
251  */
252 inline void* ecalloc_align(size_t n, size_t size, size_t alignment = NRN_SOA_BYTE_ALIGN) {
253  void* p;
254  if (n == 0) {
255  return nullptr;
256  }
257  calloc_memory(p, n * size, alignment);
258  if (alignment != 0) {
259  nrn_assert(is_aligned(p, alignment));
260  }
261  return p;
262 }
263 } // namespace coreneuron
free_memory
void free_memory(void *pointer)
Definition: memory.h:196
coreneuron::allocate_unified
void * allocate_unified(std::size_t num_bytes)
Allocate unified memory in GPU builds iff GPU enabled, otherwise new.
Definition: memory.cpp:26
coreneuron::operator==
bool operator==(unified_allocator< T > const &, unified_allocator< U > const &) noexcept
Definition: memory.h:62
MemoryManaged
for gpu builds with unified memory support
Definition: memory.h:172
coreneuron
THIS FILE IS AUTO GENERATED DONT MODIFY IT.
Definition: corenrn_parameters.cpp:12
coreneuron::unified_allocator::allocate
value_type * allocate(std::size_t n)
Definition: memory.h:52
nrniv_decl.h
alloc_memory
void alloc_memory(void *&pointer, size_t num_bytes, size_t alignment)
Definition: memory.h:178
coreneuron::allocate_unique
auto allocate_unique(const Alloc &alloc, Args &&... args)
Definition: memory.h:98
cnt
#define cnt
Definition: tqueue.hpp:44
coreneuron::alloc_deleter::alloc_deleter
alloc_deleter()=default
coreneuron::alloc_deleter::operator()
void operator()(pointer p) const
Definition: memory.h:87
coreneuron::AoS
@ AoS
Definition: nrniv_decl.h:69
calloc_memory
void calloc_memory(void *&pointer, size_t num_bytes, size_t alignment)
Definition: memory.h:191
coreneuron::alloc_deleter::pointer
typename std::allocator_traits< Alloc >::pointer pointer
Definition: memory.h:85
coreneuron::operator!=
bool operator!=(unified_allocator< T > const &x, unified_allocator< U > const &y) noexcept
Definition: memory.h:67
coreneuron::unified_allocator::unified_allocator
unified_allocator()=default
coreneuron::unified_allocator::deallocate
void deallocate(value_type *p, std::size_t n) noexcept
Definition: memory.h:56
NRN_SOA_BYTE_ALIGN
#define NRN_SOA_BYTE_ALIGN
Definition: memory.h:21
coreneuron::unified_allocator::value_type
T value_type
Definition: memory.h:45
coreneuron::alloc_deleter::alloc_deleter
alloc_deleter(const Alloc &a)
Definition: memory.h:82
coreneuron::gpu_enabled
bool gpu_enabled()
Check if GPU support is enabled.
Definition: memory.cpp:18
coreneuron::deallocate_unified
void deallocate_unified(void *ptr, std::size_t num_bytes)
Deallocate memory allocated by allocate_unified.
Definition: memory.cpp:44
coreneuron::unified_allocator::unified_allocator
unified_allocator(unified_allocator< U > const &) noexcept
Definition: memory.h:50
coreneuron::emalloc_align
void * emalloc_align(size_t size, size_t alignment)
coreneuron::alloc_deleter
Allocator-aware deleter for use with std::unique_ptr.
Definition: memory.h:80
coreneuron::ecalloc_align
void * ecalloc_align(size_t n, size_t size, size_t alignment)
nrn_assert
#define nrn_assert(x)
assert()-like macro, independent of NDEBUG status
Definition: nrn_assert.h:33
nrn_assert.h
coreneuron::alloc_deleter::a
Alloc a
Definition: memory.h:94
coreneuron::unified_allocator
C++ allocator that uses [de]allocate_unified.
Definition: memory.h:44