14 #ifdef CORENEURON_USE_BOOST_POOL
15 #include <boost/pool/pool_alloc.hpp>
16 #include <unordered_map>
25 #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \
26 defined(_OPENMP) && defined(__CUDACC__)
27 #define CORENRN_HOST_DEVICE __host__ __device__
29 #define CORENRN_HOST_DEVICE
33 #ifdef CORENEURON_USE_BOOST_POOL
44 struct random123_allocate_unified {
45 using size_type = std::size_t;
46 using difference_type = std::size_t;
47 static char* malloc(
const size_type bytes) {
48 std::lock_guard<std::mutex>
const lock{m_mutex};
49 static_cast<void>(lock);
51 m_block_sizes[buffer] = bytes;
52 return reinterpret_cast<char*
>(buffer);
54 static void free(
char*
const block) {
55 std::lock_guard<std::mutex>
const lock{m_mutex};
56 static_cast<void>(lock);
57 auto const iter = m_block_sizes.find(block);
58 assert(iter != m_block_sizes.end());
59 auto const size = iter->second;
60 m_block_sizes.erase(iter);
63 static std::mutex m_mutex;
64 static std::unordered_map<void*, std::size_t> m_block_sizes;
67 std::mutex random123_allocate_unified::m_mutex{};
68 std::unordered_map<void*, std::size_t> random123_allocate_unified::m_block_sizes{};
70 using random123_allocator =
71 boost::fast_pool_allocator<coreneuron::nrnran123_State, random123_allocate_unified>;
81 std::size_t g_instance_count{};
84 #define g_k_qualifiers __device__ __constant__
86 #define g_k_qualifiers
92 __attribute__((noinline)) philox4x32_key_t& global_state() {
99 return philox4x32(s->
c, global_state());
104 return g_instance_count;
109 return global_state().v[0];
115 auto& g_k = global_state();
117 std::lock_guard<OMP_Mutex> _{g_instance_count_mutex};
120 <<
"nrnran123_set_globalindex(" << gix
121 <<
") called when a non-zero number of Random123 streams (" << g_instance_count
122 <<
") were active. This is not safe, some streams will remember the old value ("
123 << g_k.v[0] <<
')' << std::endl;
126 if (g_k.v[0] != gix) {
131 auto const code = cudaMemcpyToSymbol(g_k, &g_k,
sizeof(g_k));
132 assert(code == cudaSuccess);
135 auto const code = cudaDeviceSynchronize();
136 assert(code == cudaSuccess);
171 bool use_unified_memory) {
177 #ifndef CORENEURON_ENABLE_GPU
178 if (use_unified_memory) {
179 throw std::runtime_error(
"Tried to use CUDA unified memory in a non-GPU build.");
183 if (use_unified_memory) {
184 s = coreneuron::allocate_unique<nrnran123_State>(random123_allocator{}).release();
192 nrnran123_setseq(s, 0, 0);
194 std::lock_guard<OMP_Mutex> _{g_instance_count_mutex};
202 #ifndef CORENEURON_ENABLE_GPU
203 if (use_unified_memory) {
204 throw std::runtime_error(
"Tried to use CUDA unified memory in a non-GPU build.");
208 std::lock_guard<OMP_Mutex> _{g_instance_count_mutex};
211 if (use_unified_memory) {
212 std::unique_ptr<nrnran123_State, coreneuron::alloc_deleter<random123_allocator>> _{s};