 |
User Guide
|
Go to the documentation of this file.
39 std::ostringstream present_clause;
40 present_clause <<
"present(inst";
43 present_clause <<
", nrb";
45 present_clause <<
", node_index, data, voltage, indexes, thread";
47 present_clause <<
", vec_rhs, vec_d";
50 present_clause <<
')';
51 printer->fmt_line(
"nrn_pragma_acc(parallel loop {} async(nt->stream_id) if(nt->compute_gpu))",
52 present_clause.str());
53 printer->add_line(
"nrn_pragma_omp(target teams distribute parallel for if(nt->compute_gpu))");
58 printer->add_line(
"nrn_pragma_acc(atomic update)");
59 printer->add_line(
"nrn_pragma_omp(atomic update)");
64 printer->add_line(
"#include <coreneuron/utils/offload.hpp>");
65 printer->add_line(
"#include <cuda_runtime_api.h>");
70 return "C++-OpenAcc (api-compatibility)";
82 "static inline void* mem_alloc(size_t num, size_t size, size_t alignment = 16)");
85 cudaMallocManaged(&ptr, num*size);
86 cudaMemset(ptr, 0, num*size);
92 printer->push_block("static inline void mem_free(void* ptr)");
93 printer->add_line(
"cudaFree(ptr);");
109 printer->push_block(
"static inline void coreneuron_abort()");
110 printer->add_line(R
"(printf("Error : Issue while running OpenACC kernel \n");)");
111 printer->add_line("assert(0==1);");
116 printer->push_block(
"if (nt->compute_gpu)");
118 printer->add_line(
"i = nsb->_cnt++;");
120 printer->add_line(
"i = nsb->_cnt++;");
144 printer->add_line(
"nrn_pragma_acc(data present(nt, ml) if(nt->compute_gpu))");
161 printer->add_line(
"#pragma acc serial present(inst, indexes, weights) if(nt->compute_gpu)");
179 printer->fmt_line(
"vec_rhs[node_id] {} rhs;", rhs_op);
183 printer->fmt_line(
"vec_d[node_id] {} g;", d_op);
193 printer->push_block(
"if (nt->nrn_fast_imem)");
197 printer->fmt_line(
"nt->nrn_fast_imem->nrn_sav_rhs[node_id] {} rhs;", rhs_op);
201 printer->fmt_line(
"nt->nrn_fast_imem->nrn_sav_d[node_id] {} g;", d_op);
232 printer->push_block(
"if (nt->compute_gpu)");
242 printer->push_block(
"if(nt->compute_gpu)");
243 printer->add_multi_line(R
"CODE(
244 double* device_vec = cnrn_target_copyin(vec, vec_size / sizeof(double));
245 void* device_ns = cnrn_target_deviceptr(*ns);
246 ThreadDatum* device_thread = cnrn_target_deviceptr(thread);
248 printer->fmt_line("cnrn_target_memcpy_to_device(&(device_thread[{}]._pvoid), &device_ns);",
250 printer->fmt_line(
"cnrn_target_memcpy_to_device(&(device_thread[dith{}()].pval), &device_vec);",
261 "static inline void copy_instance_to_device(NrnThread* nt, Memb_list* ml, {} const* inst);",
263 printer->fmt_line(
"static inline void delete_instance_from_device({}* inst);",
269 std::vector<std::string>
const& ptr_members) {
274 "static inline void copy_instance_to_device(NrnThread* nt, Memb_list* ml, {} const* inst)",
276 printer->push_block(
"if (!nt->compute_gpu)");
279 printer->fmt_line(
"auto tmp = *inst;");
280 printer->add_line(
"auto* d_inst = cnrn_target_is_present(inst);");
281 printer->push_block(
"if (!d_inst)");
282 printer->add_line(
"d_inst = cnrn_target_copyin(inst);");
284 for (
auto const& ptr_mem: ptr_members) {
285 printer->fmt_line(
"tmp.{0} = cnrn_target_deviceptr(tmp.{0});", ptr_mem);
287 printer->add_multi_line(R
"CODE(
288 cnrn_target_memcpy_to_device(d_inst, &tmp);
289 auto* d_ml = cnrn_target_deviceptr(ml);
290 void* d_inst_void = d_inst;
291 cnrn_target_memcpy_to_device(&(d_ml->instance), &d_inst_void);
296 printer->fmt_push_block(
"static inline void delete_instance_from_device({}* inst)",
298 printer->push_block(
"if (cnrn_target_is_present(inst))");
299 printer->add_line(
"cnrn_target_delete(inst);");
310 printer->add_line(
"copy_instance_to_device(nt, ml, inst);");
318 printer->add_line(
"delete_instance_from_device(inst);");
323 printer->add_line(
"nrn_pragma_acc(update device (deriv_advance_flag) if(nt->compute_gpu))");
324 printer->add_line(
"nrn_pragma_omp(target update to(deriv_advance_flag) if(nt->compute_gpu))");
329 printer->add_line(
"nrn_pragma_acc(atomic capture)");
330 printer->add_line(
"nrn_pragma_omp(atomic capture)");
335 printer->push_block(
"if(nt->compute_gpu)");
336 printer->add_line(
"nrn_pragma_acc(wait(nt->stream_id))");
342 printer->add_line(
"nrn_pragma_acc(update self(nsb->_cnt))");
343 printer->add_line(
"nrn_pragma_omp(target update from(nsb->_cnt))");
349 printer->push_block(
"if (nsb && nt->compute_gpu)");
351 printer->add_line(
"update_net_send_buffer_on_host(nt, nsb);");
357 printer->push_block(
"if (nt->compute_gpu)");
358 printer->add_line(
"nrn_pragma_acc(update device(nsb->_cnt))");
359 printer->add_line(
"nrn_pragma_omp(target update to(nsb->_cnt))");
365 printer->fmt_line(
"#pragma acc update device({}) if (nt->compute_gpu)",
void print_net_send_buf_count_update_to_host() const override
update NetSendBuffer_t count from device to host
void print_device_stream_wait() const override
Print the code to synchronise/wait on stream specific to NrnThread.
void print_kernel_data_present_annotation_block_begin() override
annotations like "acc enter data present(...)" for main kernel
void print_parallel_iteration_hint(BlockType type, const ast::Block *block) override
ivdep like annotation for channel iterations
Visitor for printing C++ code with OpenACC backend
void print_net_send_buffering_grow() override
Replace default implementation by a no-op since the buffer cannot be grown up during gpu execution.
void print_newtonspace_transfer_to_device() const override
transfer newtonspace structure to device
void print_nrn_cur_matrix_shadow_reduction() override
reduction to matrix elements from shadow vectors
void print_net_send_buffering_cnt_update() const override
print atomic update of NetSendBuffer_t cnt
void print_net_init_acc_serial_annotation_block_begin() override
start of annotation "acc kernels" for net_init kernel
codegen::CodegenInfo info
All ast information for code generation.
std::string global_struct_instance() const
Name of the (host-only) global instance of global_struct
void print_deriv_advance_flag_transfer_to_device() const override
update derivimplicit advance flag on the gpu device
int derivimplicit_list_num
slist/dlist id for derivimplicit block
int thread_data_index
thread_data_index indicates number of threads being allocated.
const char * operator_for_d() const noexcept
Operator for diagonal vector update (matrix update)
bool nrn_cur_reduction_loop_required() override
if reduction block in nrn_cur required
encapsulates code generation backend implementations
Auto generated AST classes declaration.
static constexpr char NTHREAD_DT_VARIABLE[]
dt variable in neuron thread structure
void print_device_atomic_capture_annotation() const override
print atomic capture pragma
void print_fast_imem_calculation() override
fast membrane current calculation
void print_atomic_reduction_pragma() override
atomic update pragma for reduction statements
Base class for all block scoped nodes.
void print_instance_struct_transfer_routine_declarations() override
declare helper functions for copying the instance struct to the device
bool artificial_cell
if mod file is artificial cell
@ Equation
breakpoint block
void print_nrn_cur_matrix_shadow_update() override
update to matrix elements with/without shadow vectors
void print_instance_struct_copy_to_device() override
call helper function for copying the instance struct to the device
void print_rhs_d_shadow_variables() override
setup method for setting matrix shadow vectors
virtual void print_memory_allocation_routine() const
Print memory allocation routine.
@ NetReceive
net_receive block
bool point_process
if mod file is point process
void print_dt_update_to_device() const override
update dt from host to device
std::string get_variable_name(const std::string &name, bool use_instance=true) const override
Determine variable name in the structure of mechanism properties.
void print_global_variable_device_update_annotation() override
update global variable from host to the device
bool electrode_current
if electrode current specified
void print_kernel_data_present_annotation_block_end() override
end of annotation like "acc enter data"
BlockType
Helper to represent various block types.
void print_abort_routine() const override
abort routine
Auto generated AST classes declaration.
std::string instance_struct() const
Name of structure that wraps range variables.
void print_backend_includes() override
common includes : standard c++, coreneuron and backend specific
const char * operator_for_rhs() const noexcept
Operator for rhs vector update (matrix update)
void print_instance_struct_delete_from_device() override
call helper function that deletes the instance struct from the device
std::unique_ptr< CodePrinter > printer
Code printer object for target (C++)
void print_net_send_buf_update_to_host() const override
update NetSendBuffer_t from device to host
void print_net_send_buf_count_update_to_device() const override
update NetSendBuffer_t count from host to device
void print_instance_struct_transfer_routines(const std::vector< std::string > &ptr_members) override
define helper functions for copying the instance struct to the device
Auto generated AST classes declaration.
void print_net_init_acc_serial_annotation_block_end() override
end of annotation "acc kernels" for net_init kernel
std::string backend_name() const override
name of the code generation backend
void print_memory_allocation_routine() const override
memory allocation routine