14 #define BOOST_TEST_MODULE CoreNEURON solver 
   15 #include <boost/test/included/unit_test.hpp> 
   25 namespace utf = boost::unit_test;
 
   29     std::vector<double> d, 
rhs;
 
   48         return os << 
"SolverImplementation::CellPermute0_CPU";
 
   50         return os << 
"SolverImplementation::CellPermute0_GPU";
 
   52         return os << 
"SolverImplementation::CellPermute1_CPU";
 
   54         return os << 
"SolverImplementation::CellPermute1_GPU";
 
   56         return os << 
"SolverImplementation::CellPermute2_CPU";
 
   58         return os << 
"SolverImplementation::CellPermute2_GPU";
 
   60         return os << 
"SolverImplementation::CellPermute2_CUDA";
 
   62         throw std::runtime_error(
"Invalid SolverImplementation");
 
   69     int num_segments_per_cell{3};
 
   70     std::function<double(
int, 
int)> produce_a{[](
auto, 
auto) { 
return 3.14159; }},
 
   71         produce_b{[](
auto, 
auto) { 
return 42.0; }}, produce_d{[](
auto, 
auto) { 
return 7.0; }},
 
   72         produce_rhs{[](
auto, 
auto) { 
return -16.0; }};
 
  107         int num_cells_remaining{config.num_cells}, total_cells{};
 
  108         for (
auto ithread = 0; ithread < 
nrn_nthread; ++ithread) {
 
  113             total_cells += nt.ncell;
 
  114             num_cells_remaining -= nt.ncell;
 
  116             nt.end = nt.ncell * config.num_segments_per_cell;
 
  119             nt._ndata = padded_size * 4;
 
  120             nt._data = 
static_cast<double*
>(
emalloc_align(nt._ndata * 
sizeof(
double)));
 
  121             auto* vec_rhs = (nt._actual_rhs = nt._data + 0 * padded_size);
 
  122             auto* vec_d = (nt._actual_d = nt._data + 1 * padded_size);
 
  123             auto* vec_a = (nt._actual_a = nt._data + 2 * padded_size);
 
  124             auto* vec_b = (nt._actual_b = nt._data + 3 * padded_size);
 
  125             auto* parent_indices =
 
  126                 (nt._v_parent_index = 
static_cast<int*
>(
emalloc_align(padded_size * 
sizeof(
int))));
 
  132                                     nseg = config.num_segments_per_cell](
auto icell, 
auto iseg) {
 
  136                     return ncell + icell * (nseg - 1) + iseg - 1;
 
  139             for (
auto icell = 0; icell < nt.ncell; ++icell) {
 
  140                 for (
auto iseg = 0; iseg < config.num_segments_per_cell; ++iseg) {
 
  141                     auto const global_index = get_index(icell, iseg);
 
  142                     vec_a[global_index] = config.produce_a(icell, iseg);
 
  143                     vec_b[global_index] = config.produce_b(icell, iseg);
 
  144                     vec_d[global_index] = config.produce_d(icell, iseg);
 
  145                     vec_rhs[global_index] = config.produce_rhs(icell, iseg);
 
  155                     auto const parent_id = iseg ? get_index(icell, (iseg - 1) / 2) : -1;
 
  156                     parent_indices[global_index] = parent_id;
 
  160             for (
auto i = 0; 
i < nt.end; ++
i) {
 
  164                     BOOST_REQUIRE(parent_indices[
i] == -1);
 
  169                 BOOST_REQUIRE(nt._permute);
 
  184             std::cout << 
"CellPermute0_GPU is a nonstandard configuration, copying data to the " 
  185                          "device may produce warnings:";
 
  191             std::cout << 
"\n...no more warnings expected" << std::endl;
 
  194         BOOST_REQUIRE(total_cells == config.num_cells);
 
  195         BOOST_REQUIRE(num_cells_remaining == 0);
 
  202         for (
auto& nt: *
this) {
 
  204             delete[] std::exchange(nt._permute, 
nullptr);
 
  205             free_memory(std::exchange(nt._v_parent_index, 
nullptr));
 
  212         std::vector<SolverData> ret{
static_cast<std::size_t
>(
nrn_nthread)};
 
  223             for (
auto i = 0; 
i < nt.end; ++
i) {
 
  225                 auto const p_i = nt._permute ? nt._permute[
i] : 
i;
 
  227                 auto const p_parent = nt._v_parent_index[p_i];
 
  229                 auto const parent = p_parent == -1
 
  233                 sd.d[
i] = nt._actual_d[p_i];
 
  234                 sd.parent_index[
i] = parent;
 
  235                 sd.rhs[
i] = nt._actual_rhs[p_i];
 
  238             for (
auto i = 0; 
i < nt.end; ++
i) {
 
  248         for (
auto& thread: *
this) {
 
  261 template <
typename... Args>
 
  265     return threads.dump_solver_data();
 
  273 #ifdef CORENEURON_ENABLE_GPU 
  288     BOOST_REQUIRE(solver_data.find(ref_impl) != solver_data.end());
 
  289     auto const& ref_data = solver_data.at(ref_impl);
 
  290     for (
auto const& [impl, impl_data]: solver_data) {
 
  292         BOOST_REQUIRE(impl_data.size() == ref_data.size());
 
  293         std::cout << 
"Comparing " << impl << 
" to " << ref_impl << std::endl;
 
  294         for (
auto n_thread = 0ul; n_thread < impl_data.size(); ++n_thread) {
 
  296             BOOST_REQUIRE(impl_data[n_thread].d.size() == ref_data[n_thread].d.size());
 
  297             BOOST_REQUIRE(impl_data[n_thread].parent_index.size() ==
 
  298                           ref_data[n_thread].parent_index.size());
 
  299             BOOST_REQUIRE(impl_data[n_thread].rhs.size() == ref_data[n_thread].rhs.size());
 
  300             BOOST_TEST(impl_data[n_thread].d == ref_data[n_thread].d,
 
  301                        boost::test_tools::per_element());
 
  302             BOOST_TEST(impl_data[n_thread].parent_index == ref_data[n_thread].parent_index,
 
  303                        boost::test_tools::per_element());
 
  304             BOOST_TEST(impl_data[n_thread].rhs == ref_data[n_thread].rhs,
 
  305                        boost::test_tools::per_element());
 
  310 template <
typename... Args>
 
  312     std::map<SolverImplementation, std::vector<SolverData>> solver_data;
 
  314         solver_data[impl] = 
solve_and_dump(impl, std::forward<Args>(args)...);
 
  326     constexpr std::size_t segments = 32;
 
  330     for (
auto const& [impl, 
data]: solver_data) {
 
  331         BOOST_REQUIRE(
data.size() == 1);  
 
  332         BOOST_REQUIRE(
data[0].d.size() == segments);
 
  333         BOOST_REQUIRE(
data[0].parent_index.size() == segments);
 
  334         BOOST_REQUIRE(
data[0].rhs.size() == segments);
 
  359     config.num_threads = 2;
 
  364     std::mt19937_64 gen{42};
 
  366     config.
produce_a = [g = gen, d = std::normal_distribution{1.0, 0.1}](
int icell,
 
  370     config.produce_b = [g = gen, d = std::normal_distribution{7.0, 0.2}](int, int) 
mutable {
 
  373     config.produce_d = [g = gen, d = std::normal_distribution{-0.1, 0.01}](int, int) 
mutable {
 
  376     config.produce_rhs = [g = gen, d = std::normal_distribution{-15.0, 2.0}](int, int) 
mutable {
 
  384     config.num_segments_per_cell = 4096;
 
  390     config.num_cells = 1024;