14 #define BOOST_TEST_MODULE CoreNEURON solver
15 #include <boost/test/included/unit_test.hpp>
25 namespace utf = boost::unit_test;
29 std::vector<double> d,
rhs;
48 return os <<
"SolverImplementation::CellPermute0_CPU";
50 return os <<
"SolverImplementation::CellPermute0_GPU";
52 return os <<
"SolverImplementation::CellPermute1_CPU";
54 return os <<
"SolverImplementation::CellPermute1_GPU";
56 return os <<
"SolverImplementation::CellPermute2_CPU";
58 return os <<
"SolverImplementation::CellPermute2_GPU";
60 return os <<
"SolverImplementation::CellPermute2_CUDA";
62 throw std::runtime_error(
"Invalid SolverImplementation");
69 int num_segments_per_cell{3};
70 std::function<double(
int,
int)> produce_a{[](
auto,
auto) {
return 3.14159; }},
71 produce_b{[](
auto,
auto) {
return 42.0; }}, produce_d{[](
auto,
auto) {
return 7.0; }},
72 produce_rhs{[](
auto,
auto) {
return -16.0; }};
107 int num_cells_remaining{config.num_cells}, total_cells{};
108 for (
auto ithread = 0; ithread <
nrn_nthread; ++ithread) {
113 total_cells += nt.ncell;
114 num_cells_remaining -= nt.ncell;
116 nt.end = nt.ncell * config.num_segments_per_cell;
119 nt._ndata = padded_size * 4;
120 nt._data =
static_cast<double*
>(
emalloc_align(nt._ndata *
sizeof(
double)));
121 auto* vec_rhs = (nt._actual_rhs = nt._data + 0 * padded_size);
122 auto* vec_d = (nt._actual_d = nt._data + 1 * padded_size);
123 auto* vec_a = (nt._actual_a = nt._data + 2 * padded_size);
124 auto* vec_b = (nt._actual_b = nt._data + 3 * padded_size);
125 auto* parent_indices =
126 (nt._v_parent_index =
static_cast<int*
>(
emalloc_align(padded_size *
sizeof(
int))));
132 nseg = config.num_segments_per_cell](
auto icell,
auto iseg) {
136 return ncell + icell * (nseg - 1) + iseg - 1;
139 for (
auto icell = 0; icell < nt.ncell; ++icell) {
140 for (
auto iseg = 0; iseg < config.num_segments_per_cell; ++iseg) {
141 auto const global_index = get_index(icell, iseg);
142 vec_a[global_index] = config.produce_a(icell, iseg);
143 vec_b[global_index] = config.produce_b(icell, iseg);
144 vec_d[global_index] = config.produce_d(icell, iseg);
145 vec_rhs[global_index] = config.produce_rhs(icell, iseg);
155 auto const parent_id = iseg ? get_index(icell, (iseg - 1) / 2) : -1;
156 parent_indices[global_index] = parent_id;
160 for (
auto i = 0;
i < nt.end; ++
i) {
164 BOOST_REQUIRE(parent_indices[
i] == -1);
169 BOOST_REQUIRE(nt._permute);
184 std::cout <<
"CellPermute0_GPU is a nonstandard configuration, copying data to the "
185 "device may produce warnings:";
191 std::cout <<
"\n...no more warnings expected" << std::endl;
194 BOOST_REQUIRE(total_cells == config.num_cells);
195 BOOST_REQUIRE(num_cells_remaining == 0);
202 for (
auto& nt: *
this) {
204 delete[] std::exchange(nt._permute,
nullptr);
205 free_memory(std::exchange(nt._v_parent_index,
nullptr));
212 std::vector<SolverData> ret{
static_cast<std::size_t
>(
nrn_nthread)};
223 for (
auto i = 0;
i < nt.end; ++
i) {
225 auto const p_i = nt._permute ? nt._permute[
i] :
i;
227 auto const p_parent = nt._v_parent_index[p_i];
229 auto const parent = p_parent == -1
233 sd.d[
i] = nt._actual_d[p_i];
234 sd.parent_index[
i] = parent;
235 sd.rhs[
i] = nt._actual_rhs[p_i];
238 for (
auto i = 0;
i < nt.end; ++
i) {
248 for (
auto& thread: *
this) {
261 template <
typename... Args>
265 return threads.dump_solver_data();
273 #ifdef CORENEURON_ENABLE_GPU
288 BOOST_REQUIRE(solver_data.find(ref_impl) != solver_data.end());
289 auto const& ref_data = solver_data.at(ref_impl);
290 for (
auto const& [impl, impl_data]: solver_data) {
292 BOOST_REQUIRE(impl_data.size() == ref_data.size());
293 std::cout <<
"Comparing " << impl <<
" to " << ref_impl << std::endl;
294 for (
auto n_thread = 0ul; n_thread < impl_data.size(); ++n_thread) {
296 BOOST_REQUIRE(impl_data[n_thread].d.size() == ref_data[n_thread].d.size());
297 BOOST_REQUIRE(impl_data[n_thread].parent_index.size() ==
298 ref_data[n_thread].parent_index.size());
299 BOOST_REQUIRE(impl_data[n_thread].rhs.size() == ref_data[n_thread].rhs.size());
300 BOOST_TEST(impl_data[n_thread].d == ref_data[n_thread].d,
301 boost::test_tools::per_element());
302 BOOST_TEST(impl_data[n_thread].parent_index == ref_data[n_thread].parent_index,
303 boost::test_tools::per_element());
304 BOOST_TEST(impl_data[n_thread].rhs == ref_data[n_thread].rhs,
305 boost::test_tools::per_element());
310 template <
typename... Args>
312 std::map<SolverImplementation, std::vector<SolverData>> solver_data;
314 solver_data[impl] =
solve_and_dump(impl, std::forward<Args>(args)...);
326 constexpr std::size_t segments = 32;
330 for (
auto const& [impl,
data]: solver_data) {
331 BOOST_REQUIRE(
data.size() == 1);
332 BOOST_REQUIRE(
data[0].d.size() == segments);
333 BOOST_REQUIRE(
data[0].parent_index.size() == segments);
334 BOOST_REQUIRE(
data[0].rhs.size() == segments);
359 config.num_threads = 2;
364 std::mt19937_64 gen{42};
366 config.
produce_a = [g = gen, d = std::normal_distribution{1.0, 0.1}](
int icell,
370 config.produce_b = [g = gen, d = std::normal_distribution{7.0, 0.2}](int, int)
mutable {
373 config.produce_d = [g = gen, d = std::normal_distribution{-0.1, 0.01}](int, int)
mutable {
376 config.produce_rhs = [g = gen, d = std::normal_distribution{-15.0, 2.0}](int, int)
mutable {
384 config.num_segments_per_cell = 4096;
390 config.num_cells = 1024;