49#include "communication.hpp"
53#include "system/System.hpp"
61#include <boost/mpi/collectives/all_reduce.hpp>
62#include <boost/mpi/collectives/reduce.hpp>
64#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
65#include <Kokkos_Core.hpp>
86#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
90 auto const diff = std::abs(value - reference);
91 using FT = std::remove_cvref_t<
decltype(
diff)>;
92 auto constexpr atol = std::is_same_v<FT, float> ?
FT{2
e-4} :
FT{1
e-6};
93 auto constexpr rtol = std::is_same_v<FT, float> ?
FT{5
e-5} :
FT{1
e-5};
94 auto const non_zero = std::abs(reference) !=
FT{0};
100template <
typename FloatType, Arch Architecture,
class FFTConfig>
106 for (
auto const &p : get_system().
cell_structure->local_particles()) {
107 if (p.dipm() != 0.) {
114 boost::mpi::all_reduce(
comm_cart,
local_n, dp3m.sum_dip_part, std::plus<>());
118 std::size_t
n_c_part,
double sum_q2,
122 std::size_t
n_c_part,
double sum_q2,
129 double sum_q2,
double x1,
double x2,
double xacc,
132template <
typename FloatType, Arch Architecture,
class FFTConfig>
135 auto const &box_geo = *get_system().
box_geo;
138 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
142 phi /= 3. * box_geo.length()[0] * Utils::int_pow<3>(dp3m.params.mesh[0]);
143 return phi * std::numbers::pi;
146template <
typename FloatType, Arch Architecture,
class FFTConfig>
150 assert(dp3m.params.alpha > 0.);
152 auto const &
system = get_system();
153 auto const &box_geo = *
system.box_geo;
154 auto const &local_geo = *
system.local_geo;
157 dp3m.params.cao3 = Utils::int_pow<3>(dp3m.params.cao);
158 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
161 dp3m.local_mesh.calc_local_ca_mesh(dp3m.params, local_geo,
verlet_skin, 0.);
162 dp3m.fft_buffers->init_halo();
163 dp3m.fft->init(dp3m.params);
164 dp3m.mesh.ks_pnum = dp3m.fft->get_ks_pnum();
165 dp3m.fft_buffers->init_meshes(dp3m.fft->get_ca_mesh_size());
166 dp3m.update_mesh_views();
167#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
168 dp3m.heffte.world_size =
comm_cart.size();
169 dp3m.heffte.fft = std::make_shared<P3MFFT<FloatType, FFTConfig>>(
170 ::comm_cart, dp3m.params.mesh, dp3m.local_mesh.ld_no_halo,
172 dp3m.resize_heffte_buffers();
174 dp3m.calc_differential_operator();
184#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
189 auto const &aosoa = cell_structure.get_aosoa();
190 auto const &unique_particles = cell_structure.get_unique_particles();
191 auto const n_part = cell_structure.count_local_particles();
194 "InterpolateDipoles", std::size_t{0
u}, n_part, [&](
auto p_index) {
196 auto const p_pos = aosoa.get_vector_at(aosoa.position,
p_index);
197 auto const dip = unique_particles.at(
p_index)->calc_dip();
200 p_pos, dp3m.params.ai, dp3m.local_mesh);
201 dp3m.inter_weights.store_at(
p_index, weights);
203 dp3m.local_mesh, weights, [&dip,
tid, &dp3m](
int ind,
double w) {
204 dp3m.rs_fields_kokkos(tid, 0u, ind) += value_type(w * dip[0u]);
205 dp3m.rs_fields_kokkos(tid, 1u, ind) += value_type(w * dip[1u]);
206 dp3m.rs_fields_kokkos(tid, 2u, ind) += value_type(w * dip[2u]);
212 Kokkos::RangePolicy<execution_space> policy(std::size_t{0},
213 dp3m.local_mesh.size);
214 Kokkos::parallel_for(
"ReduceInterpolatedDipoles", policy,
216 for (
int dir = 0; dir < 3; ++dir) {
219 acc += dp3m.rs_fields_kokkos(
tid, dir, i);
221 dp3m.mesh.rs_fields[dir][i] += acc;
222#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
223 dp3m.heffte.rs_dipole_density[dir][i] += acc;
236 real_pos, dp3m.params.ai, dp3m.local_mesh);
238 dp3m.local_mesh, weights, [&dip, &dp3m](
int ind,
double w) {
239 dp3m.mesh.rs_fields[0u][ind] += value_type(w * dip[0u]);
240 dp3m.mesh.rs_fields[1u][ind] += value_type(w * dip[1u]);
241 dp3m.mesh.rs_fields[2u][ind] += value_type(w * dip[2u]);
242#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
243 dp3m.heffte.rs_dipole_density[0u][ind] += value_type(w * dip[0u]);
244 dp3m.heffte.rs_dipole_density[1u][ind] += value_type(w * dip[1u]);
245 dp3m.heffte.rs_dipole_density[2u][ind] += value_type(w * dip[2u]);
249 dp3m.inter_weights.template
store<cao>(weights);
255template <
typename FloatType, Arch Architecture,
class FFTConfig>
259#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
260 Utils::integral_parameter<int, AssignDipole, p3m_min_cao, p3m_max_cao>(
261 dp3m.params.cao, dp3m, *get_system().cell_structure);
263 for (
auto const &p : get_system().
cell_structure->local_particles()) {
264 if (p.dipm() != 0.) {
265 Utils::integral_parameter<int, AssignDipole, p3m_min_cao, p3m_max_cao>(
266 dp3m.params.cao, dp3m, p.pos(), p.calc_dip());
277 assert(cao == dp3m.inter_weights.cao());
279 auto const kernel = [
d_rs, &dp3m](
auto const &pref,
auto &
p_torque,
284 [&
E, &dp3m,
d_rs](
int ind,
double w) {
286 E[d_rs] += w * double(dp3m.mesh.rs_scalar[ind]);
290#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
300#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
301 auto const n_part = dp3m.inter_weights.size();
305 "AssignTorques", std::size_t{0
u}, n_part, [&](std::size_t
p_index) {
306 auto const &p = *unique_particles.at(
p_index);
307 if (p.dipm() != 0.) {
316 if (p.dipm() != 0.) {
329 assert(cao == dp3m.inter_weights.cao());
331 auto const kernel = [
d_rs, &dp3m](
auto const &pref,
auto &
p_force,
338 E[0u] += w * double(dp3m.mesh.rs_fields[0u][ind]);
339 E[1u] += w * double(dp3m.mesh.rs_fields[1u][ind]);
340 E[2u] += w * double(dp3m.mesh.rs_fields[2u][ind]);
343#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
351#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
352 auto const n_part = dp3m.inter_weights.size();
356 "AssignForcesDip", std::size_t{0
u}, n_part, [&](std::size_t
p_index) {
357 auto const &p = *unique_particles.at(
p_index);
358 if (p.dipm() != 0.) {
367 if (p.dipm() != 0.) {
377#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
378template <
typename FloatType,
class FFTConfig>
383 static_cast<std::size_t
>(
Utils::product(this->local_mesh.dim_no_halo));
385 static_cast<std::size_t
>(
Utils::product(heffte.fft->ks_local_size()));
386 for (
auto d : {0
u, 1u, 2u}) {
397template <
typename FloatType, Arch Architecture,
class FFTConfig>
402 auto const &
system = get_system();
403 auto const &box_geo = *
system.box_geo;
413#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
414 auto constexpr r2c_dir = FFTConfig::r2c_dir;
415 auto const rs_local_size = dp3m.heffte.fft->rs_local_size();
416 auto const local_size = dp3m.heffte.fft->ks_local_size();
418 if constexpr (FFTConfig::use_r2c) {
422 auto const local_origin = dp3m.heffte.fft->ks_local_ld_index();
431 dp3m.resize_heffte_buffers();
434 if (dp3m.sum_mu2 > 0.) {
436 dp3m.fft_buffers->perform_vector_halo_gather();
437 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
438 dp3m.fft->forward_fft(rs_mesh);
440 dp3m.update_mesh_views();
442#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
443 if (dp3m.heffte.world_size == 1) {
445 std::array<FloatType *, 3u> rs_fields = {
446 {dp3m.heffte.rs_dipole_density[0
u].data(),
447 dp3m.heffte.rs_dipole_density[1u].data(),
448 dp3m.heffte.rs_dipole_density[2u].data()}};
449 dp3m.heffte.halo_comm.gather_grid(
::comm_cart, rs_fields,
450 dp3m.local_mesh.dim);
452 for (
auto dir : {0
u, 1u, 2u}) {
455 FFTConfig::r_space_order>(
456 dp3m.heffte.rs_dipole_density[dir], dp3m.local_mesh.dim,
457 dp3m.local_mesh.n_halo_ld,
458 dp3m.local_mesh.dim - dp3m.local_mesh.n_halo_ur);
465 auto constexpr KX = 1,
KY = 2,
KZ = 0;
474 dp3m.heffte.ks_dipole_density[dir].data());
476 if (
not dp3m.params.tuning) {
481 auto constexpr KX = 2,
KY = 0,
KZ = 1;
485 auto const old_value = std::complex<FloatType>{
506 if (dp3m.sum_mu2 > 0.) {
510 auto index = std::size_t(0
u);
514 auto constexpr KX = 2, KY = 0, KZ = 1;
515 auto const shift = local_index + dp3m.mesh.start;
516 auto const &d_op = dp3m.d_op[0u];
517 auto const &mesh_dip = dp3m.mesh.rs_fields;
519 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
520 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
521 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
524 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
525 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
526 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
528 node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im));
529 std::advance(it_energy, 1);
531#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
532 if (dp3m.heffte.world_size == 1) {
539 auto const &
mesh_dip = dp3m.heffte.ks_dipole_density;
570 if (dp3m.energy_correction == 0.)
571 calc_energy_correction();
575 energy -= prefactor * dp3m.sum_mu2 * std::numbers::inv_sqrtpi *
576 (2. / 3.) * Utils::int_pow<3>(dp3m.params.alpha);
579 energy += prefactor * dp3m.energy_correction / box_geo.volume();
589 if (dp3m.sum_mu2 > 0.) {
590 auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0
u];
591 dp3m.ks_scalar.resize(dp3m.local_mesh.size);
594 auto index{std::size_t(0
u)};
598 auto constexpr KX = 2, KY = 0, KZ = 1;
599 auto const shift = local_index + dp3m.mesh.start;
600 auto const &d_op = dp3m.d_op[0u];
601 auto const &mesh_dip = dp3m.mesh.rs_fields;
603 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
604 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
605 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
608 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
609 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
610 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
612 *it_ks_scalar = *it_energy * std::complex<FloatType>{re, im};
617#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
618 if (dp3m.heffte.world_size == 1) {
624 auto const &
mesh_dip = dp3m.heffte.ks_dipole_density;
634 if (
not dp3m.params.tuning) {
635 auto constexpr KX = 2,
KY = 0,
KZ = 1;
651 for (
int d = 0; d < 3; d++) {
655 auto const &offset = dp3m.mesh.start;
656 auto const &d_op = dp3m.d_op[0u];
657 auto const d_op_val = FloatType(d_op[local_index[d] + offset[d]]);
658 auto const &value = *it_ks_scalar;
659 dp3m.mesh.rs_scalar[index] = d_op_val * value.real();
661 dp3m.mesh.rs_scalar[index] = d_op_val * value.imag();
663 std::advance(it_ks_scalar, 1);
665#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
666 if (dp3m.heffte.world_size == 1) {
667 unsigned int constexpr d_ks[3] = {2u, 0
u, 1u};
678 if (
not dp3m.params.tuning) {
679 auto constexpr KX = 2,
KY = 0,
KZ = 1;
683 auto const old_value = std::complex<FloatType>{
694 dp3m.heffte.fft->backward(dp3m.heffte.ks_B_field_storage.data(),
695 dp3m.heffte.rs_B_fields_no_halo[d].data());
697 dp3m.heffte.rs_B_fields[d] =
700 std::span(dp3m.heffte.rs_B_fields_no_halo[d]),
701 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
702 dp3m.local_mesh.n_halo_ur);
705 dp3m.heffte.rs_B_fields[d].data(),
706 dp3m.local_mesh.dim);
709 dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh());
711 dp3m.fft_buffers->perform_scalar_halo_spread();
713 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
714 Utils::integral_parameter<int, AssignTorques, p3m_min_cao, p3m_max_cao>(
728 auto it_force = dp3m.g_force.begin();
730 std::size_t index = 0
u;
732 auto constexpr KX = 2, KY = 0, KZ = 1;
733 auto const shift = local_index + dp3m.mesh.start;
734 auto const &d_op = dp3m.d_op[0u];
735 auto const &mesh_dip = dp3m.mesh.rs_fields;
737 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
738 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
739 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
742 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
743 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
744 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
746 *it_ks_scalar = {*it_force * im, *it_force * (-re)};
752#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
753 if (dp3m.heffte.world_size == 1) {
759 auto const &
mesh_dip = dp3m.heffte.ks_dipole_density;
771 if (
not dp3m.params.tuning) {
772 auto constexpr KX = 2,
KY = 0,
KZ = 1;
788 for (
int d = 0; d < 3; d++) {
789 std::size_t index = 0
u;
792 auto constexpr KX = 2, KY = 0, KZ = 1;
793 auto const shift = local_index + dp3m.mesh.start;
794 auto const &d_op = dp3m.d_op[0u];
795 auto const &mesh_dip = dp3m.mesh.rs_fields;
796 auto const d_op_val = FloatType(d_op[shift[d]]);
797 auto const f = *it_ks_scalar * d_op_val;
798 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.real();
799 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.real();
800 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.real();
802 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.imag();
803 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.imag();
804 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.imag();
806 std::advance(it_ks_scalar, 1);
809#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
810 if (dp3m.heffte.world_size == 1) {
815 auto constexpr KX = 1,
KY = 2,
KZ = 0;
822 auto &
mesh_dip = dp3m.heffte.ks_dipole_density;
833 if (
not FFTConfig::use_r2c
and not dp3m.params.tuning) {
837 for (
int j = 0;
j < 3; ++
j) {
838 auto const old_value = std::complex<FloatType>{
849 for (
int dir = 0
u; dir < 3u; ++dir) {
850 dp3m.heffte.fft->backward(
851 dp3m.heffte.ks_dipole_density[dir].data(),
852 dp3m.heffte.rs_B_fields_no_halo[dir].data());
854 dp3m.heffte.rs_B_fields[d] =
857 std::span(dp3m.heffte.rs_B_fields_no_halo[dir]),
858 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
859 dp3m.local_mesh.n_halo_ur);
863 std::array<FloatType *, 3u>{{dp3m.heffte.rs_B_fields[0
u].data(),
864 dp3m.heffte.rs_B_fields[1u].data(),
865 dp3m.heffte.rs_B_fields[2u].data()}};
866 dp3m.heffte.halo_comm.spread_grid(
::comm_cart, rs_fields,
867 dp3m.local_mesh.dim);
870 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
871 dp3m.fft->backward_fft(rs_mesh);
874 dp3m.fft_buffers->perform_vector_halo_spread();
876 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
904template <
typename FloatType, Arch Architecture,
class FFTConfig>
907 auto const &
system = get_system();
908 auto const &box_geo = *
system.box_geo;
909 auto const particles =
system.cell_structure->local_particles();
910 auto const pref = prefactor * 4. * std::numbers::pi / box_geo.volume() /
911 (2. * dp3m.params.epsilon + 1.);
921 for (
auto const &p : particles) {
922 auto const dip = p.calc_dip();
946 0.5 * pref * boost::mpi::all_reduce(
comm_cart,
sum_e, std::plus<>());
962 for (
auto &p : particles) {
963 auto &torque = p.torque();
965 torque[1u] -= pref *
sumiy[
ip];
966 torque[2u] -= pref *
sumiz[
ip];
974template <
typename FloatType, Arch Architecture,
class FFTConfig>
978 FFTConfig::k_space_order>(
979 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
980 get_system().
box_geo->length_inv());
981#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
982 if (dp3m.heffte.world_size == 1) {
983 dp3m.heffte.g_force =
985 FFTConfig::k_space_order>(
986 dp3m.params, dp3m.heffte.fft->ks_local_ld_index(),
987 dp3m.heffte.fft->ks_local_ur_index(),
988 get_system().
box_geo->length_inv());
989 if constexpr (FFTConfig::use_r2c) {
991 dp3m.heffte.g_force, dp3m.params.mesh,
992 dp3m.heffte.fft->ks_local_size(),
993 dp3m.heffte.fft->ks_local_ld_index());
999template <
typename FloatType, Arch Architecture,
class FFTConfig>
1003 FFTConfig::k_space_order>(
1004 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
1005 get_system().
box_geo->length_inv());
1006#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
1007 if (dp3m.heffte.world_size == 1) {
1008 dp3m.heffte.g_energy =
1010 FFTConfig::k_space_order>(
1011 dp3m.params, dp3m.heffte.fft->ks_local_ld_index(),
1012 dp3m.heffte.fft->ks_local_ur_index(),
1013 get_system().
box_geo->length_inv());
1014 if constexpr (FFTConfig::use_r2c) {
1016 dp3m.heffte.g_energy, dp3m.params.mesh,
1017 dp3m.heffte.fft->ks_local_size(),
1018 dp3m.heffte.fft->ks_local_ld_index());
1024template <
typename FloatType, Arch Architecture,
class FFTConfig>
1027 int m_mesh_max = -1, m_mesh_min = -1;
1028 std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
1032 double prefactor,
int timings,
1041 std::optional<std::string>
1048 m_logger = std::make_unique<TuningLogger>(
1056 std::tuple<double, double, double, double>
1058 double r_cut_iL)
const override {
1072 0.0001 * box_geo.length()[0], 5. * box_geo.length()[0], 0.0001,
1097 m_mesh_min =
static_cast<int>(std::round(std::pow(2., std::floor(
expo))));
1100 if (m_tune_limits.first) {
1101 m_mesh_min = *m_tune_limits.first;
1103 if (m_tune_limits.second) {
1104 m_mesh_max = *m_tune_limits.second;
1107 m_mesh_min = m_mesh_max = dp3m.
params.
mesh[0];
1147template <
typename FloatType, Arch Architecture,
class FFTConfig>
1149 auto &
system = get_system();
1150 auto const &box_geo = *
system.box_geo;
1157 if (
not is_tuned()) {
1160 throw std::runtime_error(
1161 "DipolarP3M: no dipolar particles in the system");
1165 system, dp3m, prefactor, tuning.timings, tuning.limits);
1174 system.on_dipoles_change();
1205 [&](
unsigned dim,
int n) {
1206 nm[dim] = shift[dim] + n * mesh;
1215 std::size_t
n_c_part,
double sum_q2,
1219 auto const mesh_i = 1. /
static_cast<double>(mesh);
1237 Utils::int_pow<3>(
static_cast<double>(
n2));
1248 return 8. *
Utils::sqr(std::numbers::pi) / 3. * sum_q2 *
1260 std::size_t
n_c_part,
double sum_q2,
1262 auto constexpr exp_min = -708.4;
1294 double sum_q2,
double x1,
double x2,
double xacc,
1306 if (
f1 *
f2 >= 0.0) {
1307 throw std::runtime_error(
1308 "Root must be bracketed for bisection in dp3m_rtbisection");
1323 throw std::runtime_error(
"Too many bisections in dp3m_rtbisection");
1328 auto const &box_geo = *
system.box_geo;
1329 auto const &local_geo = *
system.local_geo;
1330 for (
auto i = 0
u; i < 3u; i++) {
1333 std::stringstream
msg;
1335 <<
" is larger than half of box dimension " << box_geo.length()[i];
1336 throw std::runtime_error(
msg.str());
1339 std::stringstream
msg;
1341 <<
" is larger than local box dimension " << local_geo.length()[i];
1342 throw std::runtime_error(
msg.str());
1346 if ((box_geo.length()[0] != box_geo.length()[1])
or
1347 (box_geo.length()[1] != box_geo.length()[2])) {
1348 throw std::runtime_error(
"DipolarP3M: requires a cubic box");
1354 if (!box_geo.periodic(0)
or !box_geo.periodic(1)
or !box_geo.periodic(2)) {
1355 throw std::runtime_error(
1356 "DipolarP3M: requires periodicity (True, True, True)");
1361 auto const &local_geo = *
get_system().local_geo;
1364 throw std::runtime_error(
1365 "DipolarP3M: requires the regular or hybrid decomposition cell system");
1369 throw std::runtime_error(
1370 "DipolarP3M: does not work with the hybrid decomposition cell system, "
1371 "if using more than one MPI node");
1377 if (node_grid[0] < node_grid[1]
or node_grid[1] < node_grid[2]) {
1378 throw std::runtime_error(
1379 "DipolarP3M: node grid must be sorted, largest first");
1383template <
typename FloatType, Arch Architecture,
class FFTConfig>
1385 auto const &box_geo = *get_system().
box_geo;
1390 sanity_checks_boxl();
1391 calc_influence_function_force();
1392 calc_influence_function_energy();
1394#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
1401template <
typename FloatType, Arch Architecture,
class FFTConfig>
1404 auto const &box_geo = *get_system().
box_geo;
1405 auto const Ukp3m = calc_average_self_energy_k_space() * box_geo.volume();
1413template <
typename FloatType, Arch Architecture,
class FFTConfig>
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
Vector implementation and trait types for boost qvm interoperability.
Describes a cell structure / cell system.
auto & get_local_torque()
auto const & get_unique_particles() const
ParticleRange local_particles() const
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
TuningAlgorithm::Parameters get_time() override
DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m, double prefactor, int timings, decltype(m_tune_limits) tune_limits)
void on_solver_change() const override
void determine_mesh_limits() override
P3MParameters & get_params() override
std::optional< std::string > layer_correction_veto_r_cut(double) const override
void setup_logger(bool verbose) override
base_type::size_type size() const
void npt_add_virial_contribution(double energy)
std::shared_ptr< CellStructure > cell_structure
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
static auto constexpr time_sentinel
Value for invalid time measurements.
static auto constexpr max_n_consecutive_trials
Maximal number of consecutive trials that don't improve runtime.
System::System & m_system
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value) noexcept
Create a vector that has all entries set to the same value.
void zfill(std::size_t size)
Fill cache with zero-initialized data.
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
constexpr auto round_error_prec
Precision below which a double-precision float is assumed to be zero.
__device__ void vector_product(float const *a, float const *b, float *out)
static std::size_t count_magnetic_particles(ParticleRange const &particles)
P3M algorithm for long-range magnetic dipole-dipole interaction.
double dp3m_real_space_error(double box_size, double r_cut_iL, std::size_t n_c_part, double sum_q2, double alpha_L)
Calculate the value of the errors for the REAL part of the force in terms of the splitting parameter ...
double dp3m_rtbisection(double box_size, double r_cut_iL, std::size_t n_c_part, double sum_q2, double x1, double x2, double xacc, double tuned_accuracy)
Compute the value of alpha through a bisection method.
auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh, double mesh_i, int cao, double alpha_L_i)
Tuning dipolar-P3M.
double dp3m_k_space_error(double box_size, int mesh, int cao, std::size_t n_c_part, double sum_q2, double alpha_L)
Calculate the k-space error of dipolar-P3M.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
Routines, row decomposition, data structures and communication for the 3D-FFT.
auto pad_with_zeros_discard_imag(std::span< T > cropped_array, Utils::Vector3i const &cropped_dim, Utils::Vector3i const &pad_left, Utils::Vector3i const &pad_right)
Pad a 3D matrix with zeros to restore halo regions.
auto extract_block(Container const &in_array, Utils::Vector3i const &dimensions, Utils::Vector3i const &start, Utils::Vector3i const &stop)
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
std::vector< FloatType > grid_influence_function_dipolar(P3MParameters const ¶ms, Utils::Vector3i const &n_start, Utils::Vector3i const &n_stop, Utils::Vector3d const &inv_box_l)
Map influence function over a grid.
void p3m_interpolate(P3MLocalMesh const &local_mesh, WeightsStorage< cao > const &weights, Kernel kernel)
P3M grid interpolation.
constexpr int p3m_min_cao
Minimal charge assignment order.
constexpr int p3m_max_cao
Maximal charge assignment order.
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
T product(Vector< T, N > const &v)
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
decltype(auto) integral_parameter(T i, Args &&...args)
Generate a call table for an integral non-type template parameter.
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
auto get_analytic_cotangent_sum_kernel(int cao)
Exports for the NpT code.
Common functions for dipolar and charge P3M.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
ESPRESSO_ATTR_ALWAYS_INLINE void kokkos_parallel_range_for(auto const &name, auto start, auto end, auto const &kernel)
Utils::Vector3i node_grid
double calc_surface_term(bool force_flag, bool energy_flag) override
void dipole_assign() override
void scaleby_box_l() override
double long_range_kernel(bool force_flag, bool energy_flag)
Compute the k-space part of forces and energies.
Base class for the magnetostatics P3M algorithm.
double sum_mu2
Sum of square of magnetic dipoles.
p3m_interpolation_cache inter_weights
std::size_t sum_dip_part
number of dipolar particles.
p3m_send_mesh< FloatType > halo_comm
double energy_correction
cached k-space self-energy correction
void resize_heffte_buffers()
struct DipolarP3MState::@1 heffte
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
void sanity_checks_cell_structure() const
P3MParameters const & dp3m_params
void sanity_checks_periodicity() const
void sanity_checks_node_grid() const
void recalc_ld_pos(P3MParameters const ¶ms)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0), in real space.
P3MParameters params
P3M base parameters.
P3MLocalMesh local_mesh
Local mesh geometry information for this MPI rank.
void operator()(auto &dp3m, auto &cell_structure)
void operator()(auto &dp3m, double prefac, int d_rs, CellStructure &cell_structure) const
void operator()(auto &dp3m, double prefac, int d_rs, CellStructure &cell_structure) const