49#include "communication.hpp"
53#include "system/System.hpp"
61#include <boost/mpi/collectives/all_reduce.hpp>
62#include <boost/mpi/collectives/reduce.hpp>
64#include <Kokkos_Core.hpp>
65#include <Kokkos_ScatterView.hpp>
84#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
88 auto const diff = std::abs(value - reference);
89 using FT = std::remove_cvref_t<
decltype(
diff)>;
90 auto constexpr atol = std::is_same_v<FT, float> ?
FT{2
e-4} :
FT{1
e-6};
91 auto constexpr rtol = std::is_same_v<FT, float> ?
FT{5
e-5} :
FT{1
e-5};
92 auto const non_zero = std::abs(reference) !=
FT{0};
98template <
typename FloatType, Arch Architecture,
class FFTConfig>
104 for (
auto const &p : get_system().
cell_structure->local_particles()) {
105 if (p.dipm() != 0.) {
112 boost::mpi::all_reduce(
comm_cart,
local_n, dp3m.sum_dip_part, std::plus<>());
116 std::size_t
n_c_part,
double sum_q2,
120 std::size_t
n_c_part,
double sum_q2,
127 double sum_q2,
double x1,
double x2,
double xacc,
130template <
typename FloatType, Arch Architecture,
class FFTConfig>
133 auto const &box_geo = *get_system().
box_geo;
136 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
140 phi /= 3. * box_geo.length()[0] * Utils::int_pow<3>(dp3m.params.mesh[0]);
141 return phi * std::numbers::pi;
144template <
typename FloatType, Arch Architecture,
class FFTConfig>
148 assert(dp3m.params.alpha > 0.);
150 auto const &
system = get_system();
151 auto const &box_geo = *
system.box_geo;
152 auto const &local_geo = *
system.local_geo;
155 dp3m.params.cao3 = Utils::int_pow<3>(dp3m.params.cao);
156 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
159 dp3m.local_mesh.calc_local_ca_mesh(dp3m.params, local_geo,
verlet_skin, 0.);
160 dp3m.fft_buffers->init_halo();
161 dp3m.fft->init(dp3m.params);
162 dp3m.mesh.ks_pnum = dp3m.fft->get_ks_pnum();
163 dp3m.fft_buffers->init_meshes(dp3m.fft->get_ca_mesh_size());
164 dp3m.update_mesh_views();
165#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
166 dp3m.heffte.world_size =
comm_cart.size();
167 dp3m.heffte.fft = std::make_shared<P3MFFT<FloatType, FFTConfig>>(
168 ::comm_cart, dp3m.params.mesh, dp3m.local_mesh.ld_no_halo,
170 dp3m.resize_heffte_buffers();
172 dp3m.calc_differential_operator();
186 auto const &aosoa = cell_structure.get_aosoa();
187 auto const &unique_particles = cell_structure.get_unique_particles();
188 auto const n_part = cell_structure.count_local_particles();
191 "InterpolateDipoles", std::size_t{0
u}, n_part, [&](
auto p_index) {
193 auto const p_pos = aosoa.get_span_at(aosoa.position,
p_index);
194 auto const dip = unique_particles.at(
p_index)->calc_dip();
197 p_pos, dp3m.params.ai, dp3m.local_mesh);
198 dp3m.inter_weights.store_at(
p_index, weights);
200 dp3m.local_mesh, weights, [&dip,
tid, &dp3m](
int ind,
double w) {
201 dp3m.rs_fields_kokkos(tid, 0u, ind) += value_type(w * dip[0u]);
202 dp3m.rs_fields_kokkos(tid, 1u, ind) += value_type(w * dip[1u]);
203 dp3m.rs_fields_kokkos(tid, 2u, ind) += value_type(w * dip[2u]);
209 Kokkos::RangePolicy<execution_space> policy(std::size_t{0},
210 dp3m.local_mesh.size);
211 Kokkos::parallel_for(
"ReduceInterpolatedDipoles", policy,
213 for (
int dir = 0; dir < 3; ++dir) {
216 acc += dp3m.rs_fields_kokkos(
tid, dir, i);
218 dp3m.mesh.rs_fields[dir][i] += acc;
219#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
220 dp3m.heffte.rs_dipole_density[dir][i] += acc;
229template <
typename FloatType, Arch Architecture,
class FFTConfig>
233 Utils::integral_parameter<int, AssignDipole, p3m_min_cao, p3m_max_cao>(
234 dp3m.params.cao, dp3m, *get_system().cell_structure);
242 assert(cao == dp3m.inter_weights.cao());
244 auto const kernel = [
d_rs, &dp3m](
auto const &pref,
auto &
p_torque,
249 [&
E, &dp3m,
d_rs](
int ind,
double w) {
251 E[d_rs] += w * double(dp3m.mesh.rs_scalar[ind]);
256 access(
p_index, 0) -= torque[0];
257 access(
p_index, 1) -= torque[1];
258 access(
p_index, 2) -= torque[2];
261 auto const n_part = dp3m.inter_weights.size();
265 "AssignTorques", std::size_t{0
u}, n_part, [&](std::size_t
p_index) {
266 auto const &p = *unique_particles.at(
p_index);
267 if (p.dipm() != 0.) {
278 assert(cao == dp3m.inter_weights.cao());
280 auto const kernel = [
d_rs, &dp3m](
auto const &pref,
auto &
p_force,
287 E[0u] += w * double(dp3m.mesh.rs_fields[0u][ind]);
288 E[1u] += w * double(dp3m.mesh.rs_fields[1u][ind]);
289 E[2u] += w * double(dp3m.mesh.rs_fields[2u][ind]);
292 auto access =
p_force.access();
296 auto const n_part = dp3m.inter_weights.size();
300 "AssignForcesDip", std::size_t{0
u}, n_part, [&](std::size_t
p_index) {
301 auto const &p = *unique_particles.at(
p_index);
302 if (p.dipm() != 0.) {
310#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
311template <
typename FloatType,
class FFTConfig>
316 static_cast<std::size_t
>(
Utils::product(this->local_mesh.dim_no_halo));
318 static_cast<std::size_t
>(
Utils::product(heffte.fft->ks_local_size()));
319 for (
auto d : {0
u, 1u, 2u}) {
330template <
typename FloatType, Arch Architecture,
class FFTConfig>
335 auto const &
system = get_system();
336 auto const &box_geo = *
system.box_geo;
346#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
347 auto constexpr r2c_dir = FFTConfig::r2c_dir;
348 auto const rs_local_size = dp3m.heffte.fft->rs_local_size();
349 auto const local_size = dp3m.heffte.fft->ks_local_size();
351 if constexpr (FFTConfig::use_r2c) {
355 auto const local_origin = dp3m.heffte.fft->ks_local_ld_index();
364 dp3m.resize_heffte_buffers();
367 if (dp3m.sum_mu2 > 0.) {
369 dp3m.fft_buffers->perform_vector_halo_gather();
370 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
371 dp3m.fft->forward_fft(rs_mesh);
373 dp3m.update_mesh_views();
375#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
376 if (dp3m.heffte.world_size == 1) {
378 std::array<FloatType *, 3u> rs_fields = {
379 {dp3m.heffte.rs_dipole_density[0
u].data(),
380 dp3m.heffte.rs_dipole_density[1u].data(),
381 dp3m.heffte.rs_dipole_density[2u].data()}};
382 dp3m.heffte.halo_comm.gather_grid(
::comm_cart, rs_fields,
383 dp3m.local_mesh.dim);
385 for (
auto dir : {0
u, 1u, 2u}) {
388 FFTConfig::r_space_order>(
389 dp3m.heffte.rs_dipole_density[dir], dp3m.local_mesh.dim,
390 dp3m.local_mesh.n_halo_ld,
391 dp3m.local_mesh.dim - dp3m.local_mesh.n_halo_ur);
398 auto constexpr KX = 1,
KY = 2,
KZ = 0;
407 dp3m.heffte.ks_dipole_density[dir].data());
409 if (
not dp3m.params.tuning) {
414 auto constexpr KX = 2,
KY = 0,
KZ = 1;
418 auto const old_value = std::complex<FloatType>{
439 if (dp3m.sum_mu2 > 0.) {
443 auto index = std::size_t(0
u);
447 auto constexpr KX = 2, KY = 0, KZ = 1;
448 auto const shift = local_index + dp3m.mesh.start;
449 auto const &d_op = dp3m.d_op[0u];
450 auto const &mesh_dip = dp3m.mesh.rs_fields;
452 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
453 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
454 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
457 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
458 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
459 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
461 node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im));
462 std::advance(it_energy, 1);
464#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
465 if (dp3m.heffte.world_size == 1) {
472 auto const &
mesh_dip = dp3m.heffte.ks_dipole_density;
503 if (dp3m.energy_correction == 0.)
504 calc_energy_correction();
508 energy -= prefactor * dp3m.sum_mu2 * std::numbers::inv_sqrtpi *
509 (2. / 3.) * Utils::int_pow<3>(dp3m.params.alpha);
512 energy += prefactor * dp3m.energy_correction / box_geo.volume();
522 if (dp3m.sum_mu2 > 0.) {
523 auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0
u];
524 dp3m.ks_scalar.resize(dp3m.local_mesh.size);
527 auto index{std::size_t(0
u)};
531 auto constexpr KX = 2, KY = 0, KZ = 1;
532 auto const shift = local_index + dp3m.mesh.start;
533 auto const &d_op = dp3m.d_op[0u];
534 auto const &mesh_dip = dp3m.mesh.rs_fields;
536 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
537 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
538 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
541 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
542 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
543 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
545 *it_ks_scalar = *it_energy * std::complex<FloatType>{re, im};
550#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
551 if (dp3m.heffte.world_size == 1) {
557 auto const &
mesh_dip = dp3m.heffte.ks_dipole_density;
567 if (
not dp3m.params.tuning) {
568 auto constexpr KX = 2,
KY = 0,
KZ = 1;
584 for (
int d = 0; d < 3; d++) {
588 auto const &offset = dp3m.mesh.start;
589 auto const &d_op = dp3m.d_op[0u];
590 auto const d_op_val = FloatType(d_op[local_index[d] + offset[d]]);
591 auto const &value = *it_ks_scalar;
592 dp3m.mesh.rs_scalar[index] = d_op_val * value.real();
594 dp3m.mesh.rs_scalar[index] = d_op_val * value.imag();
596 std::advance(it_ks_scalar, 1);
598#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
599 if (dp3m.heffte.world_size == 1) {
600 unsigned int constexpr d_ks[3] = {2u, 0
u, 1u};
611 if (
not dp3m.params.tuning) {
612 auto constexpr KX = 2,
KY = 0,
KZ = 1;
616 auto const old_value = std::complex<FloatType>{
627 dp3m.heffte.fft->backward(dp3m.heffte.ks_B_field_storage.data(),
628 dp3m.heffte.rs_B_fields_no_halo[d].data());
630 dp3m.heffte.rs_B_fields[d] =
633 std::span(dp3m.heffte.rs_B_fields_no_halo[d]),
634 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
635 dp3m.local_mesh.n_halo_ur);
638 dp3m.heffte.rs_B_fields[d].data(),
639 dp3m.local_mesh.dim);
642 dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh());
644 dp3m.fft_buffers->perform_scalar_halo_spread();
646 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
647 Utils::integral_parameter<int, AssignTorques, p3m_min_cao, p3m_max_cao>(
661 auto it_force = dp3m.g_force.begin();
663 std::size_t index = 0
u;
665 auto constexpr KX = 2, KY = 0, KZ = 1;
666 auto const shift = local_index + dp3m.mesh.start;
667 auto const &d_op = dp3m.d_op[0u];
668 auto const &mesh_dip = dp3m.mesh.rs_fields;
670 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
671 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
672 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
675 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
676 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
677 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
679 *it_ks_scalar = {*it_force * im, *it_force * (-re)};
685#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
686 if (dp3m.heffte.world_size == 1) {
692 auto const &
mesh_dip = dp3m.heffte.ks_dipole_density;
704 if (
not dp3m.params.tuning) {
705 auto constexpr KX = 2,
KY = 0,
KZ = 1;
721 for (
int d = 0; d < 3; d++) {
722 std::size_t index = 0
u;
725 auto constexpr KX = 2, KY = 0, KZ = 1;
726 auto const shift = local_index + dp3m.mesh.start;
727 auto const &d_op = dp3m.d_op[0u];
728 auto const &mesh_dip = dp3m.mesh.rs_fields;
729 auto const d_op_val = FloatType(d_op[shift[d]]);
730 auto const f = *it_ks_scalar * d_op_val;
731 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.real();
732 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.real();
733 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.real();
735 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.imag();
736 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.imag();
737 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.imag();
739 std::advance(it_ks_scalar, 1);
742#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
743 if (dp3m.heffte.world_size == 1) {
748 auto constexpr KX = 1,
KY = 2,
KZ = 0;
755 auto &
mesh_dip = dp3m.heffte.ks_dipole_density;
766 if (
not FFTConfig::use_r2c
and not dp3m.params.tuning) {
770 for (
int j = 0;
j < 3; ++
j) {
771 auto const old_value = std::complex<FloatType>{
782 for (
int dir = 0
u; dir < 3u; ++dir) {
783 dp3m.heffte.fft->backward(
784 dp3m.heffte.ks_dipole_density[dir].data(),
785 dp3m.heffte.rs_B_fields_no_halo[dir].data());
787 dp3m.heffte.rs_B_fields[d] =
790 std::span(dp3m.heffte.rs_B_fields_no_halo[dir]),
791 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
792 dp3m.local_mesh.n_halo_ur);
796 std::array<FloatType *, 3u>{{dp3m.heffte.rs_B_fields[0
u].data(),
797 dp3m.heffte.rs_B_fields[1u].data(),
798 dp3m.heffte.rs_B_fields[2u].data()}};
799 dp3m.heffte.halo_comm.spread_grid(
::comm_cart, rs_fields,
800 dp3m.local_mesh.dim);
803 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
804 dp3m.fft->backward_fft(rs_mesh);
807 dp3m.fft_buffers->perform_vector_halo_spread();
809 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
837template <
typename FloatType, Arch Architecture,
class FFTConfig>
840 auto const &
system = get_system();
841 auto const &box_geo = *
system.box_geo;
842 auto const particles =
system.cell_structure->local_particles();
843 auto const pref = prefactor * 4. * std::numbers::pi / box_geo.volume() /
844 (2. * dp3m.params.epsilon + 1.);
854 for (
auto const &p : particles) {
855 auto const dip = p.calc_dip();
879 0.5 * pref * boost::mpi::all_reduce(
comm_cart,
sum_e, std::plus<>());
895 for (
auto &p : particles) {
896 auto &torque = p.torque();
898 torque[1u] -= pref *
sumiy[
ip];
899 torque[2u] -= pref *
sumiz[
ip];
907template <
typename FloatType, Arch Architecture,
class FFTConfig>
911 FFTConfig::k_space_order>(
912 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
913 get_system().
box_geo->length_inv());
914#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
915 if (dp3m.heffte.world_size == 1) {
916 dp3m.heffte.g_force =
918 FFTConfig::k_space_order>(
919 dp3m.params, dp3m.heffte.fft->ks_local_ld_index(),
920 dp3m.heffte.fft->ks_local_ur_index(),
921 get_system().
box_geo->length_inv());
922 if constexpr (FFTConfig::use_r2c) {
924 dp3m.heffte.g_force, dp3m.params.mesh,
925 dp3m.heffte.fft->ks_local_size(),
926 dp3m.heffte.fft->ks_local_ld_index());
932template <
typename FloatType, Arch Architecture,
class FFTConfig>
936 FFTConfig::k_space_order>(
937 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
938 get_system().
box_geo->length_inv());
939#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
940 if (dp3m.heffte.world_size == 1) {
941 dp3m.heffte.g_energy =
943 FFTConfig::k_space_order>(
944 dp3m.params, dp3m.heffte.fft->ks_local_ld_index(),
945 dp3m.heffte.fft->ks_local_ur_index(),
946 get_system().
box_geo->length_inv());
947 if constexpr (FFTConfig::use_r2c) {
949 dp3m.heffte.g_energy, dp3m.params.mesh,
950 dp3m.heffte.fft->ks_local_size(),
951 dp3m.heffte.fft->ks_local_ld_index());
957template <
typename FloatType, Arch Architecture,
class FFTConfig>
960 int m_mesh_max = -1, m_mesh_min = -1;
961 std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
965 double prefactor,
int timings,
974 std::optional<std::string>
981 m_logger = std::make_unique<TuningLogger>(
989 std::tuple<double, double, double, double>
991 double r_cut_iL)
const override {
1005 0.0001 * box_geo.length()[0], 5. * box_geo.length()[0], 0.0001,
1030 m_mesh_min =
static_cast<int>(std::round(std::pow(2., std::floor(
expo))));
1033 if (m_tune_limits.first) {
1034 m_mesh_min = *m_tune_limits.first;
1036 if (m_tune_limits.second) {
1037 m_mesh_max = *m_tune_limits.second;
1040 m_mesh_min = m_mesh_max = dp3m.
params.
mesh[0];
1080template <
typename FloatType, Arch Architecture,
class FFTConfig>
1082 auto &
system = get_system();
1083 auto const &box_geo = *
system.box_geo;
1090 if (
not is_tuned()) {
1093 throw std::runtime_error(
1094 "DipolarP3M: no dipolar particles in the system");
1098 system, dp3m, prefactor, tuning.timings, tuning.limits);
1107 system.on_dipoles_change();
1138 [&](
unsigned dim,
int n) {
1139 nm[dim] = shift[dim] + n * mesh;
1148 std::size_t
n_c_part,
double sum_q2,
1152 auto const mesh_i = 1. /
static_cast<double>(mesh);
1170 Utils::int_pow<3>(
static_cast<double>(
n2));
1181 return 8. *
Utils::sqr(std::numbers::pi) / 3. * sum_q2 *
1193 std::size_t
n_c_part,
double sum_q2,
1195 auto constexpr exp_min = -708.4;
1227 double sum_q2,
double x1,
double x2,
double xacc,
1239 if (
f1 *
f2 >= 0.0) {
1240 throw std::runtime_error(
1241 "Root must be bracketed for bisection in dp3m_rtbisection");
1256 throw std::runtime_error(
"Too many bisections in dp3m_rtbisection");
1261 auto const &box_geo = *
system.box_geo;
1262 auto const &local_geo = *
system.local_geo;
1263 for (
auto i = 0
u; i < 3u; i++) {
1266 std::stringstream
msg;
1268 <<
" is larger than half of box dimension " << box_geo.length()[i];
1269 throw std::runtime_error(
msg.str());
1272 std::stringstream
msg;
1274 <<
" is larger than local box dimension " << local_geo.length()[i];
1275 throw std::runtime_error(
msg.str());
1279 if ((box_geo.length()[0] != box_geo.length()[1])
or
1280 (box_geo.length()[1] != box_geo.length()[2])) {
1281 throw std::runtime_error(
"DipolarP3M: requires a cubic box");
1287 if (!box_geo.periodic(0)
or !box_geo.periodic(1)
or !box_geo.periodic(2)) {
1288 throw std::runtime_error(
1289 "DipolarP3M: requires periodicity (True, True, True)");
1294 auto const &local_geo = *
get_system().local_geo;
1297 throw std::runtime_error(
1298 "DipolarP3M: requires the regular or hybrid decomposition cell system");
1302 throw std::runtime_error(
1303 "DipolarP3M: does not work with the hybrid decomposition cell system, "
1304 "if using more than one MPI node");
1310 if (node_grid[0] < node_grid[1]
or node_grid[1] < node_grid[2]) {
1311 throw std::runtime_error(
1312 "DipolarP3M: node grid must be sorted, largest first");
1316template <
typename FloatType, Arch Architecture,
class FFTConfig>
1318 auto const &box_geo = *get_system().
box_geo;
1323 sanity_checks_boxl();
1324 calc_influence_function_force();
1325 calc_influence_function_energy();
1327#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
1334template <
typename FloatType, Arch Architecture,
class FFTConfig>
1337 auto const &box_geo = *get_system().
box_geo;
1338 auto const Ukp3m = calc_average_self_energy_k_space() * box_geo.volume();
1346template <
typename FloatType, Arch Architecture,
class FFTConfig>
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
Vector implementation and trait types for boost qvm interoperability.
Describes a cell structure / cell system.
auto const & get_unique_particles() const
auto get_scatter_torque()
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
TuningAlgorithm::Parameters get_time() override
DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m, double prefactor, int timings, decltype(m_tune_limits) tune_limits)
void on_solver_change() const override
void determine_mesh_limits() override
P3MParameters & get_params() override
std::optional< std::string > layer_correction_veto_r_cut(double) const override
void setup_logger(bool verbose) override
base_type::size_type size() const
void npt_add_virial_contribution(double energy)
std::shared_ptr< CellStructure > cell_structure
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
static auto constexpr time_sentinel
Value for invalid time measurements.
static auto constexpr max_n_consecutive_trials
Maximal number of consecutive trials that don't improve runtime.
System::System & m_system
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value) noexcept
Create a vector that has all entries set to the same value.
void zfill(std::size_t size)
Fill cache with zero-initialized data.
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
constexpr auto round_error_prec
Precision below which a double-precision float is assumed to be zero.
__device__ void vector_product(float const *a, float const *b, float *out)
static std::size_t count_magnetic_particles(ParticleRange const &particles)
P3M algorithm for long-range magnetic dipole-dipole interaction.
double dp3m_real_space_error(double box_size, double r_cut_iL, std::size_t n_c_part, double sum_q2, double alpha_L)
Calculate the value of the errors for the REAL part of the force in terms of the splitting parameter ...
double dp3m_rtbisection(double box_size, double r_cut_iL, std::size_t n_c_part, double sum_q2, double x1, double x2, double xacc, double tuned_accuracy)
Compute the value of alpha through a bisection method.
auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh, double mesh_i, int cao, double alpha_L_i)
Tuning dipolar-P3M.
double dp3m_k_space_error(double box_size, int mesh, int cao, std::size_t n_c_part, double sum_q2, double alpha_L)
Calculate the k-space error of dipolar-P3M.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
Routines, row decomposition, data structures and communication for the 3D-FFT.
auto pad_with_zeros_discard_imag(std::span< T > cropped_array, Utils::Vector3i const &cropped_dim, Utils::Vector3i const &pad_left, Utils::Vector3i const &pad_right)
Pad a 3D matrix with zeros to restore halo regions.
auto extract_block(Container const &in_array, Utils::Vector3i const &dimensions, Utils::Vector3i const &start, Utils::Vector3i const &stop)
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
std::vector< FloatType > grid_influence_function_dipolar(P3MParameters const ¶ms, Utils::Vector3i const &n_start, Utils::Vector3i const &n_stop, Utils::Vector3d const &inv_box_l)
Map influence function over a grid.
void p3m_interpolate(P3MLocalMesh const &local_mesh, WeightsStorage< cao > const &weights, Kernel kernel)
P3M grid interpolation.
constexpr int p3m_min_cao
Minimal charge assignment order.
constexpr int p3m_max_cao
Maximal charge assignment order.
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
T product(Vector< T, N > const &v)
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
decltype(auto) integral_parameter(T i, Args &&...args)
Generate a call table for an integral non-type template parameter.
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
auto get_analytic_cotangent_sum_kernel(int cao)
Exports for the NpT code.
Common functions for dipolar and charge P3M.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
ESPRESSO_ATTR_ALWAYS_INLINE void kokkos_parallel_range_for(auto const &name, auto start, auto end, auto const &kernel)
Utils::Vector3i node_grid
double calc_surface_term(bool force_flag, bool energy_flag) override
void dipole_assign() override
void scaleby_box_l() override
double long_range_kernel(bool force_flag, bool energy_flag)
Compute the k-space part of forces and energies.
Base class for the magnetostatics P3M algorithm.
double sum_mu2
Sum of square of magnetic dipoles.
p3m_interpolation_cache inter_weights
std::size_t sum_dip_part
number of dipolar particles.
p3m_send_mesh< FloatType > halo_comm
double energy_correction
cached k-space self-energy correction
void resize_heffte_buffers()
struct DipolarP3MState::@1 heffte
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
void sanity_checks_cell_structure() const
P3MParameters const & dp3m_params
void sanity_checks_periodicity() const
void sanity_checks_node_grid() const
void recalc_ld_pos(P3MParameters const ¶ms)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0), in real space.
P3MParameters params
P3M base parameters.
P3MLocalMesh local_mesh
Local mesh geometry information for this MPI rank.
void operator()(auto &dp3m, auto &cell_structure)
void operator()(auto &dp3m, double prefac, int d_rs, CellStructure &cell_structure) const
void operator()(auto &dp3m, double prefac, int d_rs, CellStructure &cell_structure) const