ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
dp3m_heffte.impl.hpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2025 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include <config/config.hpp>
23
24#ifdef ESPRESSO_DP3M
25
27
30
31#include "magnetostatics/dp3m_heffte.hpp" // must be included after dipoles.hpp
32
33#include "fft/fft.hpp"
34#include "p3m/P3MFFT.hpp"
36#include "p3m/TuningLogger.hpp"
37#include "p3m/common.hpp"
40#include "p3m/interpolation.hpp"
41#include "p3m/math.hpp"
42
43#include "BoxGeometry.hpp"
44#include "LocalBox.hpp"
45#include "Particle.hpp"
46#include "PropagationMode.hpp"
49#include "communication.hpp"
50#include "errorhandling.hpp"
52#include "npt.hpp"
53#include "system/System.hpp"
54#include "tuning.hpp"
55
56#include <utils/Vector.hpp>
59#include <utils/math/sqr.hpp>
60
61#include <boost/mpi/collectives/all_reduce.hpp>
62#include <boost/mpi/collectives/reduce.hpp>
63
64#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
65#include <Kokkos_Core.hpp>
66#include <omp.h>
67#endif
68
69#include <algorithm>
70#include <array>
71#include <cstddef>
72#include <cstdio>
73#include <functional>
74#include <iterator>
75#include <numbers>
76#include <optional>
77#include <span>
78#include <sstream>
79#include <stdexcept>
80#include <tuple>
81#include <utility>
82#include <vector>
83
84#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
85#ifndef NDEBUG
86template <typename T>
87bool heffte_almost_equal(T const &value, T const &reference) {
88 auto const diff = std::abs(value - reference);
89 using FT = std::remove_cvref_t<decltype(diff)>;
90 auto constexpr atol = std::is_same_v<FT, float> ? FT{2e-4} : FT{1e-6};
91 auto constexpr rtol = std::is_same_v<FT, float> ? FT{5e-5} : FT{1e-5};
92 auto const non_zero = std::abs(reference) != FT{0};
93 return (diff < atol) or (non_zero and (diff / std::abs(reference) < rtol));
94}
95#endif // not NDEBUG
96#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
97
98template <typename FloatType, Arch Architecture, class FFTConfig>
99void DipolarP3MHeffte<FloatType, Architecture,
101 auto local_n = std::size_t{0u};
102 double local_mu2 = 0.;
103
104 for (auto const &p : get_system().cell_structure->local_particles()) {
105 if (p.dipm() != 0.) {
106 local_mu2 += p.calc_dip().norm2();
107 local_n++;
108 }
109 }
110
111 boost::mpi::all_reduce(comm_cart, local_mu2, dp3m.sum_mu2, std::plus<>());
112 boost::mpi::all_reduce(comm_cart, local_n, dp3m.sum_dip_part, std::plus<>());
113}
114
115inline double dp3m_k_space_error(double box_size, int mesh, int cao,
116 std::size_t n_c_part, double sum_q2,
117 double alpha_L);
118
119inline double dp3m_real_space_error(double box_size, double r_cut_iL,
120 std::size_t n_c_part, double sum_q2,
121 double alpha_L);
122
123/** Compute the value of alpha through a bisection method.
124 * Based on eq. (33) @cite wang01a.
125 */
126double dp3m_rtbisection(double box_size, double r_cut_iL, std::size_t n_c_part,
127 double sum_q2, double x1, double x2, double xacc,
128 double tuned_accuracy);
129
130template <typename FloatType, Arch Architecture, class FFTConfig>
131double DipolarP3MHeffte<FloatType, Architecture,
132 FFTConfig>::calc_average_self_energy_k_space() const {
133 auto const &box_geo = *get_system().box_geo;
134 auto const node_phi =
135 grid_influence_function_self_energy<FloatType, P3M_BRILLOUIN>(
136 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
137
138 double phi = 0.;
139 boost::mpi::reduce(comm_cart, node_phi, phi, std::plus<>(), 0);
140 phi /= 3. * box_geo.length()[0] * Utils::int_pow<3>(dp3m.params.mesh[0]);
141 return phi * std::numbers::pi;
142}
143
144template <typename FloatType, Arch Architecture, class FFTConfig>
146 assert(dp3m.params.mesh >= Utils::Vector3i::broadcast(1));
147 assert(dp3m.params.cao >= p3m_min_cao and dp3m.params.cao <= p3m_max_cao);
148 assert(dp3m.params.alpha > 0.);
149
150 auto const &system = get_system();
151 auto const &box_geo = *system.box_geo;
152 auto const &local_geo = *system.local_geo;
153 auto const verlet_skin = system.cell_structure->get_verlet_skin();
154
155 dp3m.params.cao3 = Utils::int_pow<3>(dp3m.params.cao);
156 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
157
158 assert(dp3m.fft);
159 dp3m.local_mesh.calc_local_ca_mesh(dp3m.params, local_geo, verlet_skin, 0.);
160 dp3m.fft_buffers->init_halo();
161 dp3m.fft->init(dp3m.params);
162 dp3m.mesh.ks_pnum = dp3m.fft->get_ks_pnum();
163 dp3m.fft_buffers->init_meshes(dp3m.fft->get_ca_mesh_size());
164 dp3m.update_mesh_views();
165#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
166 dp3m.heffte.world_size = comm_cart.size();
167 dp3m.heffte.fft = std::make_shared<P3MFFT<FloatType, FFTConfig>>(
168 ::comm_cart, dp3m.params.mesh, dp3m.local_mesh.ld_no_halo,
169 dp3m.local_mesh.ur_no_halo, ::communicator.node_grid);
170 dp3m.resize_heffte_buffers();
171#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
172 dp3m.calc_differential_operator();
173
174 /* fix box length dependent constants */
175 scaleby_box_l();
176
178}
179
180namespace {
181template <int cao> struct AssignDipole {
182#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
183 void operator()(auto &dp3m, auto &cell_structure) {
184 using DipolarP3MState = std::remove_reference_t<decltype(dp3m)>;
185 using value_type = DipolarP3MState::value_type;
186 auto constexpr memory_order = Utils::MemoryOrder::ROW_MAJOR;
187 auto const &aosoa = cell_structure.get_aosoa();
188 auto const &unique_particles = cell_structure.get_unique_particles();
189 auto const n_part = cell_structure.count_local_particles();
190 dp3m.inter_weights.zfill(n_part); // allocate buffer for parallel write
192 "InterpolateDipoles", std::size_t{0u}, n_part, [&](auto p_index) {
193 auto const tid = omp_get_thread_num();
194 auto const p_pos = aosoa.get_vector_at(aosoa.position, p_index);
195 auto const dip = unique_particles.at(p_index)->calc_dip();
196 auto const weights =
197 p3m_calculate_interpolation_weights<cao, memory_order>(
198 p_pos, dp3m.params.ai, dp3m.local_mesh);
199 dp3m.inter_weights.store_at(p_index, weights);
201 dp3m.local_mesh, weights, [&dip, tid, &dp3m](int ind, double w) {
202 dp3m.rs_fields_kokkos(tid, 0u, ind) += value_type(w * dip[0u]);
203 dp3m.rs_fields_kokkos(tid, 1u, ind) += value_type(w * dip[1u]);
204 dp3m.rs_fields_kokkos(tid, 2u, ind) += value_type(w * dip[2u]);
205 });
206 });
207 Kokkos::fence();
208 using execution_space = Kokkos::DefaultExecutionSpace;
209 int num_threads = execution_space().concurrency();
210 Kokkos::RangePolicy<execution_space> policy(std::size_t{0},
211 dp3m.local_mesh.size);
212 Kokkos::parallel_for("ReduceInterpolatedDipoles", policy,
213 [&dp3m, num_threads](std::size_t const i) {
214 for (int dir = 0; dir < 3; ++dir) {
215 value_type acc{};
216 for (int tid = 0; tid < num_threads; ++tid) {
217 acc += dp3m.rs_fields_kokkos(tid, dir, i);
218 }
219 dp3m.mesh.rs_fields[dir][i] += acc;
220#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
221 dp3m.heffte.rs_dipole_density[dir][i] += acc;
222#endif
223 }
224 });
225 Kokkos::fence();
226 }
227#else // ESPRESSO_SHARED_MEMORY_PARALLELISM
228 void operator()(auto &dp3m, Utils::Vector3d const &real_pos,
229 Utils::Vector3d const &dip) const {
230 using DipolarP3MState = std::remove_reference_t<decltype(dp3m)>;
231 using value_type = DipolarP3MState::value_type;
232 auto constexpr memory_order = Utils::MemoryOrder::ROW_MAJOR;
233 auto const weights = p3m_calculate_interpolation_weights<cao, memory_order>(
234 real_pos, dp3m.params.ai, dp3m.local_mesh);
235 p3m_interpolate<cao>(
236 dp3m.local_mesh, weights, [&dip, &dp3m](int ind, double w) {
237 dp3m.mesh.rs_fields[0u][ind] += value_type(w * dip[0u]);
238 dp3m.mesh.rs_fields[1u][ind] += value_type(w * dip[1u]);
239 dp3m.mesh.rs_fields[2u][ind] += value_type(w * dip[2u]);
240#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
241 dp3m.heffte.rs_dipole_density[0u][ind] += value_type(w * dip[0u]);
242 dp3m.heffte.rs_dipole_density[1u][ind] += value_type(w * dip[1u]);
243 dp3m.heffte.rs_dipole_density[2u][ind] += value_type(w * dip[2u]);
244#endif
245 });
246
247 dp3m.inter_weights.template store<cao>(weights);
248 }
249#endif // ESPRESSO_SHARED_MEMORY_PARALLELISM
250};
251} // namespace
252
253template <typename FloatType, Arch Architecture, class FFTConfig>
255 prepare_fft_mesh();
256
257#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
258 Utils::integral_parameter<int, AssignDipole, p3m_min_cao, p3m_max_cao>(
259 dp3m.params.cao, dp3m, *get_system().cell_structure);
260#else // ESPRESSO_SHARED_MEMORY_PARALLELISM
261 for (auto const &p : get_system().cell_structure->local_particles()) {
262 if (p.dipm() != 0.) {
263 Utils::integral_parameter<int, AssignDipole, p3m_min_cao, p3m_max_cao>(
264 dp3m.params.cao, dp3m, p.pos(), p.calc_dip());
265 }
266 }
267#endif // ESPRESSO_SHARED_MEMORY_PARALLELISM
268}
269
270namespace {
271template <int cao> struct AssignTorques {
272 void operator()(auto &dp3m, double prefac, int d_rs,
273 CellStructure &cell_structure) const {
274
275 assert(cao == dp3m.inter_weights.cao());
276
277 auto const kernel = [d_rs, &dp3m](auto const &pref, auto &p_torque,
278 std::size_t p_index) {
279 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
280 Utils::Vector3d E{};
281 p3m_interpolate(dp3m.local_mesh, weights,
282 [&E, &dp3m, d_rs](int ind, double w) {
283 // heFFTe data: dp3m.heffte.ks_scalar.real()
284 E[d_rs] += w * double(dp3m.mesh.rs_scalar[ind]);
285 });
286
287 auto const torque = vector_product(pref, E);
288#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
289 auto const thread_id = omp_get_thread_num();
290 p_torque(p_index, thread_id, 0) -= torque[0];
291 p_torque(p_index, thread_id, 1) -= torque[1];
292 p_torque(p_index, thread_id, 2) -= torque[2];
293#else
294 p_torque -= torque;
295#endif
296 };
297
298#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
299 auto const n_part = dp3m.inter_weights.size();
300 auto const &unique_particles = cell_structure.get_unique_particles();
301 auto &local_torque = cell_structure.get_local_torque();
303 "AssignTorques", std::size_t{0u}, n_part, [&](std::size_t p_index) {
304 auto const &p = *unique_particles.at(p_index);
305 if (p.dipm() != 0.) {
306 kernel(p.calc_dip() * prefac, local_torque, p_index);
307 }
308 });
309#else // ESPRESSO_SHARED_MEMORY_PARALLELISM
310 /* magnetic particle index */
311 auto p_index = std::size_t{0ul};
312
313 for (auto &p : cell_structure.local_particles()) {
314 if (p.dipm() != 0.) {
315 kernel(p.calc_dip() * prefac, p.torque(), p_index);
316 ++p_index;
317 }
318 }
319#endif // ESPRESSO_SHARED_MEMORY_PARALLELISM
320 }
321};
322
323template <int cao> struct AssignForcesDip {
324 void operator()(auto &dp3m, double prefac, int d_rs,
325 CellStructure &cell_structure) const {
326
327 assert(cao == dp3m.inter_weights.cao());
328
329 auto const kernel = [d_rs, &dp3m](auto const &pref, auto &p_force,
330 std::size_t p_index) {
331 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
332
333 Utils::Vector3d E{};
334 p3m_interpolate(dp3m.local_mesh, weights, [&E, &dp3m](int ind, double w) {
335 // heFFTe data: dp3m.heffte.rs_B_fields
336 E[0u] += w * double(dp3m.mesh.rs_fields[0u][ind]);
337 E[1u] += w * double(dp3m.mesh.rs_fields[1u][ind]);
338 E[2u] += w * double(dp3m.mesh.rs_fields[2u][ind]);
339 });
340
341#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
342 auto const thread_id = omp_get_thread_num();
343 p_force(p_index, thread_id, d_rs) += pref * E;
344#else
345 p_force[d_rs] += pref * E;
346#endif
347 };
348
349#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
350 auto const n_part = dp3m.inter_weights.size();
351 auto const &unique_particles = cell_structure.get_unique_particles();
352 auto &local_force = cell_structure.get_local_force();
354 "AssignForcesDip", std::size_t{0u}, n_part, [&](std::size_t p_index) {
355 auto const &p = *unique_particles.at(p_index);
356 if (p.dipm() != 0.) {
357 kernel(p.calc_dip() * prefac, local_force, p_index);
358 }
359 });
360#else // ESPRESSO_SHARED_MEMORY_PARALLELISM
361 /* magnetic particle index */
362 auto p_index = std::size_t{0ul};
363
364 for (auto &p : cell_structure.local_particles()) {
365 if (p.dipm() != 0.) {
366 kernel(p.calc_dip() * prefac, p.force(), p_index);
367 ++p_index;
368 }
369 }
370#endif // ESPRESSO_SHARED_MEMORY_PARALLELISM
371 }
372};
373} // namespace
374
375#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
376template <typename FloatType, class FFTConfig>
378 auto const rs_array_size =
379 static_cast<std::size_t>(Utils::product(this->local_mesh.dim));
380 auto const rs_array_size_no_halo =
381 static_cast<std::size_t>(Utils::product(this->local_mesh.dim_no_halo));
382 auto const fft_mesh_size =
383 static_cast<std::size_t>(Utils::product(heffte.fft->ks_local_size()));
384 for (auto d : {0u, 1u, 2u}) {
385 heffte.rs_dipole_density[d].resize(rs_array_size);
386 heffte.ks_dipole_density[d].resize(fft_mesh_size);
387 heffte.rs_B_fields[d].resize(rs_array_size);
388 heffte.rs_B_fields_no_halo[d].resize(rs_array_size_no_halo);
389 }
390 heffte.ks_B_field_storage.resize(fft_mesh_size);
391 heffte.ks_scalar.resize(fft_mesh_size);
392}
393#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
394
395template <typename FloatType, Arch Architecture, class FFTConfig>
397 bool force_flag, bool energy_flag) {
398 /* k-space energy */
399 double energy = 0.;
400 auto const &system = get_system();
401 auto const &box_geo = *system.box_geo;
402 auto const dipole_prefac = prefactor / Utils::product(dp3m.params.mesh);
403#ifdef ESPRESSO_NPT
404 auto const npt_flag = force_flag and system.has_npt_enabled();
405#else
406 auto constexpr npt_flag = false;
407#endif
408
409 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
410 auto local_index = Utils::Vector3i::broadcast(0);
411#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
412 auto constexpr r2c_dir = FFTConfig::r2c_dir;
413 auto const rs_local_size = dp3m.heffte.fft->rs_local_size();
414 auto const local_size = dp3m.heffte.fft->ks_local_size();
415 auto local_size_full = local_size;
416 if constexpr (FFTConfig::use_r2c) {
417 local_size_full[r2c_dir] -= 1;
418 local_size_full[r2c_dir] *= 2;
419 }
420 auto const local_origin = dp3m.heffte.fft->ks_local_ld_index();
421#ifndef NDEBUG
422 auto const line_stride = local_size_full[0];
423 auto const plane_stride = local_size_full[0] * local_size_full[0];
424#endif
425 auto const &global_size = dp3m.params.mesh;
426 auto const cutoff_left = 1 - local_origin[r2c_dir];
427 auto const cutoff_right = global_size[r2c_dir] / 2 - local_origin[r2c_dir];
428 auto &short_dim = local_index[r2c_dir];
429 dp3m.resize_heffte_buffers();
430#endif
431
432 if (dp3m.sum_mu2 > 0.) {
433 dipole_assign();
434 dp3m.fft_buffers->perform_vector_halo_gather();
435 for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
436 dp3m.fft->forward_fft(rs_mesh);
437 }
438 dp3m.update_mesh_views();
439
440#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
441 if (dp3m.heffte.world_size == 1) {
442 // halo communication of real space dipoles density
443 std::array<FloatType *, 3u> rs_fields = {
444 {dp3m.heffte.rs_dipole_density[0u].data(),
445 dp3m.heffte.rs_dipole_density[1u].data(),
446 dp3m.heffte.rs_dipole_density[2u].data()}};
447 dp3m.heffte.halo_comm.gather_grid(::comm_cart, rs_fields,
448 dp3m.local_mesh.dim);
449
450 for (auto dir : {0u, 1u, 2u}) {
451 // get real-space dipoles density without ghost layers
452 auto rs_field_no_halo = extract_block<Utils::MemoryOrder::ROW_MAJOR,
453 FFTConfig::r_space_order>(
454 dp3m.heffte.rs_dipole_density[dir], dp3m.local_mesh.dim,
455 dp3m.local_mesh.n_halo_ld,
456 dp3m.local_mesh.dim - dp3m.local_mesh.n_halo_ur);
457 // re-order data in row-major
458 std::vector<FloatType> rs_field_no_halo_reorder;
459 rs_field_no_halo_reorder.resize(rs_field_no_halo.size());
460 std::size_t index_row_major = 0u;
461 for_each_3d_order<FFTConfig::k_space_order>(
462 mesh_start, rs_local_size, local_index, [&]() {
463 auto constexpr KX = 1, KY = 2, KZ = 0;
464 auto const index = local_index[KZ] +
465 rs_local_size[0] * local_index[KY] +
466 Utils::sqr(rs_local_size[0]) * local_index[KX];
467 rs_field_no_halo_reorder[index_row_major] =
468 rs_field_no_halo[index];
469 ++index_row_major;
470 });
471 dp3m.heffte.fft->forward(rs_field_no_halo_reorder.data(),
472 dp3m.heffte.ks_dipole_density[dir].data());
473#ifndef NDEBUG
474 if (not dp3m.params.tuning) {
475 std::size_t index_row_major_r2c = 0u;
476 for_each_3d_order<FFTConfig::k_space_order>(
477 mesh_start, local_size, local_index, [&]() {
478 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
479 auto constexpr KX = 2, KY = 0, KZ = 1;
480 auto const index_fft_legacy = local_index[KZ] +
481 line_stride * local_index[KY] +
482 plane_stride * local_index[KX];
483 auto const old_value = std::complex<FloatType>{
484 dp3m.mesh.rs_fields[dir][2 * index_fft_legacy],
485 dp3m.mesh.rs_fields[dir][2 * index_fft_legacy + 1]};
486 auto const &new_value =
487 dp3m.heffte.ks_dipole_density[dir][index_row_major_r2c];
488 assert(heffte_almost_equal(new_value, old_value));
489 ++index_row_major_r2c;
490 }
491 });
492 }
493#endif // not NDEBUG
494 }
495 }
496#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
497 }
498
499 /* === k-space energy calculation === */
500 if (energy_flag or npt_flag) {
501 /*********************
502 Dipolar energy
503 **********************/
504 if (dp3m.sum_mu2 > 0.) {
505 /* i*k differentiation for dipolar gradients:
506 * |(\Fourier{\vect{mu}}(k)\cdot \vect{k})|^2 */
507
508 auto index = std::size_t(0u);
509 auto it_energy = dp3m.g_energy.begin();
510 auto node_energy = 0.;
511 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
512 auto constexpr KX = 2, KY = 0, KZ = 1;
513 auto const shift = local_index + dp3m.mesh.start;
514 auto const &d_op = dp3m.d_op[0u];
515 auto const &mesh_dip = dp3m.mesh.rs_fields;
516 // Re(mu)*k
517 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
518 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
519 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
520 ++index;
521 // Im(mu)*k
522 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
523 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
524 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
525 ++index;
526 node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im));
527 std::advance(it_energy, 1);
528 });
529#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
530 if (dp3m.heffte.world_size == 1) {
531 [[maybe_unused]] auto node_energy_heffte = 0.;
532 std::size_t index_row_major_r2c = 0u;
533 for_each_3d_order<FFTConfig::k_space_order>(
534 mesh_start, local_size, local_index, [&]() {
535 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
536 auto const global_index = local_origin + local_index;
537 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
538 auto const cell_field =
539 mesh_dip[0u][index_row_major_r2c] *
540 FloatType(dp3m.d_op[0u][global_index[1u]]) +
541 mesh_dip[1u][index_row_major_r2c] *
542 FloatType(dp3m.d_op[1u][global_index[2u]]) +
543 mesh_dip[2u][index_row_major_r2c] *
544 FloatType(dp3m.d_op[2u][global_index[0u]]);
545 auto cell_energy = static_cast<double>(
546 dp3m.heffte.g_energy[index_row_major_r2c] *
547 std::norm(cell_field));
548 if (FFTConfig::use_r2c and (short_dim >= cutoff_left and
549 short_dim <= cutoff_right - 1)) {
550 // k-space symmetry: double counting except in the first and
551 // last planes of the short dimension; although the wavevector
552 // points in the opposite direction in the redundant region of
553 // k-space, the product of two components of the wavevector
554 // cancels out the negative sign
555 cell_energy *= 2.;
556 }
557 node_energy_heffte += cell_energy;
558 }
559 ++index_row_major_r2c;
560 });
561 assert(heffte_almost_equal(static_cast<FloatType>(node_energy_heffte),
562 static_cast<FloatType>(node_energy)));
563 }
564#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
565 node_energy *= dipole_prefac * std::numbers::pi * box_geo.length_inv()[0];
566 boost::mpi::reduce(comm_cart, node_energy, energy, std::plus<>(), 0);
567
568 if (dp3m.energy_correction == 0.)
569 calc_energy_correction();
570
571 if (this_node == 0) {
572 /* self energy correction */
573 energy -= prefactor * dp3m.sum_mu2 * std::numbers::inv_sqrtpi *
574 (2. / 3.) * Utils::int_pow<3>(dp3m.params.alpha);
575
576 /* dipolar energy correction due to systematic Madelung-self effects */
577 energy += prefactor * dp3m.energy_correction / box_geo.volume();
578 }
579 }
580 } // if (energy_flag)
581
582 /* === k-space force calculation === */
583 if (force_flag) {
584 /****************************
585 * DIPOLAR TORQUES (k-space)
586 ****************************/
587 if (dp3m.sum_mu2 > 0.) {
588 auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0u];
589 dp3m.ks_scalar.resize(dp3m.local_mesh.size);
590 /* fill in ks_scalar array for torque calculation */
591 {
592 auto index{std::size_t(0u)};
593 auto it_energy = dp3m.g_energy.begin();
594 auto it_ks_scalar = dp3m.ks_scalar.begin();
595 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() mutable {
596 auto constexpr KX = 2, KY = 0, KZ = 1;
597 auto const shift = local_index + dp3m.mesh.start;
598 auto const &d_op = dp3m.d_op[0u];
599 auto const &mesh_dip = dp3m.mesh.rs_fields;
600 // Re(mu)*k
601 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
602 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
603 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
604 ++index;
605 // Im(mu)*k
606 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
607 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
608 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
609 ++index;
610 *it_ks_scalar = *it_energy * std::complex<FloatType>{re, im};
611 std::advance(it_energy, 1);
612 std::advance(it_ks_scalar, 1);
613 });
614 }
615#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
616 if (dp3m.heffte.world_size == 1) {
617 std::size_t index_row_major_r2c = 0u;
618 for_each_3d_order<FFTConfig::k_space_order>(
619 mesh_start, local_size, local_index, [&]() {
620 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
621 auto const global_index = local_origin + local_index;
622 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
623 dp3m.heffte.ks_scalar[index_row_major_r2c] =
624 dp3m.heffte.g_energy[index_row_major_r2c] *
625 (mesh_dip[0u][index_row_major_r2c] *
626 FloatType(dp3m.d_op[0u][global_index[1u]]) +
627 mesh_dip[1u][index_row_major_r2c] *
628 FloatType(dp3m.d_op[1u][global_index[2u]]) +
629 mesh_dip[2u][index_row_major_r2c] *
630 FloatType(dp3m.d_op[2u][global_index[0u]]));
631#ifndef NDEBUG
632 if (not dp3m.params.tuning) {
633 auto constexpr KX = 2, KY = 0, KZ = 1;
634 auto const index_fft_legacy = local_index[KZ] +
635 line_stride * local_index[KY] +
636 plane_stride * local_index[KX];
637 assert(heffte_almost_equal(
638 dp3m.heffte.ks_scalar[index_row_major_r2c],
639 dp3m.ks_scalar[index_fft_legacy]));
640 }
641#endif // not NDEBUG
642 ++index_row_major_r2c;
643 }
644 });
645 }
646#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
647
648 /* Torque component loop */
649 for (int d = 0; d < 3; d++) {
650 auto it_ks_scalar = dp3m.ks_scalar.begin();
651 auto index = 0u;
652 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
653 auto const &offset = dp3m.mesh.start;
654 auto const &d_op = dp3m.d_op[0u];
655 auto const d_op_val = FloatType(d_op[local_index[d] + offset[d]]);
656 auto const &value = *it_ks_scalar;
657 dp3m.mesh.rs_scalar[index] = d_op_val * value.real();
658 ++index;
659 dp3m.mesh.rs_scalar[index] = d_op_val * value.imag();
660 ++index;
661 std::advance(it_ks_scalar, 1);
662 });
663#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
664 if (dp3m.heffte.world_size == 1) {
665 unsigned int constexpr d_ks[3] = {2u, 0u, 1u};
666 std::size_t index_row_major_r2c = 0u;
667 for_each_3d_order<FFTConfig::k_space_order>(
668 mesh_start, local_size, local_index, [&]() {
669 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
670 auto const global_index = local_origin + local_index;
671 auto const d_op_val =
672 FloatType(dp3m.d_op[d][global_index[d_ks[d]]]);
673 dp3m.heffte.ks_B_field_storage[index_row_major_r2c] =
674 d_op_val * dp3m.heffte.ks_scalar[index_row_major_r2c];
675#ifndef NDEBUG
676 if (not dp3m.params.tuning) {
677 auto constexpr KX = 2, KY = 0, KZ = 1;
678 auto const index_fft_legacy =
679 local_index[KZ] + line_stride * local_index[KY] +
680 plane_stride * local_index[KX];
681 auto const old_value = std::complex<FloatType>{
682 dp3m.mesh.rs_scalar[2 * index_fft_legacy],
683 dp3m.mesh.rs_scalar[2 * index_fft_legacy + 1]};
684 auto const &new_value =
685 dp3m.heffte.ks_B_field_storage[index_row_major_r2c];
686 assert(heffte_almost_equal(new_value, old_value));
687 }
688#endif // not NDEBUG
689 ++index_row_major_r2c;
690 }
691 });
692 dp3m.heffte.fft->backward(dp3m.heffte.ks_B_field_storage.data(),
693 dp3m.heffte.rs_B_fields_no_halo[d].data());
694 // pad zeros around the B-field in real space for ghost layers
695 dp3m.heffte.rs_B_fields[d] =
696 pad_with_zeros_discard_imag<FFTConfig::r_space_order,
698 std::span(dp3m.heffte.rs_B_fields_no_halo[d]),
699 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
700 dp3m.local_mesh.n_halo_ur);
701 // communicate ghost layers of the B-field in real space
702 dp3m.heffte.halo_comm.spread_grid(::comm_cart,
703 dp3m.heffte.rs_B_fields[d].data(),
704 dp3m.local_mesh.dim);
705 }
706#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
707 dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh());
708 // communicate ghost layers of the B-field in real space
709 dp3m.fft_buffers->perform_scalar_halo_spread();
710 // assign torque component from mesh to particle
711 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
712 Utils::integral_parameter<int, AssignTorques, p3m_min_cao, p3m_max_cao>(
713 dp3m.params.cao, dp3m, dipole_prefac * wavenumber, d_rs,
714 *system.cell_structure);
715 }
716
717 /***************************
718 DIPOLAR FORCES (k-space)
719 ****************************/
720 // Compute forces after torques because the algorithm below overwrites the
721 // grids dp3m.mesh.rs_fields !
722 // Note: I'll do here 9 inverse FFTs. By symmetry, we can reduce this
723 // number to 6 !
724 /* fill in ks_scalar array for force calculation */
725 {
726 auto it_force = dp3m.g_force.begin();
727 auto it_ks_scalar = dp3m.ks_scalar.begin();
728 std::size_t index = 0u;
729 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
730 auto constexpr KX = 2, KY = 0, KZ = 1;
731 auto const shift = local_index + dp3m.mesh.start;
732 auto const &d_op = dp3m.d_op[0u];
733 auto const &mesh_dip = dp3m.mesh.rs_fields;
734 // Re(mu)*k
735 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
736 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
737 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
738 ++index;
739 // Im(mu)*k
740 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
741 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
742 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
743 ++index;
744 *it_ks_scalar = {*it_force * im, *it_force * (-re)};
745 std::advance(it_force, 1);
746 std::advance(it_ks_scalar, 1);
747 });
748 }
749
750#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
751 if (dp3m.heffte.world_size == 1) {
752 std::size_t index_row_major_r2c = 0u;
753 for_each_3d_order<FFTConfig::k_space_order>(
754 mesh_start, local_size, local_index, [&]() {
755 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
756 auto const global_index = local_origin + local_index;
757 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
758 auto const value =
759 dp3m.heffte.g_force[index_row_major_r2c] *
760 (mesh_dip[0u][index_row_major_r2c] *
761 FloatType(dp3m.d_op[0u][global_index[1u]]) +
762 mesh_dip[1u][index_row_major_r2c] *
763 FloatType(dp3m.d_op[1u][global_index[2u]]) +
764 mesh_dip[2u][index_row_major_r2c] *
765 FloatType(dp3m.d_op[2u][global_index[0u]]));
766 dp3m.heffte.ks_scalar[index_row_major_r2c] = {value.imag(),
767 -value.real()};
768#ifndef NDEBUG
769 if (not dp3m.params.tuning) {
770 auto constexpr KX = 2, KY = 0, KZ = 1;
771 auto const index_fft_legacy = local_index[KZ] +
772 line_stride * local_index[KY] +
773 plane_stride * local_index[KX];
774 assert(heffte_almost_equal(
775 dp3m.heffte.ks_scalar[index_row_major_r2c],
776 dp3m.ks_scalar[index_fft_legacy]));
777 }
778#endif // not NDEBUG
779 ++index_row_major_r2c;
780 }
781 });
782 }
783#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
784
785 /* Force component loop */
786 for (int d = 0; d < 3; d++) {
787 std::size_t index = 0u;
788 auto it_ks_scalar = dp3m.ks_scalar.begin();
789 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
790 auto constexpr KX = 2, KY = 0, KZ = 1;
791 auto const shift = local_index + dp3m.mesh.start;
792 auto const &d_op = dp3m.d_op[0u];
793 auto const &mesh_dip = dp3m.mesh.rs_fields;
794 auto const d_op_val = FloatType(d_op[shift[d]]);
795 auto const f = *it_ks_scalar * d_op_val;
796 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.real();
797 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.real();
798 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.real();
799 ++index;
800 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.imag();
801 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.imag();
802 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.imag();
803 ++index;
804 std::advance(it_ks_scalar, 1);
805 });
806
807#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
808 if (dp3m.heffte.world_size == 1) {
809 std::size_t index_row_major_r2c = 0u;
810 for_each_3d_order<FFTConfig::k_space_order>(
811 mesh_start, local_size, local_index, [&]() {
812 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
813 auto constexpr KX = 1, KY = 2, KZ = 0;
814 auto const global_index = local_origin + local_index;
815 auto const remapped_index =
816 local_index[KZ] + local_index[KY] * local_size[KZ] +
817 local_index[KX] * local_size[KZ] * local_size[KY];
818 auto const d_op_val =
819 FloatType(dp3m.d_op[d][global_index[d]]);
820 auto &mesh_dip = dp3m.heffte.ks_dipole_density;
821 mesh_dip[0u][index_row_major_r2c] =
822 FloatType(dp3m.d_op[d][global_index[2u]]) * d_op_val *
823 dp3m.heffte.ks_scalar[remapped_index];
824 mesh_dip[1u][index_row_major_r2c] =
825 FloatType(dp3m.d_op[d][global_index[0u]]) * d_op_val *
826 dp3m.heffte.ks_scalar[remapped_index];
827 mesh_dip[2u][index_row_major_r2c] =
828 FloatType(dp3m.d_op[d][global_index[1u]]) * d_op_val *
829 dp3m.heffte.ks_scalar[remapped_index];
830#ifndef NDEBUG
831 if (not FFTConfig::use_r2c and not dp3m.params.tuning) {
832 auto const index_fft_legacy = local_index[2] +
833 line_stride * local_index[1] +
834 plane_stride * local_index[0];
835 for (int j = 0; j < 3; ++j) {
836 auto const old_value = std::complex<FloatType>{
837 dp3m.mesh.rs_fields[j][2 * index_fft_legacy],
838 dp3m.mesh.rs_fields[j][2 * index_fft_legacy + 1]};
839 auto const &new_value = mesh_dip[j][index_row_major_r2c];
840 assert(heffte_almost_equal(new_value, old_value));
841 }
842 }
843#endif // not NDEBUG
844 ++index_row_major_r2c;
845 }
846 });
847 for (int dir = 0u; dir < 3u; ++dir) {
848 dp3m.heffte.fft->backward(
849 dp3m.heffte.ks_dipole_density[dir].data(),
850 dp3m.heffte.rs_B_fields_no_halo[dir].data());
851 // pad zeros around the B-field in real space for ghost layers
852 dp3m.heffte.rs_B_fields[d] =
853 pad_with_zeros_discard_imag<FFTConfig::r_space_order,
855 std::span(dp3m.heffte.rs_B_fields_no_halo[dir]),
856 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
857 dp3m.local_mesh.n_halo_ur);
858 }
859 // communicate ghost layers of the B-field in real space
860 auto rs_fields =
861 std::array<FloatType *, 3u>{{dp3m.heffte.rs_B_fields[0u].data(),
862 dp3m.heffte.rs_B_fields[1u].data(),
863 dp3m.heffte.rs_B_fields[2u].data()}};
864 dp3m.heffte.halo_comm.spread_grid(::comm_cart, rs_fields,
865 dp3m.local_mesh.dim);
866 }
867#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
868 for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
869 dp3m.fft->backward_fft(rs_mesh);
870 }
871 // communicate ghost layers of the B-field in real space
872 dp3m.fft_buffers->perform_vector_halo_spread();
873 // assign force component from mesh to particle
874 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
875 Utils::integral_parameter<int, AssignForcesDip, p3m_min_cao,
877 dp3m.params.cao, dp3m, dipole_prefac * Utils::sqr(wavenumber), d_rs,
878 *system.cell_structure);
879 }
880 } /* if (dp3m.sum_mu2 > 0) */
881 } /* if (force_flag) */
882
883 if (dp3m.params.epsilon != P3M_EPSILON_METALLIC) {
884 auto const surface_term =
885 calc_surface_term(force_flag, energy_flag or npt_flag);
886 if (this_node == 0) {
887 energy += surface_term;
888 }
889 }
890#ifdef ESPRESSO_NPT
891 if (npt_flag) {
893 }
894#endif
895 if (not energy_flag) {
896 energy = 0.;
897 }
898
899 return energy;
900}
901
902template <typename FloatType, Arch Architecture, class FFTConfig>
904 bool force_flag, bool energy_flag) {
905 auto const &system = get_system();
906 auto const &box_geo = *system.box_geo;
907 auto const particles = system.cell_structure->local_particles();
908 auto const pref = prefactor * 4. * std::numbers::pi / box_geo.volume() /
909 (2. * dp3m.params.epsilon + 1.);
910 auto const n_local_part = particles.size();
911
912 // We put all the dipolar momenta in a the arrays mx,my,mz according to the
913 // id-number of the particles
914 std::vector<double> mx(n_local_part);
915 std::vector<double> my(n_local_part);
916 std::vector<double> mz(n_local_part);
917
918 std::size_t ip = 0u;
919 for (auto const &p : particles) {
920 auto const dip = p.calc_dip();
921 mx[ip] = dip[0u];
922 my[ip] = dip[1u];
923 mz[ip] = dip[2u];
924 ip++;
925 }
926
927 // we will need the sum of all dipolar momenta vectors
928 auto local_dip = Utils::Vector3d{};
929 for (std::size_t i = 0u; i < n_local_part; i++) {
930 local_dip[0u] += mx[i];
931 local_dip[1u] += my[i];
932 local_dip[2u] += mz[i];
933 }
934 auto const box_dip =
935 boost::mpi::all_reduce(comm_cart, local_dip, std::plus<>());
936
937 double energy = 0.;
938 if (energy_flag) {
939 double sum_e = 0.;
940 for (std::size_t i = 0u; i < n_local_part; i++) {
941 sum_e += mx[i] * box_dip[0] + my[i] * box_dip[1] + mz[i] * box_dip[2];
942 }
943 energy =
944 0.5 * pref * boost::mpi::all_reduce(comm_cart, sum_e, std::plus<>());
945 }
946
947 if (force_flag) {
948
949 std::vector<double> sumix(n_local_part);
950 std::vector<double> sumiy(n_local_part);
951 std::vector<double> sumiz(n_local_part);
952
953 for (std::size_t i = 0u; i < n_local_part; i++) {
954 sumix[i] = my[i] * box_dip[2u] - mz[i] * box_dip[1u];
955 sumiy[i] = mz[i] * box_dip[0u] - mx[i] * box_dip[2u];
956 sumiz[i] = mx[i] * box_dip[1u] - my[i] * box_dip[0u];
957 }
958
959 ip = 0u;
960 for (auto &p : particles) {
961 auto &torque = p.torque();
962 torque[0u] -= pref * sumix[ip];
963 torque[1u] -= pref * sumiy[ip];
964 torque[2u] -= pref * sumiz[ip];
965 ip++;
966 }
967 }
968
969 return energy;
970}
971
972template <typename FloatType, Arch Architecture, class FFTConfig>
973void DipolarP3MHeffte<FloatType, Architecture,
974 FFTConfig>::calc_influence_function_force() {
975 dp3m.g_force = grid_influence_function_dipolar<FloatType, 3, P3M_BRILLOUIN,
976 FFTConfig::k_space_order>(
977 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
978 get_system().box_geo->length_inv());
979#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
980 if (dp3m.heffte.world_size == 1) {
981 dp3m.heffte.g_force =
983 FFTConfig::k_space_order>(
984 dp3m.params, dp3m.heffte.fft->ks_local_ld_index(),
985 dp3m.heffte.fft->ks_local_ur_index(),
986 get_system().box_geo->length_inv());
987 if constexpr (FFTConfig::use_r2c) {
988 influence_function_r2c<FFTConfig::r2c_dir>(
989 dp3m.heffte.g_force, dp3m.params.mesh,
990 dp3m.heffte.fft->ks_local_size(),
991 dp3m.heffte.fft->ks_local_ld_index());
992 }
993 }
994#endif
995}
996
997template <typename FloatType, Arch Architecture, class FFTConfig>
998void DipolarP3MHeffte<FloatType, Architecture,
999 FFTConfig>::calc_influence_function_energy() {
1000 dp3m.g_energy = grid_influence_function_dipolar<FloatType, 2, P3M_BRILLOUIN,
1001 FFTConfig::k_space_order>(
1002 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
1003 get_system().box_geo->length_inv());
1004#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
1005 if (dp3m.heffte.world_size == 1) {
1006 dp3m.heffte.g_energy =
1008 FFTConfig::k_space_order>(
1009 dp3m.params, dp3m.heffte.fft->ks_local_ld_index(),
1010 dp3m.heffte.fft->ks_local_ur_index(),
1011 get_system().box_geo->length_inv());
1012 if constexpr (FFTConfig::use_r2c) {
1013 influence_function_r2c<FFTConfig::r2c_dir>(
1014 dp3m.heffte.g_energy, dp3m.params.mesh,
1015 dp3m.heffte.fft->ks_local_size(),
1016 dp3m.heffte.fft->ks_local_ld_index());
1017 }
1018 }
1019#endif
1020}
1021
1022template <typename FloatType, Arch Architecture, class FFTConfig>
1025 int m_mesh_max = -1, m_mesh_min = -1;
1026 std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
1027
1028public:
1029 DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m,
1030 double prefactor, int timings,
1031 decltype(m_tune_limits) tune_limits)
1032 : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m},
1033 m_tune_limits{std::move(tune_limits)} {}
1034
1035 P3MParameters &get_params() override { return dp3m.params; }
1036
1037 void on_solver_change() const override { m_system.on_dipoles_change(); }
1038
1039 std::optional<std::string>
1040 layer_correction_veto_r_cut(double) const override {
1041 return {};
1042 }
1043
1044 void setup_logger(bool verbose) override {
1045 auto const &box_geo = *m_system.box_geo;
1046 m_logger = std::make_unique<TuningLogger>(
1047 verbose and this_node == 0, "DipolarP3M", TuningLogger::Mode::Dipolar);
1048 m_logger->tuning_goals(dp3m.params.accuracy, m_prefactor,
1049 box_geo.length()[0], dp3m.sum_dip_part,
1050 dp3m.sum_mu2);
1051 m_logger->log_tuning_start();
1052 }
1053
1054 std::tuple<double, double, double, double>
1056 double r_cut_iL) const override {
1057
1058 double alpha_L, rs_err, ks_err;
1059 auto const &box_geo = *m_system.box_geo;
1060
1061 /* calc maximal real space error for setting */
1062 rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL,
1063 dp3m.sum_dip_part, dp3m.sum_mu2, 0.001);
1064 // alpha cannot be zero for dipoles because real-space formula breaks down
1065
1066 if (std::numbers::sqrt2 * rs_err > dp3m.params.accuracy) {
1067 /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */
1068 alpha_L = dp3m_rtbisection(
1069 box_geo.length()[0], r_cut_iL, dp3m.sum_dip_part, dp3m.sum_mu2,
1070 0.0001 * box_geo.length()[0], 5. * box_geo.length()[0], 0.0001,
1071 dp3m.params.accuracy);
1072 } else {
1073 /* even alpha=0 is ok, however, we cannot choose it since it kills the
1074 k-space error formula.
1075 Anyways, this very likely NOT the optimal solution */
1076 alpha_L = 0.1;
1077 }
1078
1079 /* calculate real-space and k-space error for this alpha_L */
1080 rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL,
1081 dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L);
1082 ks_err = dp3m_k_space_error(box_geo.length()[0], mesh[0], cao,
1083 dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L);
1084
1085 return {Utils::Vector2d{rs_err, ks_err}.norm(), rs_err, ks_err, alpha_L};
1086 }
1087
1088 void determine_mesh_limits() override {
1089 if (dp3m.params.mesh[0] == -1) {
1090 /* simple heuristic to limit the tried meshes if the accuracy cannot
1091 be obtained with smaller meshes, but normally not all these
1092 meshes have to be tested */
1093 auto const expo = std::log(std::cbrt(dp3m.sum_dip_part)) / std::log(2.);
1094 /* Medium-educated guess for the minimal mesh */
1095 m_mesh_min = static_cast<int>(std::round(std::pow(2., std::floor(expo))));
1096 /* avoid using more than 1 GB of FFT arrays */
1097 m_mesh_max = 128;
1098 if (m_tune_limits.first) {
1099 m_mesh_min = *m_tune_limits.first;
1100 }
1101 if (m_tune_limits.second) {
1102 m_mesh_max = *m_tune_limits.second;
1103 }
1104 } else {
1105 m_mesh_min = m_mesh_max = dp3m.params.mesh[0];
1106 m_logger->report_fixed_mesh(dp3m.params.mesh);
1107 }
1108 }
1109
1111 auto tuned_params = TuningAlgorithm::Parameters{};
1112 auto time_best = time_sentinel;
1113 for (auto tmp_mesh = m_mesh_min; tmp_mesh <= m_mesh_max; tmp_mesh += 2) {
1114 auto trial_params = TuningAlgorithm::Parameters{};
1115 trial_params.mesh = Utils::Vector3i::broadcast(tmp_mesh);
1116 trial_params.cao = cao_best;
1117
1118 auto const trial_time =
1119 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
1120 trial_params.alpha_L, trial_params.accuracy);
1121
1122 /* this mesh does not work at all */
1123 if (trial_time < 0.)
1124 continue;
1125
1126 /* the optimum r_cut for this mesh is the upper limit for higher meshes,
1127 everything else is slower */
1128 m_r_cut_iL_max = trial_params.r_cut_iL;
1129
1130 if (trial_time < time_best) {
1131 /* new optimum */
1133 tuned_params = trial_params;
1134 time_best = tuned_params.time = trial_time;
1135 } else if (trial_time > time_best + time_granularity or
1137 /* no hope of further optimisation */
1138 break;
1139 }
1140 }
1141 return tuned_params;
1142 }
1143};
1144
1145template <typename FloatType, Arch Architecture, class FFTConfig>
1147 auto &system = get_system();
1148 auto const &box_geo = *system.box_geo;
1149 if (dp3m.params.alpha_L == 0. and dp3m.params.alpha != 0.) {
1150 dp3m.params.alpha_L = dp3m.params.alpha * box_geo.length()[0];
1151 }
1152 if (dp3m.params.r_cut_iL == 0. and dp3m.params.r_cut != 0.) {
1153 dp3m.params.r_cut_iL = dp3m.params.r_cut * box_geo.length_inv()[0];
1154 }
1155 if (not is_tuned()) {
1157 if (dp3m.sum_dip_part == 0) {
1158 throw std::runtime_error(
1159 "DipolarP3M: no dipolar particles in the system");
1160 }
1161 try {
1163 system, dp3m, prefactor, tuning.timings, tuning.limits);
1164 parameters.setup_logger(tuning.verbose);
1165 // parameter ranges
1166 parameters.determine_mesh_limits();
1167 parameters.determine_r_cut_limits();
1168 parameters.determine_cao_limits(3);
1169 // run tuning algorithm
1170 parameters.tune();
1171 m_is_tuned = true;
1172 system.on_dipoles_change();
1173 } catch (...) {
1174 dp3m.params.tuning = false;
1175 throw;
1176 }
1177 }
1178 init();
1179}
1180
1181/** Tuning dipolar-P3M */
1182inline auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh,
1183 double mesh_i, int cao, double alpha_L_i) {
1184
1185 auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
1186 auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
1187 auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i);
1188 auto alias1 = 0.;
1189 auto alias2 = 0.;
1190
1191 Utils::Vector3i indices{};
1192 Utils::Vector3i nm{};
1193 Utils::Vector3d fnm{};
1195 mesh_start, mesh_stop, indices,
1196 [&]() {
1197 auto const norm_sq = nm.norm2();
1198 auto const ex = std::exp(-factor1 * norm_sq);
1199 auto const U2 = std::pow(Utils::product(fnm), 2 * cao);
1200 alias1 += Utils::sqr(ex) * norm_sq;
1201 alias2 += U2 * ex * std::pow(shift * nm, 3) / norm_sq;
1202 },
1203 [&](unsigned dim, int n) {
1204 nm[dim] = shift[dim] + n * mesh;
1205 fnm[dim] = math::sinc(nm[dim] * mesh_i);
1206 });
1207
1208 return std::make_pair(alias1, alias2);
1209}
1210
1211/** Calculate the k-space error of dipolar-P3M */
1212inline double dp3m_k_space_error(double box_size, int mesh, int cao,
1213 std::size_t n_c_part, double sum_q2,
1214 double alpha_L) {
1215
1216 auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao);
1217 auto const mesh_i = 1. / static_cast<double>(mesh);
1218 auto const alpha_L_i = 1. / alpha_L;
1219 auto const mesh_stop = Utils::Vector3i::broadcast(mesh / 2);
1220 auto const mesh_start = -mesh_stop;
1221 auto indices = Utils::Vector3i{};
1222 auto values = Utils::Vector3d{};
1223 auto he_q = 0.;
1224
1226 mesh_start, mesh_stop, indices,
1227 [&]() {
1228 if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
1229 auto const n2 = indices.norm2();
1230 auto const cs = Utils::product(values);
1231 auto const [alias1, alias2] =
1232 dp3m_tune_aliasing_sums(indices, mesh, mesh_i, cao, alpha_L_i);
1233 auto const d =
1234 alias1 - Utils::sqr(alias2 / cs) /
1235 Utils::int_pow<3>(static_cast<double>(n2));
1236 /* at high precision, d can become negative due to extinction;
1237 also, don't take values that have no significant digits left*/
1238 if (d > 0. and std::fabs(d / alias1) > round_error_prec)
1239 he_q += d;
1240 }
1241 },
1242 [&values, &mesh_i, cotangent_sum](unsigned dim, int n) {
1243 values[dim] = cotangent_sum(n, mesh_i);
1244 });
1245
1246 return 8. * Utils::sqr(std::numbers::pi) / 3. * sum_q2 *
1247 sqrt(he_q / static_cast<double>(n_c_part)) /
1248 Utils::int_pow<4>(box_size);
1249}
1250
1251/** Calculate the value of the errors for the REAL part of the force in terms
1252 * of the splitting parameter alpha of Ewald. Based on eq. (33) @cite wang01a.
1253 *
1254 * Please note that in this more refined approach we don't use
1255 * eq. (37), but eq. (33) which maintains all the powers in alpha.
1256 */
1257inline double dp3m_real_space_error(double box_size, double r_cut_iL,
1258 std::size_t n_c_part, double sum_q2,
1259 double alpha_L) {
1260 auto constexpr exp_min = -708.4; // for IEEE-compatible double
1261 double d_error_f, d_cc, d_dc, d_con;
1262
1263 auto const d_rcut = r_cut_iL * box_size;
1264 auto const d_rcut2 = Utils::sqr(d_rcut);
1265 auto const d_rcut4 = Utils::sqr(d_rcut2);
1266
1267 auto const d_a2 = Utils::sqr(alpha_L) / Utils::sqr(box_size);
1268 auto const exponent = -d_a2 * d_rcut2;
1269 auto const exp_term = (exponent < exp_min) ? 0. : std::exp(exponent);
1270 auto const d_c = sum_q2 * exp_term;
1271
1272 d_cc = 4. * Utils::sqr(d_a2) * Utils::sqr(d_rcut2) + 6. * d_a2 * d_rcut2 + 3.;
1273
1274 d_dc = 8. * Utils::int_pow<3>(d_a2) * Utils::int_pow<3>(d_rcut2) +
1275 20. * Utils::sqr(d_a2) * d_rcut4 + 30. * d_a2 * d_rcut2 + 15.;
1276
1277 d_con = 1. / sqrt(Utils::int_pow<3>(box_size) * Utils::sqr(d_a2) * d_rcut *
1278 Utils::sqr(d_rcut4) * static_cast<double>(n_c_part));
1279
1280 d_error_f = d_c * d_con *
1281 sqrt((13. / 6.) * Utils::sqr(d_cc) +
1282 (2. / 15.) * Utils::sqr(d_dc) - (13. / 15.) * d_cc * d_dc);
1283
1284 return d_error_f;
1285}
1286
1287/** Using bisection, find the root of a function "func-tuned_accuracy/sqrt(2.)"
1288 * known to lie between x1 and x2. The root, returned as rtbis, will be
1289 * refined until its accuracy is \f$\pm\f$ @p xacc.
1290 */
1291double dp3m_rtbisection(double box_size, double r_cut_iL, std::size_t n_c_part,
1292 double sum_q2, double x1, double x2, double xacc,
1293 double tuned_accuracy) {
1294 constexpr int JJ_RTBIS_MAX = 40;
1295
1296 auto const constant = tuned_accuracy / std::numbers::sqrt2;
1297
1298 auto const f1 =
1299 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, x1) -
1300 constant;
1301 auto const f2 =
1302 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, x2) -
1303 constant;
1304 if (f1 * f2 >= 0.0) {
1305 throw std::runtime_error(
1306 "Root must be bracketed for bisection in dp3m_rtbisection");
1307 }
1308 // Orient the search dx, and set rtb to x1 or x2 ...
1309 double dx;
1310 double rtb = f1 < 0.0 ? (dx = x2 - x1, x1) : (dx = x1 - x2, x2);
1311 for (int j = 1; j <= JJ_RTBIS_MAX; j++) {
1312 auto const xmid = rtb + (dx *= 0.5);
1313 auto const fmid =
1314 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, xmid) -
1315 constant;
1316 if (fmid <= 0.0)
1317 rtb = xmid;
1318 if (fabs(dx) < xacc || fmid == 0.0)
1319 return rtb;
1320 }
1321 throw std::runtime_error("Too many bisections in dp3m_rtbisection");
1322}
1323
1325 auto const &system = get_system();
1326 auto const &box_geo = *system.box_geo;
1327 auto const &local_geo = *system.local_geo;
1328 for (auto i = 0u; i < 3u; i++) {
1329 /* check k-space cutoff */
1330 if (dp3m_params.cao_cut[i] >= box_geo.length_half()[i]) {
1331 std::stringstream msg;
1332 msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i]
1333 << " is larger than half of box dimension " << box_geo.length()[i];
1334 throw std::runtime_error(msg.str());
1335 }
1336 if (dp3m_params.cao_cut[i] >= local_geo.length()[i]) {
1337 std::stringstream msg;
1338 msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i]
1339 << " is larger than local box dimension " << local_geo.length()[i];
1340 throw std::runtime_error(msg.str());
1341 }
1342 }
1343
1344 if ((box_geo.length()[0] != box_geo.length()[1]) or
1345 (box_geo.length()[1] != box_geo.length()[2])) {
1346 throw std::runtime_error("DipolarP3M: requires a cubic box");
1347 }
1348}
1349
1351 auto const &box_geo = *get_system().box_geo;
1352 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
1353 throw std::runtime_error(
1354 "DipolarP3M: requires periodicity (True, True, True)");
1355 }
1356}
1357
1359 auto const &local_geo = *get_system().local_geo;
1360 if (local_geo.cell_structure_type() != CellStructureType::REGULAR and
1361 local_geo.cell_structure_type() != CellStructureType::HYBRID) {
1362 throw std::runtime_error(
1363 "DipolarP3M: requires the regular or hybrid decomposition cell system");
1364 }
1365 if (::communicator.size > 1 and
1366 local_geo.cell_structure_type() == CellStructureType::HYBRID) {
1367 throw std::runtime_error(
1368 "DipolarP3M: does not work with the hybrid decomposition cell system, "
1369 "if using more than one MPI node");
1370 }
1371}
1372
1374 auto const &node_grid = ::communicator.node_grid;
1375 if (node_grid[0] < node_grid[1] or node_grid[1] < node_grid[2]) {
1376 throw std::runtime_error(
1377 "DipolarP3M: node grid must be sorted, largest first");
1378 }
1379}
1380
1381template <typename FloatType, Arch Architecture, class FFTConfig>
1383 auto const &box_geo = *get_system().box_geo;
1384 dp3m.params.r_cut = dp3m.params.r_cut_iL * box_geo.length()[0];
1385 dp3m.params.alpha = dp3m.params.alpha_L * box_geo.length_inv()[0];
1386 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
1387 dp3m.local_mesh.recalc_ld_pos(dp3m.params);
1388 sanity_checks_boxl();
1389 calc_influence_function_force();
1390 calc_influence_function_energy();
1391 dp3m.energy_correction = 0.;
1392#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
1393 if (dp3m.heffte.world_size == 1) {
1394 dp3m.heffte.halo_comm.resize(::comm_cart, dp3m.local_mesh);
1395 }
1396#endif
1397}
1398
1399template <typename FloatType, Arch Architecture, class FFTConfig>
1400void DipolarP3MHeffte<FloatType, Architecture,
1401 FFTConfig>::calc_energy_correction() {
1402 auto const &box_geo = *get_system().box_geo;
1403 auto const Ukp3m = calc_average_self_energy_k_space() * box_geo.volume();
1404 auto const Ewald_volume = Utils::int_pow<3>(dp3m.params.alpha_L);
1405 auto const Eself = -2. * Ewald_volume * std::numbers::inv_sqrtpi / 3.;
1406 dp3m.energy_correction =
1407 -dp3m.sum_mu2 * (Ukp3m + Eself + 2. * std::numbers::pi / 3.);
1408}
1409
1410#ifdef ESPRESSO_NPT
1411template <typename FloatType, Arch Architecture, class FFTConfig>
1412void DipolarP3MHeffte<FloatType, Architecture,
1413 FFTConfig>::npt_add_virial_contribution(double energy)
1414 const {
1415 get_system().npt_add_virial_contribution(energy);
1416}
1417#endif // ESPRESSO_NPT
1418
1419#endif // ESPRESSO_DP3M
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
Vector implementation and trait types for boost qvm interoperability.
Describes a cell structure / cell system.
auto & get_local_force()
auto & get_local_torque()
auto const & get_unique_particles() const
ParticleRange local_particles() const
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
TuningAlgorithm::Parameters get_time() override
DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m, double prefactor, int timings, decltype(m_tune_limits) tune_limits)
void on_solver_change() const override
void determine_mesh_limits() override
P3MParameters & get_params() override
std::optional< std::string > layer_correction_veto_r_cut(double) const override
void setup_logger(bool verbose) override
base_type::size_type size() const
Main system class.
std::shared_ptr< LocalBox > local_geo
void npt_add_virial_contribution(double energy)
Definition npt.cpp:136
std::shared_ptr< CellStructure > cell_structure
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
static auto constexpr time_sentinel
Value for invalid time measurements.
static auto constexpr max_n_consecutive_trials
Maximal number of consecutive trials that don't improve runtime.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
T norm() const
Definition Vector.hpp:160
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value) noexcept
Create a vector that has all entries set to the same value.
Definition Vector.hpp:132
void zfill(std::size_t size)
Fill cache with zero-initialized data.
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
constexpr auto round_error_prec
Precision below which a double-precision float is assumed to be zero.
Definition config.hpp:38
__device__ void vector_product(float const *a, float const *b, float *out)
static std::size_t count_magnetic_particles(ParticleRange const &particles)
Definition dlc.cpp:400
P3M algorithm for long-range magnetic dipole-dipole interaction.
double dp3m_real_space_error(double box_size, double r_cut_iL, std::size_t n_c_part, double sum_q2, double alpha_L)
Calculate the value of the errors for the REAL part of the force in terms of the splitting parameter ...
double dp3m_rtbisection(double box_size, double r_cut_iL, std::size_t n_c_part, double sum_q2, double x1, double x2, double xacc, double tuned_accuracy)
Compute the value of alpha through a bisection method.
auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh, double mesh_i, int cao, double alpha_L_i)
Tuning dipolar-P3M.
double dp3m_k_space_error(double box_size, int mesh, int cao, std::size_t n_c_part, double sum_q2, double alpha_L)
Calculate the k-space error of dipolar-P3M.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
Routines, row decomposition, data structures and communication for the 3D-FFT.
auto pad_with_zeros_discard_imag(std::span< T > cropped_array, Utils::Vector3i const &cropped_dim, Utils::Vector3i const &pad_left, Utils::Vector3i const &pad_right)
Pad a 3D matrix with zeros to restore halo regions.
auto extract_block(Container const &in_array, Utils::Vector3i const &dimensions, Utils::Vector3i const &start, Utils::Vector3i const &stop)
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
std::vector< FloatType > grid_influence_function_dipolar(P3MParameters const &params, Utils::Vector3i const &n_start, Utils::Vector3i const &n_stop, Utils::Vector3d const &inv_box_l)
Map influence function over a grid.
void p3m_interpolate(P3MLocalMesh const &local_mesh, WeightsStorage< cao > const &weights, Kernel kernel)
P3M grid interpolation.
constexpr int p3m_min_cao
Minimal charge assignment order.
Definition math.hpp:48
constexpr int p3m_max_cao
Maximal charge assignment order.
Definition math.hpp:50
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
Definition math.hpp:38
System & get_system()
T product(Vector< T, N > const &v)
Definition Vector.hpp:373
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
Definition sqr.hpp:28
decltype(auto) integral_parameter(T i, Args &&...args)
Generate a call table for an integral non-type template parameter.
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
Definition math.hpp:71
auto get_analytic_cotangent_sum_kernel(int cao)
Definition math.hpp:146
STL namespace.
Exports for the NpT code.
Common functions for dipolar and charge P3M.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
ESPRESSO_ATTR_ALWAYS_INLINE void kokkos_parallel_range_for(auto const &name, auto start, auto end, auto const &kernel)
Utils::Vector3i node_grid
double calc_surface_term(bool force_flag, bool energy_flag) override
void dipole_assign() override
void scaleby_box_l() override
double long_range_kernel(bool force_flag, bool energy_flag)
Compute the k-space part of forces and energies.
Base class for the magnetostatics P3M algorithm.
double sum_mu2
Sum of square of magnetic dipoles.
p3m_interpolation_cache inter_weights
FloatType value_type
std::size_t sum_dip_part
number of dipolar particles.
p3m_send_mesh< FloatType > halo_comm
double energy_correction
cached k-space self-energy correction
void resize_heffte_buffers()
struct DipolarP3MState::@1 heffte
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
void sanity_checks_cell_structure() const
P3MParameters const & dp3m_params
Definition dp3m.hpp:55
void sanity_checks_periodicity() const
void sanity_checks_node_grid() const
void recalc_ld_pos(P3MParameters const &params)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0), in real space.
P3MParameters params
P3M base parameters.
P3MLocalMesh local_mesh
Local mesh geometry information for this MPI rank.
void operator()(auto &dp3m, double prefac, int d_rs, CellStructure &cell_structure) const
void operator()(auto &dp3m, double prefac, int d_rs, CellStructure &cell_structure) const