ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
dp3m_heffte.impl.hpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2025 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include <config/config.hpp>
23
24#ifdef ESPRESSO_DP3M
25
27
30
31#include "magnetostatics/dp3m_heffte.hpp" // must be included after dipoles.hpp
32
33#include "fft/fft.hpp"
34#include "p3m/P3MFFT.hpp"
36#include "p3m/TuningLogger.hpp"
37#include "p3m/common.hpp"
40#include "p3m/interpolation.hpp"
41#include "p3m/math.hpp"
42
43#include "BoxGeometry.hpp"
44#include "LocalBox.hpp"
45#include "Particle.hpp"
46#include "ParticleRange.hpp"
47#include "PropagationMode.hpp"
50#include "communication.hpp"
51#include "errorhandling.hpp"
53#include "npt.hpp"
54#include "system/System.hpp"
55#include "tuning.hpp"
56
57#include <utils/Vector.hpp>
60#include <utils/math/sqr.hpp>
61
62#include <boost/mpi/collectives/all_reduce.hpp>
63#include <boost/mpi/collectives/reduce.hpp>
64
65#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
66#include <Kokkos_Core.hpp>
67#include <omp.h>
68#endif
69
70#include <algorithm>
71#include <array>
72#include <cstddef>
73#include <cstdio>
74#include <functional>
75#include <iterator>
76#include <numbers>
77#include <optional>
78#include <span>
79#include <sstream>
80#include <stdexcept>
81#include <tuple>
82#include <utility>
83#include <vector>
84
85#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
86#ifndef NDEBUG
87template <typename T>
88bool heffte_almost_equal(T const &value, T const &reference) {
89 auto const diff = std::abs(value - reference);
90 using FT = std::remove_cvref_t<decltype(diff)>;
91 auto constexpr atol = std::is_same_v<FT, float> ? FT{2e-4} : FT{1e-6};
92 auto constexpr rtol = std::is_same_v<FT, float> ? FT{5e-5} : FT{1e-5};
93 auto const non_zero = std::abs(reference) != FT{0};
94 return (diff < atol) or (non_zero and (diff / std::abs(reference) < rtol));
95}
96#endif // not NDEBUG
97#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
98
99template <typename FloatType, Arch Architecture, class FFTConfig>
100void DipolarP3MHeffte<FloatType, Architecture,
102 auto local_n = std::size_t{0u};
103 double local_mu2 = 0.;
104
105 for (auto const &p : get_system().cell_structure->local_particles()) {
106 if (p.dipm() != 0.) {
107 local_mu2 += p.calc_dip().norm2();
108 local_n++;
109 }
110 }
111
112 boost::mpi::all_reduce(comm_cart, local_mu2, dp3m.sum_mu2, std::plus<>());
113 boost::mpi::all_reduce(comm_cart, local_n, dp3m.sum_dip_part, std::plus<>());
114}
115
116inline double dp3m_k_space_error(double box_size, int mesh, int cao,
117 std::size_t n_c_part, double sum_q2,
118 double alpha_L);
119
120inline double dp3m_real_space_error(double box_size, double r_cut_iL,
121 std::size_t n_c_part, double sum_q2,
122 double alpha_L);
123
124/** Compute the value of alpha through a bisection method.
125 * Based on eq. (33) @cite wang01a.
126 */
127double dp3m_rtbisection(double box_size, double r_cut_iL, std::size_t n_c_part,
128 double sum_q2, double x1, double x2, double xacc,
129 double tuned_accuracy);
130
131template <typename FloatType, Arch Architecture, class FFTConfig>
132double DipolarP3MHeffte<FloatType, Architecture,
133 FFTConfig>::calc_average_self_energy_k_space() const {
134 auto const &box_geo = *get_system().box_geo;
135 auto const node_phi =
136 grid_influence_function_self_energy<FloatType, P3M_BRILLOUIN>(
137 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
138
139 double phi = 0.;
140 boost::mpi::reduce(comm_cart, node_phi, phi, std::plus<>(), 0);
141 phi /= 3. * box_geo.length()[0] * Utils::int_pow<3>(dp3m.params.mesh[0]);
142 return phi * std::numbers::pi;
143}
144
145template <typename FloatType, Arch Architecture, class FFTConfig>
147 assert(dp3m.params.mesh >= Utils::Vector3i::broadcast(1));
148 assert(dp3m.params.cao >= p3m_min_cao and dp3m.params.cao <= p3m_max_cao);
149 assert(dp3m.params.alpha > 0.);
150
151 auto const &system = get_system();
152 auto const &box_geo = *system.box_geo;
153 auto const &local_geo = *system.local_geo;
154 auto const verlet_skin = system.cell_structure->get_verlet_skin();
155
156 dp3m.params.cao3 = Utils::int_pow<3>(dp3m.params.cao);
157 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
158
159 assert(dp3m.fft);
160 dp3m.local_mesh.calc_local_ca_mesh(dp3m.params, local_geo, verlet_skin, 0.);
161 dp3m.fft_buffers->init_halo();
162 dp3m.fft->init(dp3m.params);
163 dp3m.mesh.ks_pnum = dp3m.fft->get_ks_pnum();
164 dp3m.fft_buffers->init_meshes(dp3m.fft->get_ca_mesh_size());
165 dp3m.update_mesh_views();
166#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
167 dp3m.heffte.world_size = comm_cart.size();
168 dp3m.heffte.fft = std::make_shared<P3MFFT<FloatType, FFTConfig>>(
169 ::comm_cart, dp3m.params.mesh, dp3m.local_mesh.ld_no_halo,
170 dp3m.local_mesh.ur_no_halo, ::communicator.node_grid);
171 dp3m.resize_heffte_buffers();
172#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
173 dp3m.calc_differential_operator();
174
175 /* fix box length dependent constants */
176 scaleby_box_l();
177
179}
180
181namespace {
182template <int cao> struct AssignDipole {
183#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
184 void operator()(auto &dp3m, auto &cell_structure) {
185 using DipolarP3MState = std::remove_reference_t<decltype(dp3m)>;
186 using value_type = DipolarP3MState::value_type;
187 auto constexpr memory_order = Utils::MemoryOrder::ROW_MAJOR;
188 auto const &aosoa = cell_structure.get_aosoa();
189 auto const &unique_particles = cell_structure.get_unique_particles();
190 auto const n_part = cell_structure.count_local_particles();
191 dp3m.inter_weights.zfill(n_part); // allocate buffer for parallel write
193 "InterpolateDipoles", std::size_t{0u}, n_part, [&](auto p_index) {
194 auto const tid = omp_get_thread_num();
195 auto const p_pos = aosoa.get_vector_at(aosoa.position, p_index);
196 auto const dip = unique_particles.at(p_index)->calc_dip();
197 auto const weights =
198 p3m_calculate_interpolation_weights<cao, memory_order>(
199 p_pos, dp3m.params.ai, dp3m.local_mesh);
200 dp3m.inter_weights.store_at(p_index, weights);
202 dp3m.local_mesh, weights, [&dip, tid, &dp3m](int ind, double w) {
203 dp3m.rs_fields_kokkos(tid, 0u, ind) += value_type(w * dip[0u]);
204 dp3m.rs_fields_kokkos(tid, 1u, ind) += value_type(w * dip[1u]);
205 dp3m.rs_fields_kokkos(tid, 2u, ind) += value_type(w * dip[2u]);
206 });
207 });
208 Kokkos::fence();
209 using execution_space = Kokkos::DefaultExecutionSpace;
210 int num_threads = execution_space().concurrency();
211 Kokkos::RangePolicy<execution_space> policy(std::size_t{0},
212 dp3m.local_mesh.size);
213 Kokkos::parallel_for("ReduceInterpolatedDipoles", policy,
214 [&dp3m, num_threads](std::size_t const i) {
215 for (int dir = 0; dir < 3; ++dir) {
216 value_type acc{};
217 for (int tid = 0; tid < num_threads; ++tid) {
218 acc += dp3m.rs_fields_kokkos(tid, dir, i);
219 }
220 dp3m.mesh.rs_fields[dir][i] += acc;
221#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
222 dp3m.heffte.rs_dipole_density[dir][i] += acc;
223#endif
224 }
225 });
226 Kokkos::fence();
227 }
228#else // ESPRESSO_SHARED_MEMORY_PARALLELISM
229 void operator()(auto &dp3m, Utils::Vector3d const &real_pos,
230 Utils::Vector3d const &dip) const {
231 using DipolarP3MState = std::remove_reference_t<decltype(dp3m)>;
232 using value_type = DipolarP3MState::value_type;
233 auto constexpr memory_order = Utils::MemoryOrder::ROW_MAJOR;
234 auto const weights = p3m_calculate_interpolation_weights<cao, memory_order>(
235 real_pos, dp3m.params.ai, dp3m.local_mesh);
236 p3m_interpolate<cao>(
237 dp3m.local_mesh, weights, [&dip, &dp3m](int ind, double w) {
238 dp3m.mesh.rs_fields[0u][ind] += value_type(w * dip[0u]);
239 dp3m.mesh.rs_fields[1u][ind] += value_type(w * dip[1u]);
240 dp3m.mesh.rs_fields[2u][ind] += value_type(w * dip[2u]);
241#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
242 dp3m.heffte.rs_dipole_density[0u][ind] += value_type(w * dip[0u]);
243 dp3m.heffte.rs_dipole_density[1u][ind] += value_type(w * dip[1u]);
244 dp3m.heffte.rs_dipole_density[2u][ind] += value_type(w * dip[2u]);
245#endif
246 });
247
248 dp3m.inter_weights.template store<cao>(weights);
249 }
250#endif // ESPRESSO_SHARED_MEMORY_PARALLELISM
251};
252} // namespace
253
254template <typename FloatType, Arch Architecture, class FFTConfig>
256 [[maybe_unused]] ParticleRange const &particles) {
257 prepare_fft_mesh();
258
259#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
260 Utils::integral_parameter<int, AssignDipole, p3m_min_cao, p3m_max_cao>(
261 dp3m.params.cao, dp3m, *get_system().cell_structure);
262#else // ESPRESSO_SHARED_MEMORY_PARALLELISM
263 for (auto const &p : particles) {
264 if (p.dipm() != 0.) {
265 Utils::integral_parameter<int, AssignDipole, p3m_min_cao, p3m_max_cao>(
266 dp3m.params.cao, dp3m, p.pos(), p.calc_dip());
267 }
268 }
269#endif // ESPRESSO_SHARED_MEMORY_PARALLELISM
270}
271
272namespace {
273template <int cao> struct AssignTorques {
274 void operator()(auto &dp3m, double prefac, int d_rs,
275#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
276 CellStructure &cell_structure
277#else
278 ParticleRange const &particles
279#endif
280 ) const {
281
282 assert(cao == dp3m.inter_weights.cao());
283
284 auto const kernel = [d_rs, &dp3m](auto const &pref, auto &p_torque,
285 std::size_t p_index) {
286 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
287 Utils::Vector3d E{};
288 p3m_interpolate(dp3m.local_mesh, weights,
289 [&E, &dp3m, d_rs](int ind, double w) {
290 // heFFTe data: dp3m.heffte.ks_scalar.real()
291 E[d_rs] += w * double(dp3m.mesh.rs_scalar[ind]);
292 });
293
294 auto const torque = vector_product(pref, E);
295#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
296 auto const thread_id = omp_get_thread_num();
297 p_torque(p_index, thread_id, 0) -= torque[0];
298 p_torque(p_index, thread_id, 1) -= torque[1];
299 p_torque(p_index, thread_id, 2) -= torque[2];
300#else
301 p_torque -= torque;
302#endif
303 };
304
305#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
306 auto const n_part = dp3m.inter_weights.size();
307 auto const &unique_particles = cell_structure.get_unique_particles();
308 auto &local_torque = cell_structure.get_local_torque();
310 "AssignTorques", std::size_t{0u}, n_part, [&](std::size_t p_index) {
311 auto const &p = *unique_particles.at(p_index);
312 if (p.dipm() != 0.) {
313 kernel(p.calc_dip() * prefac, local_torque, p_index);
314 }
315 });
316#else // ESPRESSO_SHARED_MEMORY_PARALLELISM
317 /* magnetic particle index */
318 auto p_index = std::size_t{0ul};
319
320 for (auto &p : particles) {
321 if (p.dipm() != 0.) {
322 kernel(p.calc_dip() * prefac, p.torque(), p_index);
323 ++p_index;
324 }
325 }
326#endif // ESPRESSO_SHARED_MEMORY_PARALLELISM
327 }
328};
329
330template <int cao> struct AssignForcesDip {
331 void operator()(auto &dp3m, double prefac, int d_rs,
332#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
333 CellStructure &cell_structure
334#else
335 ParticleRange const &particles
336#endif
337 ) const {
338
339 assert(cao == dp3m.inter_weights.cao());
340
341 auto const kernel = [d_rs, &dp3m](auto const &pref, auto &p_force,
342 std::size_t p_index) {
343 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
344
345 Utils::Vector3d E{};
346 p3m_interpolate(dp3m.local_mesh, weights, [&E, &dp3m](int ind, double w) {
347 // heFFTe data: dp3m.heffte.rs_B_fields
348 E[0u] += w * double(dp3m.mesh.rs_fields[0u][ind]);
349 E[1u] += w * double(dp3m.mesh.rs_fields[1u][ind]);
350 E[2u] += w * double(dp3m.mesh.rs_fields[2u][ind]);
351 });
352
353#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
354 auto const thread_id = omp_get_thread_num();
355 p_force(p_index, thread_id, d_rs) += pref * E;
356#else
357 p_force[d_rs] += pref * E;
358#endif
359 };
360
361#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
362 auto const n_part = dp3m.inter_weights.size();
363 auto const &unique_particles = cell_structure.get_unique_particles();
364 auto &local_force = cell_structure.get_local_force();
366 "AssignForcesDip", std::size_t{0u}, n_part, [&](std::size_t p_index) {
367 auto const &p = *unique_particles.at(p_index);
368 if (p.dipm() != 0.) {
369 kernel(p.calc_dip() * prefac, local_force, p_index);
370 }
371 });
372#else // ESPRESSO_SHARED_MEMORY_PARALLELISM
373 /* magnetic particle index */
374 auto p_index = std::size_t{0ul};
375
376 for (auto &p : particles) {
377 if (p.dipm() != 0.) {
378 kernel(p.calc_dip() * prefac, p.force(), p_index);
379 ++p_index;
380 }
381 }
382#endif // ESPRESSO_SHARED_MEMORY_PARALLELISM
383 }
384};
385} // namespace
386
387#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
388template <typename FloatType, class FFTConfig>
390 auto const rs_array_size =
391 static_cast<std::size_t>(Utils::product(this->local_mesh.dim));
392 auto const rs_array_size_no_halo =
393 static_cast<std::size_t>(Utils::product(this->local_mesh.dim_no_halo));
394 auto const fft_mesh_size =
395 static_cast<std::size_t>(Utils::product(heffte.fft->ks_local_size()));
396 for (auto d : {0u, 1u, 2u}) {
397 heffte.rs_dipole_density[d].resize(rs_array_size);
398 heffte.ks_dipole_density[d].resize(fft_mesh_size);
399 heffte.rs_B_fields[d].resize(rs_array_size);
400 heffte.rs_B_fields_no_halo[d].resize(rs_array_size_no_halo);
401 }
402 heffte.ks_B_field_storage.resize(fft_mesh_size);
403 heffte.ks_scalar.resize(fft_mesh_size);
404}
405#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
406
407template <typename FloatType, Arch Architecture, class FFTConfig>
409 bool force_flag, bool energy_flag, ParticleRange const &particles) {
410 /* k-space energy */
411 double energy = 0.;
412 auto const &system = get_system();
413 auto const &box_geo = *system.box_geo;
414 auto const dipole_prefac = prefactor / Utils::product(dp3m.params.mesh);
415#ifdef ESPRESSO_NPT
416 auto const npt_flag = force_flag and system.has_npt_enabled();
417#else
418 auto constexpr npt_flag = false;
419#endif
420
421 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
422 auto local_index = Utils::Vector3i::broadcast(0);
423#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
424 auto constexpr r2c_dir = FFTConfig::r2c_dir;
425 auto const rs_local_size = dp3m.heffte.fft->rs_local_size();
426 auto const local_size = dp3m.heffte.fft->ks_local_size();
427 auto local_size_full = local_size;
428 if constexpr (FFTConfig::use_r2c) {
429 local_size_full[r2c_dir] -= 1;
430 local_size_full[r2c_dir] *= 2;
431 }
432 auto const local_origin = dp3m.heffte.fft->ks_local_ld_index();
433#ifndef NDEBUG
434 auto const line_stride = local_size_full[0];
435 auto const plane_stride = local_size_full[0] * local_size_full[0];
436#endif
437 auto const &global_size = dp3m.params.mesh;
438 auto const cutoff_left = 1 - local_origin[r2c_dir];
439 auto const cutoff_right = global_size[r2c_dir] / 2 - local_origin[r2c_dir];
440 auto &short_dim = local_index[r2c_dir];
441 dp3m.resize_heffte_buffers();
442#endif
443
444 if (dp3m.sum_mu2 > 0.) {
445 dipole_assign(particles);
446 dp3m.fft_buffers->perform_vector_halo_gather();
447 for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
448 dp3m.fft->forward_fft(rs_mesh);
449 }
450 dp3m.update_mesh_views();
451
452#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
453 if (dp3m.heffte.world_size == 1) {
454 // halo communication of real space dipoles density
455 std::array<FloatType *, 3u> rs_fields = {
456 {dp3m.heffte.rs_dipole_density[0u].data(),
457 dp3m.heffte.rs_dipole_density[1u].data(),
458 dp3m.heffte.rs_dipole_density[2u].data()}};
459 dp3m.heffte.halo_comm.gather_grid(::comm_cart, rs_fields,
460 dp3m.local_mesh.dim);
461
462 for (auto dir : {0u, 1u, 2u}) {
463 // get real-space dipoles density without ghost layers
464 auto rs_field_no_halo = extract_block<Utils::MemoryOrder::ROW_MAJOR,
465 FFTConfig::r_space_order>(
466 dp3m.heffte.rs_dipole_density[dir], dp3m.local_mesh.dim,
467 dp3m.local_mesh.n_halo_ld,
468 dp3m.local_mesh.dim - dp3m.local_mesh.n_halo_ur);
469 // re-order data in row-major
470 std::vector<FloatType> rs_field_no_halo_reorder;
471 rs_field_no_halo_reorder.resize(rs_field_no_halo.size());
472 std::size_t index_row_major = 0u;
473 for_each_3d_order<FFTConfig::k_space_order>(
474 mesh_start, rs_local_size, local_index, [&]() {
475 auto constexpr KX = 1, KY = 2, KZ = 0;
476 auto const index = local_index[KZ] +
477 rs_local_size[0] * local_index[KY] +
478 Utils::sqr(rs_local_size[0]) * local_index[KX];
479 rs_field_no_halo_reorder[index_row_major] =
480 rs_field_no_halo[index];
481 ++index_row_major;
482 });
483 dp3m.heffte.fft->forward(rs_field_no_halo_reorder.data(),
484 dp3m.heffte.ks_dipole_density[dir].data());
485#ifndef NDEBUG
486 if (not dp3m.params.tuning) {
487 std::size_t index_row_major_r2c = 0u;
488 for_each_3d_order<FFTConfig::k_space_order>(
489 mesh_start, local_size, local_index, [&]() {
490 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
491 auto constexpr KX = 2, KY = 0, KZ = 1;
492 auto const index_fft_legacy = local_index[KZ] +
493 line_stride * local_index[KY] +
494 plane_stride * local_index[KX];
495 auto const old_value = std::complex<FloatType>{
496 dp3m.mesh.rs_fields[dir][2 * index_fft_legacy],
497 dp3m.mesh.rs_fields[dir][2 * index_fft_legacy + 1]};
498 auto const &new_value =
499 dp3m.heffte.ks_dipole_density[dir][index_row_major_r2c];
500 assert(heffte_almost_equal(new_value, old_value));
501 ++index_row_major_r2c;
502 }
503 });
504 }
505#endif // not NDEBUG
506 }
507 }
508#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
509 }
510
511 /* === k-space energy calculation === */
512 if (energy_flag or npt_flag) {
513 /*********************
514 Dipolar energy
515 **********************/
516 if (dp3m.sum_mu2 > 0.) {
517 /* i*k differentiation for dipolar gradients:
518 * |(\Fourier{\vect{mu}}(k)\cdot \vect{k})|^2 */
519
520 auto index = std::size_t(0u);
521 auto it_energy = dp3m.g_energy.begin();
522 auto node_energy = 0.;
523 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
524 auto constexpr KX = 2, KY = 0, KZ = 1;
525 auto const shift = local_index + dp3m.mesh.start;
526 auto const &d_op = dp3m.d_op[0u];
527 auto const &mesh_dip = dp3m.mesh.rs_fields;
528 // Re(mu)*k
529 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
530 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
531 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
532 ++index;
533 // Im(mu)*k
534 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
535 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
536 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
537 ++index;
538 node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im));
539 std::advance(it_energy, 1);
540 });
541#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
542 if (dp3m.heffte.world_size == 1) {
543 [[maybe_unused]] auto node_energy_heffte = 0.;
544 std::size_t index_row_major_r2c = 0u;
545 for_each_3d_order<FFTConfig::k_space_order>(
546 mesh_start, local_size, local_index, [&]() {
547 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
548 auto const global_index = local_origin + local_index;
549 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
550 auto const cell_field =
551 mesh_dip[0u][index_row_major_r2c] *
552 FloatType(dp3m.d_op[0u][global_index[1u]]) +
553 mesh_dip[1u][index_row_major_r2c] *
554 FloatType(dp3m.d_op[1u][global_index[2u]]) +
555 mesh_dip[2u][index_row_major_r2c] *
556 FloatType(dp3m.d_op[2u][global_index[0u]]);
557 auto cell_energy = static_cast<double>(
558 dp3m.heffte.g_energy[index_row_major_r2c] *
559 std::norm(cell_field));
560 if (FFTConfig::use_r2c and (short_dim >= cutoff_left and
561 short_dim <= cutoff_right - 1)) {
562 // k-space symmetry: double counting except in the first and
563 // last planes of the short dimension; although the wavevector
564 // points in the opposite direction in the redundant region of
565 // k-space, the product of two components of the wavevector
566 // cancels out the negative sign
567 cell_energy *= 2.;
568 }
569 node_energy_heffte += cell_energy;
570 }
571 ++index_row_major_r2c;
572 });
573 assert(heffte_almost_equal(static_cast<FloatType>(node_energy_heffte),
574 static_cast<FloatType>(node_energy)));
575 }
576#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
577 node_energy *= dipole_prefac * std::numbers::pi * box_geo.length_inv()[0];
578 boost::mpi::reduce(comm_cart, node_energy, energy, std::plus<>(), 0);
579
580 if (dp3m.energy_correction == 0.)
581 calc_energy_correction();
582
583 if (this_node == 0) {
584 /* self energy correction */
585 energy -= prefactor * dp3m.sum_mu2 * std::numbers::inv_sqrtpi *
586 (2. / 3.) * Utils::int_pow<3>(dp3m.params.alpha);
587
588 /* dipolar energy correction due to systematic Madelung-self effects */
589 energy += prefactor * dp3m.energy_correction / box_geo.volume();
590 }
591 }
592 } // if (energy_flag)
593
594 /* === k-space force calculation === */
595 if (force_flag) {
596 /****************************
597 * DIPOLAR TORQUES (k-space)
598 ****************************/
599 if (dp3m.sum_mu2 > 0.) {
600 auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0u];
601 dp3m.ks_scalar.resize(dp3m.local_mesh.size);
602 /* fill in ks_scalar array for torque calculation */
603 {
604 auto index{std::size_t(0u)};
605 auto it_energy = dp3m.g_energy.begin();
606 auto it_ks_scalar = dp3m.ks_scalar.begin();
607 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() mutable {
608 auto constexpr KX = 2, KY = 0, KZ = 1;
609 auto const shift = local_index + dp3m.mesh.start;
610 auto const &d_op = dp3m.d_op[0u];
611 auto const &mesh_dip = dp3m.mesh.rs_fields;
612 // Re(mu)*k
613 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
614 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
615 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
616 ++index;
617 // Im(mu)*k
618 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
619 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
620 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
621 ++index;
622 *it_ks_scalar = *it_energy * std::complex<FloatType>{re, im};
623 std::advance(it_energy, 1);
624 std::advance(it_ks_scalar, 1);
625 });
626 }
627#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
628 if (dp3m.heffte.world_size == 1) {
629 std::size_t index_row_major_r2c = 0u;
630 for_each_3d_order<FFTConfig::k_space_order>(
631 mesh_start, local_size, local_index, [&]() {
632 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
633 auto const global_index = local_origin + local_index;
634 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
635 dp3m.heffte.ks_scalar[index_row_major_r2c] =
636 dp3m.heffte.g_energy[index_row_major_r2c] *
637 (mesh_dip[0u][index_row_major_r2c] *
638 FloatType(dp3m.d_op[0u][global_index[1u]]) +
639 mesh_dip[1u][index_row_major_r2c] *
640 FloatType(dp3m.d_op[1u][global_index[2u]]) +
641 mesh_dip[2u][index_row_major_r2c] *
642 FloatType(dp3m.d_op[2u][global_index[0u]]));
643#ifndef NDEBUG
644 if (not dp3m.params.tuning) {
645 auto constexpr KX = 2, KY = 0, KZ = 1;
646 auto const index_fft_legacy = local_index[KZ] +
647 line_stride * local_index[KY] +
648 plane_stride * local_index[KX];
649 assert(heffte_almost_equal(
650 dp3m.heffte.ks_scalar[index_row_major_r2c],
651 dp3m.ks_scalar[index_fft_legacy]));
652 }
653#endif // not NDEBUG
654 ++index_row_major_r2c;
655 }
656 });
657 }
658#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
659
660 /* Torque component loop */
661 for (int d = 0; d < 3; d++) {
662 auto it_ks_scalar = dp3m.ks_scalar.begin();
663 auto index = 0u;
664 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
665 auto const &offset = dp3m.mesh.start;
666 auto const &d_op = dp3m.d_op[0u];
667 auto const d_op_val = FloatType(d_op[local_index[d] + offset[d]]);
668 auto const &value = *it_ks_scalar;
669 dp3m.mesh.rs_scalar[index] = d_op_val * value.real();
670 ++index;
671 dp3m.mesh.rs_scalar[index] = d_op_val * value.imag();
672 ++index;
673 std::advance(it_ks_scalar, 1);
674 });
675#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
676 if (dp3m.heffte.world_size == 1) {
677 unsigned int constexpr d_ks[3] = {2u, 0u, 1u};
678 std::size_t index_row_major_r2c = 0u;
679 for_each_3d_order<FFTConfig::k_space_order>(
680 mesh_start, local_size, local_index, [&]() {
681 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
682 auto const global_index = local_origin + local_index;
683 auto const d_op_val =
684 FloatType(dp3m.d_op[d][global_index[d_ks[d]]]);
685 dp3m.heffte.ks_B_field_storage[index_row_major_r2c] =
686 d_op_val * dp3m.heffte.ks_scalar[index_row_major_r2c];
687#ifndef NDEBUG
688 if (not dp3m.params.tuning) {
689 auto constexpr KX = 2, KY = 0, KZ = 1;
690 auto const index_fft_legacy =
691 local_index[KZ] + line_stride * local_index[KY] +
692 plane_stride * local_index[KX];
693 auto const old_value = std::complex<FloatType>{
694 dp3m.mesh.rs_scalar[2 * index_fft_legacy],
695 dp3m.mesh.rs_scalar[2 * index_fft_legacy + 1]};
696 auto const &new_value =
697 dp3m.heffte.ks_B_field_storage[index_row_major_r2c];
698 assert(heffte_almost_equal(new_value, old_value));
699 }
700#endif // not NDEBUG
701 ++index_row_major_r2c;
702 }
703 });
704 dp3m.heffte.fft->backward(dp3m.heffte.ks_B_field_storage.data(),
705 dp3m.heffte.rs_B_fields_no_halo[d].data());
706 // pad zeros around the B-field in real space for ghost layers
707 dp3m.heffte.rs_B_fields[d] =
708 pad_with_zeros_discard_imag<FFTConfig::r_space_order,
710 std::span(dp3m.heffte.rs_B_fields_no_halo[d]),
711 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
712 dp3m.local_mesh.n_halo_ur);
713 // communicate ghost layers of the B-field in real space
714 dp3m.heffte.halo_comm.spread_grid(::comm_cart,
715 dp3m.heffte.rs_B_fields[d].data(),
716 dp3m.local_mesh.dim);
717 }
718#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
719 dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh());
720 // communicate ghost layers of the B-field in real space
721 dp3m.fft_buffers->perform_scalar_halo_spread();
722 // assign torque component from mesh to particle
723 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
724#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
725 auto &particle_data = *system.cell_structure;
726#else
727 auto &particle_data = particles;
728#endif
729 Utils::integral_parameter<int, AssignTorques, p3m_min_cao, p3m_max_cao>(
730 dp3m.params.cao, dp3m, dipole_prefac * wavenumber, d_rs,
731 particle_data);
732 }
733
734 /***************************
735 DIPOLAR FORCES (k-space)
736 ****************************/
737 // Compute forces after torques because the algorithm below overwrites the
738 // grids dp3m.mesh.rs_fields !
739 // Note: I'll do here 9 inverse FFTs. By symmetry, we can reduce this
740 // number to 6 !
741 /* fill in ks_scalar array for force calculation */
742 {
743 auto it_force = dp3m.g_force.begin();
744 auto it_ks_scalar = dp3m.ks_scalar.begin();
745 std::size_t index = 0u;
746 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
747 auto constexpr KX = 2, KY = 0, KZ = 1;
748 auto const shift = local_index + dp3m.mesh.start;
749 auto const &d_op = dp3m.d_op[0u];
750 auto const &mesh_dip = dp3m.mesh.rs_fields;
751 // Re(mu)*k
752 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
753 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
754 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
755 ++index;
756 // Im(mu)*k
757 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
758 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
759 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
760 ++index;
761 *it_ks_scalar = {*it_force * im, *it_force * (-re)};
762 std::advance(it_force, 1);
763 std::advance(it_ks_scalar, 1);
764 });
765 }
766
767#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
768 if (dp3m.heffte.world_size == 1) {
769 std::size_t index_row_major_r2c = 0u;
770 for_each_3d_order<FFTConfig::k_space_order>(
771 mesh_start, local_size, local_index, [&]() {
772 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
773 auto const global_index = local_origin + local_index;
774 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
775 auto const value =
776 dp3m.heffte.g_force[index_row_major_r2c] *
777 (mesh_dip[0u][index_row_major_r2c] *
778 FloatType(dp3m.d_op[0u][global_index[1u]]) +
779 mesh_dip[1u][index_row_major_r2c] *
780 FloatType(dp3m.d_op[1u][global_index[2u]]) +
781 mesh_dip[2u][index_row_major_r2c] *
782 FloatType(dp3m.d_op[2u][global_index[0u]]));
783 dp3m.heffte.ks_scalar[index_row_major_r2c] = {value.imag(),
784 -value.real()};
785#ifndef NDEBUG
786 if (not dp3m.params.tuning) {
787 auto constexpr KX = 2, KY = 0, KZ = 1;
788 auto const index_fft_legacy = local_index[KZ] +
789 line_stride * local_index[KY] +
790 plane_stride * local_index[KX];
791 assert(heffte_almost_equal(
792 dp3m.heffte.ks_scalar[index_row_major_r2c],
793 dp3m.ks_scalar[index_fft_legacy]));
794 }
795#endif // not NDEBUG
796 ++index_row_major_r2c;
797 }
798 });
799 }
800#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
801
802 /* Force component loop */
803 for (int d = 0; d < 3; d++) {
804 std::size_t index = 0u;
805 auto it_ks_scalar = dp3m.ks_scalar.begin();
806 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
807 auto constexpr KX = 2, KY = 0, KZ = 1;
808 auto const shift = local_index + dp3m.mesh.start;
809 auto const &d_op = dp3m.d_op[0u];
810 auto const &mesh_dip = dp3m.mesh.rs_fields;
811 auto const d_op_val = FloatType(d_op[shift[d]]);
812 auto const f = *it_ks_scalar * d_op_val;
813 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.real();
814 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.real();
815 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.real();
816 ++index;
817 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.imag();
818 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.imag();
819 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.imag();
820 ++index;
821 std::advance(it_ks_scalar, 1);
822 });
823
824#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
825 if (dp3m.heffte.world_size == 1) {
826 std::size_t index_row_major_r2c = 0u;
827 for_each_3d_order<FFTConfig::k_space_order>(
828 mesh_start, local_size, local_index, [&]() {
829 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
830 auto constexpr KX = 1, KY = 2, KZ = 0;
831 auto const global_index = local_origin + local_index;
832 auto const remapped_index =
833 local_index[KZ] + local_index[KY] * local_size[KZ] +
834 local_index[KX] * local_size[KZ] * local_size[KY];
835 auto const d_op_val =
836 FloatType(dp3m.d_op[d][global_index[d]]);
837 auto &mesh_dip = dp3m.heffte.ks_dipole_density;
838 mesh_dip[0u][index_row_major_r2c] =
839 FloatType(dp3m.d_op[d][global_index[2u]]) * d_op_val *
840 dp3m.heffte.ks_scalar[remapped_index];
841 mesh_dip[1u][index_row_major_r2c] =
842 FloatType(dp3m.d_op[d][global_index[0u]]) * d_op_val *
843 dp3m.heffte.ks_scalar[remapped_index];
844 mesh_dip[2u][index_row_major_r2c] =
845 FloatType(dp3m.d_op[d][global_index[1u]]) * d_op_val *
846 dp3m.heffte.ks_scalar[remapped_index];
847#ifndef NDEBUG
848 if (not FFTConfig::use_r2c and not dp3m.params.tuning) {
849 auto const index_fft_legacy = local_index[2] +
850 line_stride * local_index[1] +
851 plane_stride * local_index[0];
852 for (int j = 0; j < 3; ++j) {
853 auto const old_value = std::complex<FloatType>{
854 dp3m.mesh.rs_fields[j][2 * index_fft_legacy],
855 dp3m.mesh.rs_fields[j][2 * index_fft_legacy + 1]};
856 auto const &new_value = mesh_dip[j][index_row_major_r2c];
857 assert(heffte_almost_equal(new_value, old_value));
858 }
859 }
860#endif // not NDEBUG
861 ++index_row_major_r2c;
862 }
863 });
864 for (int dir = 0u; dir < 3u; ++dir) {
865 dp3m.heffte.fft->backward(
866 dp3m.heffte.ks_dipole_density[dir].data(),
867 dp3m.heffte.rs_B_fields_no_halo[dir].data());
868 // pad zeros around the B-field in real space for ghost layers
869 dp3m.heffte.rs_B_fields[d] =
870 pad_with_zeros_discard_imag<FFTConfig::r_space_order,
872 std::span(dp3m.heffte.rs_B_fields_no_halo[dir]),
873 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
874 dp3m.local_mesh.n_halo_ur);
875 }
876 // communicate ghost layers of the B-field in real space
877 auto rs_fields =
878 std::array<FloatType *, 3u>{{dp3m.heffte.rs_B_fields[0u].data(),
879 dp3m.heffte.rs_B_fields[1u].data(),
880 dp3m.heffte.rs_B_fields[2u].data()}};
881 dp3m.heffte.halo_comm.spread_grid(::comm_cart, rs_fields,
882 dp3m.local_mesh.dim);
883 }
884#endif // ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
885 for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
886 dp3m.fft->backward_fft(rs_mesh);
887 }
888 // communicate ghost layers of the B-field in real space
889 dp3m.fft_buffers->perform_vector_halo_spread();
890 // assign force component from mesh to particle
891 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
892#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
893 auto &particle_data = *system.cell_structure;
894#else
895 auto &particle_data = particles;
896#endif
897 Utils::integral_parameter<int, AssignForcesDip, p3m_min_cao,
899 dp3m.params.cao, dp3m, dipole_prefac * Utils::sqr(wavenumber), d_rs,
900 particle_data);
901 }
902 } /* if (dp3m.sum_mu2 > 0) */
903 } /* if (force_flag) */
904
905 if (dp3m.params.epsilon != P3M_EPSILON_METALLIC) {
906 auto const surface_term =
907 calc_surface_term(force_flag, energy_flag or npt_flag, particles);
908 if (this_node == 0) {
909 energy += surface_term;
910 }
911 }
912#ifdef ESPRESSO_NPT
913 if (npt_flag) {
915 }
916#endif
917 if (not energy_flag) {
918 energy = 0.;
919 }
920
921 return energy;
922}
923
924template <typename FloatType, Arch Architecture, class FFTConfig>
926 bool force_flag, bool energy_flag, ParticleRange const &particles) {
927 auto const &box_geo = *get_system().box_geo;
928 auto const pref = prefactor * 4. * std::numbers::pi / box_geo.volume() /
929 (2. * dp3m.params.epsilon + 1.);
930 auto const n_local_part = particles.size();
931
932 // We put all the dipolar momenta in a the arrays mx,my,mz according to the
933 // id-number of the particles
934 std::vector<double> mx(n_local_part);
935 std::vector<double> my(n_local_part);
936 std::vector<double> mz(n_local_part);
937
938 std::size_t ip = 0u;
939 for (auto const &p : particles) {
940 auto const dip = p.calc_dip();
941 mx[ip] = dip[0u];
942 my[ip] = dip[1u];
943 mz[ip] = dip[2u];
944 ip++;
945 }
946
947 // we will need the sum of all dipolar momenta vectors
948 auto local_dip = Utils::Vector3d{};
949 for (std::size_t i = 0u; i < n_local_part; i++) {
950 local_dip[0u] += mx[i];
951 local_dip[1u] += my[i];
952 local_dip[2u] += mz[i];
953 }
954 auto const box_dip =
955 boost::mpi::all_reduce(comm_cart, local_dip, std::plus<>());
956
957 double energy = 0.;
958 if (energy_flag) {
959 double sum_e = 0.;
960 for (std::size_t i = 0u; i < n_local_part; i++) {
961 sum_e += mx[i] * box_dip[0] + my[i] * box_dip[1] + mz[i] * box_dip[2];
962 }
963 energy =
964 0.5 * pref * boost::mpi::all_reduce(comm_cart, sum_e, std::plus<>());
965 }
966
967 if (force_flag) {
968
969 std::vector<double> sumix(n_local_part);
970 std::vector<double> sumiy(n_local_part);
971 std::vector<double> sumiz(n_local_part);
972
973 for (std::size_t i = 0u; i < n_local_part; i++) {
974 sumix[i] = my[i] * box_dip[2u] - mz[i] * box_dip[1u];
975 sumiy[i] = mz[i] * box_dip[0u] - mx[i] * box_dip[2u];
976 sumiz[i] = mx[i] * box_dip[1u] - my[i] * box_dip[0u];
977 }
978
979 ip = 0u;
980 for (auto &p : particles) {
981 auto &torque = p.torque();
982 torque[0u] -= pref * sumix[ip];
983 torque[1u] -= pref * sumiy[ip];
984 torque[2u] -= pref * sumiz[ip];
985 ip++;
986 }
987 }
988
989 return energy;
990}
991
992template <typename FloatType, Arch Architecture, class FFTConfig>
993void DipolarP3MHeffte<FloatType, Architecture,
994 FFTConfig>::calc_influence_function_force() {
995 dp3m.g_force = grid_influence_function_dipolar<FloatType, 3, P3M_BRILLOUIN,
996 FFTConfig::k_space_order>(
997 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
998 get_system().box_geo->length_inv());
999#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
1000 if (dp3m.heffte.world_size == 1) {
1001 dp3m.heffte.g_force =
1003 FFTConfig::k_space_order>(
1004 dp3m.params, dp3m.heffte.fft->ks_local_ld_index(),
1005 dp3m.heffte.fft->ks_local_ur_index(),
1006 get_system().box_geo->length_inv());
1007 if constexpr (FFTConfig::use_r2c) {
1008 influence_function_r2c<FFTConfig::r2c_dir>(
1009 dp3m.heffte.g_force, dp3m.params.mesh,
1010 dp3m.heffte.fft->ks_local_size(),
1011 dp3m.heffte.fft->ks_local_ld_index());
1012 }
1013 }
1014#endif
1015}
1016
1017template <typename FloatType, Arch Architecture, class FFTConfig>
1018void DipolarP3MHeffte<FloatType, Architecture,
1019 FFTConfig>::calc_influence_function_energy() {
1020 dp3m.g_energy = grid_influence_function_dipolar<FloatType, 2, P3M_BRILLOUIN,
1021 FFTConfig::k_space_order>(
1022 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
1023 get_system().box_geo->length_inv());
1024#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
1025 if (dp3m.heffte.world_size == 1) {
1026 dp3m.heffte.g_energy =
1028 FFTConfig::k_space_order>(
1029 dp3m.params, dp3m.heffte.fft->ks_local_ld_index(),
1030 dp3m.heffte.fft->ks_local_ur_index(),
1031 get_system().box_geo->length_inv());
1032 if constexpr (FFTConfig::use_r2c) {
1033 influence_function_r2c<FFTConfig::r2c_dir>(
1034 dp3m.heffte.g_energy, dp3m.params.mesh,
1035 dp3m.heffte.fft->ks_local_size(),
1036 dp3m.heffte.fft->ks_local_ld_index());
1037 }
1038 }
1039#endif
1040}
1041
1042template <typename FloatType, Arch Architecture, class FFTConfig>
1045 int m_mesh_max = -1, m_mesh_min = -1;
1046 std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
1047
1048public:
1049 DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m,
1050 double prefactor, int timings,
1051 decltype(m_tune_limits) tune_limits)
1052 : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m},
1053 m_tune_limits{std::move(tune_limits)} {}
1054
1055 P3MParameters &get_params() override { return dp3m.params; }
1056
1057 void on_solver_change() const override { m_system.on_dipoles_change(); }
1058
1059 std::optional<std::string>
1060 layer_correction_veto_r_cut(double) const override {
1061 return {};
1062 }
1063
1064 void setup_logger(bool verbose) override {
1065 auto const &box_geo = *m_system.box_geo;
1066 m_logger = std::make_unique<TuningLogger>(
1067 verbose and this_node == 0, "DipolarP3M", TuningLogger::Mode::Dipolar);
1068 m_logger->tuning_goals(dp3m.params.accuracy, m_prefactor,
1069 box_geo.length()[0], dp3m.sum_dip_part,
1070 dp3m.sum_mu2);
1071 m_logger->log_tuning_start();
1072 }
1073
1074 std::tuple<double, double, double, double>
1076 double r_cut_iL) const override {
1077
1078 double alpha_L, rs_err, ks_err;
1079 auto const &box_geo = *m_system.box_geo;
1080
1081 /* calc maximal real space error for setting */
1082 rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL,
1083 dp3m.sum_dip_part, dp3m.sum_mu2, 0.001);
1084 // alpha cannot be zero for dipoles because real-space formula breaks down
1085
1086 if (std::numbers::sqrt2 * rs_err > dp3m.params.accuracy) {
1087 /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */
1088 alpha_L = dp3m_rtbisection(
1089 box_geo.length()[0], r_cut_iL, dp3m.sum_dip_part, dp3m.sum_mu2,
1090 0.0001 * box_geo.length()[0], 5. * box_geo.length()[0], 0.0001,
1091 dp3m.params.accuracy);
1092 } else {
1093 /* even alpha=0 is ok, however, we cannot choose it since it kills the
1094 k-space error formula.
1095 Anyways, this very likely NOT the optimal solution */
1096 alpha_L = 0.1;
1097 }
1098
1099 /* calculate real-space and k-space error for this alpha_L */
1100 rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL,
1101 dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L);
1102 ks_err = dp3m_k_space_error(box_geo.length()[0], mesh[0], cao,
1103 dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L);
1104
1105 return {Utils::Vector2d{rs_err, ks_err}.norm(), rs_err, ks_err, alpha_L};
1106 }
1107
1108 void determine_mesh_limits() override {
1109 if (dp3m.params.mesh[0] == -1) {
1110 /* simple heuristic to limit the tried meshes if the accuracy cannot
1111 be obtained with smaller meshes, but normally not all these
1112 meshes have to be tested */
1113 auto const expo = std::log(std::cbrt(dp3m.sum_dip_part)) / std::log(2.);
1114 /* Medium-educated guess for the minimal mesh */
1115 m_mesh_min = static_cast<int>(std::round(std::pow(2., std::floor(expo))));
1116 /* avoid using more than 1 GB of FFT arrays */
1117 m_mesh_max = 128;
1118 if (m_tune_limits.first) {
1119 m_mesh_min = *m_tune_limits.first;
1120 }
1121 if (m_tune_limits.second) {
1122 m_mesh_max = *m_tune_limits.second;
1123 }
1124 } else {
1125 m_mesh_min = m_mesh_max = dp3m.params.mesh[0];
1126 m_logger->report_fixed_mesh(dp3m.params.mesh);
1127 }
1128 }
1129
1131 auto tuned_params = TuningAlgorithm::Parameters{};
1132 auto time_best = time_sentinel;
1133 for (auto tmp_mesh = m_mesh_min; tmp_mesh <= m_mesh_max; tmp_mesh += 2) {
1134 auto trial_params = TuningAlgorithm::Parameters{};
1135 trial_params.mesh = Utils::Vector3i::broadcast(tmp_mesh);
1136 trial_params.cao = cao_best;
1137
1138 auto const trial_time =
1139 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
1140 trial_params.alpha_L, trial_params.accuracy);
1141
1142 /* this mesh does not work at all */
1143 if (trial_time < 0.)
1144 continue;
1145
1146 /* the optimum r_cut for this mesh is the upper limit for higher meshes,
1147 everything else is slower */
1148 m_r_cut_iL_max = trial_params.r_cut_iL;
1149
1150 if (trial_time < time_best) {
1151 /* new optimum */
1153 tuned_params = trial_params;
1154 time_best = tuned_params.time = trial_time;
1155 } else if (trial_time > time_best + time_granularity or
1157 /* no hope of further optimisation */
1158 break;
1159 }
1160 }
1161 return tuned_params;
1162 }
1163};
1164
1165template <typename FloatType, Arch Architecture, class FFTConfig>
1167 auto &system = get_system();
1168 auto const &box_geo = *system.box_geo;
1169 if (dp3m.params.alpha_L == 0. and dp3m.params.alpha != 0.) {
1170 dp3m.params.alpha_L = dp3m.params.alpha * box_geo.length()[0];
1171 }
1172 if (dp3m.params.r_cut_iL == 0. and dp3m.params.r_cut != 0.) {
1173 dp3m.params.r_cut_iL = dp3m.params.r_cut * box_geo.length_inv()[0];
1174 }
1175 if (not is_tuned()) {
1177 if (dp3m.sum_dip_part == 0) {
1178 throw std::runtime_error(
1179 "DipolarP3M: no dipolar particles in the system");
1180 }
1181 try {
1183 system, dp3m, prefactor, tuning.timings, tuning.limits);
1184 parameters.setup_logger(tuning.verbose);
1185 // parameter ranges
1186 parameters.determine_mesh_limits();
1187 parameters.determine_r_cut_limits();
1188 parameters.determine_cao_limits(3);
1189 // run tuning algorithm
1190 parameters.tune();
1191 m_is_tuned = true;
1192 system.on_dipoles_change();
1193 } catch (...) {
1194 dp3m.params.tuning = false;
1195 throw;
1196 }
1197 }
1198 init();
1199}
1200
1201/** Tuning dipolar-P3M */
1202inline auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh,
1203 double mesh_i, int cao, double alpha_L_i) {
1204
1205 auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
1206 auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
1207 auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i);
1208 auto alias1 = 0.;
1209 auto alias2 = 0.;
1210
1211 Utils::Vector3i indices{};
1212 Utils::Vector3i nm{};
1213 Utils::Vector3d fnm{};
1215 mesh_start, mesh_stop, indices,
1216 [&]() {
1217 auto const norm_sq = nm.norm2();
1218 auto const ex = std::exp(-factor1 * norm_sq);
1219 auto const U2 = std::pow(Utils::product(fnm), 2 * cao);
1220 alias1 += Utils::sqr(ex) * norm_sq;
1221 alias2 += U2 * ex * std::pow(shift * nm, 3) / norm_sq;
1222 },
1223 [&](unsigned dim, int n) {
1224 nm[dim] = shift[dim] + n * mesh;
1225 fnm[dim] = math::sinc(nm[dim] * mesh_i);
1226 });
1227
1228 return std::make_pair(alias1, alias2);
1229}
1230
1231/** Calculate the k-space error of dipolar-P3M */
1232inline double dp3m_k_space_error(double box_size, int mesh, int cao,
1233 std::size_t n_c_part, double sum_q2,
1234 double alpha_L) {
1235
1236 auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao);
1237 auto const mesh_i = 1. / static_cast<double>(mesh);
1238 auto const alpha_L_i = 1. / alpha_L;
1239 auto const mesh_stop = Utils::Vector3i::broadcast(mesh / 2);
1240 auto const mesh_start = -mesh_stop;
1241 auto indices = Utils::Vector3i{};
1242 auto values = Utils::Vector3d{};
1243 auto he_q = 0.;
1244
1246 mesh_start, mesh_stop, indices,
1247 [&]() {
1248 if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
1249 auto const n2 = indices.norm2();
1250 auto const cs = Utils::product(values);
1251 auto const [alias1, alias2] =
1252 dp3m_tune_aliasing_sums(indices, mesh, mesh_i, cao, alpha_L_i);
1253 auto const d =
1254 alias1 - Utils::sqr(alias2 / cs) /
1255 Utils::int_pow<3>(static_cast<double>(n2));
1256 /* at high precision, d can become negative due to extinction;
1257 also, don't take values that have no significant digits left*/
1258 if (d > 0. and std::fabs(d / alias1) > round_error_prec)
1259 he_q += d;
1260 }
1261 },
1262 [&values, &mesh_i, cotangent_sum](unsigned dim, int n) {
1263 values[dim] = cotangent_sum(n, mesh_i);
1264 });
1265
1266 return 8. * Utils::sqr(std::numbers::pi) / 3. * sum_q2 *
1267 sqrt(he_q / static_cast<double>(n_c_part)) /
1268 Utils::int_pow<4>(box_size);
1269}
1270
1271/** Calculate the value of the errors for the REAL part of the force in terms
1272 * of the splitting parameter alpha of Ewald. Based on eq. (33) @cite wang01a.
1273 *
1274 * Please note that in this more refined approach we don't use
1275 * eq. (37), but eq. (33) which maintains all the powers in alpha.
1276 */
1277inline double dp3m_real_space_error(double box_size, double r_cut_iL,
1278 std::size_t n_c_part, double sum_q2,
1279 double alpha_L) {
1280 auto constexpr exp_min = -708.4; // for IEEE-compatible double
1281 double d_error_f, d_cc, d_dc, d_con;
1282
1283 auto const d_rcut = r_cut_iL * box_size;
1284 auto const d_rcut2 = Utils::sqr(d_rcut);
1285 auto const d_rcut4 = Utils::sqr(d_rcut2);
1286
1287 auto const d_a2 = Utils::sqr(alpha_L) / Utils::sqr(box_size);
1288 auto const exponent = -d_a2 * d_rcut2;
1289 auto const exp_term = (exponent < exp_min) ? 0. : std::exp(exponent);
1290 auto const d_c = sum_q2 * exp_term;
1291
1292 d_cc = 4. * Utils::sqr(d_a2) * Utils::sqr(d_rcut2) + 6. * d_a2 * d_rcut2 + 3.;
1293
1294 d_dc = 8. * Utils::int_pow<3>(d_a2) * Utils::int_pow<3>(d_rcut2) +
1295 20. * Utils::sqr(d_a2) * d_rcut4 + 30. * d_a2 * d_rcut2 + 15.;
1296
1297 d_con = 1. / sqrt(Utils::int_pow<3>(box_size) * Utils::sqr(d_a2) * d_rcut *
1298 Utils::sqr(d_rcut4) * static_cast<double>(n_c_part));
1299
1300 d_error_f = d_c * d_con *
1301 sqrt((13. / 6.) * Utils::sqr(d_cc) +
1302 (2. / 15.) * Utils::sqr(d_dc) - (13. / 15.) * d_cc * d_dc);
1303
1304 return d_error_f;
1305}
1306
1307/** Using bisection, find the root of a function "func-tuned_accuracy/sqrt(2.)"
1308 * known to lie between x1 and x2. The root, returned as rtbis, will be
1309 * refined until its accuracy is \f$\pm\f$ @p xacc.
1310 */
1311double dp3m_rtbisection(double box_size, double r_cut_iL, std::size_t n_c_part,
1312 double sum_q2, double x1, double x2, double xacc,
1313 double tuned_accuracy) {
1314 constexpr int JJ_RTBIS_MAX = 40;
1315
1316 auto const constant = tuned_accuracy / std::numbers::sqrt2;
1317
1318 auto const f1 =
1319 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, x1) -
1320 constant;
1321 auto const f2 =
1322 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, x2) -
1323 constant;
1324 if (f1 * f2 >= 0.0) {
1325 throw std::runtime_error(
1326 "Root must be bracketed for bisection in dp3m_rtbisection");
1327 }
1328 // Orient the search dx, and set rtb to x1 or x2 ...
1329 double dx;
1330 double rtb = f1 < 0.0 ? (dx = x2 - x1, x1) : (dx = x1 - x2, x2);
1331 for (int j = 1; j <= JJ_RTBIS_MAX; j++) {
1332 auto const xmid = rtb + (dx *= 0.5);
1333 auto const fmid =
1334 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, xmid) -
1335 constant;
1336 if (fmid <= 0.0)
1337 rtb = xmid;
1338 if (fabs(dx) < xacc || fmid == 0.0)
1339 return rtb;
1340 }
1341 throw std::runtime_error("Too many bisections in dp3m_rtbisection");
1342}
1343
1345 auto const &system = get_system();
1346 auto const &box_geo = *system.box_geo;
1347 auto const &local_geo = *system.local_geo;
1348 for (auto i = 0u; i < 3u; i++) {
1349 /* check k-space cutoff */
1350 if (dp3m_params.cao_cut[i] >= box_geo.length_half()[i]) {
1351 std::stringstream msg;
1352 msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i]
1353 << " is larger than half of box dimension " << box_geo.length()[i];
1354 throw std::runtime_error(msg.str());
1355 }
1356 if (dp3m_params.cao_cut[i] >= local_geo.length()[i]) {
1357 std::stringstream msg;
1358 msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i]
1359 << " is larger than local box dimension " << local_geo.length()[i];
1360 throw std::runtime_error(msg.str());
1361 }
1362 }
1363
1364 if ((box_geo.length()[0] != box_geo.length()[1]) or
1365 (box_geo.length()[1] != box_geo.length()[2])) {
1366 throw std::runtime_error("DipolarP3M: requires a cubic box");
1367 }
1368}
1369
1371 auto const &box_geo = *get_system().box_geo;
1372 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
1373 throw std::runtime_error(
1374 "DipolarP3M: requires periodicity (True, True, True)");
1375 }
1376}
1377
1379 auto const &local_geo = *get_system().local_geo;
1380 if (local_geo.cell_structure_type() != CellStructureType::REGULAR and
1381 local_geo.cell_structure_type() != CellStructureType::HYBRID) {
1382 throw std::runtime_error(
1383 "DipolarP3M: requires the regular or hybrid decomposition cell system");
1384 }
1385 if (::communicator.size > 1 and
1386 local_geo.cell_structure_type() == CellStructureType::HYBRID) {
1387 throw std::runtime_error(
1388 "DipolarP3M: does not work with the hybrid decomposition cell system, "
1389 "if using more than one MPI node");
1390 }
1391}
1392
1394 auto const &node_grid = ::communicator.node_grid;
1395 if (node_grid[0] < node_grid[1] or node_grid[1] < node_grid[2]) {
1396 throw std::runtime_error(
1397 "DipolarP3M: node grid must be sorted, largest first");
1398 }
1399}
1400
1401template <typename FloatType, Arch Architecture, class FFTConfig>
1403 auto const &box_geo = *get_system().box_geo;
1404 dp3m.params.r_cut = dp3m.params.r_cut_iL * box_geo.length()[0];
1405 dp3m.params.alpha = dp3m.params.alpha_L * box_geo.length_inv()[0];
1406 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
1407 dp3m.local_mesh.recalc_ld_pos(dp3m.params);
1408 sanity_checks_boxl();
1409 calc_influence_function_force();
1410 calc_influence_function_energy();
1411 dp3m.energy_correction = 0.;
1412#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
1413 if (dp3m.heffte.world_size == 1) {
1414 dp3m.heffte.halo_comm.resize(::comm_cart, dp3m.local_mesh);
1415 }
1416#endif
1417}
1418
1419template <typename FloatType, Arch Architecture, class FFTConfig>
1420void DipolarP3MHeffte<FloatType, Architecture,
1421 FFTConfig>::calc_energy_correction() {
1422 auto const &box_geo = *get_system().box_geo;
1423 auto const Ukp3m = calc_average_self_energy_k_space() * box_geo.volume();
1424 auto const Ewald_volume = Utils::int_pow<3>(dp3m.params.alpha_L);
1425 auto const Eself = -2. * Ewald_volume * std::numbers::inv_sqrtpi / 3.;
1426 dp3m.energy_correction =
1427 -dp3m.sum_mu2 * (Ukp3m + Eself + 2. * std::numbers::pi / 3.);
1428}
1429
1430#ifdef ESPRESSO_NPT
1431template <typename FloatType, Arch Architecture, class FFTConfig>
1432void DipolarP3MHeffte<FloatType, Architecture,
1433 FFTConfig>::npt_add_virial_contribution(double energy)
1434 const {
1435 get_system().npt_add_virial_contribution(energy);
1436}
1437#endif // ESPRESSO_NPT
1438
1439#endif // ESPRESSO_DP3M
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
Vector implementation and trait types for boost qvm interoperability.
Describes a cell structure / cell system.
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
TuningAlgorithm::Parameters get_time() override
DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m, double prefactor, int timings, decltype(m_tune_limits) tune_limits)
void on_solver_change() const override
void determine_mesh_limits() override
P3MParameters & get_params() override
std::optional< std::string > layer_correction_veto_r_cut(double) const override
void setup_logger(bool verbose) override
A range of particles.
base_type::size_type size() const
Main system class.
std::shared_ptr< LocalBox > local_geo
void npt_add_virial_contribution(double energy)
Definition npt.cpp:137
std::shared_ptr< CellStructure > cell_structure
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
static auto constexpr time_sentinel
Value for invalid time measurements.
static auto constexpr max_n_consecutive_trials
Maximal number of consecutive trials that don't improve runtime.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
T norm() const
Definition Vector.hpp:160
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value) noexcept
Create a vector that has all entries set to the same value.
Definition Vector.hpp:132
void zfill(std::size_t size)
Fill cache with zero-initialized data.
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
constexpr auto round_error_prec
Precision below which a double-precision float is assumed to be zero.
Definition config.hpp:38
__device__ void vector_product(float const *a, float const *b, float *out)
static int count_magnetic_particles(System::System const &system)
Definition dlc.cpp:399
P3M algorithm for long-range magnetic dipole-dipole interaction.
double dp3m_real_space_error(double box_size, double r_cut_iL, std::size_t n_c_part, double sum_q2, double alpha_L)
Calculate the value of the errors for the REAL part of the force in terms of the splitting parameter ...
double dp3m_rtbisection(double box_size, double r_cut_iL, std::size_t n_c_part, double sum_q2, double x1, double x2, double xacc, double tuned_accuracy)
Compute the value of alpha through a bisection method.
auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh, double mesh_i, int cao, double alpha_L_i)
Tuning dipolar-P3M.
double dp3m_k_space_error(double box_size, int mesh, int cao, std::size_t n_c_part, double sum_q2, double alpha_L)
Calculate the k-space error of dipolar-P3M.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
Routines, row decomposition, data structures and communication for the 3D-FFT.
auto pad_with_zeros_discard_imag(std::span< T > cropped_array, Utils::Vector3i const &cropped_dim, Utils::Vector3i const &pad_left, Utils::Vector3i const &pad_right)
Pad a 3D matrix with zeros to restore halo regions.
auto extract_block(Container const &in_array, Utils::Vector3i const &dimensions, Utils::Vector3i const &start, Utils::Vector3i const &stop)
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
std::vector< FloatType > grid_influence_function_dipolar(P3MParameters const &params, Utils::Vector3i const &n_start, Utils::Vector3i const &n_stop, Utils::Vector3d const &inv_box_l)
Map influence function over a grid.
void p3m_interpolate(P3MLocalMesh const &local_mesh, WeightsStorage< cao > const &weights, Kernel kernel)
P3M grid interpolation.
constexpr int p3m_min_cao
Minimal charge assignment order.
Definition math.hpp:48
constexpr int p3m_max_cao
Maximal charge assignment order.
Definition math.hpp:50
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
Definition math.hpp:38
System & get_system()
T product(Vector< T, N > const &v)
Definition Vector.hpp:373
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
Definition sqr.hpp:28
decltype(auto) integral_parameter(T i, Args &&...args)
Generate a call table for an integral non-type template parameter.
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
Definition math.hpp:71
auto get_analytic_cotangent_sum_kernel(int cao)
Definition math.hpp:146
STL namespace.
Exports for the NpT code.
Common functions for dipolar and charge P3M.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
ESPRESSO_ATTR_ALWAYS_INLINE void kokkos_parallel_range_for(auto const &name, auto start, auto end, auto const &kernel)
Utils::Vector3i node_grid
double long_range_kernel(bool force_flag, bool energy_flag, ParticleRange const &particles)
Compute the k-space part of forces and energies.
double calc_surface_term(bool force_flag, bool energy_flag, ParticleRange const &particles) override
void scaleby_box_l() override
void dipole_assign(ParticleRange const &particles) override
Base class for the magnetostatics P3M algorithm.
double sum_mu2
Sum of square of magnetic dipoles.
p3m_interpolation_cache inter_weights
FloatType value_type
std::size_t sum_dip_part
number of dipolar particles.
p3m_send_mesh< FloatType > halo_comm
double energy_correction
cached k-space self-energy correction
void resize_heffte_buffers()
struct DipolarP3MState::@1 heffte
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
void sanity_checks_cell_structure() const
P3MParameters const & dp3m_params
Definition dp3m.hpp:56
void sanity_checks_periodicity() const
void sanity_checks_node_grid() const
void recalc_ld_pos(P3MParameters const &params)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0), in real space.
P3MParameters params
P3M base parameters.
P3MLocalMesh local_mesh
Local mesh geometry information for this MPI rank.
void operator()(auto &dp3m, double prefac, int d_rs, CellStructure &cell_structure) const
void operator()(auto &dp3m, double prefac, int d_rs, CellStructure &cell_structure) const