ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
p3m_heffte.impl.hpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2026 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include <config/config.hpp>
23
24#ifdef ESPRESSO_P3M
25
28
31#ifdef ESPRESSO_CUDA
34#endif // ESPRESSO_CUDA
36
37#include "electrostatics/p3m_heffte.hpp" // must be included after coulomb.hpp
38
39#include "p3m/P3MFFT.hpp"
41#include "p3m/TuningLogger.hpp"
43#include "p3m/for_each_3d.hpp"
45#include "p3m/math.hpp"
46
47#include "BoxGeometry.hpp"
48#include "LocalBox.hpp"
49#include "Particle.hpp"
51#include "PropagationMode.hpp"
52#include "actor/visitors.hpp"
53#include "aosoa_pack.hpp"
57#include "communication.hpp"
58#include "errorhandling.hpp"
60#include "npt.hpp"
61#include "p3m/send_mesh.hpp"
64#include "system/System.hpp"
65#include "tuning.hpp"
66
67#include <utils/Vector.hpp>
70#include <utils/math/sqr.hpp>
72
73#include <boost/mpi/collectives/all_reduce.hpp>
74#include <boost/mpi/collectives/broadcast.hpp>
75#include <boost/mpi/collectives/reduce.hpp>
76#include <boost/mpi/communicator.hpp>
77#include <boost/range/combine.hpp>
78#include <boost/range/numeric.hpp>
79
80#include <Kokkos_Core.hpp>
81#include <omp.h>
82
83#include <algorithm>
84#include <array>
85#include <cassert>
86#include <complex>
87#include <cstddef>
88#include <functional>
89#include <initializer_list>
90#include <numbers>
91#include <optional>
92#include <span>
93#include <sstream>
94#include <stdexcept>
95#include <string>
96#include <tuple>
97#include <utility>
98#include <vector>
99
100template <typename FloatType>
101std::complex<FloatType>
102multiply_complex_by_imaginary(std::complex<FloatType> const &z, FloatType k) {
103 // Perform the multiplication manually: (re + i*imag) * (i*k)
104 return std::complex<FloatType>(-z.imag() * k, z.real() * k);
105}
106
107template <typename FloatType>
108std::complex<FloatType>
109multiply_complex_by_real(std::complex<FloatType> const &z, FloatType k) {
110 // Perform the multiplication manually: (re + i*imag) * k
111 return std::complex<FloatType>(z.real() * k, z.imag() * k);
112}
113
115 Utils::Vector3i const &mesh) {
116 return mesh[0u] % node_grid[0u] == 0 and mesh[1u] % node_grid[1u] == 0 and
117 mesh[2u] % node_grid[2u] == 0;
118}
119
120template <typename FloatType, Arch Architecture, class FFTConfig>
121void CoulombP3MHeffte<FloatType, Architecture,
122 FFTConfig>::count_charged_particles() {
123 struct Res {
124 std::size_t local_n = std::size_t{0u};
125 double local_q = 0.0;
126 double local_q2 = 0.0;
127 };
128 Reduction::AddPartialResultKernel<Res> kernel = [](Res &acc, auto const &p) {
129 if (p.q() != 0.0) {
130 acc.local_n++;
131 acc.local_q2 += Utils::sqr(p.q());
132 acc.local_q += p.q();
133 }
134 };
135
136 Reduction::ReductionOp<Res> reduce = [](Res &a, Res const &b) {
137 a.local_n += b.local_n;
138 a.local_q += b.local_q;
139 a.local_q2 += b.local_q2;
140 };
141 auto res = reduce_over_local_particles(*(get_system().cell_structure), kernel,
142 reduce);
143
144 boost::mpi::all_reduce(comm_cart, res.local_n, p3m.sum_qpart, std::plus<>());
145 boost::mpi::all_reduce(comm_cart, res.local_q2, p3m.sum_q2, std::plus<>());
146 boost::mpi::all_reduce(comm_cart, res.local_q, p3m.square_sum_q,
147 std::plus<>());
148 p3m.square_sum_q = Utils::sqr(p3m.square_sum_q);
149}
150
151/** Calculate the optimal influence function of @cite hockney88a.
152 * (optimised for force calculations)
153 *
154 * Each node calculates only the values for its domain in k-space.
155 *
156 * See also: @cite hockney88a eq. 8-22 (p. 275). Note the somewhat
157 * different convention for the prefactors, which is described in
158 * @cite deserno98a @cite deserno98b.
159 */
160template <typename FloatType, Arch Architecture, class FFTConfig>
161void CoulombP3MHeffte<FloatType, Architecture,
162 FFTConfig>::calc_influence_function_force() {
163 p3m.g_force = grid_influence_function<FloatType, 1, P3M_BRILLOUIN,
164 FFTConfig::k_space_order>(
165 p3m.params, p3m.fft->ks_local_ld_index(), p3m.fft->ks_local_ur_index(),
166 get_system().box_geo->length_inv());
167 if constexpr (FFTConfig::use_r2c) {
168 influence_function_r2c<FFTConfig::r2c_dir>(p3m.g_force, p3m.params.mesh,
169 p3m.fft->ks_local_size(),
170 p3m.fft->ks_local_ld_index());
171 }
172}
173
174/** Calculate the influence function optimized for the energy and the
175 * self energy correction.
176 */
177template <typename FloatType, Arch Architecture, class FFTConfig>
178void CoulombP3MHeffte<FloatType, Architecture,
179 FFTConfig>::calc_influence_function_energy() {
180 p3m.g_energy = grid_influence_function<FloatType, 0, P3M_BRILLOUIN,
181 FFTConfig::k_space_order>(
182 p3m.params, p3m.fft->ks_local_ld_index(), p3m.fft->ks_local_ur_index(),
183 get_system().box_geo->length_inv());
184 if constexpr (FFTConfig::use_r2c) {
185 influence_function_r2c<FFTConfig::r2c_dir>(p3m.g_energy, p3m.params.mesh,
186 p3m.fft->ks_local_size(),
187 p3m.fft->ks_local_ld_index());
188 }
189}
190
191/** Aliasing sum used by @ref p3m_k_space_error. */
193 Utils::Vector3i const &mesh,
194 Utils::Vector3d const &mesh_i, int cao,
195 double alpha_L_i) {
196
197 auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
198 auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
199 auto constexpr exp_min = -708.4; // for IEEE-compatible double
200 auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i);
201 auto alias1 = 0.;
202 auto alias2 = 0.;
203
204 Utils::Vector3i indices{};
205 Utils::Vector3i nm{};
206 Utils::Vector3d fnm{};
208 mesh_start, mesh_stop, indices,
209 [&]() {
210 auto const norm_sq = nm.norm2();
211 auto const exponent = -factor1 * norm_sq;
212 auto const exp_limit = (exp_min + std::log(norm_sq)) / 2.;
213 auto const ex = (exponent < exp_limit) ? 0. : std::exp(exponent);
214 auto const energy = std::pow(Utils::product(fnm), 2 * cao);
215 alias1 += Utils::sqr(ex) / norm_sq;
216 alias2 += energy * ex * (shift * nm) / norm_sq;
217 },
218 [&](unsigned dim, int n) {
219 nm[dim] = shift[dim] + n * mesh[dim];
220 fnm[dim] = math::sinc(nm[dim] * mesh_i[dim]);
221 });
222
223 return std::make_pair(alias1, alias2);
224}
225
226/** Calculate the real space contribution to the rms error in the force (as
227 * described by Kolafa and Perram).
228 * \param pref Prefactor of Coulomb interaction.
229 * \param r_cut_iL rescaled real space cutoff for p3m method.
230 * \param n_c_part number of charged particles in the system.
231 * \param sum_q2 sum of square of charges in the system
232 * \param alpha_L rescaled Ewald splitting parameter.
233 * \param box_l box dimensions.
234 * \return real space error
235 */
236inline double p3m_real_space_error(double pref, double r_cut_iL,
237 std::size_t n_c_part, double sum_q2,
238 double alpha_L,
239 Utils::Vector3d const &box_l) {
240 auto const volume = Utils::product(box_l);
241 return (2. * pref * sum_q2 * exp(-Utils::sqr(r_cut_iL * alpha_L))) /
242 sqrt(static_cast<double>(n_c_part) * r_cut_iL * box_l[0] * volume);
243}
244
245/** Calculate the analytic expression of the error estimate for the
246 * P3M method in @cite hockney88a (eq. 8-23 p. 275) in
247 * order to obtain the rms error in the force for a system of N
248 * randomly distributed particles in a cubic box (k-space part).
249 * \param pref Prefactor of Coulomb interaction.
250 * \param mesh number of mesh points in one direction.
251 * \param cao charge assignment order.
252 * \param n_c_part number of charged particles in the system.
253 * \param sum_q2 sum of square of charges in the system
254 * \param alpha_L rescaled Ewald splitting parameter.
255 * \param box_l box dimensions.
256 * \return reciprocal (k) space error
257 */
258inline double p3m_k_space_error(double pref, Utils::Vector3i const &mesh,
259 int cao, std::size_t n_c_part, double sum_q2,
260 double alpha_L, Utils::Vector3d const &box_l) {
261
262 auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao);
263 auto const mesh_i = 1. / Utils::Vector3d(mesh);
264 auto const alpha_L_i = 1. / alpha_L;
265 auto const mesh_stop = mesh / 2;
266 auto const mesh_start = -mesh_stop;
267 auto indices = Utils::Vector3i{};
268 auto values = Utils::Vector3d{};
269 auto he_q = 0.;
270
272 mesh_start, mesh_stop, indices,
273 [&]() {
274 if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
275 auto const n2 = indices.norm2();
276 auto const cs = Utils::product(values);
277 auto const [alias1, alias2] =
278 p3m_tune_aliasing_sums(indices, mesh, mesh_i, cao, alpha_L_i);
279 auto const d = alias1 - Utils::sqr(alias2 / cs) / n2;
280 /* at high precision, d can become negative due to extinction;
281 also, don't take values that have no significant digits left*/
282 if (d > 0. and std::fabs(d / alias1) > round_error_prec) {
283 he_q += d;
284 }
285 }
286 },
287 [&values, &mesh_i, cotangent_sum](unsigned dim, int n) {
288 values[dim] = cotangent_sum(n, mesh_i[dim]);
289 });
290
291 return 2. * pref * sum_q2 * sqrt(he_q / static_cast<double>(n_c_part)) /
292 (box_l[1] * box_l[2]);
293}
294
295template <typename FloatType, Arch Architecture, class FFTConfig>
297 assert(p3m.params.mesh >= Utils::Vector3i::broadcast(1));
298 assert(p3m.params.cao >= p3m_min_cao and p3m.params.cao <= p3m_max_cao);
299 assert(p3m.params.alpha > 0.);
300
301 auto const &system = get_system();
302 auto const &box_geo = *system.box_geo;
303 auto const &local_geo = *system.local_geo;
304 auto const skin = system.cell_structure->get_verlet_skin();
305
306 p3m.params.cao3 = Utils::int_pow<3>(p3m.params.cao);
307 p3m.params.recalc_a_ai_cao_cut(box_geo.length());
308
309 sanity_checks();
310
311 auto const &solver = system.coulomb.impl->solver;
312 double elc_layer = 0.;
313 if (auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
314 elc_layer = actor->elc.space_layer;
315 }
316
317 p3m.local_mesh.calc_local_ca_mesh(p3m.params, local_geo, skin, elc_layer);
318 p3m.fft = std::make_shared<P3MFFT<FloatType, FFTConfig>>(
319 ::comm_cart, p3m.params.mesh, p3m.local_mesh.ld_no_halo,
320 p3m.local_mesh.ur_no_halo, ::communicator.node_grid);
321 auto const rs_array_size =
322 static_cast<std::size_t>(Utils::product(p3m.local_mesh.dim));
323 auto const rs_array_size_no_halo =
324 static_cast<std::size_t>(Utils::product(p3m.local_mesh.dim_no_halo));
325 auto const fft_mesh_size =
326 static_cast<std::size_t>(Utils::product(p3m.fft->ks_local_size()));
327 p3m.rs_charge_density.resize(rs_array_size);
328 p3m.ks_charge_density.resize(fft_mesh_size);
329 for (auto d : {0u, 1u, 2u}) {
330 p3m.ks_E_fields[d].resize(fft_mesh_size);
331 p3m.rs_E_fields[d].resize(rs_array_size);
332 p3m.rs_E_fields_no_halo[d].resize(rs_array_size_no_halo);
333 }
334 p3m.calc_differential_operator();
335
336 /* fix box length dependent constants */
337 scaleby_box_l();
338
339 count_charged_particles();
340}
341
342namespace {
343template <int cao> struct AssignCharge {
344 void operator()(auto &p3m, double q,
345 InterpolationWeights<cao> const &weights) {
346 using CoulombP3MState = std::remove_reference_t<decltype(p3m)>;
347 using value_type = CoulombP3MState::value_type;
348 p3m_interpolate(p3m.local_mesh, weights, [q, &p3m](int ind, double w) {
349 p3m.rs_charge_density[ind] += value_type(w * q);
350 });
351 }
352
353 void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos,
354 p3m_interpolation_cache &inter_weights) {
355 auto constexpr memory_order = Utils::MemoryOrder::ROW_MAJOR;
356 auto const weights = p3m_calculate_interpolation_weights<cao, memory_order>(
357 real_pos.as_span(), p3m.params.ai, p3m.local_mesh);
358 inter_weights.store(weights);
359 this->operator()(p3m, q, weights);
360 }
361
362 void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos) {
363 auto constexpr memory_order = Utils::MemoryOrder::ROW_MAJOR;
364 auto const weights = p3m_calculate_interpolation_weights<cao, memory_order>(
365 real_pos.as_span(), p3m.params.ai, p3m.local_mesh);
366 this->operator()(p3m, q, weights);
367 }
368
369 void operator()(auto &p3m, auto &cell_structure) {
370 using CoulombP3MState = std::remove_reference_t<decltype(p3m)>;
371 using value_type = CoulombP3MState::value_type;
372 auto constexpr memory_order = Utils::MemoryOrder::ROW_MAJOR;
373 auto const &aosoa = cell_structure.get_aosoa();
374 auto const n_part = cell_structure.count_local_particles();
375 p3m.inter_weights.zfill(n_part); // allocate buffer for parallel write
377 "InterpolateCharges", std::size_t{0u}, n_part, [&](auto p_index) {
378 auto const tid = omp_get_thread_num();
379 auto const pos = aosoa.get_span_at(aosoa.position, p_index);
380 auto const q = aosoa.charge(p_index);
381 auto const weights =
382 p3m_calculate_interpolation_weights<cao, memory_order>(
383 pos, p3m.params.ai, p3m.local_mesh);
384 p3m.inter_weights.store_at(p_index, weights);
386 p3m.local_mesh, weights, [&, tid, q](int ind, double w) {
387 p3m.rs_charge_density_kokkos(tid, ind) += value_type(w * q);
388 });
389 });
390 Kokkos::fence();
391 using execution_space = Kokkos::DefaultExecutionSpace;
392 int num_threads = execution_space().concurrency();
393 Kokkos::RangePolicy<execution_space> policy(std::size_t{0},
394 p3m.local_mesh.size);
395 Kokkos::parallel_for("ReduceInterpolatedCharges", policy,
396 [&p3m, num_threads](std::size_t const i) {
397 value_type acc{};
398 for (int tid = 0; tid < num_threads; ++tid) {
399 acc += p3m.rs_charge_density_kokkos(tid, i);
400 }
401 p3m.rs_charge_density.at(i) += acc;
402 });
403 Kokkos::fence();
404 }
405};
406} // namespace
407
408template <typename FloatType, Arch Architecture, class FFTConfig>
410 prepare_fft_mesh(true);
411
412 Utils::integral_parameter<int, AssignCharge, p3m_min_cao, p3m_max_cao>(
413 p3m.params.cao, p3m, *get_system().cell_structure);
414}
415
416template <typename FloatType, Arch Architecture, class FFTConfig>
418 double q, Utils::Vector3d const &real_pos, bool skip_cache) {
419 if (skip_cache) {
420 Utils::integral_parameter<int, AssignCharge, p3m_min_cao, p3m_max_cao>(
421 p3m.params.cao, p3m, q, real_pos);
422 } else {
423 Utils::integral_parameter<int, AssignCharge, p3m_min_cao, p3m_max_cao>(
424 p3m.params.cao, p3m, q, real_pos, p3m.inter_weights);
425 }
426}
427
428namespace {
429template <int cao> struct AssignForces {
430 void operator()(auto &p3m, auto force_prefac,
431 CellStructure &cell_structure) const {
432
433 assert(cao == p3m.inter_weights.cao());
434
435 auto const kernel = [&p3m](auto pref, auto &p_force, std::size_t p_index) {
436 auto const weights = p3m.inter_weights.template load<cao>(p_index);
437
438 Utils::Vector3d force{};
439 p3m_interpolate(p3m.local_mesh, weights,
440 [&force, &p3m](int ind, double w) {
441 force[0u] += w * double(p3m.rs_E_fields[0u][ind]);
442 force[1u] += w * double(p3m.rs_E_fields[1u][ind]);
443 force[2u] += w * double(p3m.rs_E_fields[2u][ind]);
444 });
445
446 auto const thread_id = omp_get_thread_num();
447 p_force(p_index, thread_id, 0) -= pref * force[0];
448 p_force(p_index, thread_id, 1) -= pref * force[1];
449 p_force(p_index, thread_id, 2) -= pref * force[2];
450 };
451
452 auto const n_part = cell_structure.count_local_particles();
453 auto const &aosoa = cell_structure.get_aosoa();
454 auto &local_force = cell_structure.get_local_force();
456 "AssignForces", std::size_t{0u}, n_part, [&](std::size_t p_index) {
457 if (auto const pref = aosoa.charge(p_index) * force_prefac) {
458 kernel(pref, local_force, p_index);
459 }
460 });
461 }
462};
463} // namespace
464
465inline auto calc_dipole_moment(boost::mpi::communicator const &comm,
466 auto const &cs, auto const &box_geo) {
467 auto const local_dip = reduce_over_local_particles<Utils::Vector3d>(
468 cs,
469 [&box_geo](Utils::Vector3d &acc, Particle const &p) {
470 acc += p.q() * box_geo.unfolded_position(p.pos(), p.image_box());
471 },
472 [](Utils::Vector3d &a, Utils::Vector3d const &b) { a = a + b; });
473 return boost::mpi::all_reduce(comm, local_dip, std::plus<>());
474}
475
476template <typename FloatType, Arch Architecture, class FFTConfig>
477void CoulombP3MHeffte<FloatType, Architecture,
478 FFTConfig>::kernel_ks_charge_density() {
479 // halo communication of real space charge density
480 p3m.halo_comm.gather_grid(comm_cart, p3m.rs_charge_density.data(),
481 p3m.local_mesh.dim);
482
483 // get real space charge density without ghost layers
484 auto charge_density_no_halos =
485 extract_block<Utils::MemoryOrder::ROW_MAJOR, FFTConfig::r_space_order>(
486 p3m.rs_charge_density, p3m.local_mesh.dim, p3m.local_mesh.n_halo_ld,
487 p3m.local_mesh.dim - p3m.local_mesh.n_halo_ur);
488
489 // Set up the FFT using the Heffte library.
490 // This is in global mesh coordinates without any ghost layers
491 // The memory layout has to be specified, so the parts of
492 // the mesh held by each MPI rank are assembled correctly.
493 p3m.fft->forward(charge_density_no_halos.data(),
494 p3m.ks_charge_density.data());
495}
496
497template <typename FloatType, Arch Architecture, class FFTConfig>
498void CoulombP3MHeffte<FloatType, Architecture,
499 FFTConfig>::kernel_rs_electric_field() {
500 auto const mesh_start = p3m.fft->ks_local_ld_index();
501 auto const mesh_stop = p3m.fft->ks_local_ur_index();
502 auto const &box_geo = *get_system().box_geo;
503
504 // i*k differentiation
505 auto const wavevector =
506 Utils::Vector3<FloatType>((2. * std::numbers::pi) * box_geo.length_inv());
507
508 // compute electric field, Eq. (3.49) @cite deserno00b
509 for_each_3d_lin<FFTConfig::k_space_order>(
510 mesh_start, mesh_stop,
511 [&](Utils::Vector3i const &indices, int local_index) {
512#ifdef ESPRESSO_ADDITIONAL_CHECKS
513 assert(local_index ==
514 Utils::get_linear_index<FFTConfig::k_space_order>(
515 indices - mesh_start, p3m.fft->ks_local_size()));
516#endif
517 auto const phi_hat = multiply_complex_by_real(
518 p3m.ks_charge_density[local_index], p3m.g_force[local_index]);
519
520 for (auto d : {0u, 1u, 2u}) {
521 // wave vector of the current mesh point
522 auto const k = FloatType(p3m.d_op[d][indices[d]]) * wavevector[d];
523 // electric field in k-space
524 p3m.ks_E_fields[d][local_index] =
526 }
527 });
528
529 // back-transform the k-space electric field to real space
530 auto const size = p3m.local_mesh.ur_no_halo - p3m.local_mesh.ld_no_halo;
531 auto const rs_mesh_size_no_halo = Utils::product(size);
532 for (auto d : {0u, 1u, 2u}) {
533 auto k_space = p3m.ks_E_fields[d].data();
534 auto r_space = p3m.rs_E_fields_no_halo[d].data();
535 p3m.fft->backward(k_space, r_space);
536
537 // add zeros around the E-field in real space to make room for ghost layers
538 auto const begin = p3m.rs_E_fields_no_halo[d].begin();
539 p3m.rs_E_fields[d] =
540 pad_with_zeros_discard_imag<FFTConfig::r_space_order,
542 std::span(begin, rs_mesh_size_no_halo), p3m.local_mesh.dim_no_halo,
543 p3m.local_mesh.n_halo_ld, p3m.local_mesh.n_halo_ur);
544 }
545
546 // ghost communicate the boundary layers of the E-field in real space
547 std::array<FloatType *, 3u> rs_fields = {{p3m.rs_E_fields[0u].data(),
548 p3m.rs_E_fields[1u].data(),
549 p3m.rs_E_fields[2u].data()}};
550 p3m.halo_comm.spread_grid(comm_cart, rs_fields, p3m.local_mesh.dim);
551}
552
553/** @details Calculate the long range electrostatics part of the pressure
554 * tensor. This is part \f$\Pi_{\textrm{dir}, \alpha, \beta}\f$ eq. (2.6)
555 * in @cite essmann95a. The part \f$\Pi_{\textrm{corr}, \alpha, \beta}\f$
556 * eq. (2.8) is not present here since M is the empty set in our simulations.
557 */
558template <typename FloatType, Arch Architecture, class FFTConfig>
561 auto const &box_geo = *get_system().box_geo;
562 Utils::Vector9d node_k_space_pressure_tensor{};
563
564 if (p3m.sum_q2 > 0.) {
566 kernel_ks_charge_density();
567
568 auto constexpr r2c_dir = FFTConfig::r2c_dir;
569 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
570 auto const &global_size = p3m.params.mesh;
571 auto const local_size = p3m.fft->ks_local_size();
572 auto const local_origin = p3m.fft->ks_local_ld_index();
573 auto const half_alpha_inv_sq = Utils::sqr(1. / 2. / p3m.params.alpha);
574 auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv();
575 auto const cutoff_left = 1 - local_origin[r2c_dir];
576 auto const cutoff_right = global_size[r2c_dir] / 2 - local_origin[r2c_dir];
577 auto local_index = Utils::Vector3i::broadcast(0);
578 auto &short_dim = local_index[r2c_dir];
579 auto diagonal = 0.;
580 std::size_t index = 0u;
581 for_each_3d_order<FFTConfig::k_space_order>(
582 mesh_start, local_size, local_index, [&]() {
583 if (short_dim <= cutoff_right) {
584 auto const global_index = local_index + local_origin;
585 auto const kx = p3m.d_op[0u][global_index[0u]] * wavevector[0u];
586 auto const ky = p3m.d_op[1u][global_index[1u]] * wavevector[1u];
587 auto const kz = p3m.d_op[2u][global_index[2u]] * wavevector[2u];
588 auto const norm_sq =
589 Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz);
590
591 if (norm_sq != 0.) {
592 auto cell_energy =
593 static_cast<double>(p3m.g_energy[index] *
594 std::norm(p3m.ks_charge_density[index]));
595 if (short_dim >= cutoff_left and short_dim <= cutoff_right - 1) {
596 // k-space symmetry: double counting except in the first and
597 // last planes of the short dimension; although the wavevector
598 // points in the opposite direction in the redundant region of
599 // k-space, the product of two components of the wavevector
600 // cancels out the negative sign
601 cell_energy *= 2.;
602 }
603 auto const vterm = -2. * (1. / norm_sq + half_alpha_inv_sq);
604 auto const pref = cell_energy * vterm;
605 diagonal += cell_energy;
606 node_k_space_pressure_tensor[0u] += pref * kx * kx; /* sigma_xx */
607 node_k_space_pressure_tensor[1u] += pref * kx * ky; /* sigma_xy */
608 node_k_space_pressure_tensor[2u] += pref * kx * kz; /* sigma_xz */
609 node_k_space_pressure_tensor[4u] += pref * ky * ky; /* sigma_yy */
610 node_k_space_pressure_tensor[5u] += pref * ky * kz; /* sigma_yz */
611 node_k_space_pressure_tensor[8u] += pref * kz * kz; /* sigma_zz */
612 }
613 }
614 ++index;
615 });
616
617 node_k_space_pressure_tensor[0u] += diagonal;
618 node_k_space_pressure_tensor[4u] += diagonal;
619 node_k_space_pressure_tensor[8u] += diagonal;
620 node_k_space_pressure_tensor[3u] = node_k_space_pressure_tensor[1u];
621 node_k_space_pressure_tensor[6u] = node_k_space_pressure_tensor[2u];
622 node_k_space_pressure_tensor[7u] = node_k_space_pressure_tensor[5u];
623 }
624
625 return node_k_space_pressure_tensor * prefactor / (2. * box_geo.volume());
626}
627
628template <typename FloatType, Arch Architecture, class FFTConfig>
630 bool force_flag, bool energy_flag) {
631
632 auto const &system = get_system();
633 auto const &box_geo = *system.box_geo;
634#ifdef ESPRESSO_NPT
635 auto const npt_flag = force_flag and system.has_npt_enabled();
636#else
637 auto constexpr npt_flag = false;
638#endif
639 if (p3m.sum_qpart == 0u) {
640 return 0.;
641 }
642 auto &cell_structure = *system.cell_structure;
643
644 if (not has_actor_of_type<ElectrostaticLayerCorrection>(
645 system.coulomb.impl->solver)) {
647 }
648
649 kernel_ks_charge_density();
650
651 auto const &local_force = cell_structure.get_local_force();
652 auto const &aosoa = cell_structure.get_aosoa();
653
654 // The dipole moment is only needed if we don't have metallic boundaries
655 auto const box_dipole = (p3m.params.epsilon != P3M_EPSILON_METALLIC)
656 ? std::make_optional(calc_dipole_moment(
657 comm_cart, cell_structure, box_geo))
658 : std::nullopt;
659 auto const volume = box_geo.volume();
660 auto const pref =
661 4. * std::numbers::pi / volume / (2. * p3m.params.epsilon + 1.);
662 auto energy = 0.;
663
664 /* === k-space force calculation === */
665 if (force_flag) {
666 kernel_rs_electric_field();
667
668 // assign particle forces
669 auto const force_prefac = prefactor / volume;
670 auto &particle_data = cell_structure;
671 Utils::integral_parameter<int, AssignForces, p3m_min_cao, p3m_max_cao>(
672 p3m.params.cao, p3m, force_prefac, particle_data);
673
674 // add dipole forces
675 // Eq. (3.19) @cite deserno00b
676 if (box_dipole) {
677 auto const dm = prefactor * pref * box_dipole.value();
678 auto const n_part = cell_structure.count_local_particles();
680 "AssignForcesBoxDipole", std::size_t{0u}, n_part,
681 [&aosoa, &local_force, dm](auto p_index) {
682 auto const thread_id = omp_get_thread_num();
683 auto const q = aosoa.charge(p_index);
684 local_force(p_index, thread_id, 0) -= q * dm[0];
685 local_force(p_index, thread_id, 1) -= q * dm[1];
686 local_force(p_index, thread_id, 2) -= q * dm[2];
687 });
688 }
689 }
690
691 /* === k-space energy calculation === */
692 if (energy_flag or npt_flag) {
693 auto constexpr r2c_dir = FFTConfig::r2c_dir;
694 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
695 auto const &global_size = p3m.params.mesh;
696 auto const local_size = p3m.fft->ks_local_size();
697 auto const local_origin = p3m.fft->ks_local_ld_index();
698 auto const cutoff_left = 1 - local_origin[r2c_dir];
699 auto const cutoff_right = global_size[r2c_dir] / 2 - local_origin[r2c_dir];
700 auto local_index = Utils::Vector3i::broadcast(0);
701 auto &short_dim = local_index[r2c_dir];
702 auto node_energy = 0.;
703 std::size_t index = 0u;
704 for_each_3d_order<FFTConfig::k_space_order>(
705 mesh_start, local_size, local_index, [&]() {
706 if (short_dim <= cutoff_right) {
707 auto const &cell_field = p3m.ks_charge_density[index];
708 auto cell_energy = static_cast<double>(p3m.g_energy[index] *
709 std::norm(cell_field));
710 if (short_dim >= cutoff_left and short_dim <= cutoff_right - 1) {
711 // leverage symmetry of k-space: double counting except in the
712 // first and last planes of the short dimension
713 cell_energy += cell_energy;
714 }
715 node_energy += cell_energy;
716 }
717 ++index;
718 });
719 node_energy /= 2. * volume;
720
721 // add up energy contributions from all mpi ranks
722 boost::mpi::reduce(::comm_cart, node_energy, energy, std::plus<>(), 0);
723 if (this_node == 0) {
724 /* self energy correction */
725 // Eq. (3.8) @cite deserno00b
726 energy -= p3m.sum_q2 * p3m.params.alpha * std::numbers::inv_sqrtpi;
727 /* net charge correction */
728 // Eq. (3.11) @cite deserno00b
729 energy -= p3m.square_sum_q * std::numbers::pi /
730 (2. * volume * Utils::sqr(p3m.params.alpha));
731 /* dipole correction */
732 // Eq. (3.9) @cite deserno00b
733 if (box_dipole) {
734 energy += pref * box_dipole.value().norm2();
735 }
736 }
737 energy *= prefactor;
738#ifdef ESPRESSO_NPT
739 if (npt_flag) {
740 get_system().npt_add_virial_contribution(energy);
741 }
742#endif
743 if (not energy_flag) {
744 energy = 0.;
745 }
746 }
747
748 return energy;
749}
750
751template <typename FloatType, Arch Architecture, class FFTConfig>
755 double m_mesh_density_min = -1., m_mesh_density_max = -1.;
756 // indicates if mesh should be tuned
757 bool m_tune_mesh = false;
758 std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
759
760protected:
761 P3MParameters &get_params() override { return p3m.params; }
762
763 static constexpr std::tuple<int, int, int> get_memory_layout() {
764 using enum Utils::MemoryOrder;
765 auto constexpr memory_order = FFTConfig::k_space_order;
766 auto constexpr layout_col_major = std::tuple(2, 1, 0);
767 auto constexpr layout_row_major = std::tuple(0, 1, 2);
768 return (memory_order == COLUMN_MAJOR) ? layout_col_major : layout_row_major;
769 }
770
771public:
772 CoulombTuningAlgorithm(System::System &system, auto &input_p3m,
773 double prefactor, int timings,
774 decltype(m_tune_limits) tune_limits)
775 : TuningAlgorithm(system, prefactor, timings), p3m{input_p3m},
776 m_tune_limits{std::move(tune_limits)} {}
777
778 void on_solver_change() const override { m_system.on_coulomb_change(); }
779
780 void setup_logger(bool verbose) override {
781 auto const &box_geo = *m_system.box_geo;
782#ifdef ESPRESSO_CUDA
783 auto const on_gpu = Architecture == Arch::CUDA;
784#else
785 auto const on_gpu = false;
786#endif
787 m_logger = std::make_unique<TuningLogger>(
788 verbose and this_node == 0, (on_gpu) ? "CoulombP3MGPU" : "CoulombP3M",
790 m_logger->tuning_goals(p3m.params.accuracy, m_prefactor,
791 box_geo.length()[0], p3m.sum_qpart, p3m.sum_q2);
792 m_logger->log_tuning_start();
793 }
794
795 std::optional<std::string>
796 layer_correction_veto_r_cut(double r_cut) const override {
797 auto const &solver = m_system.coulomb.impl->solver;
798 if (auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
799 return actor->veto_r_cut(r_cut);
800 }
801 return {};
802 }
803
804 std::optional<std::string> fft_decomposition_veto(
805 Utils::Vector3i const &mesh_size_r_space) const override {
806#ifdef ESPRESSO_CUDA
807 if constexpr (Architecture == Arch::CUDA) {
808 return std::nullopt;
809 }
810#endif
811 auto const [KX, KY, KZ] = get_memory_layout();
812 auto valid_decomposition = false;
813 // calculate box size in k-space
814 Utils::Vector3i mesh_size_k_space = {};
815 boost::mpi::reduce(
816 ::comm_cart, p3m.fft->ks_local_ur_index(), mesh_size_k_space,
817 [](Utils::Vector3i const &lhs, Utils::Vector3i const &rhs) {
818 return Utils::Vector3i{{std::max(lhs[0u], rhs[0u]),
819 std::max(lhs[1u], rhs[1u]),
820 std::max(lhs[2u], rhs[2u])}};
821 },
822 0);
823 if constexpr (FFTConfig::use_r2c) {
824 // adjust for reduced dimension
825 mesh_size_k_space[FFTConfig::r2c_dir] -= 1;
826 mesh_size_k_space[FFTConfig::r2c_dir] *= 2;
827 }
828 // check consistency with box size in real-space
829 if (::this_node == 0) {
830 auto const &node_grid = ::communicator.node_grid;
831 valid_decomposition =
832 (mesh_size_r_space[0u] == mesh_size_k_space[KX] and
833 mesh_size_r_space[1u] == mesh_size_k_space[KY] and
834 mesh_size_r_space[2u] == mesh_size_k_space[KZ] and
835 is_node_grid_compatible_with_mesh(node_grid, mesh_size_r_space));
836 }
837 boost::mpi::broadcast(::comm_cart, valid_decomposition, 0);
838 std::optional<std::string> retval{"conflict with FFT domain decomposition"};
839 if (valid_decomposition) {
840 retval = std::nullopt;
841 }
842 return retval;
843 }
844
845 std::tuple<double, double, double, double>
847 double r_cut_iL) const override {
848
849 auto const &box_geo = *m_system.box_geo;
850 double alpha_L, rs_err, ks_err;
851
852 /* calc maximal real space error for setting */
853 rs_err = p3m_real_space_error(m_prefactor, r_cut_iL, p3m.sum_qpart,
854 p3m.sum_q2, 0., box_geo.length());
855
856 if (std::numbers::sqrt2 * rs_err > p3m.params.accuracy) {
857 /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */
858 alpha_L = sqrt(log(std::numbers::sqrt2 * rs_err / p3m.params.accuracy)) /
859 r_cut_iL;
860 } else {
861 /* even alpha=0 is ok, however, we cannot choose it since it kills the
862 k-space error formula.
863 Anyways, this very likely NOT the optimal solution */
864 alpha_L = 0.1;
865 }
866
867 /* calculate real-space and k-space error for this alpha_L */
868 rs_err = p3m_real_space_error(m_prefactor, r_cut_iL, p3m.sum_qpart,
869 p3m.sum_q2, alpha_L, box_geo.length());
870#ifdef ESPRESSO_CUDA
871 if constexpr (Architecture == Arch::CUDA) {
872 if (this_node == 0) {
873 ks_err =
874 p3m_k_space_error_gpu(m_prefactor, mesh.data(), cao, p3m.sum_qpart,
875 p3m.sum_q2, alpha_L, box_geo.length().data());
876 }
877 boost::mpi::broadcast(comm_cart, ks_err, 0);
878 } else
879#endif
880 ks_err = p3m_k_space_error(m_prefactor, mesh, cao, p3m.sum_qpart,
881 p3m.sum_q2, alpha_L, box_geo.length());
882
883 return {Utils::Vector2d{rs_err, ks_err}.norm(), rs_err, ks_err, alpha_L};
884 }
885
886 void determine_mesh_limits() override {
887 auto const &box_geo = *m_system.box_geo;
888 auto const mesh_density =
889 static_cast<double>(p3m.params.mesh[0]) * box_geo.length_inv()[0];
890
891 if (p3m.params.mesh == Utils::Vector3i::broadcast(-1)) {
892 /* avoid using more than 1 GB of FFT arrays */
893 auto const normalized_box_dim = std::cbrt(box_geo.volume());
894 auto const max_npart_per_dim = 512.;
895 /* simple heuristic to limit the tried meshes if the accuracy cannot
896 be obtained with smaller meshes, but normally not all these
897 meshes have to be tested */
898 auto const min_npart_per_dim = std::min(
899 max_npart_per_dim, std::cbrt(static_cast<double>(p3m.sum_qpart)));
900 m_mesh_density_min = min_npart_per_dim / normalized_box_dim;
901 m_mesh_density_max = max_npart_per_dim / normalized_box_dim;
902 if (m_tune_limits.first or m_tune_limits.second) {
903 auto const &box_l = box_geo.length();
904 auto const dim = std::max({box_l[0], box_l[1], box_l[2]});
905 if (m_tune_limits.first) {
906 m_mesh_density_min = static_cast<double>(*m_tune_limits.first) / dim;
907 }
908 if (m_tune_limits.second) {
909 m_mesh_density_max = static_cast<double>(*m_tune_limits.second) / dim;
910 }
911 }
912 m_tune_mesh = true;
913 } else {
914 m_mesh_density_min = m_mesh_density_max = mesh_density;
915 assert(p3m.params.mesh[0] >= 1);
916 if (p3m.params.mesh[1] == -1 and p3m.params.mesh[2] == -1) {
917 // determine the two missing values by rescaling by the box length
918 for (auto i : {1u, 2u}) {
919 p3m.params.mesh[i] =
920 static_cast<int>(std::round(mesh_density * box_geo.length()[i]));
921 // make the mesh even in all directions
922 p3m.params.mesh[i] += p3m.params.mesh[i] % 2;
923 }
924 }
925 m_logger->report_fixed_mesh(p3m.params.mesh);
926 }
927 }
928
930 auto const &box_geo = *m_system.box_geo;
931 auto const &solver = m_system.coulomb.impl->solver;
932 auto tuned_params = TuningAlgorithm::Parameters{};
933 auto time_best = time_sentinel;
934 auto mesh_density = m_mesh_density_min;
935 auto current_mesh = p3m.params.mesh;
936 if (m_tune_mesh) {
937 for (auto i : {0u, 1u, 2u}) {
938 current_mesh[i] =
939 static_cast<int>(std::round(box_geo.length()[i] * mesh_density));
940 // make the mesh even in all directions
941 current_mesh[i] += current_mesh[i] % 2;
942 }
943 }
944
945 while (mesh_density <= m_mesh_density_max) {
946 auto trial_params = TuningAlgorithm::Parameters{};
947 trial_params.mesh = current_mesh;
948 trial_params.cao = cao_best;
949 trial_params.cao = cao_best;
950
951 auto const trial_time =
952 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
953 trial_params.alpha_L, trial_params.accuracy);
954
955 if (trial_time >= 0.) {
956 /* the optimum r_cut for this mesh is the upper limit for higher meshes,
957 everything else is slower */
958 if (has_actor_of_type<CoulombP3M>(solver)) {
959 m_r_cut_iL_max = trial_params.r_cut_iL;
960 }
961
962 if (trial_time < time_best) {
963 /* new optimum */
964 reset_n_trials();
965 tuned_params = trial_params;
966 time_best = tuned_params.time = trial_time;
967 } else if (trial_time > time_best + time_granularity or
968 get_n_trials() > max_n_consecutive_trials) {
969 /* no hope of further optimisation */
970 break;
971 }
972 }
973 if (m_tune_mesh) {
974 current_mesh += Utils::Vector3i::broadcast(2);
975 mesh_density = current_mesh[0] / box_geo.length()[0];
976 } else {
977 break;
978 }
979 }
980 return tuned_params;
981 }
982};
983
984template <typename FloatType, Arch Architecture, class FFTConfig>
986 auto &system = get_system();
987 auto const &box_geo = *system.box_geo;
988 if (p3m.params.alpha_L == 0. and p3m.params.alpha != 0.) {
989 p3m.params.alpha_L = p3m.params.alpha * box_geo.length()[0];
990 }
991 if (p3m.params.r_cut_iL == 0. and p3m.params.r_cut != 0.) {
992 p3m.params.r_cut_iL = p3m.params.r_cut * box_geo.length_inv()[0];
993 }
994 if (not is_tuned()) {
995 count_charged_particles();
996 if (p3m.sum_qpart == 0) {
997 throw std::runtime_error(
998 "CoulombP3M: no charged particles in the system");
999 }
1000 try {
1002 system, p3m, prefactor, tuning.timings, tuning.limits);
1003 parameters.setup_logger(tuning.verbose);
1004 // parameter ranges
1005 parameters.determine_mesh_limits();
1006 parameters.determine_r_cut_limits();
1007 parameters.determine_cao_limits(7);
1008 // run tuning algorithm
1009 parameters.tune();
1010 m_is_tuned = true;
1011 system.on_coulomb_change();
1012 } catch (...) {
1013 p3m.params.tuning = false;
1014 throw;
1015 }
1016 }
1017 init();
1018}
1019
1021 auto const &system = get_system();
1022 auto const &box_geo = *system.box_geo;
1023 auto const &local_geo = *system.local_geo;
1024 for (auto i = 0u; i < 3u; i++) {
1025 /* check k-space cutoff */
1026 if (p3m_params.cao_cut[i] >= box_geo.length_half()[i]) {
1027 std::stringstream msg;
1028 msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i]
1029 << " is larger than half of box dimension " << box_geo.length()[i];
1030 throw std::runtime_error(msg.str());
1031 }
1032 if (p3m_params.cao_cut[i] >= local_geo.length()[i]) {
1033 std::stringstream msg;
1034 msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i]
1035 << " is larger than local box dimension " << local_geo.length()[i];
1036 throw std::runtime_error(msg.str());
1037 }
1038 }
1039
1041 if ((box_geo.length()[0] != box_geo.length()[1]) or
1042 (box_geo.length()[1] != box_geo.length()[2]) or
1043 (p3m_params.mesh[0] != p3m_params.mesh[1]) or
1044 (p3m_params.mesh[1] != p3m_params.mesh[2])) {
1045 throw std::runtime_error(
1046 "CoulombP3M: non-metallic epsilon requires cubic box");
1047 }
1048 }
1049}
1050
1052 auto const &box_geo = *get_system().box_geo;
1053 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
1054 throw std::runtime_error(
1055 "CoulombP3M: requires periodicity (True, True, True)");
1056 }
1057}
1058
1060 auto const &local_geo = *get_system().local_geo;
1061 if (local_geo.cell_structure_type() != CellStructureType::REGULAR and
1062 local_geo.cell_structure_type() != CellStructureType::HYBRID) {
1063 throw std::runtime_error(
1064 "CoulombP3M: requires the regular or hybrid decomposition cell system");
1065 }
1066 if (::communicator.size > 1 and
1067 local_geo.cell_structure_type() == CellStructureType::HYBRID) {
1068 throw std::runtime_error(
1069 "CoulombP3M: does not work with the hybrid decomposition cell system, "
1070 "if using more than one MPI node");
1071 }
1072}
1073
1074template <typename FloatType, Arch Architecture, class FFTConfig>
1076 auto const &box_geo = *get_system().box_geo;
1077 p3m.params.r_cut = p3m.params.r_cut_iL * box_geo.length()[0];
1078 p3m.params.alpha = p3m.params.alpha_L * box_geo.length_inv()[0];
1079 p3m.params.recalc_a_ai_cao_cut(box_geo.length());
1081 sanity_checks_boxl();
1082 calc_influence_function_force();
1083 calc_influence_function_energy();
1084 p3m.halo_comm.resize(::comm_cart, p3m.local_mesh);
1085}
1086
1087#ifdef ESPRESSO_CUDA
1088template <typename FloatType, Arch Architecture, class FFTConfig>
1089void CoulombP3MHeffte<FloatType, Architecture,
1090 FFTConfig>::add_long_range_forces_gpu() {
1091 if constexpr (Architecture == Arch::CUDA) {
1092#ifdef ESPRESSO_NPT
1093 if (get_system().has_npt_enabled()) {
1094 get_system().npt_add_virial_contribution(long_range_energy());
1095 }
1096#endif
1097 if (this_node == 0) {
1098 auto &gpu = *get_system().gpu;
1099 p3m_gpu_add_farfield_force(*m_gpu_data, gpu, prefactor,
1100 gpu.n_particles());
1101 }
1102 }
1103}
1104
1105/* Initialize the CPU kernels.
1106 * This operation is time-consuming and sets up data members
1107 * that are only relevant for ELC force corrections, since the
1108 * GPU implementation uses CPU kernels to compute energies.
1109 */
1110template <typename FloatType, Arch Architecture, class FFTConfig>
1112 if constexpr (Architecture == Arch::CUDA) {
1113 auto &system = get_system();
1114 if (has_actor_of_type<ElectrostaticLayerCorrection>(
1115 system.coulomb.impl->solver)) {
1116 init_cpu_kernels();
1117 }
1118 p3m_gpu_init(m_gpu_data, p3m.params.cao, p3m.params.mesh, p3m.params.alpha,
1119 system.box_geo->length(), system.gpu->n_particles());
1120 }
1121}
1122
1123template <typename FloatType, Arch Architecture, class FFTConfig>
1125 if constexpr (Architecture == Arch::CUDA) {
1126 auto &gpu_particle_data = *get_system().gpu;
1127 gpu_particle_data.enable_property(GpuParticleData::prop::force);
1128 gpu_particle_data.enable_property(GpuParticleData::prop::q);
1129 gpu_particle_data.enable_property(GpuParticleData::prop::pos);
1130 }
1131}
1132#endif // ESPRESSO_CUDA
1133
1134#endif // ESPRESSO_P3M
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
Vector implementation and trait types for boost qvm interoperability.
Describes a cell structure / cell system.
auto & get_local_force()
std::size_t count_local_particles() const
void determine_mesh_limits() override
std::optional< std::string > layer_correction_veto_r_cut(double r_cut) const override
TuningAlgorithm::Parameters get_time() override
void setup_logger(bool verbose) override
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
void on_solver_change() const override
CoulombTuningAlgorithm(System::System &system, auto &input_p3m, double prefactor, int timings, decltype(m_tune_limits) tune_limits)
static constexpr std::tuple< int, int, int > get_memory_layout()
std::optional< std::string > fft_decomposition_veto(Utils::Vector3i const &mesh_size_r_space) const override
P3MParameters & get_params() override
Main system class.
std::shared_ptr< LocalBox > local_geo
void npt_add_virial_contribution(double energy)
Definition npt.cpp:137
std::shared_ptr< GpuParticleData > gpu
bool has_npt_enabled() const
Coulomb::Solver coulomb
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
DEVICE_QUALIFIER constexpr pointer data() noexcept
Definition Array.hpp:132
T norm() const
Definition Vector.hpp:159
constexpr std::span< T, N > as_span() const
Definition Vector.hpp:143
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value) noexcept
Create a vector that has all entries set to the same value.
Definition Vector.hpp:131
Cache for interpolation weights.
void zfill(std::size_t size)
Fill cache with zero-initialized data.
void store(InterpolationWeights< cao > const &weights)
Push back weights for one point.
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
constexpr auto round_error_prec
Precision below which a double-precision float is assumed to be zero.
Definition config.hpp:47
void charge_assign(elc_data const &elc, CoulombP3M &solver, combined_ranges const &p_q_pos_range)
Definition elc.cpp:1124
ELC algorithm for long-range Coulomb interactions.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
auto pad_with_zeros_discard_imag(std::span< T > cropped_array, Utils::Vector3i const &cropped_dim, Utils::Vector3i const &pad_left, Utils::Vector3i const &pad_right)
Pad a 3D matrix with zeros to restore halo regions.
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
std::vector< FloatType > grid_influence_function(P3MParameters const &params, Utils::Vector3i const &n_start, Utils::Vector3i const &n_stop, Utils::Vector3d const &inv_box_l)
Map influence function over a grid.
void p3m_interpolate(P3MLocalMesh const &local_mesh, WeightsStorage< cao > const &weights, Kernel kernel)
P3M grid interpolation.
constexpr int p3m_min_cao
Minimal charge assignment order.
Definition math.hpp:48
constexpr int p3m_max_cao
Maximal charge assignment order.
Definition math.hpp:50
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
Definition math.hpp:38
std::function< void(ResultType &, ResultType const &)> ReductionOp
Join two partial reduction results.
std::function< void(ResultType &, Particle const &)> AddPartialResultKernel
Kernel that adds the result from a single particle to a reduction.
System & get_system()
T product(Vector< T, N > const &v)
Definition Vector.hpp:372
VectorXd< 3 > Vector3d
Definition Vector.hpp:184
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
Definition sqr.hpp:28
MemoryOrder
Definition index.hpp:32
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
Definition math.hpp:71
auto get_analytic_cotangent_sum_kernel(int cao)
Definition math.hpp:146
STL namespace.
Exports for the NpT code.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
P3M algorithm for long-range Coulomb interaction.
void p3m_gpu_add_farfield_force(P3MGpuParams &data, GpuParticleData &gpu, double prefactor, std::size_t n_part)
The long-range part of the P3M algorithm.
void p3m_gpu_init(std::shared_ptr< P3MGpuParams > &data, int cao, Utils::Vector3i const &mesh, double alpha, Utils::Vector3d const &box_l, std::size_t n_part)
Initialize the internal data structure of the P3M GPU.
P3M electrostatics on GPU.
double p3m_k_space_error_gpu(double prefactor, const int *mesh, int cao, int npart, double sum_q2, double alpha_L, const double *box)
double p3m_k_space_error(double pref, Utils::Vector3i const &mesh, int cao, std::size_t n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the analytic expression of the error estimate for the P3M method in (eq.
std::complex< FloatType > multiply_complex_by_real(std::complex< FloatType > const &z, FloatType k)
auto p3m_tune_aliasing_sums(Utils::Vector3i const &shift, Utils::Vector3i const &mesh, Utils::Vector3d const &mesh_i, int cao, double alpha_L_i)
Aliasing sum used by p3m_k_space_error.
double p3m_real_space_error(double pref, double r_cut_iL, std::size_t n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the real space contribution to the rms error in the force (as described by Kolafa and Perra...
std::complex< FloatType > multiply_complex_by_imaginary(std::complex< FloatType > const &z, FloatType k)
auto calc_dipole_moment(boost::mpi::communicator const &comm, auto const &cs, auto const &box_geo)
bool is_node_grid_compatible_with_mesh(Utils::Vector3i const &node_grid, Utils::Vector3i const &mesh)
ResultType reduce_over_local_particles(CellStructure const &cs, Reduction::AddPartialResultKernel< ResultType > add_partial, Reduction::ReductionOp< ResultType > reduce_op)
performs a reduction over all particles
ESPRESSO_ATTR_ALWAYS_INLINE void kokkos_parallel_range_for(auto const &name, auto start, auto end, auto const &kernel)
Utils::Vector3i node_grid
void tune() override
void assign_charge(double q, Utils::Vector3d const &real_pos, bool skip_cache) override
double long_range_kernel(bool force_flag, bool energy_flag)
Compute the k-space part of forces and energies.
void charge_assign() override
Utils::Vector9d long_range_pressure() override
void scaleby_box_l() override
Base class for the electrostatics P3M algorithm.
std::size_t sum_qpart
number of charged particles.
FloatType value_type
std::shared_ptr< P3MFFT< FloatType, FFTConfig > > fft
p3m_send_mesh< FloatType > halo_comm
double sum_q2
Sum of square of charges.
p3m_interpolation_cache inter_weights
void sanity_checks_periodicity() const
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
void sanity_checks_cell_structure() const
P3MParameters const & p3m_params
Definition p3m.hpp:56
std::unique_ptr< Implementation > impl
Pointer-to-implementation.
static constexpr std::size_t force
static constexpr std::size_t pos
static constexpr std::size_t q
Interpolation weights for one point.
void recalc_ld_pos(P3MParameters const &params)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
int cao
charge assignment order ([0,7]).
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0), in real space.
double epsilon
epsilon of the "surrounding dielectric".
P3MParameters params
P3M base parameters.
P3MLocalMesh local_mesh
Local mesh geometry information for this MPI rank.
Struct holding all information for one particle.
Definition Particle.hpp:435
auto const & q() const
Definition Particle.hpp:578
auto const & image_box() const
Definition Particle.hpp:484
auto const & pos() const
Definition Particle.hpp:471
void operator()(auto &p3m, double q, InterpolationWeights< cao > const &weights)
void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos)
void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos, p3m_interpolation_cache &inter_weights)
void operator()(auto &p3m, auto force_prefac, CellStructure &cell_structure) const