ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
p3m.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2024 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22/** @file
23 *
24 * The corresponding header file is @ref p3m.hpp.
25 */
26
27#include "config/config.hpp"
28
29#ifdef P3M
30
33
36#ifdef CUDA
39#endif // CUDA
40
41#include "fft/fft.hpp"
43#include "p3m/TuningLogger.hpp"
44#include "p3m/for_each_3d.hpp"
46#include "p3m/math.hpp"
47
48#include "BoxGeometry.hpp"
49#include "LocalBox.hpp"
50#include "Particle.hpp"
52#include "ParticleRange.hpp"
53#include "PropagationMode.hpp"
54#include "actor/visitors.hpp"
57#include "communication.hpp"
58#include "errorhandling.hpp"
60#include "npt.hpp"
62#include "system/System.hpp"
63#include "tuning.hpp"
64
65#include <utils/Vector.hpp>
68#include <utils/math/sqr.hpp>
70
71#include <boost/mpi/collectives/all_reduce.hpp>
72#include <boost/mpi/collectives/broadcast.hpp>
73#include <boost/mpi/collectives/reduce.hpp>
74#include <boost/mpi/communicator.hpp>
75#include <boost/range/combine.hpp>
76#include <boost/range/numeric.hpp>
77
78#include <algorithm>
79#include <array>
80#include <cassert>
81#include <complex>
82#include <cstddef>
83#include <functional>
84#include <initializer_list>
85#include <numbers>
86#include <optional>
87#include <span>
88#include <sstream>
89#include <stdexcept>
90#include <string>
91#include <tuple>
92#include <utility>
93
94#ifdef FFTW3_H
95#error "The FFTW3 library shouldn't be visible in this translation unit"
96#endif
97
98template <typename FloatType, Arch Architecture>
100 auto local_n = std::size_t{0u};
101 auto local_q2 = 0.0;
102 auto local_q = 0.0;
103
104 for (auto const &p : get_system().cell_structure->local_particles()) {
105 if (p.q() != 0.0) {
106 local_n++;
107 local_q2 += Utils::sqr(p.q());
108 local_q += p.q();
109 }
110 }
111
112 boost::mpi::all_reduce(comm_cart, local_n, p3m.sum_qpart, std::plus<>());
113 boost::mpi::all_reduce(comm_cart, local_q2, p3m.sum_q2, std::plus<>());
114 boost::mpi::all_reduce(comm_cart, local_q, p3m.square_sum_q, std::plus<>());
115 p3m.square_sum_q = Utils::sqr(p3m.square_sum_q);
116}
117
118/** Calculate the optimal influence function of @cite hockney88a.
119 * (optimised for force calculations)
120 *
121 * Each node calculates only the values for its domain in k-space.
122 *
123 * See also: @cite hockney88a eq. 8-22 (p. 275). Note the somewhat
124 * different convention for the prefactors, which is described in
125 * @cite deserno98a @cite deserno98b.
126 */
127template <typename FloatType, Arch Architecture>
129 auto const [KX, KY, KZ] = p3m.fft->get_permutations();
130 p3m.g_force = grid_influence_function<FloatType, 1, P3M_BRILLOUIN>(
131 p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ,
132 get_system().box_geo->length_inv());
133}
134
135/** Calculate the influence function optimized for the energy and the
136 * self energy correction.
137 */
138template <typename FloatType, Arch Architecture>
140 auto const [KX, KY, KZ] = p3m.fft->get_permutations();
141 p3m.g_energy = grid_influence_function<FloatType, 0, P3M_BRILLOUIN>(
142 p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ,
143 get_system().box_geo->length_inv());
144}
145
146/** Aliasing sum used by @ref p3m_k_space_error. */
148 Utils::Vector3i const &mesh,
149 Utils::Vector3d const &mesh_i, int cao,
150 double alpha_L_i) {
151
152 auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
153 auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
154 auto constexpr exp_min = -708.4; // for IEEE-compatible double
155 auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i);
156 auto alias1 = 0.;
157 auto alias2 = 0.;
158
159 Utils::Vector3i indices{};
160 Utils::Vector3i nm{};
161 Utils::Vector3d fnm{};
163 mesh_start, mesh_stop, indices,
164 [&]() {
165 auto const norm_sq = nm.norm2();
166 auto const exponent = -factor1 * norm_sq;
167 auto const exp_limit = (exp_min + std::log(norm_sq)) / 2.;
168 auto const ex = (exponent < exp_limit) ? 0. : std::exp(exponent);
169 auto const energy = std::pow(Utils::product(fnm), 2 * cao);
170 alias1 += Utils::sqr(ex) / norm_sq;
171 alias2 += energy * ex * (shift * nm) / norm_sq;
172 },
173 [&](unsigned dim, int n) {
174 nm[dim] = shift[dim] + n * mesh[dim];
175 fnm[dim] = math::sinc(nm[dim] * mesh_i[dim]);
176 });
177
178 return std::make_pair(alias1, alias2);
179}
180
181/** Calculate the real space contribution to the rms error in the force (as
182 * described by Kolafa and Perram).
183 * \param pref Prefactor of Coulomb interaction.
184 * \param r_cut_iL rescaled real space cutoff for p3m method.
185 * \param n_c_part number of charged particles in the system.
186 * \param sum_q2 sum of square of charges in the system
187 * \param alpha_L rescaled Ewald splitting parameter.
188 * \param box_l box dimensions.
189 * \return real space error
190 */
191static double p3m_real_space_error(double pref, double r_cut_iL, int n_c_part,
192 double sum_q2, double alpha_L,
193 Utils::Vector3d const &box_l) {
194 auto const volume = Utils::product(box_l);
195 return (2. * pref * sum_q2 * exp(-Utils::sqr(r_cut_iL * alpha_L))) /
196 sqrt(static_cast<double>(n_c_part) * r_cut_iL * box_l[0] * volume);
197}
198
199/** Calculate the analytic expression of the error estimate for the
200 * P3M method in @cite hockney88a (eq. 8-23 p. 275) in
201 * order to obtain the rms error in the force for a system of N
202 * randomly distributed particles in a cubic box (k-space part).
203 * \param pref Prefactor of Coulomb interaction.
204 * \param mesh number of mesh points in one direction.
205 * \param cao charge assignment order.
206 * \param n_c_part number of charged particles in the system.
207 * \param sum_q2 sum of square of charges in the system
208 * \param alpha_L rescaled Ewald splitting parameter.
209 * \param box_l box dimensions.
210 * \return reciprocal (k) space error
211 */
212static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh,
213 int cao, int n_c_part, double sum_q2,
214 double alpha_L, Utils::Vector3d const &box_l) {
215
216 auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao);
217 auto const mesh_i = 1. / Utils::Vector3d(mesh);
218 auto const alpha_L_i = 1. / alpha_L;
219 auto const mesh_stop = mesh / 2;
220 auto const mesh_start = -mesh_stop;
221 auto indices = Utils::Vector3i{};
222 auto values = Utils::Vector3d{};
223 auto he_q = 0.;
224
226 mesh_start, mesh_stop, indices,
227 [&]() {
228 if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
229 auto const n2 = indices.norm2();
230 auto const cs = Utils::product(values);
231 auto const [alias1, alias2] =
232 p3m_tune_aliasing_sums(indices, mesh, mesh_i, cao, alpha_L_i);
233 auto const d = alias1 - Utils::sqr(alias2 / cs) / n2;
234 /* at high precision, d can become negative due to extinction;
235 also, don't take values that have no significant digits left*/
236 if (d > 0. and std::fabs(d / alias1) > ROUND_ERROR_PREC) {
237 he_q += d;
238 }
239 }
240 },
241 [&values, &mesh_i, cotangent_sum](unsigned dim, int n) {
242 values[dim] = cotangent_sum(n, mesh_i[dim]);
243 });
244
245 return 2. * pref * sum_q2 * sqrt(he_q / static_cast<double>(n_c_part)) /
246 (box_l[1] * box_l[2]);
247}
248
249template <typename FloatType, Arch Architecture>
251 assert(p3m.params.mesh >= Utils::Vector3i::broadcast(1));
252 assert(p3m.params.cao >= 1 and p3m.params.cao <= 7);
253 assert(p3m.params.alpha > 0.);
254
255 auto const &system = get_system();
256 auto const &box_geo = *system.box_geo;
257 auto const &local_geo = *system.local_geo;
258 auto const skin = system.cell_structure->get_verlet_skin();
259
260 p3m.params.cao3 = Utils::int_pow<3>(p3m.params.cao);
261 p3m.params.recalc_a_ai_cao_cut(box_geo.length());
262
263 sanity_checks();
264
265 auto const &solver = system.coulomb.impl->solver;
266 double elc_layer = 0.;
267 if (auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
268 elc_layer = actor->elc.space_layer;
269 }
270
271 assert(p3m.fft);
272 p3m.local_mesh.calc_local_ca_mesh(p3m.params, local_geo, skin, elc_layer);
273 p3m.fft_buffers->init_halo();
274 p3m.fft->init(p3m.params);
275 p3m.mesh.ks_pnum = p3m.fft->get_ks_pnum();
276 p3m.fft_buffers->init_meshes(p3m.fft->get_ca_mesh_size());
277 p3m.update_mesh_views();
278 p3m.calc_differential_operator();
279
280 /* fix box length dependent constants */
281 scaleby_box_l();
282
283 count_charged_particles();
284}
285
286namespace {
287template <int cao> struct AssignCharge {
288 void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos,
289 InterpolationWeights<cao> const &w) {
290 using value_type =
291 typename std::remove_reference_t<decltype(p3m)>::value_type;
292 p3m_interpolate(p3m.local_mesh, w, [q, &p3m](int ind, double w) {
293 p3m.mesh.rs_scalar[ind] += value_type(w * q);
294 });
295 }
296
297 void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos,
298 p3m_interpolation_cache &inter_weights) {
299 auto const w = p3m_calculate_interpolation_weights<cao>(
300 real_pos, p3m.params.ai, p3m.local_mesh);
301 inter_weights.store(w);
302 this->operator()(p3m, q, real_pos, w);
303 }
304
305 void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos) {
306 auto const w = p3m_calculate_interpolation_weights<cao>(
307 real_pos, p3m.params.ai, p3m.local_mesh);
308 this->operator()(p3m, q, real_pos, w);
309 }
310
311 template <typename combined_ranges>
312 void operator()(auto &p3m, combined_ranges const &p_q_pos_range) {
313 for (auto zipped : p_q_pos_range) {
314 auto const p_q = boost::get<0>(zipped);
315 auto const &p_pos = boost::get<1>(zipped);
316 if (p_q != 0.0) {
317 this->operator()(p3m, p_q, p_pos, p3m.inter_weights);
318 }
319 }
320 }
321};
322} // namespace
323
324template <typename FloatType, Arch Architecture>
326 ParticleRange const &particles) {
327 prepare_fft_mesh(true);
328
329 auto p_q_range = ParticlePropertyRange::charge_range(particles);
330 auto p_pos_range = ParticlePropertyRange::pos_range(particles);
331
332 Utils::integral_parameter<int, AssignCharge, 1, 7>(
333 p3m.params.cao, p3m, boost::combine(p_q_range, p_pos_range));
334}
335
336template <typename FloatType, Arch Architecture>
338 double q, Utils::Vector3d const &real_pos, bool skip_cache) {
339 if (skip_cache) {
340 Utils::integral_parameter<int, AssignCharge, 1, 7>(p3m.params.cao, p3m, q,
341 real_pos);
342 } else {
343 Utils::integral_parameter<int, AssignCharge, 1, 7>(
344 p3m.params.cao, p3m, q, real_pos, p3m.inter_weights);
345 }
346}
347
348template <int cao> struct AssignForces {
349 template <typename combined_ranges>
350 void operator()(auto &p3m, double force_prefac,
351 combined_ranges const &p_q_force_range) const {
352
353 assert(cao == p3m.inter_weights.cao());
354
355 /* charged particle counter */
356 auto p_index = std::size_t{0ul};
357
358 for (auto zipped : p_q_force_range) {
359 auto p_q = boost::get<0>(zipped);
360 auto &p_force = boost::get<1>(zipped);
361 if (p_q != 0.0) {
362 auto const pref = p_q * force_prefac;
363 auto const w = p3m.inter_weights.template load<cao>(p_index);
364
365 Utils::Vector3d force{};
366 p3m_interpolate(p3m.local_mesh, w, [&force, &p3m](int ind, double w) {
367 force[0u] += w * double(p3m.mesh.rs_fields[0u][ind]);
368 force[1u] += w * double(p3m.mesh.rs_fields[1u][ind]);
369 force[2u] += w * double(p3m.mesh.rs_fields[2u][ind]);
370 });
371
372 p_force -= pref * force;
373 ++p_index;
374 }
375 }
376 }
377};
378
379template <typename combined_ranges>
380static auto calc_dipole_moment(boost::mpi::communicator const &comm,
381 combined_ranges const &p_q_unfolded_pos_range) {
382 auto const local_dip =
383 boost::accumulate(p_q_unfolded_pos_range, Utils::Vector3d{},
384 [](Utils::Vector3d const &dip, auto const &q_pos) {
385 auto const p_q = boost::get<0>(q_pos);
386 auto const &p_unfolded_pos = boost::get<1>(q_pos);
387 return dip + p_q * p_unfolded_pos;
388 });
389 return boost::mpi::all_reduce(comm, local_dip, std::plus<>());
390}
391
392/** @details Calculate the long range electrostatics part of the pressure
393 * tensor. This is part \f$\Pi_{\textrm{dir}, \alpha, \beta}\f$ eq. (2.6)
394 * in @cite essmann95a. The part \f$\Pi_{\textrm{corr}, \alpha, \beta}\f$
395 * eq. (2.8) is not present here since M is the empty set in our simulations.
396 */
397template <typename FloatType, Arch Architecture>
399 ParticleRange const &particles) {
400 auto const &box_geo = *get_system().box_geo;
401 Utils::Vector9d node_k_space_pressure_tensor{};
402
403 if (p3m.sum_q2 > 0.) {
404 charge_assign(particles);
405 p3m.fft_buffers->perform_scalar_halo_gather();
406 p3m.fft->forward_fft(p3m.fft_buffers->get_scalar_mesh());
407 p3m.update_mesh_views();
408
409 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
410 auto const &mesh_stop = p3m.mesh.size;
411 auto const &offset = p3m.mesh.start;
412 auto const half_alpha_inv_sq = Utils::sqr(1. / 2. / p3m.params.alpha);
413 auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv();
414 auto const [KX, KY, KZ] = p3m.fft->get_permutations();
415 auto indices = Utils::Vector3i{};
416 auto index = std::size_t(0u);
417 auto diagonal = 0.;
418
419 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
420 auto const shift = indices + offset;
421 auto const kx = p3m.d_op[0u][shift[KX]] * wavevector[0u];
422 auto const ky = p3m.d_op[1u][shift[KY]] * wavevector[1u];
423 auto const kz = p3m.d_op[2u][shift[KZ]] * wavevector[2u];
424 auto const norm_sq = Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz);
425
426 if (norm_sq != 0.) {
427 auto const node_k_space_energy =
428 double(p3m.g_energy[index] *
429 (Utils::sqr(p3m.mesh.rs_scalar[2u * index + 0u]) +
430 Utils::sqr(p3m.mesh.rs_scalar[2u * index + 1u])));
431 auto const vterm = -2. * (1. / norm_sq + half_alpha_inv_sq);
432 auto const pref = node_k_space_energy * vterm;
433 node_k_space_pressure_tensor[0u] += pref * kx * kx; /* sigma_xx */
434 node_k_space_pressure_tensor[1u] += pref * kx * ky; /* sigma_xy */
435 node_k_space_pressure_tensor[2u] += pref * kx * kz; /* sigma_xz */
436 node_k_space_pressure_tensor[4u] += pref * ky * ky; /* sigma_yy */
437 node_k_space_pressure_tensor[5u] += pref * ky * kz; /* sigma_yz */
438 node_k_space_pressure_tensor[8u] += pref * kz * kz; /* sigma_zz */
439 diagonal += node_k_space_energy;
440 }
441 ++index;
442 });
443
444 node_k_space_pressure_tensor[0u] += diagonal;
445 node_k_space_pressure_tensor[4u] += diagonal;
446 node_k_space_pressure_tensor[8u] += diagonal;
447 node_k_space_pressure_tensor[3u] = node_k_space_pressure_tensor[1u];
448 node_k_space_pressure_tensor[6u] = node_k_space_pressure_tensor[2u];
449 node_k_space_pressure_tensor[7u] = node_k_space_pressure_tensor[5u];
450 }
451
452 return node_k_space_pressure_tensor * prefactor / (2. * box_geo.volume());
453}
454
455template <typename FloatType, Arch Architecture>
457 bool force_flag, bool energy_flag, ParticleRange const &particles) {
458 auto const &system = get_system();
459 auto const &box_geo = *system.box_geo;
460#ifdef NPT
461 auto const npt_flag =
462 force_flag and (system.propagation->integ_switch == INTEG_METHOD_NPT_ISO);
463#else
464 auto constexpr npt_flag = false;
465#endif
466 if (p3m.sum_qpart == 0u) {
467 return 0.;
468 }
469
470 if (p3m.sum_q2 > 0.) {
471 if (not has_actor_of_type<ElectrostaticLayerCorrection>(
472 system.coulomb.impl->solver)) {
473 charge_assign(particles);
474 }
475 p3m.fft_buffers->perform_scalar_halo_gather();
476 p3m.fft->forward_fft(p3m.fft_buffers->get_scalar_mesh());
477 p3m.update_mesh_views();
478 }
479
480 auto p_q_range = ParticlePropertyRange::charge_range(particles);
481 auto p_force_range = ParticlePropertyRange::force_range(particles);
482 auto p_unfolded_pos_range =
484
485 // Note: after these calls, the grids are in the order yzx and not xyz
486 // anymore!!!
487 /* The dipole moment is only needed if we don't have metallic boundaries. */
488 auto const box_dipole =
489 (p3m.params.epsilon != P3M_EPSILON_METALLIC)
490 ? std::make_optional(calc_dipole_moment(
491 comm_cart, boost::combine(p_q_range, p_unfolded_pos_range)))
492 : std::nullopt;
493 auto const volume = box_geo.volume();
494 auto const pref =
495 4. * std::numbers::pi / volume / (2. * p3m.params.epsilon + 1.);
496
497 /* === k-space force calculation === */
498 if (force_flag) {
499 /* i*k differentiation */
500 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
501 auto const &mesh_stop = p3m.mesh.size;
502 auto const &offset = p3m.mesh.start;
503 auto const wavevector = Utils::Vector3<FloatType>((2. * std::numbers::pi) *
504 box_geo.length_inv());
505 auto indices = Utils::Vector3i{};
506 auto index = std::size_t(0u);
507
508 /* compute electric field */
509 // Eq. (3.49) @cite deserno00b
510 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
511 auto const rho_hat =
512 std::complex<FloatType>(p3m.mesh.rs_scalar[2u * index + 0u],
513 p3m.mesh.rs_scalar[2u * index + 1u]);
514 auto const phi_hat = p3m.g_force[index] * rho_hat;
515
516 for (int d = 0; d < 3; d++) {
517 /* direction in r-space: */
518 int d_rs = (d + p3m.mesh.ks_pnum) % 3;
519 /* directions */
520 auto const k = FloatType(p3m.d_op[d_rs][indices[d] + offset[d]]) *
521 wavevector[d_rs];
522
523 /* i*k*(Re+i*Im) = - Im*k + i*Re*k (i=sqrt(-1)) */
524 p3m.mesh.rs_fields[d_rs][2u * index + 0u] = -k * phi_hat.imag();
525 p3m.mesh.rs_fields[d_rs][2u * index + 1u] = +k * phi_hat.real();
526 }
527
528 ++index;
529 });
530
531 auto const check_residuals =
532 not p3m.params.tuning and check_complex_residuals;
533 p3m.fft->check_complex_residuals = check_residuals;
534 for (auto &rs_mesh : p3m.fft_buffers->get_vector_mesh()) {
535 p3m.fft->backward_fft(rs_mesh);
536 }
537 p3m.fft_buffers->perform_vector_halo_spread();
538 p3m.fft->check_complex_residuals = false;
539
540 auto const force_prefac = prefactor / volume;
541 Utils::integral_parameter<int, AssignForces, 1, 7>(
542 p3m.params.cao, p3m, force_prefac,
543 boost::combine(p_q_range, p_force_range));
544
545 // add dipole forces
546 // Eq. (3.19) @cite deserno00b
547 if (box_dipole) {
548 auto const dm = prefactor * pref * box_dipole.value();
549 for (auto zipped : boost::combine(p_q_range, p_force_range)) {
550 auto p_q = boost::get<0>(zipped);
551 auto &p_force = boost::get<1>(zipped);
552 p_force -= p_q * dm;
553 }
554 }
555 }
556
557 /* === k-space energy calculation === */
558 if (energy_flag or npt_flag) {
559 auto node_energy = 0.;
560 auto const mesh_length = Utils::product(p3m.mesh.size);
561 for (int i = 0; i < mesh_length; i++) {
562 // Use the energy optimized influence function for energy!
563 // Eq. (3.40) @cite deserno00b
564 node_energy +=
565 double(p3m.g_energy[i] * (Utils::sqr(p3m.mesh.rs_scalar[2 * i + 0]) +
566 Utils::sqr(p3m.mesh.rs_scalar[2 * i + 1])));
567 }
568 node_energy /= 2. * volume;
569
570 auto energy = 0.;
571 boost::mpi::reduce(comm_cart, node_energy, energy, std::plus<>(), 0);
572 if (this_node == 0) {
573 /* self energy correction */
574 // Eq. (3.8) @cite deserno00b
575 energy -= p3m.sum_q2 * p3m.params.alpha * std::numbers::inv_sqrtpi;
576 /* net charge correction */
577 // Eq. (3.11) @cite deserno00b
578 energy -= p3m.square_sum_q * std::numbers::pi /
579 (2. * volume * Utils::sqr(p3m.params.alpha));
580 /* dipole correction */
581 // Eq. (3.9) @cite deserno00b
582 if (box_dipole) {
583 energy += pref * box_dipole.value().norm2();
584 }
585 }
586 energy *= prefactor;
587#ifdef NPT
588 if (npt_flag) {
590 }
591#endif
592 if (not energy_flag) {
593 energy = 0.;
594 }
595 return energy;
596 }
597
598 return 0.;
599}
600
601template <typename FloatType, Arch Architecture>
604 double m_mesh_density_min = -1., m_mesh_density_max = -1.;
605 // indicates if mesh should be tuned
606 bool m_tune_mesh = false;
607 std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
608
609protected:
610 P3MParameters &get_params() override { return p3m.params; }
611
612public:
613 CoulombTuningAlgorithm(System::System &system, auto &input_p3m,
614 double prefactor, int timings,
615 decltype(m_tune_limits) tune_limits)
616 : TuningAlgorithm(system, prefactor, timings), p3m{input_p3m},
617 m_tune_limits{std::move(tune_limits)} {}
618
619 void on_solver_change() const override { m_system.on_coulomb_change(); }
620
621 void setup_logger(bool verbose) override {
622 auto const &box_geo = *m_system.box_geo;
623#ifdef CUDA
624 auto const on_gpu = Architecture == Arch::GPU;
625#else
626 auto const on_gpu = false;
627#endif
628 m_logger = std::make_unique<TuningLogger>(
629 verbose and this_node == 0, (on_gpu) ? "CoulombP3MGPU" : "CoulombP3M",
631 m_logger->tuning_goals(p3m.params.accuracy, m_prefactor,
632 box_geo.length()[0], p3m.sum_qpart, p3m.sum_q2);
633 m_logger->log_tuning_start();
634 }
635
636 std::optional<std::string>
637 layer_correction_veto_r_cut(double r_cut) const override {
638 auto const &solver = m_system.coulomb.impl->solver;
639 if (auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
640 return actor->veto_r_cut(r_cut);
641 }
642 return {};
643 }
644
645 std::optional<std::string> fft_decomposition_veto(
646 Utils::Vector3i const &mesh_size_r_space) const override {
647#ifdef CUDA
648 if constexpr (Architecture == Arch::GPU) {
649 return std::nullopt;
650 }
651#endif
652 using Array3i = Utils::Array<int, 3u>;
653 auto const [KX, KY, KZ] = p3m.fft->get_permutations();
654 auto valid_decomposition = false;
655 Array3i mesh_size_k_space = {};
656 boost::mpi::reduce(
657 ::comm_cart, Array3i(p3m.mesh.stop), mesh_size_k_space,
658 [](Array3i const &lhs, Array3i const &rhs) {
659 return Array3i{{std::max(lhs[0u], rhs[0u]),
660 std::max(lhs[1u], rhs[1u]),
661 std::max(lhs[2u], rhs[2u])}};
662 },
663 0);
664 if (::this_node == 0) {
665 valid_decomposition = (mesh_size_r_space[0u] == mesh_size_k_space[KX] and
666 mesh_size_r_space[1u] == mesh_size_k_space[KY] and
667 mesh_size_r_space[2u] == mesh_size_k_space[KZ]);
668 }
669 boost::mpi::broadcast(::comm_cart, valid_decomposition, 0);
670 std::optional<std::string> retval{"conflict with FFT domain decomposition"};
671 if (valid_decomposition) {
672 retval = std::nullopt;
673 }
674 return retval;
675 }
676
677 std::tuple<double, double, double, double>
679 double r_cut_iL) const override {
680
681 auto const &box_geo = *m_system.box_geo;
682 double alpha_L, rs_err, ks_err;
683
684 /* calc maximal real space error for setting */
685 rs_err = p3m_real_space_error(m_prefactor, r_cut_iL, p3m.sum_qpart,
686 p3m.sum_q2, 0., box_geo.length());
687
688 if (std::numbers::sqrt2 * rs_err > p3m.params.accuracy) {
689 /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */
690 alpha_L = sqrt(log(std::numbers::sqrt2 * rs_err / p3m.params.accuracy)) /
691 r_cut_iL;
692 } else {
693 /* even alpha=0 is ok, however, we cannot choose it since it kills the
694 k-space error formula.
695 Anyways, this very likely NOT the optimal solution */
696 alpha_L = 0.1;
697 }
698
699 /* calculate real-space and k-space error for this alpha_L */
700 rs_err = p3m_real_space_error(m_prefactor, r_cut_iL, p3m.sum_qpart,
701 p3m.sum_q2, alpha_L, box_geo.length());
702#ifdef CUDA
703 if constexpr (Architecture == Arch::GPU) {
704 if (this_node == 0) {
705 ks_err =
706 p3m_k_space_error_gpu(m_prefactor, mesh.data(), cao, p3m.sum_qpart,
707 p3m.sum_q2, alpha_L, box_geo.length().data());
708 }
709 boost::mpi::broadcast(comm_cart, ks_err, 0);
710 } else
711#endif
712 ks_err = p3m_k_space_error(m_prefactor, mesh, cao, p3m.sum_qpart,
713 p3m.sum_q2, alpha_L, box_geo.length());
714
715 return {Utils::Vector2d{rs_err, ks_err}.norm(), rs_err, ks_err, alpha_L};
716 }
717
718 void determine_mesh_limits() override {
719 auto const &box_geo = *m_system.box_geo;
720 auto const mesh_density =
721 static_cast<double>(p3m.params.mesh[0]) * box_geo.length_inv()[0];
722
723 if (p3m.params.mesh == Utils::Vector3i::broadcast(-1)) {
724 /* avoid using more than 1 GB of FFT arrays */
725 auto const normalized_box_dim = std::cbrt(box_geo.volume());
726 auto const max_npart_per_dim = 512.;
727 /* simple heuristic to limit the tried meshes if the accuracy cannot
728 be obtained with smaller meshes, but normally not all these
729 meshes have to be tested */
730 auto const min_npart_per_dim = std::min(
731 max_npart_per_dim, std::cbrt(static_cast<double>(p3m.sum_qpart)));
732 m_mesh_density_min = min_npart_per_dim / normalized_box_dim;
733 m_mesh_density_max = max_npart_per_dim / normalized_box_dim;
734 if (m_tune_limits.first or m_tune_limits.second) {
735 auto const &box_l = box_geo.length();
736 auto const dim = std::max({box_l[0], box_l[1], box_l[2]});
737 if (m_tune_limits.first) {
738 m_mesh_density_min = static_cast<double>(*m_tune_limits.first) / dim;
739 }
740 if (m_tune_limits.second) {
741 m_mesh_density_max = static_cast<double>(*m_tune_limits.second) / dim;
742 }
743 }
744 m_tune_mesh = true;
745 } else {
746 m_mesh_density_min = m_mesh_density_max = mesh_density;
747 assert(p3m.params.mesh[0] >= 1);
748 if (p3m.params.mesh[1] == -1 and p3m.params.mesh[2] == -1) {
749 // determine the two missing values by rescaling by the box length
750 for (auto i : {1, 2}) {
751 p3m.params.mesh[i] =
752 static_cast<int>(std::round(mesh_density * box_geo.length()[i]));
753 // make the mesh even in all directions
754 p3m.params.mesh[i] += p3m.params.mesh[i] % 2;
755 }
756 }
757 m_logger->report_fixed_mesh(p3m.params.mesh);
758 }
759 }
760
762 auto const &box_geo = *m_system.box_geo;
763 auto const &solver = m_system.coulomb.impl->solver;
764 auto tuned_params = TuningAlgorithm::Parameters{};
765 auto time_best = time_sentinel;
766 auto mesh_density = m_mesh_density_min;
767 while (mesh_density <= m_mesh_density_max) {
768 auto trial_params = TuningAlgorithm::Parameters{};
769 if (m_tune_mesh) {
770 for (auto i : {0, 1, 2}) {
771 trial_params.mesh[i] =
772 static_cast<int>(std::round(box_geo.length()[i] * mesh_density));
773 // make the mesh even in all directions
774 trial_params.mesh[i] += trial_params.mesh[i] % 2;
775 }
776 } else {
777 trial_params.mesh = p3m.params.mesh;
778 }
779 trial_params.cao = cao_best;
780
781 auto const trial_time =
782 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
783 trial_params.alpha_L, trial_params.accuracy);
784
785 if (trial_time >= 0.) {
786 /* the optimum r_cut for this mesh is the upper limit for higher meshes,
787 everything else is slower */
788 if (has_actor_of_type<CoulombP3M>(solver)) {
789 m_r_cut_iL_max = trial_params.r_cut_iL;
790 }
791
792 if (trial_time < time_best) {
793 /* new optimum */
794 reset_n_trials();
795 tuned_params = trial_params;
796 time_best = tuned_params.time = trial_time;
797 } else if (trial_time > time_best + time_granularity or
798 get_n_trials() > max_n_consecutive_trials) {
799 /* no hope of further optimisation */
800 break;
801 }
802 }
803 mesh_density += 0.1;
804 }
805 return tuned_params;
806 }
807};
808
809template <typename FloatType, Arch Architecture>
811 auto &system = get_system();
812 auto const &box_geo = *system.box_geo;
813 if (p3m.params.alpha_L == 0. and p3m.params.alpha != 0.) {
814 p3m.params.alpha_L = p3m.params.alpha * box_geo.length()[0];
815 }
816 if (p3m.params.r_cut_iL == 0. and p3m.params.r_cut != 0.) {
817 p3m.params.r_cut_iL = p3m.params.r_cut * box_geo.length_inv()[0];
818 }
819 if (not is_tuned()) {
820 count_charged_particles();
821 if (p3m.sum_qpart == 0) {
822 throw std::runtime_error(
823 "CoulombP3M: no charged particles in the system");
824 }
825 try {
827 system, p3m, prefactor, tune_timings, tune_limits);
828 parameters.setup_logger(tune_verbose);
829 // parameter ranges
830 parameters.determine_mesh_limits();
831 parameters.determine_r_cut_limits();
832 parameters.determine_cao_limits(7);
833 // run tuning algorithm
834 parameters.tune();
835 m_is_tuned = true;
836 system.on_coulomb_change();
837 } catch (...) {
838 p3m.params.tuning = false;
839 throw;
840 }
841 }
842 init();
843}
844
846 auto const &system = get_system();
847 auto const &box_geo = *system.box_geo;
848 auto const &local_geo = *system.local_geo;
849 for (auto i = 0u; i < 3u; i++) {
850 /* check k-space cutoff */
851 if (p3m_params.cao_cut[i] >= box_geo.length_half()[i]) {
852 std::stringstream msg;
853 msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i]
854 << " is larger than half of box dimension " << box_geo.length()[i];
855 throw std::runtime_error(msg.str());
856 }
857 if (p3m_params.cao_cut[i] >= local_geo.length()[i]) {
858 std::stringstream msg;
859 msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i]
860 << " is larger than local box dimension " << local_geo.length()[i];
861 throw std::runtime_error(msg.str());
862 }
863 }
864
866 if ((box_geo.length()[0] != box_geo.length()[1]) or
867 (box_geo.length()[1] != box_geo.length()[2]) or
868 (p3m_params.mesh[0] != p3m_params.mesh[1]) or
869 (p3m_params.mesh[1] != p3m_params.mesh[2])) {
870 throw std::runtime_error(
871 "CoulombP3M: non-metallic epsilon requires cubic box");
872 }
873 }
874}
875
877 auto const &box_geo = *get_system().box_geo;
878 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
879 throw std::runtime_error(
880 "CoulombP3M: requires periodicity (True, True, True)");
881 }
882}
883
885 auto const &local_geo = *get_system().local_geo;
886 if (local_geo.cell_structure_type() != CellStructureType::REGULAR and
887 local_geo.cell_structure_type() != CellStructureType::HYBRID) {
888 throw std::runtime_error(
889 "CoulombP3M: requires the regular or hybrid decomposition cell system");
890 }
891 if (::communicator.size > 1 and
892 local_geo.cell_structure_type() == CellStructureType::HYBRID) {
893 throw std::runtime_error(
894 "CoulombP3M: does not work with the hybrid decomposition cell system, "
895 "if using more than one MPI node");
896 }
897}
898
900 auto const &node_grid = ::communicator.node_grid;
901 if (node_grid[0] < node_grid[1] || node_grid[1] < node_grid[2]) {
902 throw std::runtime_error(
903 "CoulombP3M: node grid must be sorted, largest first");
904 }
905}
906
907template <typename FloatType, Arch Architecture>
909 auto const &box_geo = *get_system().box_geo;
910 p3m.params.r_cut = p3m.params.r_cut_iL * box_geo.length()[0];
911 p3m.params.alpha = p3m.params.alpha_L * box_geo.length_inv()[0];
912 p3m.params.recalc_a_ai_cao_cut(box_geo.length());
914 sanity_checks_boxl();
915 calc_influence_function_force();
916 calc_influence_function_energy();
917}
918
919#ifdef CUDA
920template <typename FloatType, Arch Architecture>
922 ParticleRange const &particles) {
923 if constexpr (Architecture == Arch::GPU) {
924#ifdef NPT
925 if (get_system().propagation->integ_switch == INTEG_METHOD_NPT_ISO) {
926 auto const energy = long_range_energy(particles);
928 }
929#else
930 static_cast<void>(particles);
931#endif
932 if (this_node == 0) {
933 auto &gpu = get_system().gpu;
934 p3m_gpu_add_farfield_force(*m_gpu_data, gpu, prefactor,
935 gpu.n_particles());
936 }
937 }
938}
939
940/* Initialize the CPU kernels.
941 * This operation is time-consuming and sets up data members
942 * that are only relevant for ELC force corrections, since the
943 * GPU implementation uses CPU kernels to compute energies.
944 */
945template <typename FloatType, Arch Architecture>
947 if constexpr (Architecture == Arch::GPU) {
948 auto &system = get_system();
949 if (has_actor_of_type<ElectrostaticLayerCorrection>(
950 system.coulomb.impl->solver)) {
951 init_cpu_kernels();
952 }
953 p3m_gpu_init(m_gpu_data, p3m.params.cao, p3m.params.mesh, p3m.params.alpha,
954 system.box_geo->length(), system.gpu.n_particles());
955 }
956}
957
958template <typename FloatType, Arch Architecture>
960 if constexpr (Architecture == Arch::GPU) {
961 auto &gpu_particle_data = get_system().gpu;
963 gpu_particle_data.enable_property(GpuParticleData::prop::q);
964 gpu_particle_data.enable_property(GpuParticleData::prop::pos);
965 }
966}
967#endif // CUDA
968
973
974#endif // P3M
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
@ INTEG_METHOD_NPT_ISO
Vector implementation and trait types for boost qvm interoperability.
TuningAlgorithm::Parameters get_time() override
Definition p3m.cpp:761
P3MParameters & get_params() override
Definition p3m.cpp:610
std::optional< std::string > fft_decomposition_veto(Utils::Vector3i const &mesh_size_r_space) const override
Definition p3m.cpp:645
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
Definition p3m.cpp:678
CoulombTuningAlgorithm(System::System &system, auto &input_p3m, double prefactor, int timings, decltype(m_tune_limits) tune_limits)
Definition p3m.cpp:613
void setup_logger(bool verbose) override
Definition p3m.cpp:621
void on_solver_change() const override
Definition p3m.cpp:619
std::optional< std::string > layer_correction_veto_r_cut(double r_cut) const override
Definition p3m.cpp:637
void determine_mesh_limits() override
Definition p3m.cpp:718
void enable_property(std::size_t property)
std::size_t n_particles() const
A range of particles.
Main system class.
std::shared_ptr< LocalBox > local_geo
GpuParticleData gpu
std::shared_ptr< Propagation > propagation
std::shared_ptr< CellStructure > cell_structure
Coulomb::Solver coulomb
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
T norm() const
Definition Vector.hpp:139
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value) noexcept
Create a vector that has all entries set to the same value.
Definition Vector.hpp:111
Cache for interpolation weights.
void store(const InterpolationWeights< cao > &w)
Push back weights for one point.
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
This file contains the defaults for ESPResSo.
#define ROUND_ERROR_PREC
Precision for capture of round off errors.
Definition config.hpp:66
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
Definition config.hpp:53
void charge_assign(elc_data const &elc, CoulombP3M &solver, combined_ranges const &p_q_pos_range)
Definition elc.cpp:1114
ELC algorithm for long-range Coulomb interactions.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
Routines, row decomposition, data structures and communication for the 3D-FFT.
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
void p3m_interpolate(P3MLocalMesh const &local_mesh, InterpolationWeights< cao > const &weights, Kernel kernel)
P3M grid interpolation.
auto charge_range(ParticleRange const &particles)
auto pos_range(ParticleRange const &particles)
auto force_range(ParticleRange const &particles)
auto unfolded_pos_range(ParticleRange const &particles, BoxGeometry const &box)
T product(Vector< T, N > const &v)
Definition Vector.hpp:375
VectorXd< 3 > Vector3d
Definition Vector.hpp:165
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
Definition sqr.hpp:28
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
Definition math.hpp:53
auto get_analytic_cotangent_sum_kernel(int cao)
Definition math.hpp:128
void npt_add_virial_contribution(double energy)
Definition npt.cpp:141
Exports for the NpT code.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
static auto p3m_tune_aliasing_sums(Utils::Vector3i const &shift, Utils::Vector3i const &mesh, Utils::Vector3d const &mesh_i, int cao, double alpha_L_i)
Aliasing sum used by p3m_k_space_error.
Definition p3m.cpp:147
static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh, int cao, int n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the analytic expression of the error estimate for the P3M method in (eq.
Definition p3m.cpp:212
static double p3m_real_space_error(double pref, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the real space contribution to the rms error in the force (as described by Kolafa and Perra...
Definition p3m.cpp:191
static auto calc_dipole_moment(boost::mpi::communicator const &comm, combined_ranges const &p_q_unfolded_pos_range)
Definition p3m.cpp:380
P3M algorithm for long-range Coulomb interaction.
void p3m_gpu_add_farfield_force(P3MGpuParams &data, GpuParticleData &gpu, double prefactor, std::size_t n_part)
The long-range part of the P3M algorithm.
void p3m_gpu_init(std::shared_ptr< P3MGpuParams > &data, int cao, Utils::Vector3i const &mesh, double alpha, Utils::Vector3d const &box_l, std::size_t n_part)
Initialize the internal data structure of the P3M GPU.
P3M electrostatics on GPU.
double p3m_k_space_error_gpu(double prefactor, const int *mesh, int cao, int npart, double sum_q2, double alpha_L, const double *box)
void operator()(auto &p3m, double force_prefac, combined_ranges const &p_q_force_range) const
Definition p3m.cpp:350
Utils::Vector3i node_grid
void charge_assign(ParticleRange const &particles) override
Definition p3m.cpp:325
void tune() override
Definition p3m.cpp:810
void assign_charge(double q, Utils::Vector3d const &real_pos, bool skip_cache) override
Definition p3m.cpp:337
void calc_influence_function_energy() override
Calculate the influence function optimized for the energy and the self energy correction.
Definition p3m.cpp:139
void scaleby_box_l() override
Definition p3m.cpp:908
void calc_influence_function_force() override
Calculate the optimal influence function of .
Definition p3m.cpp:128
Utils::Vector9d long_range_pressure(ParticleRange const &particles) override
Definition p3m.cpp:398
void init_cpu_kernels()
Definition p3m.cpp:250
void count_charged_particles() override
Definition p3m.cpp:99
double long_range_kernel(bool force_flag, bool energy_flag, ParticleRange const &particles)
Compute the k-space part of forces and energies.
Definition p3m.cpp:456
void request_gpu() const
Definition p3m.cpp:959
void init_gpu_kernels()
Definition p3m.cpp:946
void add_long_range_forces_gpu(ParticleRange const &particles)
Definition p3m.cpp:921
void sanity_checks_periodicity() const
Definition p3m.cpp:876
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
Definition p3m.cpp:845
void sanity_checks_cell_structure() const
Definition p3m.cpp:884
P3MParameters const & p3m_params
Definition p3m.hpp:57
void sanity_checks_node_grid() const
Definition p3m.cpp:899
std::unique_ptr< Implementation > impl
Pointer-to-implementation.
static constexpr std::size_t force
static constexpr std::size_t pos
static constexpr std::size_t q
Interpolation weights for one point.
void recalc_ld_pos(P3MParameters const &params)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
int cao
charge assignment order ([0,7]).
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0), in real space.
double epsilon
epsilon of the "surrounding dielectric".
DEVICE_QUALIFIER constexpr pointer data() noexcept
Definition Array.hpp:132
void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos, p3m_interpolation_cache &inter_weights)
Definition p3m.cpp:297
void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos, InterpolationWeights< cao > const &w)
Definition p3m.cpp:288
void operator()(auto &p3m, combined_ranges const &p_q_pos_range)
Definition p3m.cpp:312
void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos)
Definition p3m.cpp:305
std::size_t sum_qpart
number of charged particles.
Definition p3m.impl.hpp:50
double sum_q2
Sum of square of charges.
Definition p3m.impl.hpp:52
P3MLocalMesh local_mesh
Local mesh properties.
P3MParameters params
P3M base parameters.
std::unique_ptr< FFTBackend< FloatType > > fft
FFT algorithm.
P3MFFTMesh< FloatType > mesh
Local mesh FFT buffers.