ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
p3m.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2022 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22/** @file
23 *
24 * The corresponding header file is @ref p3m.hpp.
25 */
26
28
29#ifdef P3M
30
35
37#include "p3m/TuningLogger.hpp"
38#include "p3m/fft.hpp"
40
41#include "BoxGeometry.hpp"
42#include "LocalBox.hpp"
43#include "Particle.hpp"
45#include "ParticleRange.hpp"
46#include "PropagationMode.hpp"
47#include "actor/visitors.hpp"
50#include "communication.hpp"
51#include "errorhandling.hpp"
53#include "npt.hpp"
54#include "system/System.hpp"
55#include "tuning.hpp"
56
57#include <utils/Span.hpp>
58#include <utils/Vector.hpp>
59#include <utils/constants.hpp>
62#include <utils/math/sinc.hpp>
63#include <utils/math/sqr.hpp>
64
65#include <boost/mpi/collectives/all_reduce.hpp>
66#include <boost/mpi/collectives/broadcast.hpp>
67#include <boost/mpi/collectives/reduce.hpp>
68#include <boost/mpi/communicator.hpp>
69#include <boost/range/combine.hpp>
70#include <boost/range/numeric.hpp>
71
72#include <algorithm>
73#include <array>
74#include <cassert>
75#include <complex>
76#include <cstddef>
77#include <functional>
78#include <optional>
79#include <sstream>
80#include <stdexcept>
81#include <string>
82#include <utility>
83
85 auto local_n = 0;
86 auto local_q2 = 0.0;
87 auto local_q = 0.0;
88
89 for (auto const &p : get_system().cell_structure->local_particles()) {
90 if (p.q() != 0.0) {
91 local_n++;
92 local_q2 += Utils::sqr(p.q());
93 local_q += p.q();
94 }
95 }
96
97 boost::mpi::all_reduce(comm_cart, local_n, p3m.sum_qpart, std::plus<>());
98 boost::mpi::all_reduce(comm_cart, local_q2, p3m.sum_q2, std::plus<>());
99 boost::mpi::all_reduce(comm_cart, local_q, p3m.square_sum_q, std::plus<>());
101}
102
103/** Calculate the optimal influence function of @cite hockney88a.
104 * (optimised for force calculations)
105 *
106 * Each node calculates only the values for its domain in k-space
107 * (see fft.plan[3].mesh and fft.plan[3].start).
108 *
109 * See also: @cite hockney88a eq. 8-22 (p. 275). Note the somewhat
110 * different convention for the prefactors, which is described in
111 * @cite deserno98a @cite deserno98b.
112 */
113void CoulombP3M::calc_influence_function_force() {
114 auto const start = Utils::Vector3i{p3m.fft.plan[3].start};
115 auto const size = Utils::Vector3i{p3m.fft.plan[3].new_mesh};
116
117 p3m.g_force = grid_influence_function<1>(p3m.params, start, start + size,
118 get_system().box_geo->length());
119}
120
121/** Calculate the influence function optimized for the energy and the
122 * self energy correction.
123 */
124void CoulombP3M::calc_influence_function_energy() {
125 auto const start = Utils::Vector3i{p3m.fft.plan[3].start};
126 auto const size = Utils::Vector3i{p3m.fft.plan[3].new_mesh};
127
128 p3m.g_energy = grid_influence_function<0>(p3m.params, start, start + size,
129 get_system().box_geo->length());
130}
131
132/** Aliasing sum used by @ref p3m_k_space_error. */
133static auto p3m_tune_aliasing_sums(int nx, int ny, int nz,
134 Utils::Vector3i const &mesh,
135 Utils::Vector3d const &mesh_i, int cao,
136 double alpha_L_i) {
137 using Utils::sinc;
138
139 auto const factor1 = Utils::sqr(Utils::pi() * alpha_L_i);
140
141 auto alias1 = 0.;
142 auto alias2 = 0.;
143 for (int mx = -P3M_BRILLOUIN; mx <= P3M_BRILLOUIN; mx++) {
144 auto const nmx = nx + mx * mesh[0];
145 auto const fnmx = mesh_i[0] * nmx;
146 for (int my = -P3M_BRILLOUIN; my <= P3M_BRILLOUIN; my++) {
147 auto const nmy = ny + my * mesh[1];
148 auto const fnmy = mesh_i[1] * nmy;
149 for (int mz = -P3M_BRILLOUIN; mz <= P3M_BRILLOUIN; mz++) {
150 auto const nmz = nz + mz * mesh[2];
151 auto const fnmz = mesh_i[2] * nmz;
152
153 auto const nm2 = Utils::sqr(nmx) + Utils::sqr(nmy) + Utils::sqr(nmz);
154 auto const ex = exp(-factor1 * nm2);
155 auto const ex2 = Utils::sqr(ex);
156
157 auto const U2 = pow(sinc(fnmx) * sinc(fnmy) * sinc(fnmz), 2.0 * cao);
158
159 alias1 += ex2 / nm2;
160 alias2 += U2 * ex * (nx * nmx + ny * nmy + nz * nmz) / nm2;
161 }
162 }
163 }
164 return std::make_pair(alias1, alias2);
165}
166
167/** Calculate the real space contribution to the rms error in the force (as
168 * described by Kolafa and Perram).
169 * \param pref Prefactor of Coulomb interaction.
170 * \param r_cut_iL rescaled real space cutoff for p3m method.
171 * \param n_c_part number of charged particles in the system.
172 * \param sum_q2 sum of square of charges in the system
173 * \param alpha_L rescaled Ewald splitting parameter.
174 * \param box_l box dimensions.
175 * \return real space error
176 */
177static double p3m_real_space_error(double pref, double r_cut_iL, int n_c_part,
178 double sum_q2, double alpha_L,
179 Utils::Vector3d const &box_l) {
180 auto const volume = Utils::product(box_l);
181 return (2. * pref * sum_q2 * exp(-Utils::sqr(r_cut_iL * alpha_L))) /
182 sqrt(static_cast<double>(n_c_part) * r_cut_iL * box_l[0] * volume);
183}
184
185/** Calculate the analytic expression of the error estimate for the
186 * P3M method in @cite hockney88a (eq. 8-23 p. 275) in
187 * order to obtain the rms error in the force for a system of N
188 * randomly distributed particles in a cubic box (k-space part).
189 * \param pref Prefactor of Coulomb interaction.
190 * \param mesh number of mesh points in one direction.
191 * \param cao charge assignment order.
192 * \param n_c_part number of charged particles in the system.
193 * \param sum_q2 sum of square of charges in the system
194 * \param alpha_L rescaled Ewald splitting parameter.
195 * \param box_l box dimensions.
196 * \return reciprocal (k) space error
197 */
198static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh,
199 int cao, int n_c_part, double sum_q2,
200 double alpha_L, Utils::Vector3d const &box_l) {
201 auto const mesh_i =
203 auto const alpha_L_i = 1. / alpha_L;
204 auto he_q = 0.;
205
206 for (int nx = -mesh[0] / 2; nx < mesh[0] / 2; nx++) {
207 auto const ctan_x = p3m_analytic_cotangent_sum(nx, mesh_i[0], cao);
208 for (int ny = -mesh[1] / 2; ny < mesh[1] / 2; ny++) {
209 auto const ctan_y =
210 ctan_x * p3m_analytic_cotangent_sum(ny, mesh_i[1], cao);
211 for (int nz = -mesh[2] / 2; nz < mesh[2] / 2; nz++) {
212 if ((nx != 0) || (ny != 0) || (nz != 0)) {
213 auto const n2 = Utils::sqr(nx) + Utils::sqr(ny) + Utils::sqr(nz);
214 auto const cs =
215 p3m_analytic_cotangent_sum(nz, mesh_i[2], cao) * ctan_y;
216 auto const [alias1, alias2] =
217 p3m_tune_aliasing_sums(nx, ny, nz, mesh, mesh_i, cao, alpha_L_i);
218 auto const d = alias1 - Utils::sqr(alias2 / cs) / n2;
219 /* at high precision, d can become negative due to extinction;
220 also, don't take values that have no significant digits left*/
221 if (d > 0 && (fabs(d / alias1) > ROUND_ERROR_PREC))
222 he_q += d;
223 }
224 }
225 }
226 }
227 return 2. * pref * sum_q2 * sqrt(he_q / static_cast<double>(n_c_part)) /
228 (box_l[1] * box_l[2]);
229}
230
231#ifdef CUDA
232static double p3mgpu_k_space_error(double prefactor,
233 Utils::Vector3i const &mesh, int cao,
234 int npart, double sum_q2, double alpha_L,
235 Utils::Vector3d const &box_l) {
236 auto ks_err = 0.;
237 if (this_node == 0) {
238 ks_err = p3m_k_space_error_gpu(prefactor, mesh.data(), cao, npart, sum_q2,
239 alpha_L, box_l.data());
240 }
241 boost::mpi::broadcast(comm_cart, ks_err, 0);
242 return ks_err;
243}
244#endif
245
248 assert(p3m.params.cao >= 1 and p3m.params.cao <= 7);
249 assert(p3m.params.alpha > 0.);
250
251 auto const &system = get_system();
252 auto const &box_geo = *system.box_geo;
253 auto const &local_geo = *system.local_geo;
254 auto const skin = system.cell_structure->get_verlet_skin();
255
256 p3m.params.cao3 = Utils::int_pow<3>(p3m.params.cao);
257 p3m.params.recalc_a_ai_cao_cut(box_geo.length());
258
260
261 auto const &solver = system.coulomb.impl->solver;
262 double elc_layer = 0.;
263 if (auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
264 elc_layer = actor->elc.space_layer;
265 }
266
267 p3m.local_mesh.calc_local_ca_mesh(p3m.params, local_geo, skin, elc_layer);
269
270 int ca_mesh_size = fft_init(p3m.local_mesh.dim, p3m.local_mesh.margin,
273 p3m.rs_mesh.resize(ca_mesh_size);
274
275 for (auto &e : p3m.E_mesh) {
276 e.resize(ca_mesh_size);
277 }
278
280
281 /* fix box length dependent constants */
282 scaleby_box_l();
283
285}
286
287CoulombP3M::CoulombP3M(P3MParameters &&parameters, double prefactor,
288 int tune_timings, bool tune_verbose,
289 bool check_complex_residuals)
290 : p3m{std::move(parameters)}, tune_timings{tune_timings},
291 tune_verbose{tune_verbose},
292 check_complex_residuals{check_complex_residuals} {
293
294 if (tune_timings <= 0) {
295 throw std::domain_error("Parameter 'timings' must be > 0");
296 }
297 m_is_tuned = !p3m.params.tuning;
298 p3m.params.tuning = false;
300}
301
302namespace {
303template <int cao> struct AssignCharge {
304 void operator()(p3m_data_struct &p3m, double q,
305 Utils::Vector3d const &real_pos,
306 p3m_interpolation_cache &inter_weights) {
307 auto const w = p3m_calculate_interpolation_weights<cao>(
308 real_pos, p3m.params.ai, p3m.local_mesh);
309
310 inter_weights.store(w);
311
312 p3m_interpolate(p3m.local_mesh, w, [q, &p3m](int ind, double w) {
313 p3m.rs_mesh[ind] += w * q;
314 });
315 }
316
317 void operator()(p3m_data_struct &p3m, double q,
318 Utils::Vector3d const &real_pos) {
320 p3m.local_mesh,
321 p3m_calculate_interpolation_weights<cao>(real_pos, p3m.params.ai,
322 p3m.local_mesh),
323 [q, &p3m](int ind, double w) { p3m.rs_mesh[ind] += w * q; });
324 }
325
326 template <typename combined_ranges>
327 void operator()(p3m_data_struct &p3m, combined_ranges const &p_q_pos_range) {
328 for (auto zipped : p_q_pos_range) {
329 auto const p_q = boost::get<0>(zipped);
330 auto const &p_pos = boost::get<1>(zipped);
331 if (p_q != 0.0) {
332 this->operator()(p3m, p_q, p_pos, p3m.inter_weights);
333 }
334 }
335 }
336};
337} // namespace
338
341
342 /* prepare local FFT mesh */
343 for (int i = 0; i < p3m.local_mesh.size; i++)
344 p3m.rs_mesh[i] = 0.0;
345
346 auto p_q_range = ParticlePropertyRange::charge_range(particles);
347 auto p_pos_range = ParticlePropertyRange::pos_range(particles);
348
349 Utils::integral_parameter<int, AssignCharge, 1, 7>(
350 p3m.params.cao, p3m, boost::combine(p_q_range, p_pos_range));
351}
352
353void CoulombP3M::assign_charge(double q, Utils::Vector3d const &real_pos,
354 p3m_interpolation_cache &inter_weights) {
355 Utils::integral_parameter<int, AssignCharge, 1, 7>(p3m.params.cao, p3m, q,
356 real_pos, inter_weights);
357}
358
359void CoulombP3M::assign_charge(double q, Utils::Vector3d const &real_pos) {
360 Utils::integral_parameter<int, AssignCharge, 1, 7>(p3m.params.cao, p3m, q,
361 real_pos);
362}
363
364template <int cao> struct AssignForces {
365 template <typename combined_ranges>
366 void operator()(p3m_data_struct &p3m, double force_prefac,
367 combined_ranges const &p_q_force_range) const {
369 using Utils::Span;
370 using Utils::Vector;
371
372 assert(cao == p3m.inter_weights.cao());
373
374 /* charged particle counter */
375 auto p_index = std::size_t{0ul};
376
377 for (auto zipped : p_q_force_range) {
378 auto p_q = boost::get<0>(zipped);
379 auto &p_force = boost::get<1>(zipped);
380 if (p_q != 0.0) {
381 auto const pref = p_q * force_prefac;
382 auto const w = p3m.inter_weights.load<cao>(p_index);
383
385 p3m_interpolate(p3m.local_mesh, w, [&force, &p3m](int ind, double w) {
386 force += w * Utils::Vector3d{p3m.E_mesh[0][ind], p3m.E_mesh[1][ind],
387 p3m.E_mesh[2][ind]};
388 });
389
390 p_force -= pref * force;
391 ++p_index;
392 }
393 }
394 }
395};
396
397template <typename combined_ranges>
398static auto calc_dipole_moment(boost::mpi::communicator const &comm,
399 combined_ranges const &p_q_unfolded_pos_range) {
400 auto const local_dip =
401 boost::accumulate(p_q_unfolded_pos_range, Utils::Vector3d{},
402 [](Utils::Vector3d const &dip, auto const &q_pos) {
403 auto const p_q = boost::get<0>(q_pos);
404 auto const &p_unfolded_pos = boost::get<1>(q_pos);
405 return dip + p_q * p_unfolded_pos;
406 });
407 return boost::mpi::all_reduce(comm, local_dip, std::plus<>());
408}
409
410/** @details Calculate the long range electrostatics part of the pressure
411 * tensor. This is part \f$\Pi_{\textrm{dir}, \alpha, \beta}\f$ eq. (2.6)
412 * in @cite essmann95a. The part \f$\Pi_{\textrm{corr}, \alpha, \beta}\f$
413 * eq. (2.8) is not present here since M is the empty set in our simulations.
414 */
417 using namespace detail::FFT_indexing;
418
419 auto const &box_geo = *get_system().box_geo;
420
421 Utils::Vector9d node_k_space_pressure_tensor{};
422
423 if (p3m.sum_q2 > 0.) {
424 charge_assign(particles);
427
428 auto diagonal = 0.;
429 int ind = 0;
430 int j[3];
431 auto const half_alpha_inv_sq = Utils::sqr(1. / 2. / p3m.params.alpha);
432 for (j[0] = 0; j[0] < p3m.fft.plan[3].new_mesh[RX]; j[0]++) {
433 for (j[1] = 0; j[1] < p3m.fft.plan[3].new_mesh[RY]; j[1]++) {
434 for (j[2] = 0; j[2] < p3m.fft.plan[3].new_mesh[RZ]; j[2]++) {
435 auto const kx = 2. * Utils::pi() *
436 p3m.d_op[RX][j[KX] + p3m.fft.plan[3].start[KX]] *
437 box_geo.length_inv()[RX];
438 auto const ky = 2. * Utils::pi() *
439 p3m.d_op[RY][j[KY] + p3m.fft.plan[3].start[KY]] *
440 box_geo.length_inv()[RY];
441 auto const kz = 2. * Utils::pi() *
442 p3m.d_op[RZ][j[KZ] + p3m.fft.plan[3].start[KZ]] *
443 box_geo.length_inv()[RZ];
444 auto const sqk = Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz);
445
446 if (sqk != 0.) {
447 auto const node_k_space_energy =
448 p3m.g_energy[ind] * (Utils::sqr(p3m.rs_mesh[2 * ind]) +
449 Utils::sqr(p3m.rs_mesh[2 * ind + 1]));
450 auto const vterm = -2. * (1. / sqk + half_alpha_inv_sq);
451 auto const pref = node_k_space_energy * vterm;
452 node_k_space_pressure_tensor[0] += pref * kx * kx; /* sigma_xx */
453 node_k_space_pressure_tensor[1] += pref * kx * ky; /* sigma_xy */
454 node_k_space_pressure_tensor[2] += pref * kx * kz; /* sigma_xz */
455 node_k_space_pressure_tensor[3] += pref * ky * kx; /* sigma_yx */
456 node_k_space_pressure_tensor[4] += pref * ky * ky; /* sigma_yy */
457 node_k_space_pressure_tensor[5] += pref * ky * kz; /* sigma_yz */
458 node_k_space_pressure_tensor[6] += pref * kz * kx; /* sigma_zx */
459 node_k_space_pressure_tensor[7] += pref * kz * ky; /* sigma_zy */
460 node_k_space_pressure_tensor[8] += pref * kz * kz; /* sigma_zz */
461 diagonal += node_k_space_energy;
462 }
463 ind++;
464 }
465 }
466 }
467 node_k_space_pressure_tensor[0] += diagonal;
468 node_k_space_pressure_tensor[4] += diagonal;
469 node_k_space_pressure_tensor[8] += diagonal;
470 }
471
472 return node_k_space_pressure_tensor * prefactor / (2. * box_geo.volume());
473}
474
475double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag,
476 ParticleRange const &particles) {
477 auto const &system = get_system();
478 auto const &box_geo = *system.box_geo;
479#ifdef NPT
480 auto const npt_flag =
481 force_flag and (system.propagation->integ_switch == INTEG_METHOD_NPT_ISO);
482#else
483 auto constexpr npt_flag = false;
484#endif
485
486 if (p3m.sum_q2 > 0.) {
487 if (not has_actor_of_type<ElectrostaticLayerCorrection>(
488 system.coulomb.impl->solver)) {
489 charge_assign(particles);
490 }
491 /* Gather information for FFT grid inside the nodes domain (inner local
492 * mesh) and perform forward 3D FFT (Charge Assignment Mesh). */
495 }
496
497 auto p_q_range = ParticlePropertyRange::charge_range(particles);
498 auto p_force_range = ParticlePropertyRange::force_range(particles);
499 auto p_unfolded_pos_range =
501
502 // Note: after these calls, the grids are in the order yzx and not xyz
503 // anymore!!!
504 /* The dipole moment is only needed if we don't have metallic boundaries. */
505 auto const box_dipole =
507 ? std::make_optional(calc_dipole_moment(
508 comm_cart, boost::combine(p_q_range, p_unfolded_pos_range)))
509 : std::nullopt;
510 auto const volume = box_geo.volume();
511 auto const pref = 4. * Utils::pi() / volume / (2. * p3m.params.epsilon + 1.);
512
513 /* === k-space force calculation === */
514 if (force_flag) {
515 /* sqrt(-1)*k differentiation */
516 int j[3];
517 int ind = 0;
518 /* compute electric field */
519 // Eq. (3.49) @cite deserno00b
520 for (j[0] = 0; j[0] < p3m.fft.plan[3].new_mesh[0]; j[0]++) {
521 for (j[1] = 0; j[1] < p3m.fft.plan[3].new_mesh[1]; j[1]++) {
522 for (j[2] = 0; j[2] < p3m.fft.plan[3].new_mesh[2]; j[2]++) {
523 auto const rho_hat = std::complex<double>(p3m.rs_mesh[2 * ind + 0],
524 p3m.rs_mesh[2 * ind + 1]);
525 auto const phi_hat = p3m.g_force[ind] * rho_hat;
526
527 for (int d = 0; d < 3; d++) {
528 /* direction in r-space: */
529 int d_rs = (d + p3m.ks_pnum) % 3;
530 /* directions */
531 auto const k = 2. * Utils::pi() *
532 p3m.d_op[d_rs][j[d] + p3m.fft.plan[3].start[d]] *
533 box_geo.length_inv()[d_rs];
534
535 /* i*k*(Re+i*Im) = - Im*k + i*Re*k (i=sqrt(-1)) */
536 p3m.E_mesh[d_rs][2 * ind + 0] = -k * phi_hat.imag();
537 p3m.E_mesh[d_rs][2 * ind + 1] = +k * phi_hat.real();
538 }
539
540 ind++;
541 }
542 }
543 }
544
545 /* Back FFT force component mesh */
546 auto const check_complex = !p3m.params.tuning and check_complex_residuals;
547 for (int d = 0; d < 3; d++) {
548 fft_perform_back(p3m.E_mesh[d].data(), check_complex, p3m.fft, comm_cart);
549 }
550
551 /* redistribute force component mesh */
552 std::array<double *, 3> E_fields = {
553 {p3m.E_mesh[0].data(), p3m.E_mesh[1].data(), p3m.E_mesh[2].data()}};
556
557 auto const force_prefac = prefactor / volume;
558 Utils::integral_parameter<int, AssignForces, 1, 7>(
559 p3m.params.cao, p3m, force_prefac,
560 boost::combine(p_q_range, p_force_range));
561
562 // add dipole forces
563 // Eq. (3.19) @cite deserno00b
564 if (box_dipole) {
565 auto const dm = prefactor * pref * box_dipole.value();
566 for (auto zipped : boost::combine(p_q_range, p_force_range)) {
567 auto p_q = boost::get<0>(zipped);
568 auto &p_force = boost::get<1>(zipped);
569 p_force -= p_q * dm;
570 }
571 }
572 }
573
574 /* === k-space energy calculation === */
575 if (energy_flag or npt_flag) {
576 auto node_energy = 0.;
577 for (int i = 0; i < p3m.fft.plan[3].new_size; i++) {
578 // Use the energy optimized influence function for energy!
579 // Eq. (3.40) @cite deserno00b
580 node_energy += p3m.g_energy[i] * (Utils::sqr(p3m.rs_mesh[2 * i]) +
581 Utils::sqr(p3m.rs_mesh[2 * i + 1]));
582 }
583 node_energy /= 2. * volume;
584
585 auto energy = 0.;
586 boost::mpi::reduce(comm_cart, node_energy, energy, std::plus<>(), 0);
587 if (this_node == 0) {
588 /* self energy correction */
589 // Eq. (3.8) @cite deserno00b
590 energy -= p3m.sum_q2 * p3m.params.alpha * Utils::sqrt_pi_i();
591 /* net charge correction */
592 // Eq. (3.11) @cite deserno00b
593 energy -= p3m.square_sum_q * Utils::pi() /
594 (2. * volume * Utils::sqr(p3m.params.alpha));
595 /* dipole correction */
596 // Eq. (3.9) @cite deserno00b
597 if (box_dipole) {
598 energy += pref * box_dipole.value().norm2();
599 }
600 }
601 energy *= prefactor;
602#ifdef NPT
603 if (npt_flag) {
605 }
606#endif
607 if (not energy_flag) {
608 energy = 0.;
609 }
610 return energy;
611 }
612
613 return 0.;
614}
615
617 p3m_data_struct &p3m;
618 double m_mesh_density_min = -1., m_mesh_density_max = -1.;
619 // indicates if mesh should be tuned
620 bool m_tune_mesh = false;
621
622protected:
623 P3MParameters &get_params() override { return p3m.params; }
624
625public:
627 double prefactor, int timings)
628 : TuningAlgorithm(system, prefactor, timings), p3m{input_p3m} {}
629
630 void on_solver_change() const override { m_system.on_coulomb_change(); }
631
632 void setup_logger(bool verbose) override {
633 auto const &box_geo = *m_system.box_geo;
634#ifdef CUDA
635 auto const &solver = m_system.coulomb.impl->solver;
636 auto const on_gpu = has_actor_of_type<CoulombP3MGPU>(solver);
637#else
638 auto const on_gpu = false;
639#endif
640 m_logger = std::make_unique<TuningLogger>(
641 verbose and this_node == 0, (on_gpu) ? "CoulombP3MGPU" : "CoulombP3M",
643 m_logger->tuning_goals(p3m.params.accuracy, m_prefactor,
644 box_geo.length()[0], p3m.sum_qpart, p3m.sum_q2);
645 m_logger->log_tuning_start();
646 }
647
648 std::optional<std::string>
649 layer_correction_veto_r_cut(double r_cut) const override {
650 auto const &solver = m_system.coulomb.impl->solver;
651 if (auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
652 return actor->veto_r_cut(r_cut);
653 }
654 return {};
655 }
656
657 std::tuple<double, double, double, double>
659 double r_cut_iL) const override {
660
661 auto const &box_geo = *m_system.box_geo;
662 double alpha_L, rs_err, ks_err;
663
664 /* calc maximal real space error for setting */
665 rs_err = p3m_real_space_error(m_prefactor, r_cut_iL, p3m.sum_qpart,
666 p3m.sum_q2, 0., box_geo.length());
667
668 if (Utils::sqrt_2() * rs_err > p3m.params.accuracy) {
669 /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */
670 alpha_L =
671 sqrt(log(Utils::sqrt_2() * rs_err / p3m.params.accuracy)) / r_cut_iL;
672 } else {
673 /* even alpha=0 is ok, however, we cannot choose it since it kills the
674 k-space error formula.
675 Anyways, this very likely NOT the optimal solution */
676 alpha_L = 0.1;
677 }
678
679 /* calculate real-space and k-space error for this alpha_L */
680 rs_err = p3m_real_space_error(m_prefactor, r_cut_iL, p3m.sum_qpart,
681 p3m.sum_q2, alpha_L, box_geo.length());
682#ifdef CUDA
683 auto const &solver = m_system.coulomb.impl->solver;
684 if (has_actor_of_type<CoulombP3MGPU>(solver)) {
685 ks_err = p3mgpu_k_space_error(m_prefactor, mesh, cao, p3m.sum_qpart,
686 p3m.sum_q2, alpha_L, box_geo.length());
687 } else
688#endif
689 ks_err = p3m_k_space_error(m_prefactor, mesh, cao, p3m.sum_qpart,
690 p3m.sum_q2, alpha_L, box_geo.length());
691
692 return {Utils::Vector2d{rs_err, ks_err}.norm(), rs_err, ks_err, alpha_L};
693 }
694
695 void determine_mesh_limits() override {
696 auto const &box_geo = *m_system.box_geo;
697 auto const mesh_density =
698 static_cast<double>(p3m.params.mesh[0]) * box_geo.length_inv()[0];
699
700 if (p3m.params.mesh == Utils::Vector3i::broadcast(-1)) {
701 /* avoid using more than 1 GB of FFT arrays */
702 auto const normalized_box_dim = std::cbrt(box_geo.volume());
703 auto const max_npart_per_dim = 512.;
704 /* simple heuristic to limit the tried meshes if the accuracy cannot
705 be obtained with smaller meshes, but normally not all these
706 meshes have to be tested */
707 auto const min_npart_per_dim = std::min(
708 max_npart_per_dim, std::cbrt(static_cast<double>(p3m.sum_qpart)));
709 m_mesh_density_min = min_npart_per_dim / normalized_box_dim;
710 m_mesh_density_max = max_npart_per_dim / normalized_box_dim;
711 m_tune_mesh = true;
712 } else {
713 m_mesh_density_min = m_mesh_density_max = mesh_density;
714 assert(p3m.params.mesh[0] >= 1);
715 if (p3m.params.mesh[1] == -1 and p3m.params.mesh[2] == -1) {
716 // determine the two missing values by rescaling by the box length
717 for (int i : {1, 2}) {
718 p3m.params.mesh[i] =
719 static_cast<int>(std::round(mesh_density * box_geo.length()[i]));
720 // make the mesh even in all directions
721 p3m.params.mesh[i] += p3m.params.mesh[i] % 2;
722 }
723 }
724 m_logger->report_fixed_mesh(p3m.params.mesh);
725 }
726 }
727
729 auto const &box_geo = *m_system.box_geo;
730 auto const &solver = m_system.coulomb.impl->solver;
731 auto tuned_params = TuningAlgorithm::Parameters{};
732 auto time_best = time_sentinel;
733 auto mesh_density = m_mesh_density_min;
734 while (mesh_density <= m_mesh_density_max) {
735 auto trial_params = TuningAlgorithm::Parameters{};
736 if (m_tune_mesh) {
737 for (int i : {0, 1, 2}) {
738 trial_params.mesh[i] =
739 static_cast<int>(std::round(box_geo.length()[i] * mesh_density));
740 // make the mesh even in all directions
741 trial_params.mesh[i] += trial_params.mesh[i] % 2;
742 }
743 } else {
744 trial_params.mesh = p3m.params.mesh;
745 }
746 trial_params.cao = cao_best;
747
748 auto const trial_time =
749 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
750 trial_params.alpha_L, trial_params.accuracy);
751
752 if (trial_time >= 0.) {
753 /* the optimum r_cut for this mesh is the upper limit for higher meshes,
754 everything else is slower */
755 if (has_actor_of_type<CoulombP3M>(solver)) {
756 m_r_cut_iL_max = trial_params.r_cut_iL;
757 }
758
759 if (trial_time < time_best) {
760 /* new optimum */
762 tuned_params = trial_params;
763 time_best = tuned_params.time = trial_time;
764 } else if (trial_time > time_best + time_granularity or
766 /* no hope of further optimisation */
767 break;
768 }
769 }
770 mesh_density += 0.1;
771 }
772 return tuned_params;
773 }
774};
775
777 auto &system = get_system();
778 auto const &box_geo = *system.box_geo;
779 if (p3m.params.alpha_L == 0. and p3m.params.alpha != 0.) {
780 p3m.params.alpha_L = p3m.params.alpha * box_geo.length()[0];
781 }
782 if (p3m.params.r_cut_iL == 0. and p3m.params.r_cut != 0.) {
783 p3m.params.r_cut_iL = p3m.params.r_cut * box_geo.length_inv()[0];
784 }
785 if (not is_tuned()) {
787 if (p3m.sum_qpart == 0) {
788 throw std::runtime_error(
789 "CoulombP3M: no charged particles in the system");
790 }
791 try {
792 CoulombTuningAlgorithm parameters(system, p3m, prefactor, tune_timings);
793 parameters.setup_logger(tune_verbose);
794 // parameter ranges
795 parameters.determine_mesh_limits();
796 parameters.determine_r_cut_limits();
797 parameters.determine_cao_limits(7);
798 // run tuning algorithm
799 parameters.tune();
800 m_is_tuned = true;
801 system.on_coulomb_change();
802 } catch (...) {
803 p3m.params.tuning = false;
804 throw;
805 }
806 }
807 init();
808}
809
810void CoulombP3M::sanity_checks_boxl() const {
811 auto const &system = get_system();
812 auto const &box_geo = *system.box_geo;
813 auto const &local_geo = *system.local_geo;
814 for (unsigned int i = 0u; i < 3u; i++) {
815 /* check k-space cutoff */
816 if (p3m.params.cao_cut[i] >= box_geo.length_half()[i]) {
817 std::stringstream msg;
818 msg << "P3M_init: k-space cutoff " << p3m.params.cao_cut[i]
819 << " is larger than half of box dimension " << box_geo.length()[i];
820 throw std::runtime_error(msg.str());
821 }
822 if (p3m.params.cao_cut[i] >= local_geo.length()[i]) {
823 std::stringstream msg;
824 msg << "P3M_init: k-space cutoff " << p3m.params.cao_cut[i]
825 << " is larger than local box dimension " << local_geo.length()[i];
826 throw std::runtime_error(msg.str());
827 }
828 }
829
831 if ((box_geo.length()[0] != box_geo.length()[1]) or
832 (box_geo.length()[1] != box_geo.length()[2]) or
833 (p3m.params.mesh[0] != p3m.params.mesh[1]) or
834 (p3m.params.mesh[1] != p3m.params.mesh[2])) {
835 throw std::runtime_error(
836 "CoulombP3M: non-metallic epsilon requires cubic box");
837 }
838 }
839}
840
841void CoulombP3M::sanity_checks_periodicity() const {
842 auto const &box_geo = *get_system().box_geo;
843 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
844 throw std::runtime_error(
845 "CoulombP3M: requires periodicity (True, True, True)");
846 }
847}
848
849void CoulombP3M::sanity_checks_cell_structure() const {
850 auto const &local_geo = *get_system().local_geo;
851 if (local_geo.cell_structure_type() != CellStructureType::REGULAR and
852 local_geo.cell_structure_type() != CellStructureType::HYBRID) {
853 throw std::runtime_error(
854 "CoulombP3M: requires the regular or hybrid decomposition cell system");
855 }
856 if (::communicator.size > 1 and
857 local_geo.cell_structure_type() == CellStructureType::HYBRID) {
858 throw std::runtime_error(
859 "CoulombP3M: does not work with the hybrid decomposition cell system, "
860 "if using more than one MPI node");
861 }
862}
863
864void CoulombP3M::sanity_checks_node_grid() const {
865 auto const &node_grid = ::communicator.node_grid;
866 if (node_grid[0] < node_grid[1] || node_grid[1] < node_grid[2]) {
867 throw std::runtime_error(
868 "CoulombP3M: node grid must be sorted, largest first");
869 }
870}
871
872void CoulombP3M::scaleby_box_l() {
873 auto const &box_geo = *get_system().box_geo;
874 p3m.params.r_cut = p3m.params.r_cut_iL * box_geo.length()[0];
875 p3m.params.alpha = p3m.params.alpha_L * box_geo.length_inv()[0];
876 p3m.params.recalc_a_ai_cao_cut(box_geo.length());
878 sanity_checks_boxl();
879 calc_influence_function_force();
880 calc_influence_function_energy();
881}
882
883#endif // P3M
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
@ INTEG_METHOD_NPT_ISO
Vector implementation and trait types for boost qvm interoperability.
__global__ float * force
float u[3]
std::optional< std::string > layer_correction_veto_r_cut(double r_cut) const override
Definition p3m.cpp:649
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
Definition p3m.cpp:658
void setup_logger(bool verbose) override
Definition p3m.cpp:632
void determine_mesh_limits() override
Definition p3m.cpp:695
TuningAlgorithm::Parameters get_time() override
Definition p3m.cpp:728
P3MParameters & get_params() override
Definition p3m.cpp:623
void on_solver_change() const override
Definition p3m.cpp:630
CoulombTuningAlgorithm(System::System &system, p3m_data_struct &input_p3m, double prefactor, int timings)
Definition p3m.cpp:626
void set_prefactor(double new_prefactor)
double prefactor
Electrostatics prefactor.
A range of particles.
Main system class.
Coulomb::Solver coulomb
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
static auto constexpr time_sentinel
Value for invalid time measurements.
static auto constexpr max_n_consecutive_trials
Maximal number of consecutive trials that don't improve runtime.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
A stripped-down version of std::span from C++17.
Definition Span.hpp:38
static Vector< T, N > broadcast(T const &s)
Create a vector that has all entries set to one value.
Definition Vector.hpp:110
T norm() const
Definition Vector.hpp:131
Cache for interpolation weights.
void store(const InterpolationWeights< cao > &w)
Push back weights for one point.
InterpolationWeights< cao > load(std::size_t i) const
Load entry from the cache.
auto cao() const
Charge assignment order the weights are for.
void reset(int cao)
Reset the cache.
void spread_grid(Utils::Span< double * > meshes, const boost::mpi::communicator &comm, const Utils::Vector3i &dim)
void resize(const boost::mpi::communicator &comm, const P3MLocalMesh &local_mesh)
Definition send_mesh.cpp:73
void gather_grid(Utils::Span< double * > meshes, const boost::mpi::communicator &comm, const Utils::Vector3i &dim)
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
Definition common.hpp:45
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
#define ROUND_ERROR_PREC
Precision for capture of round off errors.
Definition config.hpp:66
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
Definition config.hpp:53
ELC algorithm for long-range Coulomb interactions.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
void fft_perform_forw(double *data, fft_data_struct &fft, const boost::mpi::communicator &comm)
Perform an in-place forward 3D FFT.
Definition fft.cpp:683
void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft, const boost::mpi::communicator &comm)
Perform an in-place backward 3D FFT.
Definition fft.cpp:714
int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, Utils::Vector3i const &global_mesh_dim, Utils::Vector3d const &global_mesh_off, int &ks_pnum, fft_data_struct &fft, Utils::Vector3i const &grid, boost::mpi::communicator const &comm)
Initialize everything connected to the 3D-FFT.
Definition fft.cpp:484
Routines, row decomposition, data structures and communication for the 3D-FFT.
void p3m_interpolate(P3MLocalMesh const &local_mesh, InterpolationWeights< cao > const &weights, Kernel kernel)
P3M grid interpolation.
auto charge_range(ParticleRange const &particles)
auto pos_range(ParticleRange const &particles)
auto force_range(ParticleRange const &particles)
auto unfolded_pos_range(ParticleRange const &particles, BoxGeometry const &box)
T product(Vector< T, N > const &v)
Definition Vector.hpp:359
DEVICE_QUALIFIER constexpr T pi()
Ratio of diameter and circumference of a circle.
Definition constants.hpp:36
DEVICE_QUALIFIER T sinc(T d)
Calculates the sinc-function as sin(PI*x)/(PI*x).
Definition sinc.hpp:45
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
Definition sqr.hpp:26
DEVICE_QUALIFIER constexpr Span< T > make_span(T *p, std::size_t N)
Definition Span.hpp:112
DEVICE_QUALIFIER constexpr T sqrt_2()
Square root of 2.
Definition constants.hpp:64
auto hadamard_division(Vector< T, N > const &a, Vector< U, N > const &b)
Definition Vector.hpp:407
DEVICE_QUALIFIER constexpr Span< std::add_const_t< T > > make_const_span(T *p, std::size_t N)
Definition Span.hpp:122
DEVICE_QUALIFIER constexpr T sqrt_pi_i()
One over square root of pi.
Definition constants.hpp:43
void npt_add_virial_contribution(double energy)
Definition npt.cpp:141
Exports for the NpT code.
static auto p3m_tune_aliasing_sums(int nx, int ny, int nz, Utils::Vector3i const &mesh, Utils::Vector3d const &mesh_i, int cao, double alpha_L_i)
Aliasing sum used by p3m_k_space_error.
Definition p3m.cpp:133
static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh, int cao, int n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the analytic expression of the error estimate for the P3M method in (eq.
Definition p3m.cpp:198
static double p3m_real_space_error(double pref, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the real space contribution to the rms error in the force (as described by Kolafa and Perra...
Definition p3m.cpp:177
static auto calc_dipole_moment(boost::mpi::communicator const &comm, combined_ranges const &p_q_unfolded_pos_range)
Definition p3m.cpp:398
static double p3mgpu_k_space_error(double prefactor, Utils::Vector3i const &mesh, int cao, int npart, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Definition p3m.cpp:232
P3M algorithm for long-range Coulomb interaction.
P3M electrostatics on GPU.
P3M electrostatics on GPU.
double p3m_k_space_error_gpu(double prefactor, const int *mesh, int cao, int npart, double sum_q2, double alpha_L, const double *box)
static __device__ double p3m_analytic_cotangent_sum(int n, double mesh_i)
void operator()(p3m_data_struct &p3m, double force_prefac, combined_ranges const &p_q_force_range) const
Definition p3m.cpp:366
Utils::Vector3i node_grid
void charge_assign(ParticleRange const &particles)
Assign the physical charges using the tabulated charge assignment function.
Definition p3m.cpp:339
bool check_complex_residuals
Definition p3m.hpp:91
void assign_charge(double q, Utils::Vector3d const &real_pos, p3m_interpolation_cache &inter_weights)
Assign a single charge into the current charge grid.
Definition p3m.cpp:353
double long_range_kernel(bool force_flag, bool energy_flag, ParticleRange const &particles)
Compute the k-space part of forces and energies.
Definition p3m.cpp:475
CoulombP3M(P3MParameters &&parameters, double prefactor, int tune_timings, bool tune_verbose, bool check_complex_residuals)
Definition p3m.cpp:287
bool tune_verbose
Definition p3m.hpp:90
bool is_tuned() const
Definition p3m.hpp:100
Utils::Vector9d long_range_pressure(ParticleRange const &particles)
Compute the k-space part of the pressure tensor.
Definition p3m.cpp:416
void init()
Recalculate all derived parameters.
Definition p3m.cpp:246
void sanity_checks() const
Definition p3m.hpp:116
void count_charged_particles()
Count the number of charged particles and calculate the sum of the squared charges.
Definition p3m.cpp:84
p3m_data_struct p3m
P3M parameters.
Definition p3m.hpp:87
int tune_timings
Definition p3m.hpp:89
void tune()
Tune P3M parameters to desired accuracy.
Definition p3m.cpp:776
std::unique_ptr< Implementation > impl
Pointer-to-implementation.
Utils::Vector3i dim
dimension (size) of local mesh.
Definition common.hpp:186
int size
number of local mesh points.
Definition common.hpp:188
void recalc_ld_pos(P3MParameters const &params)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Definition common.hpp:213
void calc_local_ca_mesh(P3MParameters const &params, LocalBox const &local_geo, double skin, double space_layer)
Calculate properties of the local FFT mesh for the charge assignment process.
Definition common.cpp:78
int margin[6]
number of margin mesh points.
Definition common.hpp:201
Structure to hold P3M parameters and some dependent variables.
Definition common.hpp:67
Utils::Vector3d cao_cut
cutoff for charge assignment.
Definition common.hpp:89
double alpha
unscaled alpha_L for use with fast inline functions only
Definition common.hpp:96
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
Definition common.hpp:75
int cao
charge assignment order ([0,7]).
Definition common.hpp:82
double accuracy
accuracy of the actual parameter set.
Definition common.hpp:84
double alpha_L
Ewald splitting parameter (0.
Definition common.hpp:72
int cao3
number of points unto which a single charge is interpolated, i.e.
Definition common.hpp:102
Utils::Vector3d mesh_off
offset of the first mesh point (lower left corner) from the coordinate origin ([0,...
Definition common.hpp:80
Utils::Vector3d ai
inverse mesh constant.
Definition common.hpp:93
double r_cut
unscaled r_cut_iL for use with fast inline functions only
Definition common.hpp:99
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
Definition common.hpp:167
bool tuning
tuning or production?
Definition common.hpp:69
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0).
Definition common.hpp:77
double epsilon
epsilon of the "surrounding dielectric".
Definition common.hpp:87
DEVICE_QUALIFIER constexpr pointer data() noexcept
Definition Array.hpp:120
void operator()(p3m_data_struct &p3m, double q, Utils::Vector3d const &real_pos)
Definition p3m.cpp:317
void operator()(p3m_data_struct &p3m, double q, Utils::Vector3d const &real_pos, p3m_interpolation_cache &inter_weights)
Definition p3m.cpp:304
void operator()(p3m_data_struct &p3m, combined_ranges const &p_q_pos_range)
Definition p3m.cpp:327
fft_forw_plan plan[4]
Information for forward FFTs.
Definition fft.hpp:154
int start[3]
lower left point of local FFT mesh in global FFT mesh coordinates.
Definition fft.hpp:111
int new_mesh[3]
size of local mesh after communication, also used for actual FFT.
Definition fft.hpp:109
int new_size
size of new mesh (number of mesh points).
Definition fft.hpp:113
std::vector< double > g_energy
Energy optimised influence function (k-space)
P3MParameters params
void calc_differential_operator()
Calculate the Fourier transformed differential operator.
std::array< std::vector< int >, 3 > d_op
Spatial differential operator in k-space.
std::vector< double > g_force
Force optimised influence function (k-space)
int ks_pnum
number of permutations in k_space
P3MLocalMesh local_mesh
local mesh.
Definition p3m.hpp:63
p3m_send_mesh sm
send/recv mesh sizes
Definition p3m.hpp:79
double sum_q2
Sum of square of charges (only on head node).
Definition p3m.hpp:72
fft_vector< double > rs_mesh
real space mesh (local) for CA/FFT.
Definition p3m.hpp:65
double square_sum_q
square of sum of charges (only on head node).
Definition p3m.hpp:74
int sum_qpart
number of charged particles (only on head node).
Definition p3m.hpp:70
fft_data_struct fft
Definition p3m.hpp:81
p3m_interpolation_cache inter_weights
Definition p3m.hpp:76
std::array< fft_vector< double >, 3 > E_mesh
mesh (local) for the electric field.
Definition p3m.hpp:67