Loading [MathJax]/extensions/TeX/AMSmath.js
ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages Concepts
p3m.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2024 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22/** @file
23 *
24 * The corresponding header file is @ref p3m.hpp.
25 */
26
27#include "config/config.hpp"
28
29#ifdef P3M
30
32
35#ifdef CUDA
38#endif // CUDA
39
41
42#include "p3m/P3MFFT.hpp"
44#include "p3m/TuningLogger.hpp"
46#include "p3m/for_each_3d.hpp"
48#include "p3m/math.hpp"
49
50#include "BoxGeometry.hpp"
51#include "LocalBox.hpp"
52#include "Particle.hpp"
54#include "ParticleRange.hpp"
55#include "PropagationMode.hpp"
56#include "actor/visitors.hpp"
59#include "communication.hpp"
60#include "errorhandling.hpp"
62#include "npt.hpp"
63#include "p3m/send_mesh.hpp"
65#include "system/System.hpp"
66#include "tuning.hpp"
67
68#include <utils/Vector.hpp>
71#include <utils/math/sqr.hpp>
73
74#include <boost/mpi/collectives/all_reduce.hpp>
75#include <boost/mpi/collectives/broadcast.hpp>
76#include <boost/mpi/collectives/reduce.hpp>
77#include <boost/mpi/communicator.hpp>
78#include <boost/range/combine.hpp>
79#include <boost/range/numeric.hpp>
80
81#include <algorithm>
82#include <array>
83#include <cassert>
84#include <complex>
85#include <cstddef>
86#include <functional>
87#include <initializer_list>
88#include <numbers>
89#include <optional>
90#include <span>
91#include <sstream>
92#include <stdexcept>
93#include <string>
94#include <tuple>
95#include <utility>
96#include <vector>
97
98template <typename FloatType>
99std::complex<FloatType>
100multiply_complex_by_imaginary(std::complex<FloatType> const &z, FloatType k) {
101 // Perform the multiplication manually: (re + i*imag) * (i*k)
102 return std::complex<FloatType>(-z.imag() * k, z.real() * k);
103}
104
105template <typename FloatType>
106std::complex<FloatType>
107multiply_complex_by_real(std::complex<FloatType> const &z, FloatType k) {
108 // Perform the multiplication manually: (re + i*imag) * k
109 return std::complex<FloatType>(z.real() * k, z.imag() * k);
110}
111
112template <typename FloatType>
113FloatType complex_norm2(std::complex<FloatType> const &z) {
114 return Utils::sqr(z.real()) + Utils::sqr(z.imag());
115}
116
117template <Utils::MemoryOrder order_in, Utils::MemoryOrder order_out, typename T>
118auto transpose(std::span<T> const &flat_array, Utils::Vector3i const &shape) {
119 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
120 Utils::Vector3i indices{};
121 std::vector<T> flat_array_t(flat_array.size());
122 for_each_3d(mesh_start, shape, indices, [&]() {
123 auto const index_in = Utils::get_linear_index(indices, shape, order_in);
124 auto const index_out = Utils::get_linear_index(indices, shape, order_out);
125 flat_array_t[index_out] = flat_array[index_in];
126 });
127 return flat_array_t;
128}
129
131 Utils::Vector3i const &mesh) {
132 return mesh[0u] % node_grid[0u] == 0 and mesh[1u] % node_grid[1u] == 0 and
133 mesh[2u] % node_grid[2u] == 0;
134}
135
136static auto get_size_from_shape(Utils::Vector3i const &shape) {
137 return std::accumulate(shape.cbegin(), shape.cend(), std::size_t{1u},
138 [](std::size_t const acc, int const value) {
139 return acc * static_cast<std::size_t>(value);
140 });
141}
142
143template <typename FloatType, Arch Architecture>
145 auto local_n = std::size_t{0u};
146 auto local_q2 = 0.0;
147 auto local_q = 0.0;
148
149 for (auto const &p : get_system().cell_structure->local_particles()) {
150 if (p.q() != 0.0) {
151 local_n++;
152 local_q2 += Utils::sqr(p.q());
153 local_q += p.q();
154 }
155 }
156
157 boost::mpi::all_reduce(comm_cart, local_n, p3m.sum_qpart, std::plus<>());
158 boost::mpi::all_reduce(comm_cart, local_q2, p3m.sum_q2, std::plus<>());
159 boost::mpi::all_reduce(comm_cart, local_q, p3m.square_sum_q, std::plus<>());
160 p3m.square_sum_q = Utils::sqr(p3m.square_sum_q);
161}
162
163/** Calculate the optimal influence function of @cite hockney88a.
164 * (optimised for force calculations)
165 *
166 * Each node calculates only the values for its domain in k-space.
167 *
168 * See also: @cite hockney88a eq. 8-22 (p. 275). Note the somewhat
169 * different convention for the prefactors, which is described in
170 * @cite deserno98a @cite deserno98b.
171 */
172template <typename FloatType, Arch Architecture>
174 p3m.g_force = grid_influence_function<FloatType, 1, P3M_BRILLOUIN>(
175 p3m.params, p3m.fft->ks_local_ld_index(), p3m.fft->ks_local_ur_index(),
176 get_system().box_geo->length_inv());
177}
178
179/** Calculate the influence function optimized for the energy and the
180 * self energy correction.
181 */
182template <typename FloatType, Arch Architecture>
184 p3m.g_energy = grid_influence_function<FloatType, 0, P3M_BRILLOUIN>(
185 p3m.params, p3m.fft->ks_local_ld_index(), p3m.fft->ks_local_ur_index(),
186 get_system().box_geo->length_inv());
187}
188
189/** Aliasing sum used by @ref p3m_k_space_error. */
191 Utils::Vector3i const &mesh,
192 Utils::Vector3d const &mesh_i, int cao,
193 double alpha_L_i) {
194
195 auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
196 auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
197 auto constexpr exp_min = -708.4; // for IEEE-compatible double
198 auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i);
199 auto alias1 = 0.;
200 auto alias2 = 0.;
201
202 Utils::Vector3i indices{};
203 Utils::Vector3i nm{};
204 Utils::Vector3d fnm{};
206 mesh_start, mesh_stop, indices,
207 [&]() {
208 auto const norm_sq = nm.norm2();
209 auto const exponent = -factor1 * norm_sq;
210 auto const exp_limit = (exp_min + std::log(norm_sq)) / 2.;
211 auto const ex = (exponent < exp_limit) ? 0. : std::exp(exponent);
212 auto const energy = std::pow(Utils::product(fnm), 2 * cao);
213 alias1 += Utils::sqr(ex) / norm_sq;
214 alias2 += energy * ex * (shift * nm) / norm_sq;
215 },
216 [&](unsigned dim, int n) {
217 nm[dim] = shift[dim] + n * mesh[dim];
218 fnm[dim] = math::sinc(nm[dim] * mesh_i[dim]);
219 });
220
221 return std::make_pair(alias1, alias2);
222}
223
224/** Calculate the real space contribution to the rms error in the force (as
225 * described by Kolafa and Perram).
226 * \param pref Prefactor of Coulomb interaction.
227 * \param r_cut_iL rescaled real space cutoff for p3m method.
228 * \param n_c_part number of charged particles in the system.
229 * \param sum_q2 sum of square of charges in the system
230 * \param alpha_L rescaled Ewald splitting parameter.
231 * \param box_l box dimensions.
232 * \return real space error
233 */
234static double p3m_real_space_error(double pref, double r_cut_iL, int n_c_part,
235 double sum_q2, double alpha_L,
236 Utils::Vector3d const &box_l) {
237 auto const volume = Utils::product(box_l);
238 return (2. * pref * sum_q2 * exp(-Utils::sqr(r_cut_iL * alpha_L))) /
239 sqrt(static_cast<double>(n_c_part) * r_cut_iL * box_l[0] * volume);
240}
241
242/** Calculate the analytic expression of the error estimate for the
243 * P3M method in @cite hockney88a (eq. 8-23 p. 275) in
244 * order to obtain the rms error in the force for a system of N
245 * randomly distributed particles in a cubic box (k-space part).
246 * \param pref Prefactor of Coulomb interaction.
247 * \param mesh number of mesh points in one direction.
248 * \param cao charge assignment order.
249 * \param n_c_part number of charged particles in the system.
250 * \param sum_q2 sum of square of charges in the system
251 * \param alpha_L rescaled Ewald splitting parameter.
252 * \param box_l box dimensions.
253 * \return reciprocal (k) space error
254 */
255static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh,
256 int cao, int n_c_part, double sum_q2,
257 double alpha_L, Utils::Vector3d const &box_l) {
258
259 auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao);
260 auto const mesh_i = 1. / Utils::Vector3d(mesh);
261 auto const alpha_L_i = 1. / alpha_L;
262 auto const mesh_stop = mesh / 2;
263 auto const mesh_start = -mesh_stop;
264 auto indices = Utils::Vector3i{};
265 auto values = Utils::Vector3d{};
266 auto he_q = 0.;
267
269 mesh_start, mesh_stop, indices,
270 [&]() {
271 if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
272 auto const n2 = indices.norm2();
273 auto const cs = Utils::product(values);
274 auto const [alias1, alias2] =
275 p3m_tune_aliasing_sums(indices, mesh, mesh_i, cao, alpha_L_i);
276 auto const d = alias1 - Utils::sqr(alias2 / cs) / n2;
277 /* at high precision, d can become negative due to extinction;
278 also, don't take values that have no significant digits left*/
279 if (d > 0. and std::fabs(d / alias1) > ROUND_ERROR_PREC) {
280 he_q += d;
281 }
282 }
283 },
284 [&values, &mesh_i, cotangent_sum](unsigned dim, int n) {
285 values[dim] = cotangent_sum(n, mesh_i[dim]);
286 });
287
288 return 2. * pref * sum_q2 * sqrt(he_q / static_cast<double>(n_c_part)) /
289 (box_l[1] * box_l[2]);
290}
291
292template <typename FloatType, Arch Architecture>
294 assert(p3m.params.mesh >= Utils::Vector3i::broadcast(1));
295 assert(p3m.params.cao >= 1 and p3m.params.cao <= 7);
296 assert(p3m.params.alpha > 0.);
297
298 auto const &system = get_system();
299 auto const &box_geo = *system.box_geo;
300 auto const &local_geo = *system.local_geo;
301 auto const skin = system.cell_structure->get_verlet_skin();
302
303 p3m.params.cao3 = Utils::int_pow<3>(p3m.params.cao);
304 p3m.params.recalc_a_ai_cao_cut(box_geo.length());
305
306 sanity_checks();
307
308 auto const &solver = system.coulomb.impl->solver;
309 double elc_layer = 0.;
310 if (auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
311 elc_layer = actor->elc.space_layer;
312 }
313
314 p3m.local_mesh.calc_local_ca_mesh(p3m.params, local_geo, skin, elc_layer);
315 p3m.fft = std::make_shared<P3MFFT<FloatType>>(
316 comm_cart, p3m.params.mesh, p3m.local_mesh.ld_no_halo,
317 p3m.local_mesh.ur_no_halo, get_memory_layout());
318 p3m.fft->set_preferred_kspace_decomposition(::communicator.node_grid);
319 auto const rs_array_size = get_size_from_shape(p3m.local_mesh.dim);
320 auto const rs_array_size_no_halo =
321 get_size_from_shape(p3m.local_mesh.dim_no_halo);
322 auto const fft_mesh_size = get_size_from_shape(p3m.fft->ks_local_size());
323 p3m.rs_charge_density.resize(rs_array_size);
324 p3m.ks_charge_density.resize(fft_mesh_size);
325 for (auto d : {0u, 1u, 2u}) {
326 p3m.rs_E_fields[d].resize(rs_array_size);
327 }
328 p3m.ks_E_fields_storage.resize(3u * fft_mesh_size);
329 p3m.rs_E_fields_no_halo.resize(3u * rs_array_size_no_halo);
330 p3m.calc_differential_operator();
331
332 /* fix box length dependent constants */
333 scaleby_box_l();
334
335 count_charged_particles();
336}
337
338namespace {
339template <int cao> struct AssignCharge {
340 void operator()(auto &p3m, double q, InterpolationWeights<cao> const &w) {
341 using value_type =
342 typename std::remove_reference_t<decltype(p3m)>::value_type;
343 p3m_interpolate(p3m.local_mesh, w, [q, &p3m](int ind, double w) {
344 p3m.rs_charge_density[ind] += value_type(w * q);
345 });
346 }
347
348 void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos,
349 p3m_interpolation_cache &inter_weights) {
350 auto constexpr memory_order =
351 std::remove_reference<decltype(p3m)>::type::memory_order;
352 auto const w = p3m_calculate_interpolation_weights<cao, memory_order>(
353 real_pos, p3m.params.ai, p3m.local_mesh);
354 inter_weights.store(w);
355 this->operator()(p3m, q, w);
356 }
357
358 void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos) {
359 auto constexpr memory_order =
360 std::remove_reference<decltype(p3m)>::type::memory_order;
361 auto const w = p3m_calculate_interpolation_weights<cao, memory_order>(
362 real_pos, p3m.params.ai, p3m.local_mesh);
363 this->operator()(p3m, q, w);
364 }
365
366 template <typename combined_ranges>
367 void operator()(auto &p3m, combined_ranges const &p_q_pos_range) {
368 for (auto zipped : p_q_pos_range) {
369 auto const p_q = boost::get<0>(zipped);
370 auto const &p_pos = boost::get<1>(zipped);
371 if (p_q != 0.0) {
372 this->operator()(p3m, p_q, p_pos, p3m.inter_weights);
373 }
374 }
375 }
376};
377} // namespace
378
379template <typename FloatType, Arch Architecture>
381 ParticleRange const &particles) {
382 prepare_fft_mesh(true);
383
384 auto p_q_range = ParticlePropertyRange::charge_range(particles);
385 auto p_pos_range = ParticlePropertyRange::pos_range(particles);
386
387 Utils::integral_parameter<int, AssignCharge, 1, 7>(
388 p3m.params.cao, p3m, boost::combine(p_q_range, p_pos_range));
389}
390
391template <typename FloatType, Arch Architecture>
393 double q, Utils::Vector3d const &real_pos, bool skip_cache) {
394 if (skip_cache) {
395 Utils::integral_parameter<int, AssignCharge, 1, 7>(p3m.params.cao, p3m, q,
396 real_pos);
397 } else {
398 Utils::integral_parameter<int, AssignCharge, 1, 7>(
399 p3m.params.cao, p3m, q, real_pos, p3m.inter_weights);
400 }
401}
402
403template <int cao> struct AssignForces {
404 template <typename combined_ranges>
405 void operator()(auto &p3m, double force_prefac,
406 combined_ranges const &p_q_force_range) const {
407
408 assert(cao == p3m.inter_weights.cao());
409
410 /* charged particle counter */
411 auto p_index = std::size_t{0ul};
412
413 for (auto zipped : p_q_force_range) {
414 auto p_q = boost::get<0>(zipped);
415 auto &p_force = boost::get<1>(zipped);
416 if (p_q != 0.0) {
417 auto const pref = p_q * force_prefac;
418 auto const w = p3m.inter_weights.template load<cao>(p_index);
419
420 Utils::Vector3d force{};
421 p3m_interpolate(p3m.local_mesh, w, [&force, &p3m](int ind, double w) {
422 force[0u] += w * double(p3m.rs_E_fields[0u][ind]);
423 force[1u] += w * double(p3m.rs_E_fields[1u][ind]);
424 force[2u] += w * double(p3m.rs_E_fields[2u][ind]);
425 });
426
427 p_force -= pref * force;
428 ++p_index;
429 }
430 }
431 }
432};
433
434template <typename combined_ranges>
435static auto calc_dipole_moment(boost::mpi::communicator const &comm,
436 combined_ranges const &p_q_unfolded_pos_range) {
437 auto const local_dip =
438 boost::accumulate(p_q_unfolded_pos_range, Utils::Vector3d{},
439 [](Utils::Vector3d const &dip, auto const &q_pos) {
440 auto const p_q = boost::get<0>(q_pos);
441 auto const &p_unfolded_pos = boost::get<1>(q_pos);
442 return dip + p_q * p_unfolded_pos;
443 });
444 return boost::mpi::all_reduce(comm, local_dip, std::plus<>());
445}
446
447template <typename FloatType, Arch Architecture>
449 // halo communication of real space charge density
450 p3m.halo_comm.gather_grid(comm_cart, p3m.rs_charge_density.data(),
451 p3m.local_mesh.dim);
452
453 // get real space charge density without ghost layers
454 auto charge_density_no_halos = extract_block(
455 p3m.rs_charge_density, p3m.local_mesh.dim, p3m.local_mesh.n_halo_ld,
456 p3m.local_mesh.dim - p3m.local_mesh.n_halo_ur,
458
459 // Set up the FFT using the Heffte library.
460 // This is in global mesh coordinates without any ghost layers
461 // The memory layout has to be specified, so the parts of
462 // the mesh held by each MPI rank are assembled correctly.
463 p3m.fft->forward(charge_density_no_halos.data(),
464 p3m.ks_charge_density.data());
465}
466
467template <typename FloatType, Arch Architecture>
469 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
470 auto const &mesh_stop = p3m.fft->ks_local_size();
471 auto const &box_geo = *get_system().box_geo;
472 auto indices = Utils::Vector3i{};
473
474 // hold electric field in k-space
475 std::array<std::span<std::complex<FloatType>>, 3> ks_E_fields;
476 auto const fft_mesh_length = get_size_from_shape(mesh_stop);
477 for (auto d : {0u, 1u, 2u}) {
478 auto const offset = d * fft_mesh_length;
479 auto const begin = p3m.ks_E_fields_storage.begin() + offset;
480 ks_E_fields[d] = {begin, fft_mesh_length};
481 }
482
483 // i*k differentiation
484 auto const wavevector =
485 Utils::Vector3<FloatType>((2. * std::numbers::pi) * box_geo.length_inv());
486
487 // compute electric field, Eq. (3.49) @cite deserno00b
488 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
489 auto const global_index = indices + p3m.fft->ks_local_ld_index();
490 auto const local_index = Utils::get_linear_index(
491 indices, mesh_stop, Utils::MemoryOrder::COLUMN_MAJOR);
492 auto const phi_hat = multiply_complex_by_real(
493 p3m.ks_charge_density[local_index], p3m.g_force[local_index]);
494
495 for (auto d : {0u, 1u, 2u}) {
496 // wave vector of the current mesh point
497 auto const k = FloatType(p3m.d_op[d][global_index[d]]) * wavevector[d];
498 // electric field in k-space
499 ks_E_fields[d][local_index] = multiply_complex_by_imaginary(phi_hat, k);
500 }
501 });
502
503 // back-transform the k-space electric field to real space
504 auto const rs_mesh_size_no_halo =
505 get_size_from_shape(p3m.local_mesh.dim_no_halo);
506 p3m.fft->backward_batch(3, p3m.ks_E_fields_storage.data(),
507 p3m.rs_E_fields_no_halo.data());
508
509 // add zeros around the E-field in real space to make room for ghost layers
510 auto const size = p3m.local_mesh.ur_no_halo - p3m.local_mesh.ld_no_halo;
511 for (auto d : {0u, 1u, 2u}) {
512 auto const offset = d * rs_mesh_size_no_halo;
513 auto const begin = p3m.rs_E_fields_no_halo.begin() + offset;
514 auto f = std::span<std::complex<FloatType>>(begin, rs_mesh_size_no_halo);
517 p3m.rs_E_fields[d] = pad_with_zeros_discard_imag(
518 std::span<std::complex<FloatType>>(f_t), p3m.local_mesh.dim_no_halo,
519 p3m.local_mesh.n_halo_ld, p3m.local_mesh.n_halo_ur);
520 }
521
522 // ghost communicate the boundary layers of the E-field in real space
523 auto field_pointers = std::vector<FloatType *>{p3m.rs_E_fields[0u].data(),
524 p3m.rs_E_fields[1u].data(),
525 p3m.rs_E_fields[2u].data()};
526 p3m.halo_comm.spread_grid(comm_cart, field_pointers, p3m.local_mesh.dim);
527}
528
529/** @details Calculate the long range electrostatics part of the pressure
530 * tensor. This is part \f$\Pi_{\textrm{dir}, \alpha, \beta}\f$ eq. (2.6)
531 * in @cite essmann95a. The part \f$\Pi_{\textrm{corr}, \alpha, \beta}\f$
532 * eq. (2.8) is not present here since M is the empty set in our simulations.
533 */
534template <typename FloatType, Arch Architecture>
536 ParticleRange const &particles) {
537 auto const &box_geo = *get_system().box_geo;
538 Utils::Vector9d node_k_space_pressure_tensor{};
539
540 if (p3m.sum_q2 > 0.) {
541 charge_assign(particles);
542 kernel_ks_charge_density();
543
544 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
545 auto const &mesh_stop = p3m.fft->ks_local_size();
546 auto const &k_size = p3m.fft->ks_local_size();
547 auto const half_alpha_inv_sq = Utils::sqr(1. / 2. / p3m.params.alpha);
548 auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv();
549 auto indices = Utils::Vector3i{};
550 auto diagonal = 0.;
551
552 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
553 auto const global_index = indices + p3m.fft->ks_local_ld_index();
554 auto const kx = p3m.d_op[0u][global_index[0u]] * wavevector[0u];
555 auto const ky = p3m.d_op[1u][global_index[1u]] * wavevector[1u];
556 auto const kz = p3m.d_op[2u][global_index[2u]] * wavevector[2u];
557 auto const norm_sq = Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz);
558
559 if (norm_sq != 0.) {
560 auto const index = Utils::get_linear_index(
561 indices, k_size, Utils::MemoryOrder::COLUMN_MAJOR);
562 auto const node_k_space_energy = static_cast<double>(
563 p3m.g_energy[index] * complex_norm2(p3m.ks_charge_density[index]));
564 auto const vterm = -2. * (1. / norm_sq + half_alpha_inv_sq);
565 auto const pref = node_k_space_energy * vterm;
566 node_k_space_pressure_tensor[0u] += pref * kx * kx; /* sigma_xx */
567 node_k_space_pressure_tensor[1u] += pref * kx * ky; /* sigma_xy */
568 node_k_space_pressure_tensor[2u] += pref * kx * kz; /* sigma_xz */
569 node_k_space_pressure_tensor[4u] += pref * ky * ky; /* sigma_yy */
570 node_k_space_pressure_tensor[5u] += pref * ky * kz; /* sigma_yz */
571 node_k_space_pressure_tensor[8u] += pref * kz * kz; /* sigma_zz */
572 diagonal += node_k_space_energy;
573 }
574 });
575
576 node_k_space_pressure_tensor[0u] += diagonal;
577 node_k_space_pressure_tensor[4u] += diagonal;
578 node_k_space_pressure_tensor[8u] += diagonal;
579 node_k_space_pressure_tensor[3u] = node_k_space_pressure_tensor[1u];
580 node_k_space_pressure_tensor[6u] = node_k_space_pressure_tensor[2u];
581 node_k_space_pressure_tensor[7u] = node_k_space_pressure_tensor[5u];
582 }
583
584 return node_k_space_pressure_tensor * prefactor / (2. * box_geo.volume());
585}
586
587template <typename FloatType, Arch Architecture>
589 bool force_flag, bool energy_flag, ParticleRange const &particles) {
590
591 auto const &system = get_system();
592 auto const &box_geo = *system.box_geo;
593#ifdef NPT
594 auto const npt_flag =
595 force_flag and (system.propagation->integ_switch == INTEG_METHOD_NPT_ISO);
596#else
597 auto constexpr npt_flag = false;
598#endif
599 if (p3m.sum_qpart == 0u) {
600 return 0.;
601 }
602
603 if (not has_actor_of_type<ElectrostaticLayerCorrection>(
604 system.coulomb.impl->solver)) {
605 charge_assign(particles);
606 }
607
608 kernel_ks_charge_density();
609
610 // Calculate the dipole term
611 auto p_q_range = ParticlePropertyRange::charge_range(particles);
612 auto p_force_range = ParticlePropertyRange::force_range(particles);
613 auto p_unfolded_pos_range =
615
616 // The dipole moment is only needed if we don't have metallic boundaries
617 auto const box_dipole =
618 (p3m.params.epsilon != P3M_EPSILON_METALLIC)
619 ? std::make_optional(calc_dipole_moment(
620 comm_cart, boost::combine(p_q_range, p_unfolded_pos_range)))
621 : std::nullopt;
622 auto const volume = box_geo.volume();
623 auto const pref =
624 4. * std::numbers::pi / volume / (2. * p3m.params.epsilon + 1.);
625 auto energy = 0.;
626
627 /* === k-space force calculation === */
628 if (force_flag) {
629 kernel_rs_electric_field();
630
631 // assign particle forces
632 auto const force_prefac = prefactor / volume;
633 Utils::integral_parameter<int, AssignForces, 1, 7>(
634 p3m.params.cao, p3m, force_prefac,
635 boost::combine(p_q_range, p_force_range));
636
637 // add dipole forces
638 // Eq. (3.19) @cite deserno00b
639 if (box_dipole) {
640 auto const dm = prefactor * pref * box_dipole.value();
641 for (auto zipped : boost::combine(p_q_range, p_force_range)) {
642 auto p_q = boost::get<0>(zipped);
643 auto &p_force = boost::get<1>(zipped);
644 p_force -= p_q * dm;
645 }
646 }
647 }
648
649 /* === k-space energy calculation === */
650 if (energy_flag or npt_flag) {
651 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
652 auto const &mesh_stop = p3m.fft->ks_local_size();
653 auto indices = Utils::Vector3i{};
654 auto node_energy = 0.;
655 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
656 auto const index = Utils::get_linear_index(
657 indices, mesh_stop, Utils::MemoryOrder::COLUMN_MAJOR);
658 // Use the energy optimized influence function for energy!
659 // Eq. (3.40) @cite deserno00b
660 node_energy += static_cast<double>(
661 p3m.g_energy[index] * complex_norm2(p3m.ks_charge_density[index]));
662 });
663 node_energy /= 2. * volume;
664
665 // add up energy contributions from all mpi ranks
666 boost::mpi::reduce(comm_cart, node_energy, energy, std::plus<>(), 0);
667 if (this_node == 0) {
668 /* self energy correction */
669 // Eq. (3.8) @cite deserno00b
670 energy -= p3m.sum_q2 * p3m.params.alpha * std::numbers::inv_sqrtpi;
671 /* net charge correction */
672 // Eq. (3.11) @cite deserno00b
673 energy -= p3m.square_sum_q * std::numbers::pi /
674 (2. * volume * Utils::sqr(p3m.params.alpha));
675 /* dipole correction */
676 // Eq. (3.9) @cite deserno00b
677 if (box_dipole) {
678 energy += pref * box_dipole.value().norm2();
679 }
680 }
681 energy *= prefactor;
682#ifdef NPT
683 if (npt_flag) {
684 get_system().npt_add_virial_contribution(energy);
685 }
686#endif
687 if (not energy_flag) {
688 energy = 0.;
689 }
690 }
691
692 return energy;
693}
694
695template <typename FloatType, Arch Architecture>
698 double m_mesh_density_min = -1., m_mesh_density_max = -1.;
699 // indicates if mesh should be tuned
700 bool m_tune_mesh = false;
701 std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
702
703protected:
704 P3MParameters &get_params() override { return p3m.params; }
705
706public:
707 CoulombTuningAlgorithm(System::System &system, auto &input_p3m,
708 double prefactor, int timings,
709 decltype(m_tune_limits) tune_limits)
710 : TuningAlgorithm(system, prefactor, timings), p3m{input_p3m},
711 m_tune_limits{std::move(tune_limits)} {}
712
713 void on_solver_change() const override { m_system.on_coulomb_change(); }
714
715 void setup_logger(bool verbose) override {
716 auto const &box_geo = *m_system.box_geo;
717#ifdef CUDA
718 auto const on_gpu = Architecture == Arch::GPU;
719#else
720 auto const on_gpu = false;
721#endif
722 m_logger = std::make_unique<TuningLogger>(
723 verbose and this_node == 0, (on_gpu) ? "CoulombP3MGPU" : "CoulombP3M",
725 m_logger->tuning_goals(p3m.params.accuracy, m_prefactor,
726 box_geo.length()[0], p3m.sum_qpart, p3m.sum_q2);
727 m_logger->log_tuning_start();
728 }
729
730 std::optional<std::string>
731 layer_correction_veto_r_cut(double r_cut) const override {
732 auto const &solver = m_system.coulomb.impl->solver;
733 if (auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
734 return actor->veto_r_cut(r_cut);
735 }
736 return {};
737 }
738
739 std::optional<std::string> fft_decomposition_veto(
740 Utils::Vector3i const &mesh_size_r_space) const override {
741#ifdef CUDA
742 if constexpr (Architecture == Arch::GPU) {
743 return std::nullopt;
744 }
745#endif
746 auto const [KX, KY, KZ] = p3m.fft->get_memory_layout();
747 auto valid_decomposition = false;
748 Utils::Vector3i mesh_size_k_space = {};
749 boost::mpi::reduce(
750 ::comm_cart, p3m.fft->ks_local_ur_index(), mesh_size_k_space,
751 [](Utils::Vector3i const &lhs, Utils::Vector3i const &rhs) {
752 return Utils::Vector3i{{std::max(lhs[0u], rhs[0u]),
753 std::max(lhs[1u], rhs[1u]),
754 std::max(lhs[2u], rhs[2u])}};
755 },
756 0);
757 if (::this_node == 0) {
758 auto const &node_grid = ::communicator.node_grid;
759 valid_decomposition =
760 (mesh_size_r_space[0u] == mesh_size_k_space[KX] and
761 mesh_size_r_space[1u] == mesh_size_k_space[KY] and
762 mesh_size_r_space[2u] == mesh_size_k_space[KZ] and
763 is_node_grid_compatible_with_mesh(node_grid, mesh_size_r_space));
764 }
765 boost::mpi::broadcast(::comm_cart, valid_decomposition, 0);
766 std::optional<std::string> retval{"conflict with FFT domain decomposition"};
767 if (valid_decomposition) {
768 retval = std::nullopt;
769 }
770 return retval;
771 }
772
773 std::tuple<double, double, double, double>
775 double r_cut_iL) const override {
776
777 auto const &box_geo = *m_system.box_geo;
778 double alpha_L, rs_err, ks_err;
779
780 /* calc maximal real space error for setting */
781 rs_err = p3m_real_space_error(m_prefactor, r_cut_iL, p3m.sum_qpart,
782 p3m.sum_q2, 0., box_geo.length());
783
784 if (std::numbers::sqrt2 * rs_err > p3m.params.accuracy) {
785 /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */
786 alpha_L = sqrt(log(std::numbers::sqrt2 * rs_err / p3m.params.accuracy)) /
787 r_cut_iL;
788 } else {
789 /* even alpha=0 is ok, however, we cannot choose it since it kills the
790 k-space error formula.
791 Anyways, this very likely NOT the optimal solution */
792 alpha_L = 0.1;
793 }
794
795 /* calculate real-space and k-space error for this alpha_L */
796 rs_err = p3m_real_space_error(m_prefactor, r_cut_iL, p3m.sum_qpart,
797 p3m.sum_q2, alpha_L, box_geo.length());
798#ifdef CUDA
799 if constexpr (Architecture == Arch::GPU) {
800 if (this_node == 0) {
801 ks_err =
802 p3m_k_space_error_gpu(m_prefactor, mesh.data(), cao, p3m.sum_qpart,
803 p3m.sum_q2, alpha_L, box_geo.length().data());
804 }
805 boost::mpi::broadcast(comm_cart, ks_err, 0);
806 } else
807#endif
808 ks_err = p3m_k_space_error(m_prefactor, mesh, cao, p3m.sum_qpart,
809 p3m.sum_q2, alpha_L, box_geo.length());
810
811 return {Utils::Vector2d{rs_err, ks_err}.norm(), rs_err, ks_err, alpha_L};
812 }
813
814 void determine_mesh_limits() override {
815 auto const &box_geo = *m_system.box_geo;
816 auto const mesh_density =
817 static_cast<double>(p3m.params.mesh[0]) * box_geo.length_inv()[0];
818
819 if (p3m.params.mesh == Utils::Vector3i::broadcast(-1)) {
820 /* avoid using more than 1 GB of FFT arrays */
821 auto const normalized_box_dim = std::cbrt(box_geo.volume());
822 auto const max_npart_per_dim = 512.;
823 /* simple heuristic to limit the tried meshes if the accuracy cannot
824 be obtained with smaller meshes, but normally not all these
825 meshes have to be tested */
826 auto const min_npart_per_dim = std::min(
827 max_npart_per_dim, std::cbrt(static_cast<double>(p3m.sum_qpart)));
828 m_mesh_density_min = min_npart_per_dim / normalized_box_dim;
829 m_mesh_density_max = max_npart_per_dim / normalized_box_dim;
830 if (m_tune_limits.first or m_tune_limits.second) {
831 auto const &box_l = box_geo.length();
832 auto const dim = std::max({box_l[0], box_l[1], box_l[2]});
833 if (m_tune_limits.first) {
834 m_mesh_density_min = static_cast<double>(*m_tune_limits.first) / dim;
835 }
836 if (m_tune_limits.second) {
837 m_mesh_density_max = static_cast<double>(*m_tune_limits.second) / dim;
838 }
839 }
840 m_tune_mesh = true;
841 } else {
842 m_mesh_density_min = m_mesh_density_max = mesh_density;
843 assert(p3m.params.mesh[0] >= 1);
844 if (p3m.params.mesh[1] == -1 and p3m.params.mesh[2] == -1) {
845 // determine the two missing values by rescaling by the box length
846 for (auto i : {1u, 2u}) {
847 p3m.params.mesh[i] =
848 static_cast<int>(std::round(mesh_density * box_geo.length()[i]));
849 // make the mesh even in all directions
850 p3m.params.mesh[i] += p3m.params.mesh[i] % 2;
851 }
852 }
853 m_logger->report_fixed_mesh(p3m.params.mesh);
854 }
855 }
856
858 auto const &box_geo = *m_system.box_geo;
859 auto const &solver = m_system.coulomb.impl->solver;
860 auto tuned_params = TuningAlgorithm::Parameters{};
861 auto time_best = time_sentinel;
862 auto mesh_density = m_mesh_density_min;
863 auto current_mesh = p3m.params.mesh;
864 if (m_tune_mesh) {
865 for (auto i : {0u, 1u, 2u}) {
866 current_mesh[i] =
867 static_cast<int>(std::round(box_geo.length()[i] * mesh_density));
868 // make the mesh even in all directions
869 current_mesh[i] += current_mesh[i] % 2;
870 }
871 }
872
873 while (mesh_density <= m_mesh_density_max) {
874 auto trial_params = TuningAlgorithm::Parameters{};
875 trial_params.mesh = current_mesh;
876 trial_params.cao = cao_best;
877 trial_params.cao = cao_best;
878
879 auto const trial_time =
880 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
881 trial_params.alpha_L, trial_params.accuracy);
882
883 if (trial_time >= 0.) {
884 /* the optimum r_cut for this mesh is the upper limit for higher meshes,
885 everything else is slower */
886 if (has_actor_of_type<CoulombP3M>(solver)) {
887 m_r_cut_iL_max = trial_params.r_cut_iL;
888 }
889
890 if (trial_time < time_best) {
891 /* new optimum */
892 reset_n_trials();
893 tuned_params = trial_params;
894 time_best = tuned_params.time = trial_time;
895 } else if (trial_time > time_best + time_granularity or
896 get_n_trials() > max_n_consecutive_trials) {
897 /* no hope of further optimisation */
898 break;
899 }
900 }
901 if (m_tune_mesh) {
902 current_mesh += Utils::Vector3i::broadcast(2);
903 mesh_density = current_mesh[0] / box_geo.length()[0];
904 } else {
905 break;
906 }
907 }
908 return tuned_params;
909 }
910};
911
912template <typename FloatType, Arch Architecture>
914 auto &system = get_system();
915 auto const &box_geo = *system.box_geo;
916 if (p3m.params.alpha_L == 0. and p3m.params.alpha != 0.) {
917 p3m.params.alpha_L = p3m.params.alpha * box_geo.length()[0];
918 }
919 if (p3m.params.r_cut_iL == 0. and p3m.params.r_cut != 0.) {
920 p3m.params.r_cut_iL = p3m.params.r_cut * box_geo.length_inv()[0];
921 }
922 if (not is_tuned()) {
923 count_charged_particles();
924 if (p3m.sum_qpart == 0) {
925 throw std::runtime_error(
926 "CoulombP3M: no charged particles in the system");
927 }
928 try {
930 system, p3m, prefactor, tune_timings, tune_limits);
931 parameters.setup_logger(tune_verbose);
932 // parameter ranges
933 parameters.determine_mesh_limits();
934 parameters.determine_r_cut_limits();
935 parameters.determine_cao_limits(7);
936 // run tuning algorithm
937 parameters.tune();
938 m_is_tuned = true;
939 system.on_coulomb_change();
940 } catch (...) {
941 p3m.params.tuning = false;
942 throw;
943 }
944 }
945 init();
946}
947
949 auto const &system = get_system();
950 auto const &box_geo = *system.box_geo;
951 auto const &local_geo = *system.local_geo;
952 for (auto i = 0u; i < 3u; i++) {
953 /* check k-space cutoff */
954 if (p3m_params.cao_cut[i] >= box_geo.length_half()[i]) {
955 std::stringstream msg;
956 msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i]
957 << " is larger than half of box dimension " << box_geo.length()[i];
958 throw std::runtime_error(msg.str());
959 }
960 if (p3m_params.cao_cut[i] >= local_geo.length()[i]) {
961 std::stringstream msg;
962 msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i]
963 << " is larger than local box dimension " << local_geo.length()[i];
964 throw std::runtime_error(msg.str());
965 }
966 }
967
969 if ((box_geo.length()[0] != box_geo.length()[1]) or
970 (box_geo.length()[1] != box_geo.length()[2]) or
971 (p3m_params.mesh[0] != p3m_params.mesh[1]) or
972 (p3m_params.mesh[1] != p3m_params.mesh[2])) {
973 throw std::runtime_error(
974 "CoulombP3M: non-metallic epsilon requires cubic box");
975 }
976 }
977}
978
980 auto const &box_geo = *get_system().box_geo;
981 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
982 throw std::runtime_error(
983 "CoulombP3M: requires periodicity (True, True, True)");
984 }
985}
986
988 auto const &local_geo = *get_system().local_geo;
989 if (local_geo.cell_structure_type() != CellStructureType::REGULAR and
990 local_geo.cell_structure_type() != CellStructureType::HYBRID) {
991 throw std::runtime_error(
992 "CoulombP3M: requires the regular or hybrid decomposition cell system");
993 }
994 if (::communicator.size > 1 and
995 local_geo.cell_structure_type() == CellStructureType::HYBRID) {
996 throw std::runtime_error(
997 "CoulombP3M: does not work with the hybrid decomposition cell system, "
998 "if using more than one MPI node");
999 }
1000}
1001
1002template <typename FloatType, Arch Architecture>
1004 auto const &box_geo = *get_system().box_geo;
1005 p3m.params.r_cut = p3m.params.r_cut_iL * box_geo.length()[0];
1006 p3m.params.alpha = p3m.params.alpha_L * box_geo.length_inv()[0];
1007 p3m.params.recalc_a_ai_cao_cut(box_geo.length());
1009 sanity_checks_boxl();
1010 calc_influence_function_force();
1011 calc_influence_function_energy();
1012 p3m.halo_comm.resize(::comm_cart, p3m.local_mesh);
1013}
1014
1015#ifdef CUDA
1016template <typename FloatType, Arch Architecture>
1018 ParticleRange const &particles) {
1019 if constexpr (Architecture == Arch::GPU) {
1020#ifdef NPT
1021 if (get_system().propagation->integ_switch == INTEG_METHOD_NPT_ISO) {
1022 get_system().npt_add_virial_contribution(long_range_energy(particles));
1023 }
1024#else
1025 static_cast<void>(particles);
1026#endif
1027 if (this_node == 0) {
1028 auto &gpu = get_system().gpu;
1029 p3m_gpu_add_farfield_force(*m_gpu_data, gpu, prefactor,
1030 gpu.n_particles());
1031 }
1032 }
1033}
1034
1035/* Initialize the CPU kernels.
1036 * This operation is time-consuming and sets up data members
1037 * that are only relevant for ELC force corrections, since the
1038 * GPU implementation uses CPU kernels to compute energies.
1039 */
1040template <typename FloatType, Arch Architecture>
1042 if constexpr (Architecture == Arch::GPU) {
1043 auto &system = get_system();
1044 if (has_actor_of_type<ElectrostaticLayerCorrection>(
1045 system.coulomb.impl->solver)) {
1046 init_cpu_kernels();
1047 }
1048 p3m_gpu_init(m_gpu_data, p3m.params.cao, p3m.params.mesh, p3m.params.alpha,
1049 system.box_geo->length(), system.gpu.n_particles());
1050 }
1051}
1052
1053template <typename FloatType, Arch Architecture>
1055 if constexpr (Architecture == Arch::GPU) {
1056 auto &gpu_particle_data = get_system().gpu;
1058 gpu_particle_data.enable_property(GpuParticleData::prop::q);
1059 gpu_particle_data.enable_property(GpuParticleData::prop::pos);
1060 }
1061}
1062#endif // CUDA
1063
1064template struct CoulombP3MImpl<float, Arch::CPU>;
1065template struct CoulombP3MImpl<float, Arch::GPU>;
1066template struct CoulombP3MImpl<double, Arch::CPU>;
1067template struct CoulombP3MImpl<double, Arch::GPU>;
1068
1069#endif // P3M
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
@ INTEG_METHOD_NPT_ISO
Vector implementation and trait types for boost qvm interoperability.
TuningAlgorithm::Parameters get_time() override
Definition p3m.cpp:857
P3MParameters & get_params() override
Definition p3m.cpp:704
std::optional< std::string > fft_decomposition_veto(Utils::Vector3i const &mesh_size_r_space) const override
Definition p3m.cpp:739
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
Definition p3m.cpp:774
CoulombTuningAlgorithm(System::System &system, auto &input_p3m, double prefactor, int timings, decltype(m_tune_limits) tune_limits)
Definition p3m.cpp:707
void setup_logger(bool verbose) override
Definition p3m.cpp:715
void on_solver_change() const override
Definition p3m.cpp:713
std::optional< std::string > layer_correction_veto_r_cut(double r_cut) const override
Definition p3m.cpp:731
void determine_mesh_limits() override
Definition p3m.cpp:814
void enable_property(std::size_t property)
std::size_t n_particles() const
A range of particles.
Main system class.
std::shared_ptr< LocalBox > local_geo
GpuParticleData gpu
void npt_add_virial_contribution(double energy)
Definition npt.cpp:122
std::shared_ptr< Propagation > propagation
std::shared_ptr< CellStructure > cell_structure
Coulomb::Solver coulomb
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
T norm() const
Definition Vector.hpp:139
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value) noexcept
Create a vector that has all entries set to the same value.
Definition Vector.hpp:111
Cache for interpolation weights.
void store(const InterpolationWeights< cao > &w)
Push back weights for one point.
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
This file contains the defaults for ESPResSo.
#define ROUND_ERROR_PREC
Precision for capture of round off errors.
Definition config.hpp:66
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
Definition config.hpp:53
void charge_assign(elc_data const &elc, CoulombP3M &solver, combined_ranges const &p_q_pos_range)
Definition elc.cpp:1114
ELC algorithm for long-range Coulomb interactions.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
auto pad_with_zeros_discard_imag(std::span< T > cropped_array, Utils::Vector3i cropped_dim, Utils::Vector3i pad_left, Utils::Vector3i pad_right)
auto extract_block(Container const &in_array, Utils::Vector3i const &dimensions, Utils::Vector3i const &start, Utils::Vector3i const &stop, Utils::MemoryOrder memory_order, Utils::MemoryOrder output_memory_order)
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
void p3m_interpolate(P3MLocalMesh const &local_mesh, InterpolationWeights< cao > const &weights, Kernel kernel)
P3M grid interpolation.
auto charge_range(ParticleRange const &particles)
auto pos_range(ParticleRange const &particles)
auto force_range(ParticleRange const &particles)
auto unfolded_pos_range(ParticleRange const &particles, BoxGeometry const &box)
System & get_system()
T product(Vector< T, N > const &v)
Definition Vector.hpp:375
VectorXd< 3 > Vector3d
Definition Vector.hpp:165
int get_linear_index(int a, int b, int c, const Vector3i &adim, MemoryOrder memory_order=MemoryOrder::COLUMN_MAJOR)
get the linear index from the position (a,b,c) in a 3D grid of dimensions adim.
Definition index.hpp:43
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
Definition sqr.hpp:28
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
Definition math.hpp:53
auto get_analytic_cotangent_sum_kernel(int cao)
Definition math.hpp:128
Exports for the NpT code.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
static auto get_size_from_shape(Utils::Vector3i const &shape)
Definition p3m.cpp:136
auto transpose(std::span< T > const &flat_array, Utils::Vector3i const &shape)
Definition p3m.cpp:118
static auto p3m_tune_aliasing_sums(Utils::Vector3i const &shift, Utils::Vector3i const &mesh, Utils::Vector3d const &mesh_i, int cao, double alpha_L_i)
Aliasing sum used by p3m_k_space_error.
Definition p3m.cpp:190
std::complex< FloatType > multiply_complex_by_real(std::complex< FloatType > const &z, FloatType k)
Definition p3m.cpp:107
FloatType complex_norm2(std::complex< FloatType > const &z)
Definition p3m.cpp:113
static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh, int cao, int n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the analytic expression of the error estimate for the P3M method in (eq.
Definition p3m.cpp:255
static bool is_node_grid_compatible_with_mesh(Utils::Vector3i const &node_grid, Utils::Vector3i const &mesh)
Definition p3m.cpp:130
static double p3m_real_space_error(double pref, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the real space contribution to the rms error in the force (as described by Kolafa and Perra...
Definition p3m.cpp:234
std::complex< FloatType > multiply_complex_by_imaginary(std::complex< FloatType > const &z, FloatType k)
Definition p3m.cpp:100
static auto calc_dipole_moment(boost::mpi::communicator const &comm, combined_ranges const &p_q_unfolded_pos_range)
Definition p3m.cpp:435
P3M algorithm for long-range Coulomb interaction.
void p3m_gpu_add_farfield_force(P3MGpuParams &data, GpuParticleData &gpu, double prefactor, std::size_t n_part)
The long-range part of the P3M algorithm.
void p3m_gpu_init(std::shared_ptr< P3MGpuParams > &data, int cao, Utils::Vector3i const &mesh, double alpha, Utils::Vector3d const &box_l, std::size_t n_part)
Initialize the internal data structure of the P3M GPU.
P3M electrostatics on GPU.
double p3m_k_space_error_gpu(double prefactor, const int *mesh, int cao, int npart, double sum_q2, double alpha_L, const double *box)
void operator()(auto &p3m, double force_prefac, combined_ranges const &p_q_force_range) const
Definition p3m.cpp:405
Utils::Vector3i node_grid
void charge_assign(ParticleRange const &particles) override
Definition p3m.cpp:380
void tune() override
Definition p3m.cpp:913
void assign_charge(double q, Utils::Vector3d const &real_pos, bool skip_cache) override
Definition p3m.cpp:392
void calc_influence_function_energy() override
Calculate the influence function optimized for the energy and the self energy correction.
Definition p3m.cpp:183
void scaleby_box_l() override
Definition p3m.cpp:1003
void calc_influence_function_force() override
Calculate the optimal influence function of .
Definition p3m.cpp:173
Utils::Vector9d long_range_pressure(ParticleRange const &particles) override
Definition p3m.cpp:535
void init_cpu_kernels()
Definition p3m.cpp:293
void count_charged_particles() override
Definition p3m.cpp:144
double long_range_kernel(bool force_flag, bool energy_flag, ParticleRange const &particles)
Compute the k-space part of forces and energies.
Definition p3m.cpp:588
void request_gpu() const
Definition p3m.cpp:1054
void init_gpu_kernels()
Definition p3m.cpp:1041
void add_long_range_forces_gpu(ParticleRange const &particles)
Definition p3m.cpp:1017
p3m_send_mesh< FloatType > halo_comm
Definition p3m.impl.hpp:71
std::shared_ptr< P3MFFT< FloatType > > fft
Definition p3m.impl.hpp:72
std::size_t sum_qpart
number of charged particles.
Definition p3m.impl.hpp:57
double sum_q2
Sum of square of charges.
Definition p3m.impl.hpp:59
void sanity_checks_periodicity() const
Definition p3m.cpp:979
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
Definition p3m.cpp:948
void sanity_checks_cell_structure() const
Definition p3m.cpp:987
P3MParameters const & p3m_params
Definition p3m.hpp:57
std::unique_ptr< Implementation > impl
Pointer-to-implementation.
static constexpr std::size_t force
static constexpr std::size_t pos
static constexpr std::size_t q
Interpolation weights for one point.
void recalc_ld_pos(P3MParameters const &params)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
int cao
charge assignment order ([0,7]).
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0), in real space.
double epsilon
epsilon of the "surrounding dielectric".
P3MParameters params
P3M base parameters.
P3MLocalMesh local_mesh
Local mesh geometry information for this MPI rank.
DEVICE_QUALIFIER constexpr pointer data() noexcept
Definition Array.hpp:132
DEVICE_QUALIFIER constexpr const_iterator cbegin() const noexcept
Definition Array.hpp:148
DEVICE_QUALIFIER constexpr const_iterator cend() const noexcept
Definition Array.hpp:160
void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos, p3m_interpolation_cache &inter_weights)
Definition p3m.cpp:348
void operator()(auto &p3m, double q, InterpolationWeights< cao > const &w)
Definition p3m.cpp:340
void operator()(auto &p3m, combined_ranges const &p_q_pos_range)
Definition p3m.cpp:367
void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos)
Definition p3m.cpp:358