Loading [MathJax]/extensions/TeX/AMSmath.js
ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages Concepts
dp3m.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2024 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22/** @file
23 * P3M algorithm for long-range magnetic dipole-dipole interaction.
24 *
25 * By default the magnetic epsilon is metallic = 0.
26 */
27
28#include "config/config.hpp"
29
30#ifdef DP3M
31
34
35#include "fft/fft.hpp"
37#include "p3m/TuningLogger.hpp"
38#include "p3m/common.hpp"
40#include "p3m/interpolation.hpp"
41#include "p3m/math.hpp"
42
43#include "BoxGeometry.hpp"
44#include "LocalBox.hpp"
45#include "Particle.hpp"
46#include "ParticleRange.hpp"
47#include "PropagationMode.hpp"
50#include "communication.hpp"
51#include "errorhandling.hpp"
53#include "npt.hpp"
54#include "system/System.hpp"
55#include "tuning.hpp"
56
57#include <utils/Vector.hpp>
60#include <utils/math/sqr.hpp>
61
62#include <boost/mpi/collectives/all_reduce.hpp>
63#include <boost/mpi/collectives/reduce.hpp>
64
65#include <algorithm>
66#include <array>
67#include <cstddef>
68#include <cstdio>
69#include <functional>
70#include <iterator>
71#include <numbers>
72#include <optional>
73#include <span>
74#include <sstream>
75#include <stdexcept>
76#include <tuple>
77#include <utility>
78#include <vector>
79
80#ifdef FFTW3_H
81#error "The FFTW3 library shouldn't be visible in this translation unit"
82#endif
83
84template <typename FloatType, Arch Architecture>
86 auto local_n = std::size_t{0u};
87 double local_mu2 = 0.;
88
89 for (auto const &p : get_system().cell_structure->local_particles()) {
90 if (p.dipm() != 0.) {
91 local_mu2 += p.calc_dip().norm2();
92 local_n++;
93 }
94 }
95
96 boost::mpi::all_reduce(comm_cart, local_mu2, dp3m.sum_mu2, std::plus<>());
97 boost::mpi::all_reduce(comm_cart, local_n, dp3m.sum_dip_part, std::plus<>());
98}
99
100static double dp3m_k_space_error(double box_size, int mesh, int cao,
101 int n_c_part, double sum_q2, double alpha_L);
102
103static double dp3m_real_space_error(double box_size, double r_cut_iL,
104 int n_c_part, double sum_q2,
105 double alpha_L);
106
107/** Compute the value of alpha through a bisection method.
108 * Based on eq. (33) @cite wang01a.
109 */
110double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part,
111 double sum_q2, double x1, double x2, double xacc,
112 double tuned_accuracy);
113
114template <typename FloatType, Arch Architecture>
115double
117 const {
118 auto const &box_geo = *get_system().box_geo;
119 auto const node_phi =
120 grid_influence_function_self_energy<FloatType, P3M_BRILLOUIN>(
121 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
122
123 double phi = 0.;
124 boost::mpi::reduce(comm_cart, node_phi, phi, std::plus<>(), 0);
125 phi /= 3. * box_geo.length()[0] * Utils::int_pow<3>(dp3m.params.mesh[0]);
126 return phi * std::numbers::pi;
127}
128
129template <typename FloatType, Arch Architecture>
131 assert(dp3m.params.mesh >= Utils::Vector3i::broadcast(1));
132 assert(dp3m.params.cao >= 1 and dp3m.params.cao <= 7);
133 assert(dp3m.params.alpha > 0.);
134
135 auto const &system = get_system();
136 auto const &box_geo = *system.box_geo;
137 auto const &local_geo = *system.local_geo;
138 auto const verlet_skin = system.cell_structure->get_verlet_skin();
139
140 dp3m.params.cao3 = Utils::int_pow<3>(dp3m.params.cao);
141 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
142
143 assert(dp3m.fft);
144 dp3m.local_mesh.calc_local_ca_mesh(dp3m.params, local_geo, verlet_skin, 0.);
145 dp3m.fft_buffers->init_halo();
146 dp3m.fft->init(dp3m.params);
147 dp3m.mesh.ks_pnum = dp3m.fft->get_ks_pnum();
148 dp3m.fft_buffers->init_meshes(dp3m.fft->get_ca_mesh_size());
149 dp3m.update_mesh_views();
150 dp3m.calc_differential_operator();
151
152 /* fix box length dependent constants */
153 scaleby_box_l();
154
156}
157
158namespace {
159template <int cao> struct AssignDipole {
160 void operator()(auto &dp3m, Utils::Vector3d const &real_pos,
161 Utils::Vector3d const &dip) const {
162 using value_type =
163 typename std::remove_reference_t<decltype(dp3m)>::value_type;
164 auto constexpr memory_order = Utils::MemoryOrder::ROW_MAJOR;
165 auto const weights = p3m_calculate_interpolation_weights<cao, memory_order>(
166 real_pos, dp3m.params.ai, dp3m.local_mesh);
167 p3m_interpolate<cao>(
168 dp3m.local_mesh, weights, [&dip, &dp3m](int ind, double w) {
169 dp3m.mesh.rs_fields[0u][ind] += value_type(w * dip[0u]);
170 dp3m.mesh.rs_fields[1u][ind] += value_type(w * dip[1u]);
171 dp3m.mesh.rs_fields[2u][ind] += value_type(w * dip[2u]);
172 });
173
174 dp3m.inter_weights.template store<cao>(weights);
175 }
176};
177} // namespace
178
179template <typename FloatType, Arch Architecture>
181 ParticleRange const &particles) {
182 dp3m.inter_weights.reset(dp3m.params.cao);
183
184 /* prepare local FFT mesh */
185 for (auto &rs_mesh_field : dp3m.mesh.rs_fields)
186 for (int j = 0; j < dp3m.local_mesh.size; j++)
187 rs_mesh_field[j] = 0.;
188
189 for (auto const &p : particles) {
190 if (p.dipm() != 0.) {
191 Utils::integral_parameter<int, AssignDipole, 1, 7>(dp3m.params.cao, dp3m,
192 p.pos(), p.calc_dip());
193 }
194 }
195}
196
197namespace {
198template <int cao> struct AssignTorques {
199 void operator()(auto &dp3m, double prefac, int d_rs,
200 ParticleRange const &particles) const {
201
202 /* magnetic particle index */
203 auto p_index = std::size_t{0ul};
204
205 for (auto &p : particles) {
206 if (p.dipm() != 0.) {
207 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
208
209 Utils::Vector3d E{};
210 p3m_interpolate(dp3m.local_mesh, weights,
211 [&E, &dp3m, d_rs](int ind, double w) {
212 E[d_rs] += w * double(dp3m.mesh.rs_scalar[ind]);
213 });
214
215 p.torque() -= vector_product(p.calc_dip(), prefac * E);
216 ++p_index;
217 }
218 }
219 }
220};
221
222template <int cao> struct AssignForces {
223 void operator()(auto &dp3m, double prefac, int d_rs,
224 ParticleRange const &particles) const {
225
226 /* magnetic particle index */
227 auto p_index = std::size_t{0ul};
228
229 for (auto &p : particles) {
230 if (p.dipm() != 0.) {
231 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
232
233 Utils::Vector3d E{};
234 p3m_interpolate(dp3m.local_mesh, weights,
235 [&E, &dp3m](int ind, double w) {
236 E[0u] += w * double(dp3m.mesh.rs_fields[0u][ind]);
237 E[1u] += w * double(dp3m.mesh.rs_fields[1u][ind]);
238 E[2u] += w * double(dp3m.mesh.rs_fields[2u][ind]);
239 });
240
241 p.force()[d_rs] += p.calc_dip() * prefac * E;
242 ++p_index;
243 }
244 }
245 }
246};
247} // namespace
248
249template <typename FloatType, Arch Architecture>
251 bool force_flag, bool energy_flag, ParticleRange const &particles) {
252 /* k-space energy */
253 double energy = 0.;
254 auto const &system = get_system();
255 auto const &box_geo = *system.box_geo;
256 auto const dipole_prefac = prefactor / Utils::int_pow<3>(dp3m.params.mesh[0]);
257#ifdef NPT
258 auto const npt_flag =
259 force_flag and (system.propagation->integ_switch == INTEG_METHOD_NPT_ISO);
260#else
261 auto constexpr npt_flag = false;
262#endif
263
264 if (dp3m.sum_mu2 > 0.) {
265 dipole_assign(particles);
266 dp3m.fft_buffers->perform_vector_halo_gather();
267 for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
268 dp3m.fft->forward_fft(rs_mesh);
269 }
270 dp3m.update_mesh_views();
271 }
272
273 /* === k-space energy calculation === */
274 if (energy_flag or npt_flag) {
275 /*********************
276 Dipolar energy
277 **********************/
278 if (dp3m.sum_mu2 > 0.) {
279 /* i*k differentiation for dipolar gradients:
280 * |(\Fourier{\vect{mu}}(k)\cdot \vect{k})|^2 */
281
282 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
283 auto const &offset = dp3m.mesh.start;
284 auto const &d_op = dp3m.d_op[0u];
285 auto const &mesh_dip = dp3m.mesh.rs_fields;
286 auto const permutations = dp3m.fft->get_permutations();
287 auto indices = Utils::Vector3i{};
288 auto index = std::size_t(0u);
289 auto it_energy = dp3m.g_energy.begin();
290 auto node_energy = 0.;
291 for_each_3d(mesh_start, dp3m.mesh.size, indices, [&]() {
292 auto const [KX, KY, KZ] = permutations;
293 auto const shift = indices + offset;
294 // Re(mu)*k
295 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
296 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
297 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
298 ++index;
299 // Im(mu)*k
300 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
301 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
302 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
303 ++index;
304 node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im));
305 std::advance(it_energy, 1);
306 });
307
308 node_energy *= dipole_prefac * std::numbers::pi * box_geo.length_inv()[0];
309 boost::mpi::reduce(comm_cart, node_energy, energy, std::plus<>(), 0);
310
311 if (dp3m.energy_correction == 0.)
312 calc_energy_correction();
313
314 if (this_node == 0) {
315 /* self energy correction */
316 energy -= prefactor * dp3m.sum_mu2 * std::numbers::inv_sqrtpi *
317 (2. / 3.) * Utils::int_pow<3>(dp3m.params.alpha);
318
319 /* dipolar energy correction due to systematic Madelung-self effects */
320 energy += prefactor * dp3m.energy_correction / box_geo.volume();
321 }
322 }
323 } // if (energy_flag)
324
325 /* === k-space force calculation === */
326 if (force_flag) {
327 /****************************
328 * DIPOLAR TORQUES (k-space)
329 ****************************/
330 if (dp3m.sum_mu2 > 0.) {
331 auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0u];
332 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
333 auto const &mesh_stop = dp3m.mesh.size;
334 auto const offset = dp3m.mesh.start;
335 auto const &d_op = dp3m.d_op[0u];
336 auto const permutations = dp3m.fft->get_permutations();
337 auto &mesh_dip = dp3m.mesh.rs_fields;
338 auto indices = Utils::Vector3i{};
339 auto index = std::size_t(0u);
340 dp3m.ks_scalar.resize(dp3m.mesh.rs_scalar.size());
341
342 /* fill in ks_scalar array for torque calculation */
343 auto it_energy = dp3m.g_energy.begin();
344 index = 0u;
345 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
346 auto const [KX, KY, KZ] = permutations;
347 auto const shift = indices + offset;
348 // Re(mu)*k
349 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
350 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
351 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
352 dp3m.ks_scalar[index] = *it_energy * re;
353 ++index;
354 // Im(mu)*k
355 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
356 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
357 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
358 dp3m.ks_scalar[index] = *it_energy * im;
359 ++index;
360 std::advance(it_energy, 1);
361 });
362
363 /* Force component loop */
364 for (int d = 0; d < 3; d++) {
365 index = 0u;
366 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
367 auto const d_op_val = FloatType(d_op[indices[d] + offset[d]]);
368 dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.ks_scalar[index];
369 ++index;
370 dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.ks_scalar[index];
371 ++index;
372 });
373 dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh());
374 dp3m.fft_buffers->perform_scalar_halo_spread();
375 /* Assign force component from mesh to particle */
376 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
377 Utils::integral_parameter<int, AssignTorques, 1, 7>(
378 dp3m.params.cao, dp3m, dipole_prefac * wavenumber, d_rs, particles);
379 }
380
381 /***************************
382 DIPOLAR FORCES (k-space)
383 ****************************/
384 // Compute forces after torques because the algorithm below overwrites the
385 // grids dp3m.mesh.rs_fields !
386 // Note: I'll do here 9 inverse FFTs. By symmetry, we can reduce this
387 // number to 6 !
388 /* fill in ks_scalar array for force calculation */
389 auto it_force = dp3m.g_force.begin();
390 index = 0u;
391 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
392 auto const [KX, KY, KZ] = permutations;
393 auto const shift = indices + offset;
394 // Re(mu)*k
395 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
396 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
397 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
398 ++index;
399 // Im(mu)*k
400 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
401 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
402 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
403 ++index;
404 dp3m.ks_scalar[index - 2] = *it_force * im;
405 dp3m.ks_scalar[index - 1] = *it_force * (-re);
406 std::advance(it_force, 1);
407 });
408
409 /* Force component loop */
410 for (int d = 0; d < 3; d++) {
411 index = 0u;
412 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
413 auto const [KX, KY, KZ] = permutations;
414 auto const d_op_val = FloatType(d_op[indices[d] + offset[d]]);
415 auto const shift = indices + offset;
416 auto const f1 = d_op_val * dp3m.ks_scalar[index];
417 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f1;
418 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f1;
419 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f1;
420 ++index;
421 auto const f2 = d_op_val * dp3m.ks_scalar[index];
422 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f2;
423 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f2;
424 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f2;
425 ++index;
426 });
427 for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
428 dp3m.fft->backward_fft(rs_mesh);
429 }
430 dp3m.fft_buffers->perform_vector_halo_spread();
431 /* Assign force component from mesh to particle */
432 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
433 Utils::integral_parameter<int, AssignForces, 1, 7>(
434 dp3m.params.cao, dp3m, dipole_prefac * Utils::sqr(wavenumber), d_rs,
435 particles);
436 }
437 } /* if (dp3m.sum_mu2 > 0) */
438 } /* if (force_flag) */
439
440 if (dp3m.params.epsilon != P3M_EPSILON_METALLIC) {
441 auto const surface_term =
442 calc_surface_term(force_flag, energy_flag or npt_flag, particles);
443 if (this_node == 0) {
444 energy += surface_term;
445 }
446 }
447#ifdef NPT
448 if (npt_flag) {
449 get_system().npt_add_virial_contribution(energy);
450 }
451#endif
452 if (not energy_flag) {
453 energy = 0.;
454 }
455
456 return energy;
457}
458
459template <typename FloatType, Arch Architecture>
461 bool force_flag, bool energy_flag, ParticleRange const &particles) {
462 auto const &box_geo = *get_system().box_geo;
463 auto const pref = prefactor * 4. * std::numbers::pi / box_geo.volume() /
464 (2. * dp3m.params.epsilon + 1.);
465 auto const n_local_part = particles.size();
466
467 // We put all the dipolar momenta in a the arrays mx,my,mz according to the
468 // id-number of the particles
469 std::vector<double> mx(n_local_part);
470 std::vector<double> my(n_local_part);
471 std::vector<double> mz(n_local_part);
472
473 std::size_t ip = 0u;
474 for (auto const &p : particles) {
475 auto const dip = p.calc_dip();
476 mx[ip] = dip[0u];
477 my[ip] = dip[1u];
478 mz[ip] = dip[2u];
479 ip++;
480 }
481
482 // we will need the sum of all dipolar momenta vectors
483 auto local_dip = Utils::Vector3d{};
484 for (std::size_t i = 0u; i < n_local_part; i++) {
485 local_dip[0u] += mx[i];
486 local_dip[1u] += my[i];
487 local_dip[2u] += mz[i];
488 }
489 auto const box_dip =
490 boost::mpi::all_reduce(comm_cart, local_dip, std::plus<>());
491
492 double energy = 0.;
493 if (energy_flag) {
494 double sum_e = 0.;
495 for (std::size_t i = 0u; i < n_local_part; i++) {
496 sum_e += mx[i] * box_dip[0] + my[i] * box_dip[1] + mz[i] * box_dip[2];
497 }
498 energy =
499 0.5 * pref * boost::mpi::all_reduce(comm_cart, sum_e, std::plus<>());
500 }
501
502 if (force_flag) {
503
504 std::vector<double> sumix(n_local_part);
505 std::vector<double> sumiy(n_local_part);
506 std::vector<double> sumiz(n_local_part);
507
508 for (std::size_t i = 0u; i < n_local_part; i++) {
509 sumix[i] = my[i] * box_dip[2u] - mz[i] * box_dip[1u];
510 sumiy[i] = mz[i] * box_dip[0u] - mx[i] * box_dip[2u];
511 sumiz[i] = mx[i] * box_dip[1u] - my[i] * box_dip[0u];
512 }
513
514 ip = 0u;
515 for (auto &p : particles) {
516 auto &torque = p.torque();
517 torque[0u] -= pref * sumix[ip];
518 torque[1u] -= pref * sumiy[ip];
519 torque[2u] -= pref * sumiz[ip];
520 ip++;
521 }
522 }
523
524 return energy;
525}
526
527template <typename FloatType, Arch Architecture>
529 dp3m.g_force = grid_influence_function<FloatType, 3, P3M_BRILLOUIN>(
530 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
531 get_system().box_geo->length_inv());
532}
533
534template <typename FloatType, Arch Architecture>
536 dp3m.g_energy = grid_influence_function<FloatType, 2, P3M_BRILLOUIN>(
537 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
538 get_system().box_geo->length_inv());
539}
540
541template <typename FloatType, Arch Architecture>
544 int m_mesh_max = -1, m_mesh_min = -1;
545 std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
546
547public:
548 DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m,
549 double prefactor, int timings,
550 decltype(m_tune_limits) tune_limits)
551 : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m},
552 m_tune_limits{std::move(tune_limits)} {}
553
554 P3MParameters &get_params() override { return dp3m.params; }
555
556 void on_solver_change() const override { m_system.on_dipoles_change(); }
557
558 std::optional<std::string>
559 layer_correction_veto_r_cut(double) const override {
560 return {};
561 }
562
563 void setup_logger(bool verbose) override {
564 auto const &box_geo = *m_system.box_geo;
565 m_logger = std::make_unique<TuningLogger>(
566 verbose and this_node == 0, "DipolarP3M", TuningLogger::Mode::Dipolar);
567 m_logger->tuning_goals(dp3m.params.accuracy, m_prefactor,
568 box_geo.length()[0], dp3m.sum_dip_part,
569 dp3m.sum_mu2);
570 m_logger->log_tuning_start();
571 }
572
573 std::tuple<double, double, double, double>
575 double r_cut_iL) const override {
576
577 double alpha_L, rs_err, ks_err;
578 auto const &box_geo = *m_system.box_geo;
579
580 /* calc maximal real space error for setting */
581 rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL,
582 dp3m.sum_dip_part, dp3m.sum_mu2, 0.001);
583 // alpha cannot be zero for dipoles because real-space formula breaks down
584
585 if (std::numbers::sqrt2 * rs_err > dp3m.params.accuracy) {
586 /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */
587 alpha_L = dp3m_rtbisection(
588 box_geo.length()[0], r_cut_iL, dp3m.sum_dip_part, dp3m.sum_mu2,
589 0.0001 * box_geo.length()[0], 5. * box_geo.length()[0], 0.0001,
590 dp3m.params.accuracy);
591 } else {
592 /* even alpha=0 is ok, however, we cannot choose it since it kills the
593 k-space error formula.
594 Anyways, this very likely NOT the optimal solution */
595 alpha_L = 0.1;
596 }
597
598 /* calculate real-space and k-space error for this alpha_L */
599 rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL,
600 dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L);
601 ks_err = dp3m_k_space_error(box_geo.length()[0], mesh[0], cao,
602 dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L);
603
604 return {Utils::Vector2d{rs_err, ks_err}.norm(), rs_err, ks_err, alpha_L};
605 }
606
607 void determine_mesh_limits() override {
608 if (dp3m.params.mesh[0] == -1) {
609 /* simple heuristic to limit the tried meshes if the accuracy cannot
610 be obtained with smaller meshes, but normally not all these
611 meshes have to be tested */
612 auto const expo = std::log(std::cbrt(dp3m.sum_dip_part)) / std::log(2.);
613 /* Medium-educated guess for the minimal mesh */
614 m_mesh_min = static_cast<int>(std::round(std::pow(2., std::floor(expo))));
615 /* avoid using more than 1 GB of FFT arrays */
616 m_mesh_max = 128;
617 if (m_tune_limits.first) {
618 m_mesh_min = *m_tune_limits.first;
619 }
620 if (m_tune_limits.second) {
621 m_mesh_max = *m_tune_limits.second;
622 }
623 } else {
624 m_mesh_min = m_mesh_max = dp3m.params.mesh[0];
625 m_logger->report_fixed_mesh(dp3m.params.mesh);
626 }
627 }
628
630 auto tuned_params = TuningAlgorithm::Parameters{};
631 auto time_best = time_sentinel;
632 for (auto tmp_mesh = m_mesh_min; tmp_mesh <= m_mesh_max; tmp_mesh += 2) {
633 auto trial_params = TuningAlgorithm::Parameters{};
634 trial_params.mesh = Utils::Vector3i::broadcast(tmp_mesh);
635 trial_params.cao = cao_best;
636
637 auto const trial_time =
638 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
639 trial_params.alpha_L, trial_params.accuracy);
640
641 /* this mesh does not work at all */
642 if (trial_time < 0.)
643 continue;
644
645 /* the optimum r_cut for this mesh is the upper limit for higher meshes,
646 everything else is slower */
647 m_r_cut_iL_max = trial_params.r_cut_iL;
648
649 if (trial_time < time_best) {
650 /* new optimum */
652 tuned_params = trial_params;
653 time_best = tuned_params.time = trial_time;
654 } else if (trial_time > time_best + time_granularity or
656 /* no hope of further optimisation */
657 break;
658 }
659 }
660 return tuned_params;
661 }
662};
663
664template <typename FloatType, Arch Architecture>
666 auto &system = get_system();
667 auto const &box_geo = *system.box_geo;
668 if (dp3m.params.alpha_L == 0. and dp3m.params.alpha != 0.) {
669 dp3m.params.alpha_L = dp3m.params.alpha * box_geo.length()[0];
670 }
671 if (dp3m.params.r_cut_iL == 0. and dp3m.params.r_cut != 0.) {
672 dp3m.params.r_cut_iL = dp3m.params.r_cut * box_geo.length_inv()[0];
673 }
674 if (not is_tuned()) {
676 if (dp3m.sum_dip_part == 0) {
677 throw std::runtime_error(
678 "DipolarP3M: no dipolar particles in the system");
679 }
680 try {
682 system, dp3m, prefactor, tune_timings, tune_limits);
683 parameters.setup_logger(tune_verbose);
684 // parameter ranges
685 parameters.determine_mesh_limits();
686 parameters.determine_r_cut_limits();
687 parameters.determine_cao_limits(3);
688 // run tuning algorithm
689 parameters.tune();
690 m_is_tuned = true;
691 system.on_dipoles_change();
692 } catch (...) {
693 dp3m.params.tuning = false;
694 throw;
695 }
696 }
697 init();
698}
699
700/** Tuning dipolar-P3M */
701static auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh,
702 double mesh_i, int cao, double alpha_L_i) {
703
704 auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
705 auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
706 auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i);
707 auto alias1 = 0.;
708 auto alias2 = 0.;
709
710 Utils::Vector3i indices{};
711 Utils::Vector3i nm{};
712 Utils::Vector3d fnm{};
714 mesh_start, mesh_stop, indices,
715 [&]() {
716 auto const norm_sq = nm.norm2();
717 auto const ex = std::exp(-factor1 * norm_sq);
718 auto const U2 = std::pow(Utils::product(fnm), 2 * cao);
719 alias1 += Utils::sqr(ex) * norm_sq;
720 alias2 += U2 * ex * std::pow(shift * nm, 3) / norm_sq;
721 },
722 [&](unsigned dim, int n) {
723 nm[dim] = shift[dim] + n * mesh;
724 fnm[dim] = math::sinc(nm[dim] * mesh_i);
725 });
726
727 return std::make_pair(alias1, alias2);
728}
729
730/** Calculate the k-space error of dipolar-P3M */
731static double dp3m_k_space_error(double box_size, int mesh, int cao,
732 int n_c_part, double sum_q2, double alpha_L) {
733
734 auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao);
735 auto const mesh_i = 1. / static_cast<double>(mesh);
736 auto const alpha_L_i = 1. / alpha_L;
737 auto const mesh_stop = Utils::Vector3i::broadcast(mesh / 2);
738 auto const mesh_start = -mesh_stop;
739 auto indices = Utils::Vector3i{};
740 auto values = Utils::Vector3d{};
741 auto he_q = 0.;
742
744 mesh_start, mesh_stop, indices,
745 [&]() {
746 if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
747 auto const n2 = indices.norm2();
748 auto const cs = Utils::product(values);
749 auto const [alias1, alias2] =
750 dp3m_tune_aliasing_sums(indices, mesh, mesh_i, cao, alpha_L_i);
751 auto const d =
752 alias1 - Utils::sqr(alias2 / cs) /
753 Utils::int_pow<3>(static_cast<double>(n2));
754 /* at high precision, d can become negative due to extinction;
755 also, don't take values that have no significant digits left*/
756 if (d > 0. and std::fabs(d / alias1) > ROUND_ERROR_PREC)
757 he_q += d;
758 }
759 },
760 [&values, &mesh_i, cotangent_sum](unsigned dim, int n) {
761 values[dim] = cotangent_sum(n, mesh_i);
762 });
763
764 return 8. * Utils::sqr(std::numbers::pi) / 3. * sum_q2 *
765 sqrt(he_q / n_c_part) / Utils::int_pow<4>(box_size);
766}
767
768/** Calculate the value of the errors for the REAL part of the force in terms
769 * of the splitting parameter alpha of Ewald. Based on eq. (33) @cite wang01a.
770 *
771 * Please note that in this more refined approach we don't use
772 * eq. (37), but eq. (33) which maintains all the powers in alpha.
773 */
774static double dp3m_real_space_error(double box_size, double r_cut_iL,
775 int n_c_part, double sum_q2,
776 double alpha_L) {
777 auto constexpr exp_min = -708.4; // for IEEE-compatible double
778 double d_error_f, d_cc, d_dc, d_con;
779
780 auto const d_rcut = r_cut_iL * box_size;
781 auto const d_rcut2 = Utils::sqr(d_rcut);
782 auto const d_rcut4 = Utils::sqr(d_rcut2);
783
784 auto const d_a2 = Utils::sqr(alpha_L) / Utils::sqr(box_size);
785 auto const exponent = -d_a2 * d_rcut2;
786 auto const exp_term = (exponent < exp_min) ? 0. : std::exp(exponent);
787 auto const d_c = sum_q2 * exp_term;
788
789 d_cc = 4. * Utils::sqr(d_a2) * Utils::sqr(d_rcut2) + 6. * d_a2 * d_rcut2 + 3.;
790
791 d_dc = 8. * Utils::int_pow<3>(d_a2) * Utils::int_pow<3>(d_rcut2) +
792 20. * Utils::sqr(d_a2) * d_rcut4 + 30. * d_a2 * d_rcut2 + 15.;
793
794 d_con = 1. / sqrt(Utils::int_pow<3>(box_size) * Utils::sqr(d_a2) * d_rcut *
795 Utils::sqr(d_rcut4) * static_cast<double>(n_c_part));
796
797 d_error_f = d_c * d_con *
798 sqrt((13. / 6.) * Utils::sqr(d_cc) +
799 (2. / 15.) * Utils::sqr(d_dc) - (13. / 15.) * d_cc * d_dc);
800
801 return d_error_f;
802}
803
804/** Using bisection, find the root of a function "func-tuned_accuracy/sqrt(2.)"
805 * known to lie between x1 and x2. The root, returned as rtbis, will be
806 * refined until its accuracy is \f$\pm\f$ @p xacc.
807 */
808double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part,
809 double sum_q2, double x1, double x2, double xacc,
810 double tuned_accuracy) {
811 constexpr int JJ_RTBIS_MAX = 40;
812
813 auto const constant = tuned_accuracy / std::numbers::sqrt2;
814
815 auto const f1 =
816 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, x1) -
817 constant;
818 auto const f2 =
819 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, x2) -
820 constant;
821 if (f1 * f2 >= 0.0) {
822 throw std::runtime_error(
823 "Root must be bracketed for bisection in dp3m_rtbisection");
824 }
825 // Orient the search dx, and set rtb to x1 or x2 ...
826 double dx;
827 double rtb = f1 < 0.0 ? (dx = x2 - x1, x1) : (dx = x1 - x2, x2);
828 for (int j = 1; j <= JJ_RTBIS_MAX; j++) {
829 auto const xmid = rtb + (dx *= 0.5);
830 auto const fmid =
831 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, xmid) -
832 constant;
833 if (fmid <= 0.0)
834 rtb = xmid;
835 if (fabs(dx) < xacc || fmid == 0.0)
836 return rtb;
837 }
838 throw std::runtime_error("Too many bisections in dp3m_rtbisection");
839}
840
842 auto const &system = get_system();
843 auto const &box_geo = *system.box_geo;
844 auto const &local_geo = *system.local_geo;
845 for (auto i = 0u; i < 3u; i++) {
846 /* check k-space cutoff */
847 if (dp3m_params.cao_cut[i] >= box_geo.length_half()[i]) {
848 std::stringstream msg;
849 msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i]
850 << " is larger than half of box dimension " << box_geo.length()[i];
851 throw std::runtime_error(msg.str());
852 }
853 if (dp3m_params.cao_cut[i] >= local_geo.length()[i]) {
854 std::stringstream msg;
855 msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i]
856 << " is larger than local box dimension " << local_geo.length()[i];
857 throw std::runtime_error(msg.str());
858 }
859 }
860
861 if ((box_geo.length()[0] != box_geo.length()[1]) or
862 (box_geo.length()[1] != box_geo.length()[2])) {
863 throw std::runtime_error("DipolarP3M: requires a cubic box");
864 }
865}
866
868 auto const &box_geo = *get_system().box_geo;
869 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
870 throw std::runtime_error(
871 "DipolarP3M: requires periodicity (True, True, True)");
872 }
873}
874
876 auto const &local_geo = *get_system().local_geo;
877 if (local_geo.cell_structure_type() != CellStructureType::REGULAR and
878 local_geo.cell_structure_type() != CellStructureType::HYBRID) {
879 throw std::runtime_error(
880 "DipolarP3M: requires the regular or hybrid decomposition cell system");
881 }
882 if (::communicator.size > 1 and
883 local_geo.cell_structure_type() == CellStructureType::HYBRID) {
884 throw std::runtime_error(
885 "DipolarP3M: does not work with the hybrid decomposition cell system, "
886 "if using more than one MPI node");
887 }
888}
889
891 auto const &node_grid = ::communicator.node_grid;
892 if (node_grid[0] < node_grid[1] or node_grid[1] < node_grid[2]) {
893 throw std::runtime_error(
894 "DipolarP3M: node grid must be sorted, largest first");
895 }
896}
897
898template <typename FloatType, Arch Architecture>
900 auto const &box_geo = *get_system().box_geo;
901 dp3m.params.r_cut = dp3m.params.r_cut_iL * box_geo.length()[0];
902 dp3m.params.alpha = dp3m.params.alpha_L * box_geo.length_inv()[0];
903 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
905 sanity_checks_boxl();
906 calc_influence_function_force();
907 calc_influence_function_energy();
908 dp3m.energy_correction = 0.;
909}
910
911template <typename FloatType, Arch Architecture>
913 auto const &box_geo = *get_system().box_geo;
914 auto const Ukp3m = calc_average_self_energy_k_space() * box_geo.volume();
915 auto const Ewald_volume = Utils::int_pow<3>(dp3m.params.alpha_L);
916 auto const Eself = -2. * Ewald_volume * std::numbers::inv_sqrtpi / 3.;
917 dp3m.energy_correction =
918 -dp3m.sum_mu2 * (Ukp3m + Eself + 2. * std::numbers::pi / 3.);
919}
920
921#ifdef NPT
922template <typename FloatType, Arch Architecture>
924 double energy) const {
925 get_system().npt_add_virial_contribution(energy);
926}
927#endif // NPT
928
931
932#endif // DP3M
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
@ INTEG_METHOD_NPT_ISO
Vector implementation and trait types for boost qvm interoperability.
void on_solver_change() const override
Definition dp3m.cpp:556
void setup_logger(bool verbose) override
Definition dp3m.cpp:563
TuningAlgorithm::Parameters get_time() override
Definition dp3m.cpp:629
P3MParameters & get_params() override
Definition dp3m.cpp:554
DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m, double prefactor, int timings, decltype(m_tune_limits) tune_limits)
Definition dp3m.cpp:548
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
Definition dp3m.cpp:574
std::optional< std::string > layer_correction_veto_r_cut(double) const override
Definition dp3m.cpp:559
void determine_mesh_limits() override
Definition dp3m.cpp:607
A range of particles.
base_type::size_type size() const
Main system class.
std::shared_ptr< LocalBox > local_geo
void npt_add_virial_contribution(double energy)
Definition npt.cpp:122
std::shared_ptr< CellStructure > cell_structure
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
static auto constexpr time_sentinel
Value for invalid time measurements.
static auto constexpr max_n_consecutive_trials
Maximal number of consecutive trials that don't improve runtime.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
T norm() const
Definition Vector.hpp:139
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value) noexcept
Create a vector that has all entries set to the same value.
Definition Vector.hpp:111
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
This file contains the defaults for ESPResSo.
#define ROUND_ERROR_PREC
Precision for capture of round off errors.
Definition config.hpp:66
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
Definition config.hpp:53
__device__ void vector_product(float const *a, float const *b, float *out)
static int count_magnetic_particles(System::System const &system)
Definition dlc.cpp:399
static auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh, double mesh_i, int cao, double alpha_L_i)
Tuning dipolar-P3M.
Definition dp3m.cpp:701
static double dp3m_k_space_error(double box_size, int mesh, int cao, int n_c_part, double sum_q2, double alpha_L)
Calculate the k-space error of dipolar-P3M.
Definition dp3m.cpp:731
double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part, double sum_q2, double x1, double x2, double xacc, double tuned_accuracy)
Compute the value of alpha through a bisection method.
Definition dp3m.cpp:808
static double dp3m_real_space_error(double box_size, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L)
Calculate the value of the errors for the REAL part of the force in terms of the splitting parameter ...
Definition dp3m.cpp:774
P3M algorithm for long-range magnetic dipole-dipole interaction.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
Routines, row decomposition, data structures and communication for the 3D-FFT.
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
void p3m_interpolate(P3MLocalMesh const &local_mesh, InterpolationWeights< cao > const &weights, Kernel kernel)
P3M grid interpolation.
T product(Vector< T, N > const &v)
Definition Vector.hpp:375
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
Definition sqr.hpp:28
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
Definition math.hpp:53
auto get_analytic_cotangent_sum_kernel(int cao)
Definition math.hpp:128
Exports for the NpT code.
Common functions for dipolar and charge P3M.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
Utils::Vector3i node_grid
void calc_energy_correction() override
Definition dp3m.cpp:912
double long_range_kernel(bool force_flag, bool energy_flag, ParticleRange const &particles)
Compute the k-space part of forces and energies.
Definition dp3m.cpp:250
double calc_surface_term(bool force_flag, bool energy_flag, ParticleRange const &particles) override
Definition dp3m.cpp:460
void dipole_assign(ParticleRange const &particles) override
Definition dp3m.cpp:180
void init_cpu_kernels()
Definition dp3m.cpp:130
void calc_influence_function_energy() override
Definition dp3m.cpp:535
void count_magnetic_particles() override
Definition dp3m.cpp:85
void npt_add_virial_contribution(double energy) const override
Definition dp3m.cpp:923
void calc_influence_function_force() override
Definition dp3m.cpp:528
void tune() override
Definition dp3m.cpp:665
void scaleby_box_l() override
Definition dp3m.cpp:899
double calc_average_self_energy_k_space() const override
Definition dp3m.cpp:116
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
Definition dp3m.cpp:841
void sanity_checks_cell_structure() const
Definition dp3m.cpp:875
P3MParameters const & dp3m_params
Definition dp3m.hpp:55
void sanity_checks_periodicity() const
Definition dp3m.cpp:867
void sanity_checks_node_grid() const
Definition dp3m.cpp:890
void recalc_ld_pos(P3MParameters const &params)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0), in real space.
P3MParameters params
P3M base parameters.
P3MLocalMesh local_mesh
Local mesh geometry information for this MPI rank.
void operator()(auto &dp3m, Utils::Vector3d const &real_pos, Utils::Vector3d const &dip) const
Definition dp3m.cpp:160
void operator()(auto &dp3m, double prefac, int d_rs, ParticleRange const &particles) const
Definition dp3m.cpp:223
void operator()(auto &dp3m, double prefac, int d_rs, ParticleRange const &particles) const
Definition dp3m.cpp:199
double energy_correction
cached k-space self-energy correction
Definition dp3m.impl.hpp:59
double sum_mu2
Sum of square of magnetic dipoles.
Definition dp3m.impl.hpp:53
std::size_t sum_dip_part
number of dipolar particles.
Definition dp3m.impl.hpp:51