ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
dp3m.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2024 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22/** @file
23 * P3M algorithm for long-range magnetic dipole-dipole interaction.
24 *
25 * By default the magnetic epsilon is metallic = 0.
26 */
27
28#include "config/config.hpp"
29
30#ifdef DP3M
31
34
35#include "fft/fft.hpp"
37#include "p3m/TuningLogger.hpp"
38#include "p3m/common.hpp"
40#include "p3m/interpolation.hpp"
41#include "p3m/math.hpp"
42
43#include "BoxGeometry.hpp"
44#include "LocalBox.hpp"
45#include "Particle.hpp"
46#include "ParticleRange.hpp"
47#include "PropagationMode.hpp"
50#include "communication.hpp"
51#include "errorhandling.hpp"
53#include "npt.hpp"
54#include "system/System.hpp"
55#include "tuning.hpp"
56
57#include <utils/Vector.hpp>
60#include <utils/math/sqr.hpp>
61
62#include <boost/mpi/collectives/all_reduce.hpp>
63#include <boost/mpi/collectives/reduce.hpp>
64
65#include <algorithm>
66#include <array>
67#include <cstddef>
68#include <cstdio>
69#include <functional>
70#include <iterator>
71#include <numbers>
72#include <optional>
73#include <span>
74#include <sstream>
75#include <stdexcept>
76#include <tuple>
77#include <utility>
78#include <vector>
79
80#ifdef FFTW3_H
81#error "The FFTW3 library shouldn't be visible in this translation unit"
82#endif
83
84template <typename FloatType, Arch Architecture>
86 int local_n = 0;
87 double local_mu2 = 0.;
88
89 for (auto const &p : get_system().cell_structure->local_particles()) {
90 if (p.dipm() != 0.) {
91 local_mu2 += p.calc_dip().norm2();
92 local_n++;
93 }
94 }
95
96 boost::mpi::all_reduce(comm_cart, local_mu2, dp3m.sum_mu2, std::plus<>());
97 boost::mpi::all_reduce(comm_cart, local_n, dp3m.sum_dip_part, std::plus<>());
98}
99
100static double dp3m_k_space_error(double box_size, int mesh, int cao,
101 int n_c_part, double sum_q2, double alpha_L);
102
103static double dp3m_real_space_error(double box_size, double r_cut_iL,
104 int n_c_part, double sum_q2,
105 double alpha_L);
106
107/** Compute the value of alpha through a bisection method.
108 * Based on eq. (33) @cite wang01a.
109 */
110double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part,
111 double sum_q2, double x1, double x2, double xacc,
112 double tuned_accuracy);
113
114template <typename FloatType, Arch Architecture>
115double
117 const {
118 auto const &box_geo = *get_system().box_geo;
119 auto const node_phi =
120 grid_influence_function_self_energy<FloatType, P3M_BRILLOUIN>(
121 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
122
123 double phi = 0.;
124 boost::mpi::reduce(comm_cart, node_phi, phi, std::plus<>(), 0);
125 phi /= 3. * box_geo.length()[0] * Utils::int_pow<3>(dp3m.params.mesh[0]);
126 return phi * std::numbers::pi;
127}
128
129template <typename FloatType, Arch Architecture>
131 assert(dp3m.params.mesh >= Utils::Vector3i::broadcast(1));
132 assert(dp3m.params.cao >= 1 and dp3m.params.cao <= 7);
133 assert(dp3m.params.alpha > 0.);
134
135 auto const &system = get_system();
136 auto const &box_geo = *system.box_geo;
137 auto const &local_geo = *system.local_geo;
138 auto const verlet_skin = system.cell_structure->get_verlet_skin();
139
140 dp3m.params.cao3 = Utils::int_pow<3>(dp3m.params.cao);
141 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
142
143 assert(dp3m.fft);
144 dp3m.local_mesh.calc_local_ca_mesh(dp3m.params, local_geo, verlet_skin, 0.);
145 dp3m.fft_buffers->init_halo();
146 dp3m.fft->init(dp3m.params);
147 dp3m.mesh.ks_pnum = dp3m.fft->get_ks_pnum();
148 dp3m.fft_buffers->init_meshes(dp3m.fft->get_ca_mesh_size());
149 dp3m.update_mesh_views();
150 dp3m.calc_differential_operator();
151
152 /* fix box length dependent constants */
153 scaleby_box_l();
154
156}
157
158namespace {
159template <int cao> struct AssignDipole {
160 void operator()(auto &dp3m, Utils::Vector3d const &real_pos,
161 Utils::Vector3d const &dip) const {
162 using value_type =
163 typename std::remove_reference_t<decltype(dp3m)>::value_type;
164 auto const weights = p3m_calculate_interpolation_weights<cao>(
165 real_pos, dp3m.params.ai, dp3m.local_mesh);
166 p3m_interpolate<cao>(
167 dp3m.local_mesh, weights, [&dip, &dp3m](int ind, double w) {
168 dp3m.mesh.rs_fields[0u][ind] += value_type(w * dip[0u]);
169 dp3m.mesh.rs_fields[1u][ind] += value_type(w * dip[1u]);
170 dp3m.mesh.rs_fields[2u][ind] += value_type(w * dip[2u]);
171 });
172
173 dp3m.inter_weights.template store<cao>(weights);
174 }
175};
176} // namespace
177
178template <typename FloatType, Arch Architecture>
180 ParticleRange const &particles) {
181 dp3m.inter_weights.reset(dp3m.params.cao);
182
183 /* prepare local FFT mesh */
184 for (auto &rs_mesh_field : dp3m.mesh.rs_fields)
185 for (int j = 0; j < dp3m.local_mesh.size; j++)
186 rs_mesh_field[j] = 0.;
187
188 for (auto const &p : particles) {
189 if (p.dipm() != 0.) {
190 Utils::integral_parameter<int, AssignDipole, 1, 7>(dp3m.params.cao, dp3m,
191 p.pos(), p.calc_dip());
192 }
193 }
194}
195
196namespace {
197template <int cao> struct AssignTorques {
198 void operator()(auto &dp3m, double prefac, int d_rs,
199 ParticleRange const &particles) const {
200
201 /* magnetic particle index */
202 auto p_index = std::size_t{0ul};
203
204 for (auto &p : particles) {
205 if (p.dipm() != 0.) {
206 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
207
208 Utils::Vector3d E{};
209 p3m_interpolate(dp3m.local_mesh, weights,
210 [&E, &dp3m, d_rs](int ind, double w) {
211 E[d_rs] += w * double(dp3m.mesh.rs_scalar[ind]);
212 });
213
214 p.torque() -= vector_product(p.calc_dip(), prefac * E);
215 ++p_index;
216 }
217 }
218 }
219};
220
221template <int cao> struct AssignForces {
222 void operator()(auto &dp3m, double prefac, int d_rs,
223 ParticleRange const &particles) const {
224
225 /* magnetic particle index */
226 auto p_index = std::size_t{0ul};
227
228 for (auto &p : particles) {
229 if (p.dipm() != 0.) {
230 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
231
232 Utils::Vector3d E{};
233 p3m_interpolate(dp3m.local_mesh, weights,
234 [&E, &dp3m](int ind, double w) {
235 E[0u] += w * double(dp3m.mesh.rs_fields[0u][ind]);
236 E[1u] += w * double(dp3m.mesh.rs_fields[1u][ind]);
237 E[2u] += w * double(dp3m.mesh.rs_fields[2u][ind]);
238 });
239
240 p.force()[d_rs] += p.calc_dip() * prefac * E;
241 ++p_index;
242 }
243 }
244 }
245};
246} // namespace
247
248template <typename FloatType, Arch Architecture>
250 bool force_flag, bool energy_flag, ParticleRange const &particles) {
251 /* k-space energy */
252 double energy = 0.;
253 auto const &system = get_system();
254 auto const &box_geo = *system.box_geo;
255 auto const dipole_prefac = prefactor / Utils::int_pow<3>(dp3m.params.mesh[0]);
256#ifdef NPT
257 auto const npt_flag =
258 force_flag and (system.propagation->integ_switch == INTEG_METHOD_NPT_ISO);
259#else
260 auto constexpr npt_flag = false;
261#endif
262
263 if (dp3m.sum_mu2 > 0.) {
264 dipole_assign(particles);
265 dp3m.fft_buffers->perform_vector_halo_gather();
266 for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
267 dp3m.fft->forward_fft(rs_mesh);
268 }
269 dp3m.update_mesh_views();
270 }
271
272 /* === k-space energy calculation === */
273 if (energy_flag or npt_flag) {
274 /*********************
275 Dipolar energy
276 **********************/
277 if (dp3m.sum_mu2 > 0.) {
278 /* i*k differentiation for dipolar gradients:
279 * |(\Fourier{\vect{mu}}(k)\cdot \vect{k})|^2 */
280
281 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
282 auto const &offset = dp3m.mesh.start;
283 auto const &d_op = dp3m.d_op[0u];
284 auto const &mesh_dip = dp3m.mesh.rs_fields;
285 auto const [KX, KY, KZ] = dp3m.fft->get_permutations();
286 auto indices = Utils::Vector3i{};
287 auto index = std::size_t(0u);
288 auto it_energy = dp3m.g_energy.begin();
289 auto node_energy = 0.;
290 for_each_3d(mesh_start, dp3m.mesh.size, indices, [&]() {
291 auto const shift = indices + offset;
292 // Re(mu)*k
293 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
294 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
295 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
296 ++index;
297 // Im(mu)*k
298 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
299 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
300 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
301 ++index;
302 node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im));
303 std::advance(it_energy, 1);
304 });
305
306 node_energy *= dipole_prefac * std::numbers::pi * box_geo.length_inv()[0];
307 boost::mpi::reduce(comm_cart, node_energy, energy, std::plus<>(), 0);
308
309 if (dp3m.energy_correction == 0.)
310 calc_energy_correction();
311
312 if (this_node == 0) {
313 /* self energy correction */
314 energy -= prefactor * dp3m.sum_mu2 * std::numbers::inv_sqrtpi *
315 (2. / 3.) * Utils::int_pow<3>(dp3m.params.alpha);
316
317 /* dipolar energy correction due to systematic Madelung-self effects */
318 energy += prefactor * dp3m.energy_correction / box_geo.volume();
319 }
320 }
321 } // if (energy_flag)
322
323 /* === k-space force calculation === */
324 if (force_flag) {
325 /****************************
326 * DIPOLAR TORQUES (k-space)
327 ****************************/
328 if (dp3m.sum_mu2 > 0.) {
329 auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0u];
330 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
331 auto const &mesh_stop = dp3m.mesh.size;
332 auto const offset = dp3m.mesh.start;
333 auto const &d_op = dp3m.d_op[0u];
334 auto const [KX, KY, KZ] = dp3m.fft->get_permutations();
335 auto &mesh_dip = dp3m.mesh.rs_fields;
336 auto indices = Utils::Vector3i{};
337 auto index = std::size_t(0u);
338 dp3m.ks_scalar.resize(dp3m.mesh.rs_scalar.size());
339
340 /* fill in ks_scalar array for torque calculation */
341 auto it_energy = dp3m.g_energy.begin();
342 index = 0u;
343 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
344 auto const shift = indices + offset;
345 // Re(mu)*k
346 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
347 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
348 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
349 dp3m.ks_scalar[index] = *it_energy * re;
350 ++index;
351 // Im(mu)*k
352 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
353 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
354 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
355 dp3m.ks_scalar[index] = *it_energy * im;
356 ++index;
357 std::advance(it_energy, 1);
358 });
359
360 /* Force component loop */
361 for (int d = 0; d < 3; d++) {
362 index = 0u;
363 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
364 auto const d_op_val = FloatType(d_op[indices[d] + offset[d]]);
365 dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.ks_scalar[index];
366 ++index;
367 dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.ks_scalar[index];
368 ++index;
369 });
370 dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh());
371 dp3m.fft_buffers->perform_scalar_halo_spread();
372 /* Assign force component from mesh to particle */
373 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
374 Utils::integral_parameter<int, AssignTorques, 1, 7>(
375 dp3m.params.cao, dp3m, dipole_prefac * wavenumber, d_rs, particles);
376 }
377
378 /***************************
379 DIPOLAR FORCES (k-space)
380 ****************************/
381 // Compute forces after torques because the algorithm below overwrites the
382 // grids dp3m.mesh.rs_fields !
383 // Note: I'll do here 9 inverse FFTs. By symmetry, we can reduce this
384 // number to 6 !
385 /* fill in ks_scalar array for force calculation */
386 auto it_force = dp3m.g_force.begin();
387 index = 0u;
388 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
389 auto const shift = indices + offset;
390 // Re(mu)*k
391 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
392 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
393 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
394 ++index;
395 // Im(mu)*k
396 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
397 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
398 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
399 ++index;
400 dp3m.ks_scalar[index - 2] = *it_force * im;
401 dp3m.ks_scalar[index - 1] = *it_force * (-re);
402 std::advance(it_force, 1);
403 });
404
405 /* Force component loop */
406 for (int d = 0; d < 3; d++) {
407 index = 0u;
408 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
409 auto const d_op_val = FloatType(d_op[indices[d] + offset[d]]);
410 auto const shift = indices + offset;
411 auto const f1 = d_op_val * dp3m.ks_scalar[index];
412 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f1;
413 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f1;
414 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f1;
415 ++index;
416 auto const f2 = d_op_val * dp3m.ks_scalar[index];
417 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f2;
418 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f2;
419 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f2;
420 ++index;
421 });
422 for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
423 dp3m.fft->backward_fft(rs_mesh);
424 }
425 dp3m.fft_buffers->perform_vector_halo_spread();
426 /* Assign force component from mesh to particle */
427 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
428 Utils::integral_parameter<int, AssignForces, 1, 7>(
429 dp3m.params.cao, dp3m, dipole_prefac * Utils::sqr(wavenumber), d_rs,
430 particles);
431 }
432 } /* if (dp3m.sum_mu2 > 0) */
433 } /* if (force_flag) */
434
435 if (dp3m.params.epsilon != P3M_EPSILON_METALLIC) {
436 auto const surface_term =
437 calc_surface_term(force_flag, energy_flag or npt_flag, particles);
438 if (this_node == 0) {
439 energy += surface_term;
440 }
441 }
442 if (npt_flag) {
444 fprintf(stderr, "dipolar_P3M at this moment is added to p_vir[0]\n");
445 }
446 if (not energy_flag) {
447 energy = 0.;
448 }
449
450 return energy;
451}
452
453template <typename FloatType, Arch Architecture>
455 bool force_flag, bool energy_flag, ParticleRange const &particles) {
456 auto const &box_geo = *get_system().box_geo;
457 auto const pref = prefactor * 4. * std::numbers::pi / box_geo.volume() /
458 (2. * dp3m.params.epsilon + 1.);
459 auto const n_local_part = particles.size();
460
461 // We put all the dipolar momenta in a the arrays mx,my,mz according to the
462 // id-number of the particles
463 std::vector<double> mx(n_local_part);
464 std::vector<double> my(n_local_part);
465 std::vector<double> mz(n_local_part);
466
467 std::size_t ip = 0u;
468 for (auto const &p : particles) {
469 auto const dip = p.calc_dip();
470 mx[ip] = dip[0u];
471 my[ip] = dip[1u];
472 mz[ip] = dip[2u];
473 ip++;
474 }
475
476 // we will need the sum of all dipolar momenta vectors
477 auto local_dip = Utils::Vector3d{};
478 for (std::size_t i = 0u; i < n_local_part; i++) {
479 local_dip[0u] += mx[i];
480 local_dip[1u] += my[i];
481 local_dip[2u] += mz[i];
482 }
483 auto const box_dip =
484 boost::mpi::all_reduce(comm_cart, local_dip, std::plus<>());
485
486 double energy = 0.;
487 if (energy_flag) {
488 double sum_e = 0.;
489 for (std::size_t i = 0u; i < n_local_part; i++) {
490 sum_e += mx[i] * box_dip[0] + my[i] * box_dip[1] + mz[i] * box_dip[2];
491 }
492 energy =
493 0.5 * pref * boost::mpi::all_reduce(comm_cart, sum_e, std::plus<>());
494 }
495
496 if (force_flag) {
497
498 std::vector<double> sumix(n_local_part);
499 std::vector<double> sumiy(n_local_part);
500 std::vector<double> sumiz(n_local_part);
501
502 for (std::size_t i = 0u; i < n_local_part; i++) {
503 sumix[i] = my[i] * box_dip[2u] - mz[i] * box_dip[1u];
504 sumiy[i] = mz[i] * box_dip[0u] - mx[i] * box_dip[2u];
505 sumiz[i] = mx[i] * box_dip[1u] - my[i] * box_dip[0u];
506 }
507
508 ip = 0u;
509 for (auto &p : particles) {
510 auto &torque = p.torque();
511 torque[0u] -= pref * sumix[ip];
512 torque[1u] -= pref * sumiy[ip];
513 torque[2u] -= pref * sumiz[ip];
514 ip++;
515 }
516 }
517
518 return energy;
519}
520
521template <typename FloatType, Arch Architecture>
523 dp3m.g_force = grid_influence_function<FloatType, 3, P3M_BRILLOUIN>(
524 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
525 get_system().box_geo->length_inv());
526}
527
528template <typename FloatType, Arch Architecture>
530 dp3m.g_energy = grid_influence_function<FloatType, 2, P3M_BRILLOUIN>(
531 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
532 get_system().box_geo->length_inv());
533}
534
535template <typename FloatType, Arch Architecture>
538 int m_mesh_max = -1, m_mesh_min = -1;
539 std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
540
541public:
542 DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m,
543 double prefactor, int timings,
544 decltype(m_tune_limits) tune_limits)
545 : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m},
546 m_tune_limits{std::move(tune_limits)} {}
547
548 P3MParameters &get_params() override { return dp3m.params; }
549
550 void on_solver_change() const override { m_system.on_dipoles_change(); }
551
552 std::optional<std::string>
553 layer_correction_veto_r_cut(double) const override {
554 return {};
555 }
556
557 void setup_logger(bool verbose) override {
558 auto const &box_geo = *m_system.box_geo;
559 m_logger = std::make_unique<TuningLogger>(
560 verbose and this_node == 0, "DipolarP3M", TuningLogger::Mode::Dipolar);
561 m_logger->tuning_goals(dp3m.params.accuracy, m_prefactor,
562 box_geo.length()[0], dp3m.sum_dip_part,
563 dp3m.sum_mu2);
564 m_logger->log_tuning_start();
565 }
566
567 std::tuple<double, double, double, double>
569 double r_cut_iL) const override {
570
571 double alpha_L, rs_err, ks_err;
572 auto const &box_geo = *m_system.box_geo;
573
574 /* calc maximal real space error for setting */
575 rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL,
576 dp3m.sum_dip_part, dp3m.sum_mu2, 0.001);
577 // alpha cannot be zero for dipoles because real-space formula breaks down
578
579 if (std::numbers::sqrt2 * rs_err > dp3m.params.accuracy) {
580 /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */
581 alpha_L = dp3m_rtbisection(
582 box_geo.length()[0], r_cut_iL, dp3m.sum_dip_part, dp3m.sum_mu2,
583 0.0001 * box_geo.length()[0], 5. * box_geo.length()[0], 0.0001,
584 dp3m.params.accuracy);
585 } else {
586 /* even alpha=0 is ok, however, we cannot choose it since it kills the
587 k-space error formula.
588 Anyways, this very likely NOT the optimal solution */
589 alpha_L = 0.1;
590 }
591
592 /* calculate real-space and k-space error for this alpha_L */
593 rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL,
594 dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L);
595 ks_err = dp3m_k_space_error(box_geo.length()[0], mesh[0], cao,
596 dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L);
597
598 return {Utils::Vector2d{rs_err, ks_err}.norm(), rs_err, ks_err, alpha_L};
599 }
600
601 void determine_mesh_limits() override {
602 if (dp3m.params.mesh[0] == -1) {
603 /* simple heuristic to limit the tried meshes if the accuracy cannot
604 be obtained with smaller meshes, but normally not all these
605 meshes have to be tested */
606 auto const expo = std::log(std::cbrt(dp3m.sum_dip_part)) / std::log(2.);
607 /* Medium-educated guess for the minimal mesh */
608 m_mesh_min = static_cast<int>(std::round(std::pow(2., std::floor(expo))));
609 /* avoid using more than 1 GB of FFT arrays */
610 m_mesh_max = 128;
611 if (m_tune_limits.first) {
612 m_mesh_min = *m_tune_limits.first;
613 }
614 if (m_tune_limits.second) {
615 m_mesh_max = *m_tune_limits.second;
616 }
617 } else {
618 m_mesh_min = m_mesh_max = dp3m.params.mesh[0];
619 m_logger->report_fixed_mesh(dp3m.params.mesh);
620 }
621 }
622
624 auto tuned_params = TuningAlgorithm::Parameters{};
625 auto time_best = time_sentinel;
626 for (auto tmp_mesh = m_mesh_min; tmp_mesh <= m_mesh_max; tmp_mesh += 2) {
627 auto trial_params = TuningAlgorithm::Parameters{};
628 trial_params.mesh = Utils::Vector3i::broadcast(tmp_mesh);
629 trial_params.cao = cao_best;
630
631 auto const trial_time =
632 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
633 trial_params.alpha_L, trial_params.accuracy);
634
635 /* this mesh does not work at all */
636 if (trial_time < 0.)
637 continue;
638
639 /* the optimum r_cut for this mesh is the upper limit for higher meshes,
640 everything else is slower */
641 m_r_cut_iL_max = trial_params.r_cut_iL;
642
643 if (trial_time < time_best) {
644 /* new optimum */
646 tuned_params = trial_params;
647 time_best = tuned_params.time = trial_time;
648 } else if (trial_time > time_best + time_granularity or
650 /* no hope of further optimisation */
651 break;
652 }
653 }
654 return tuned_params;
655 }
656};
657
658template <typename FloatType, Arch Architecture>
660 auto &system = get_system();
661 auto const &box_geo = *system.box_geo;
662 if (dp3m.params.alpha_L == 0. and dp3m.params.alpha != 0.) {
663 dp3m.params.alpha_L = dp3m.params.alpha * box_geo.length()[0];
664 }
665 if (dp3m.params.r_cut_iL == 0. and dp3m.params.r_cut != 0.) {
666 dp3m.params.r_cut_iL = dp3m.params.r_cut * box_geo.length_inv()[0];
667 }
668 if (not is_tuned()) {
670 if (dp3m.sum_dip_part == 0) {
671 throw std::runtime_error(
672 "DipolarP3M: no dipolar particles in the system");
673 }
674 try {
676 system, dp3m, prefactor, tune_timings, tune_limits);
677 parameters.setup_logger(tune_verbose);
678 // parameter ranges
679 parameters.determine_mesh_limits();
680 parameters.determine_r_cut_limits();
681 parameters.determine_cao_limits(3);
682 // run tuning algorithm
683 parameters.tune();
684 m_is_tuned = true;
685 system.on_dipoles_change();
686 } catch (...) {
687 dp3m.params.tuning = false;
688 throw;
689 }
690 }
691 init();
692}
693
694/** Tuning dipolar-P3M */
695static auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh,
696 double mesh_i, int cao, double alpha_L_i) {
697
698 auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
699 auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
700 auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i);
701 auto alias1 = 0.;
702 auto alias2 = 0.;
703
704 Utils::Vector3i indices{};
705 Utils::Vector3i nm{};
706 Utils::Vector3d fnm{};
708 mesh_start, mesh_stop, indices,
709 [&]() {
710 auto const norm_sq = nm.norm2();
711 auto const ex = std::exp(-factor1 * norm_sq);
712 auto const U2 = std::pow(Utils::product(fnm), 2 * cao);
713 alias1 += Utils::sqr(ex) * norm_sq;
714 alias2 += U2 * ex * std::pow(shift * nm, 3) / norm_sq;
715 },
716 [&](unsigned dim, int n) {
717 nm[dim] = shift[dim] + n * mesh;
718 fnm[dim] = math::sinc(nm[dim] * mesh_i);
719 });
720
721 return std::make_pair(alias1, alias2);
722}
723
724/** Calculate the k-space error of dipolar-P3M */
725static double dp3m_k_space_error(double box_size, int mesh, int cao,
726 int n_c_part, double sum_q2, double alpha_L) {
727
728 auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao);
729 auto const mesh_i = 1. / static_cast<double>(mesh);
730 auto const alpha_L_i = 1. / alpha_L;
731 auto const mesh_stop = Utils::Vector3i::broadcast(mesh / 2);
732 auto const mesh_start = -mesh_stop;
733 auto indices = Utils::Vector3i{};
734 auto values = Utils::Vector3d{};
735 auto he_q = 0.;
736
738 mesh_start, mesh_stop, indices,
739 [&]() {
740 if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
741 auto const n2 = indices.norm2();
742 auto const cs = Utils::product(values);
743 auto const [alias1, alias2] =
744 dp3m_tune_aliasing_sums(indices, mesh, mesh_i, cao, alpha_L_i);
745 auto const d =
746 alias1 - Utils::sqr(alias2 / cs) /
747 Utils::int_pow<3>(static_cast<double>(n2));
748 /* at high precision, d can become negative due to extinction;
749 also, don't take values that have no significant digits left*/
750 if (d > 0. and std::fabs(d / alias1) > ROUND_ERROR_PREC)
751 he_q += d;
752 }
753 },
754 [&values, &mesh_i, cotangent_sum](unsigned dim, int n) {
755 values[dim] = cotangent_sum(n, mesh_i);
756 });
757
758 return 8. * Utils::sqr(std::numbers::pi) / 3. * sum_q2 *
759 sqrt(he_q / n_c_part) / Utils::int_pow<4>(box_size);
760}
761
762/** Calculate the value of the errors for the REAL part of the force in terms
763 * of the splitting parameter alpha of Ewald. Based on eq. (33) @cite wang01a.
764 *
765 * Please note that in this more refined approach we don't use
766 * eq. (37), but eq. (33) which maintains all the powers in alpha.
767 */
768static double dp3m_real_space_error(double box_size, double r_cut_iL,
769 int n_c_part, double sum_q2,
770 double alpha_L) {
771 double d_error_f, d_cc, d_dc, d_con;
772
773 auto const d_rcut = r_cut_iL * box_size;
774 auto const d_rcut2 = Utils::sqr(d_rcut);
775 auto const d_rcut4 = Utils::sqr(d_rcut2);
776
777 auto const d_a2 = Utils::sqr(alpha_L) / Utils::sqr(box_size);
778
779 auto const d_c = sum_q2 * exp(-d_a2 * d_rcut2);
780
781 d_cc = 4. * Utils::sqr(d_a2) * Utils::sqr(d_rcut2) + 6. * d_a2 * d_rcut2 + 3.;
782
783 d_dc = 8. * Utils::int_pow<3>(d_a2) * Utils::int_pow<3>(d_rcut2) +
784 20. * Utils::sqr(d_a2) * d_rcut4 + 30. * d_a2 * d_rcut2 + 15.;
785
786 d_con = 1. / sqrt(Utils::int_pow<3>(box_size) * Utils::sqr(d_a2) * d_rcut *
787 Utils::sqr(d_rcut4) * static_cast<double>(n_c_part));
788
789 d_error_f = d_c * d_con *
790 sqrt((13. / 6.) * Utils::sqr(d_cc) +
791 (2. / 15.) * Utils::sqr(d_dc) - (13. / 15.) * d_cc * d_dc);
792
793 return d_error_f;
794}
795
796/** Using bisection, find the root of a function "func-tuned_accuracy/sqrt(2.)"
797 * known to lie between x1 and x2. The root, returned as rtbis, will be
798 * refined until its accuracy is \f$\pm\f$ @p xacc.
799 */
800double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part,
801 double sum_q2, double x1, double x2, double xacc,
802 double tuned_accuracy) {
803 constexpr int JJ_RTBIS_MAX = 40;
804
805 auto const constant = tuned_accuracy / std::numbers::sqrt2;
806
807 auto const f1 =
808 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, x1) -
809 constant;
810 auto const f2 =
811 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, x2) -
812 constant;
813 if (f1 * f2 >= 0.0) {
814 throw std::runtime_error(
815 "Root must be bracketed for bisection in dp3m_rtbisection");
816 }
817 // Orient the search dx, and set rtb to x1 or x2 ...
818 double dx;
819 double rtb = f1 < 0.0 ? (dx = x2 - x1, x1) : (dx = x1 - x2, x2);
820 for (int j = 1; j <= JJ_RTBIS_MAX; j++) {
821 auto const xmid = rtb + (dx *= 0.5);
822 auto const fmid =
823 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, xmid) -
824 constant;
825 if (fmid <= 0.0)
826 rtb = xmid;
827 if (fabs(dx) < xacc || fmid == 0.0)
828 return rtb;
829 }
830 throw std::runtime_error("Too many bisections in dp3m_rtbisection");
831}
832
834 auto const &system = get_system();
835 auto const &box_geo = *system.box_geo;
836 auto const &local_geo = *system.local_geo;
837 for (auto i = 0u; i < 3u; i++) {
838 /* check k-space cutoff */
839 if (dp3m_params.cao_cut[i] >= box_geo.length_half()[i]) {
840 std::stringstream msg;
841 msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i]
842 << " is larger than half of box dimension " << box_geo.length()[i];
843 throw std::runtime_error(msg.str());
844 }
845 if (dp3m_params.cao_cut[i] >= local_geo.length()[i]) {
846 std::stringstream msg;
847 msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i]
848 << " is larger than local box dimension " << local_geo.length()[i];
849 throw std::runtime_error(msg.str());
850 }
851 }
852
853 if ((box_geo.length()[0] != box_geo.length()[1]) or
854 (box_geo.length()[1] != box_geo.length()[2])) {
855 throw std::runtime_error("DipolarP3M: requires a cubic box");
856 }
857}
858
860 auto const &box_geo = *get_system().box_geo;
861 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
862 throw std::runtime_error(
863 "DipolarP3M: requires periodicity (True, True, True)");
864 }
865}
866
868 auto const &local_geo = *get_system().local_geo;
869 if (local_geo.cell_structure_type() != CellStructureType::REGULAR and
870 local_geo.cell_structure_type() != CellStructureType::HYBRID) {
871 throw std::runtime_error(
872 "DipolarP3M: requires the regular or hybrid decomposition cell system");
873 }
874 if (::communicator.size > 1 and
875 local_geo.cell_structure_type() == CellStructureType::HYBRID) {
876 throw std::runtime_error(
877 "DipolarP3M: does not work with the hybrid decomposition cell system, "
878 "if using more than one MPI node");
879 }
880}
881
883 auto const &node_grid = ::communicator.node_grid;
884 if (node_grid[0] < node_grid[1] or node_grid[1] < node_grid[2]) {
885 throw std::runtime_error(
886 "DipolarP3M: node grid must be sorted, largest first");
887 }
888}
889
890template <typename FloatType, Arch Architecture>
892 auto const &box_geo = *get_system().box_geo;
893 dp3m.params.r_cut = dp3m.params.r_cut_iL * box_geo.length()[0];
894 dp3m.params.alpha = dp3m.params.alpha_L * box_geo.length_inv()[0];
895 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
897 sanity_checks_boxl();
898 calc_influence_function_force();
899 calc_influence_function_energy();
900 dp3m.energy_correction = 0.;
901}
902
903template <typename FloatType, Arch Architecture>
905 auto const &box_geo = *get_system().box_geo;
906 auto const Ukp3m = calc_average_self_energy_k_space() * box_geo.volume();
907 auto const Ewald_volume = Utils::int_pow<3>(dp3m.params.alpha_L);
908 auto const Eself = -2. * Ewald_volume * std::numbers::inv_sqrtpi / 3.;
909 dp3m.energy_correction =
910 -dp3m.sum_mu2 * (Ukp3m + Eself + 2. * std::numbers::pi / 3.);
911}
912
913#ifdef NPT
917#endif // NPT
918
921
922#endif // DP3M
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
@ INTEG_METHOD_NPT_ISO
Vector implementation and trait types for boost qvm interoperability.
void on_solver_change() const override
Definition dp3m.cpp:550
void setup_logger(bool verbose) override
Definition dp3m.cpp:557
TuningAlgorithm::Parameters get_time() override
Definition dp3m.cpp:623
P3MParameters & get_params() override
Definition dp3m.cpp:548
DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m, double prefactor, int timings, decltype(m_tune_limits) tune_limits)
Definition dp3m.cpp:542
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
Definition dp3m.cpp:568
std::optional< std::string > layer_correction_veto_r_cut(double) const override
Definition dp3m.cpp:553
void determine_mesh_limits() override
Definition dp3m.cpp:601
A range of particles.
base_type::size_type size() const
Main system class.
std::shared_ptr< LocalBox > local_geo
std::shared_ptr< CellStructure > cell_structure
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
static auto constexpr time_sentinel
Value for invalid time measurements.
static auto constexpr max_n_consecutive_trials
Maximal number of consecutive trials that don't improve runtime.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
T norm() const
Definition Vector.hpp:138
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value)
Create a vector that has all entries set to the same value.
Definition Vector.hpp:110
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
This file contains the defaults for ESPResSo.
#define ROUND_ERROR_PREC
Precision for capture of round off errors.
Definition config.hpp:66
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
Definition config.hpp:53
__device__ void vector_product(float const *a, float const *b, float *out)
static int count_magnetic_particles(System::System const &system)
Definition dlc.cpp:399
static auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh, double mesh_i, int cao, double alpha_L_i)
Tuning dipolar-P3M.
Definition dp3m.cpp:695
static double dp3m_k_space_error(double box_size, int mesh, int cao, int n_c_part, double sum_q2, double alpha_L)
Calculate the k-space error of dipolar-P3M.
Definition dp3m.cpp:725
void npt_add_virial_magnetic_contribution(double energy)
Update the NpT virial.
Definition dp3m.cpp:914
double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part, double sum_q2, double x1, double x2, double xacc, double tuned_accuracy)
Compute the value of alpha through a bisection method.
Definition dp3m.cpp:800
static double dp3m_real_space_error(double box_size, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L)
Calculate the value of the errors for the REAL part of the force in terms of the splitting parameter ...
Definition dp3m.cpp:768
P3M algorithm for long-range magnetic dipole-dipole interaction.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
Routines, row decomposition, data structures and communication for the 3D-FFT.
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
void p3m_interpolate(P3MLocalMesh const &local_mesh, InterpolationWeights< cao > const &weights, Kernel kernel)
P3M grid interpolation.
T product(Vector< T, N > const &v)
Definition Vector.hpp:374
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
Definition sqr.hpp:28
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
Definition math.hpp:53
auto get_analytic_cotangent_sum_kernel(int cao)
Definition math.hpp:128
void npt_add_virial_contribution(double energy)
Definition npt.cpp:141
Exports for the NpT code.
Common functions for dipolar and charge P3M.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
Utils::Vector3i node_grid
void calc_energy_correction() override
Definition dp3m.cpp:904
double long_range_kernel(bool force_flag, bool energy_flag, ParticleRange const &particles)
Compute the k-space part of forces and energies.
Definition dp3m.cpp:249
double calc_surface_term(bool force_flag, bool energy_flag, ParticleRange const &particles) override
Definition dp3m.cpp:454
void dipole_assign(ParticleRange const &particles) override
Definition dp3m.cpp:179
void init_cpu_kernels()
Definition dp3m.cpp:130
void calc_influence_function_energy() override
Definition dp3m.cpp:529
void count_magnetic_particles() override
Definition dp3m.cpp:85
void calc_influence_function_force() override
Definition dp3m.cpp:522
void tune() override
Definition dp3m.cpp:659
void scaleby_box_l() override
Definition dp3m.cpp:891
double calc_average_self_energy_k_space() const override
Definition dp3m.cpp:116
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
Definition dp3m.cpp:833
void sanity_checks_cell_structure() const
Definition dp3m.cpp:867
P3MParameters const & dp3m_params
Definition dp3m.hpp:60
void sanity_checks_periodicity() const
Definition dp3m.cpp:859
void sanity_checks_node_grid() const
Definition dp3m.cpp:882
void recalc_ld_pos(P3MParameters const &params)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0), in real space.
void operator()(auto &dp3m, Utils::Vector3d const &real_pos, Utils::Vector3d const &dip) const
Definition dp3m.cpp:160
void operator()(auto &dp3m, double prefac, int d_rs, ParticleRange const &particles) const
Definition dp3m.cpp:222
void operator()(auto &dp3m, double prefac, int d_rs, ParticleRange const &particles) const
Definition dp3m.cpp:198
double energy_correction
cached k-space self-energy correction
Definition dp3m.impl.hpp:58
int sum_dip_part
number of dipolar particles (only on head node).
Definition dp3m.impl.hpp:50
double sum_mu2
Sum of square of magnetic dipoles (only on head node).
Definition dp3m.impl.hpp:52
P3MLocalMesh local_mesh
Local mesh properties.
P3MParameters params
P3M base parameters.