ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
dp3m.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2024 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22/** @file
23 * P3M algorithm for long-range magnetic dipole-dipole interaction.
24 *
25 * By default the magnetic epsilon is metallic = 0.
26 */
27
28#include "config/config.hpp"
29
30#ifdef DP3M
31
34
35#include "fft/fft.hpp"
37#include "p3m/TuningLogger.hpp"
38#include "p3m/common.hpp"
40#include "p3m/interpolation.hpp"
41#include "p3m/math.hpp"
42
43#include "BoxGeometry.hpp"
44#include "LocalBox.hpp"
45#include "Particle.hpp"
46#include "ParticleRange.hpp"
47#include "PropagationMode.hpp"
50#include "communication.hpp"
51#include "errorhandling.hpp"
53#include "npt.hpp"
54#include "system/System.hpp"
55#include "tuning.hpp"
56
57#include <utils/Vector.hpp>
60#include <utils/math/sqr.hpp>
61
62#include <boost/mpi/collectives/all_reduce.hpp>
63#include <boost/mpi/collectives/reduce.hpp>
64
65#include <algorithm>
66#include <array>
67#include <cstddef>
68#include <cstdio>
69#include <functional>
70#include <iterator>
71#include <numbers>
72#include <optional>
73#include <span>
74#include <sstream>
75#include <stdexcept>
76#include <tuple>
77#include <vector>
78
79#ifdef FFTW3_H
80#error "The FFTW3 library shouldn't be visible in this translation unit"
81#endif
82
83template <typename FloatType, Arch Architecture>
85 int local_n = 0;
86 double local_mu2 = 0.;
87
88 for (auto const &p : get_system().cell_structure->local_particles()) {
89 if (p.dipm() != 0.) {
90 local_mu2 += p.calc_dip().norm2();
91 local_n++;
92 }
93 }
94
95 boost::mpi::all_reduce(comm_cart, local_mu2, dp3m.sum_mu2, std::plus<>());
96 boost::mpi::all_reduce(comm_cart, local_n, dp3m.sum_dip_part, std::plus<>());
97}
98
99static double dp3m_k_space_error(double box_size, int mesh, int cao,
100 int n_c_part, double sum_q2, double alpha_L);
101
102static double dp3m_real_space_error(double box_size, double r_cut_iL,
103 int n_c_part, double sum_q2,
104 double alpha_L);
105
106/** Compute the value of alpha through a bisection method.
107 * Based on eq. (33) @cite wang01a.
108 */
109double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part,
110 double sum_q2, double x1, double x2, double xacc,
111 double tuned_accuracy);
112
113template <typename FloatType, Arch Architecture>
114double
116 const {
117 auto const &box_geo = *get_system().box_geo;
118 auto const node_phi = grid_influence_function_self_energy(
119 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
120
121 double phi = 0.;
122 boost::mpi::reduce(comm_cart, node_phi, phi, std::plus<>(), 0);
123 phi /= 3. * box_geo.length()[0] * Utils::int_pow<3>(dp3m.params.mesh[0]);
124 return phi * std::numbers::pi;
125}
126
127template <typename FloatType, Arch Architecture>
129 assert(dp3m.params.mesh >= Utils::Vector3i::broadcast(1));
130 assert(dp3m.params.cao >= 1 and dp3m.params.cao <= 7);
131 assert(dp3m.params.alpha > 0.);
132
133 auto const &system = get_system();
134 auto const &box_geo = *system.box_geo;
135 auto const &local_geo = *system.local_geo;
136 auto const verlet_skin = system.cell_structure->get_verlet_skin();
137
138 dp3m.params.cao3 = Utils::int_pow<3>(dp3m.params.cao);
139 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
140
141 assert(dp3m.fft);
142 dp3m.local_mesh.calc_local_ca_mesh(dp3m.params, local_geo, verlet_skin, 0.);
143 dp3m.fft_buffers->init_halo();
144 dp3m.fft->init(dp3m.params);
145 dp3m.mesh.ks_pnum = dp3m.fft->get_ks_pnum();
146 dp3m.fft_buffers->init_meshes(dp3m.fft->get_ca_mesh_size());
147 dp3m.update_mesh_views();
148 dp3m.calc_differential_operator();
149
150 /* fix box length dependent constants */
151 scaleby_box_l();
152
154}
155
156namespace {
157template <int cao> struct AssignDipole {
158 void operator()(auto &dp3m, Utils::Vector3d const &real_pos,
159 Utils::Vector3d const &dip) const {
160 using value_type =
161 typename std::remove_reference_t<decltype(dp3m)>::value_type;
162 auto const weights = p3m_calculate_interpolation_weights<cao>(
163 real_pos, dp3m.params.ai, dp3m.local_mesh);
164 p3m_interpolate<cao>(
165 dp3m.local_mesh, weights, [&dip, &dp3m](int ind, double w) {
166 dp3m.mesh.rs_fields[0u][ind] += value_type(w * dip[0u]);
167 dp3m.mesh.rs_fields[1u][ind] += value_type(w * dip[1u]);
168 dp3m.mesh.rs_fields[2u][ind] += value_type(w * dip[2u]);
169 });
170
171 dp3m.inter_weights.template store<cao>(weights);
172 }
173};
174} // namespace
175
176template <typename FloatType, Arch Architecture>
178 ParticleRange const &particles) {
179 dp3m.inter_weights.reset(dp3m.params.cao);
180
181 /* prepare local FFT mesh */
182 for (auto &rs_mesh_field : dp3m.mesh.rs_fields)
183 for (int j = 0; j < dp3m.local_mesh.size; j++)
184 rs_mesh_field[j] = 0.;
185
186 for (auto const &p : particles) {
187 if (p.dipm() != 0.) {
188 Utils::integral_parameter<int, AssignDipole, 1, 7>(dp3m.params.cao, dp3m,
189 p.pos(), p.calc_dip());
190 }
191 }
192}
193
194namespace {
195template <int cao> struct AssignTorques {
196 void operator()(auto &dp3m, double prefac, int d_rs,
197 ParticleRange const &particles) const {
198
199 /* magnetic particle index */
200 auto p_index = std::size_t{0ul};
201
202 for (auto &p : particles) {
203 if (p.dipm() != 0.) {
204 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
205
206 Utils::Vector3d E{};
207 p3m_interpolate(dp3m.local_mesh, weights,
208 [&E, &dp3m, d_rs](int ind, double w) {
209 E[d_rs] += w * double(dp3m.mesh.rs_scalar[ind]);
210 });
211
212 p.torque() -= vector_product(p.calc_dip(), prefac * E);
213 ++p_index;
214 }
215 }
216 }
217};
218
219template <int cao> struct AssignForces {
220 void operator()(auto &dp3m, double prefac, int d_rs,
221 ParticleRange const &particles) const {
222
223 /* magnetic particle index */
224 auto p_index = std::size_t{0ul};
225
226 for (auto &p : particles) {
227 if (p.dipm() != 0.) {
228 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
229
230 Utils::Vector3d E{};
231 p3m_interpolate(dp3m.local_mesh, weights,
232 [&E, &dp3m](int ind, double w) {
233 E[0u] += w * double(dp3m.mesh.rs_fields[0u][ind]);
234 E[1u] += w * double(dp3m.mesh.rs_fields[1u][ind]);
235 E[2u] += w * double(dp3m.mesh.rs_fields[2u][ind]);
236 });
237
238 p.force()[d_rs] += p.calc_dip() * prefac * E;
239 ++p_index;
240 }
241 }
242 }
243};
244} // namespace
245
246template <typename FloatType, Arch Architecture>
248 bool force_flag, bool energy_flag, ParticleRange const &particles) {
249 /* k-space energy */
250 double energy = 0.;
251 auto const &system = get_system();
252 auto const &box_geo = *system.box_geo;
253 auto const dipole_prefac = prefactor / Utils::int_pow<3>(dp3m.params.mesh[0]);
254#ifdef NPT
255 auto const npt_flag =
256 force_flag and (system.propagation->integ_switch == INTEG_METHOD_NPT_ISO);
257#else
258 auto constexpr npt_flag = false;
259#endif
260
261 if (dp3m.sum_mu2 > 0.) {
262 dipole_assign(particles);
263 dp3m.fft_buffers->perform_vector_halo_gather();
264 for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
265 dp3m.fft->forward_fft(rs_mesh);
266 }
267 dp3m.update_mesh_views();
268 }
269
270 /* === k-space energy calculation === */
271 if (energy_flag or npt_flag) {
272 /*********************
273 Dipolar energy
274 **********************/
275 if (dp3m.sum_mu2 > 0.) {
276 /* i*k differentiation for dipolar gradients:
277 * |(\Fourier{\vect{mu}}(k)\cdot \vect{k})|^2 */
278
279 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
280 auto const &offset = dp3m.mesh.start;
281 auto const &d_op = dp3m.d_op[0u];
282 auto const &mesh_dip = dp3m.mesh.rs_fields;
283 auto const [KX, KY, KZ] = dp3m.fft->get_permutations();
284 auto indices = Utils::Vector3i{};
285 auto index = std::size_t(0u);
286 auto it_energy = dp3m.g_energy.begin();
287 auto node_energy = 0.;
288 for_each_3d(mesh_start, dp3m.mesh.size, indices, [&]() {
289 auto const shift = indices + offset;
290 // Re(mu)*k
291 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
292 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
293 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
294 ++index;
295 // Im(mu)*k
296 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
297 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
298 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
299 ++index;
300 node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im));
301 std::advance(it_energy, 1);
302 });
303
304 node_energy *= dipole_prefac * std::numbers::pi * box_geo.length_inv()[0];
305 boost::mpi::reduce(comm_cart, node_energy, energy, std::plus<>(), 0);
306
307 if (dp3m.energy_correction == 0.)
308 calc_energy_correction();
309
310 if (this_node == 0) {
311 /* self energy correction */
312 energy -= prefactor * dp3m.sum_mu2 * std::numbers::inv_sqrtpi *
313 (2. / 3.) * Utils::int_pow<3>(dp3m.params.alpha);
314
315 /* dipolar energy correction due to systematic Madelung-self effects */
316 energy += prefactor * dp3m.energy_correction / box_geo.volume();
317 }
318 }
319 } // if (energy_flag)
320
321 /* === k-space force calculation === */
322 if (force_flag) {
323 /****************************
324 * DIPOLAR TORQUES (k-space)
325 ****************************/
326 if (dp3m.sum_mu2 > 0.) {
327 auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0u];
328 auto constexpr mesh_start = Utils::Vector3i::broadcast(0);
329 auto const &mesh_stop = dp3m.mesh.size;
330 auto const offset = dp3m.mesh.start;
331 auto const &d_op = dp3m.d_op[0u];
332 auto const [KX, KY, KZ] = dp3m.fft->get_permutations();
333 auto &mesh_dip = dp3m.mesh.rs_fields;
334 auto indices = Utils::Vector3i{};
335 auto index = std::size_t(0u);
336 dp3m.ks_scalar.resize(dp3m.mesh.rs_scalar.size());
337
338 /* fill in ks_scalar array for torque calculation */
339 auto it_energy = dp3m.g_energy.begin();
340 index = 0u;
341 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
342 auto const shift = indices + offset;
343 // Re(mu)*k
344 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
345 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
346 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
347 dp3m.ks_scalar[index] = *it_energy * re;
348 ++index;
349 // Im(mu)*k
350 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
351 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
352 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
353 dp3m.ks_scalar[index] = *it_energy * im;
354 ++index;
355 std::advance(it_energy, 1);
356 });
357
358 /* Force component loop */
359 for (int d = 0; d < 3; d++) {
360 index = 0u;
361 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
362 auto const d_op_val = FloatType(d_op[indices[d] + offset[d]]);
363 dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.ks_scalar[index];
364 ++index;
365 dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.ks_scalar[index];
366 ++index;
367 });
368 dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh());
369 dp3m.fft_buffers->perform_scalar_halo_spread();
370 /* Assign force component from mesh to particle */
371 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
372 Utils::integral_parameter<int, AssignTorques, 1, 7>(
373 dp3m.params.cao, dp3m, dipole_prefac * wavenumber, d_rs, particles);
374 }
375
376 /***************************
377 DIPOLAR FORCES (k-space)
378 ****************************/
379 // Compute forces after torques because the algorithm below overwrites the
380 // grids dp3m.mesh.rs_fields !
381 // Note: I'll do here 9 inverse FFTs. By symmetry, we can reduce this
382 // number to 6 !
383 /* fill in ks_scalar array for force calculation */
384 auto it_force = dp3m.g_force.begin();
385 index = 0u;
386 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
387 auto const shift = indices + offset;
388 // Re(mu)*k
389 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
390 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
391 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
392 ++index;
393 // Im(mu)*k
394 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
395 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
396 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
397 ++index;
398 dp3m.ks_scalar[index - 2] = *it_force * im;
399 dp3m.ks_scalar[index - 1] = *it_force * (-re);
400 std::advance(it_force, 1);
401 });
402
403 /* Force component loop */
404 for (int d = 0; d < 3; d++) {
405 index = 0u;
406 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
407 auto const d_op_val = FloatType(d_op[indices[d] + offset[d]]);
408 auto const shift = indices + offset;
409 auto const f1 = d_op_val * dp3m.ks_scalar[index];
410 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f1;
411 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f1;
412 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f1;
413 ++index;
414 auto const f2 = d_op_val * dp3m.ks_scalar[index];
415 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f2;
416 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f2;
417 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f2;
418 ++index;
419 });
420 for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
421 dp3m.fft->backward_fft(rs_mesh);
422 }
423 dp3m.fft_buffers->perform_vector_halo_spread();
424 /* Assign force component from mesh to particle */
425 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
426 Utils::integral_parameter<int, AssignForces, 1, 7>(
427 dp3m.params.cao, dp3m, dipole_prefac * Utils::sqr(wavenumber), d_rs,
428 particles);
429 }
430 } /* if (dp3m.sum_mu2 > 0) */
431 } /* if (force_flag) */
432
433 if (dp3m.params.epsilon != P3M_EPSILON_METALLIC) {
434 auto const surface_term =
435 calc_surface_term(force_flag, energy_flag or npt_flag, particles);
436 if (this_node == 0) {
437 energy += surface_term;
438 }
439 }
440 if (npt_flag) {
442 fprintf(stderr, "dipolar_P3M at this moment is added to p_vir[0]\n");
443 }
444 if (not energy_flag) {
445 energy = 0.;
446 }
447
448 return energy;
449}
450
451template <typename FloatType, Arch Architecture>
453 bool force_flag, bool energy_flag, ParticleRange const &particles) {
454 auto const &box_geo = *get_system().box_geo;
455 auto const pref = prefactor * 4. * std::numbers::pi / box_geo.volume() /
456 (2. * dp3m.params.epsilon + 1.);
457 auto const n_local_part = particles.size();
458
459 // We put all the dipolar momenta in a the arrays mx,my,mz according to the
460 // id-number of the particles
461 std::vector<double> mx(n_local_part);
462 std::vector<double> my(n_local_part);
463 std::vector<double> mz(n_local_part);
464
465 std::size_t ip = 0u;
466 for (auto const &p : particles) {
467 auto const dip = p.calc_dip();
468 mx[ip] = dip[0u];
469 my[ip] = dip[1u];
470 mz[ip] = dip[2u];
471 ip++;
472 }
473
474 // we will need the sum of all dipolar momenta vectors
475 auto local_dip = Utils::Vector3d{};
476 for (std::size_t i = 0u; i < n_local_part; i++) {
477 local_dip[0u] += mx[i];
478 local_dip[1u] += my[i];
479 local_dip[2u] += mz[i];
480 }
481 auto const box_dip =
482 boost::mpi::all_reduce(comm_cart, local_dip, std::plus<>());
483
484 double energy = 0.;
485 if (energy_flag) {
486 double sum_e = 0.;
487 for (std::size_t i = 0u; i < n_local_part; i++) {
488 sum_e += mx[i] * box_dip[0] + my[i] * box_dip[1] + mz[i] * box_dip[2];
489 }
490 energy =
491 0.5 * pref * boost::mpi::all_reduce(comm_cart, sum_e, std::plus<>());
492 }
493
494 if (force_flag) {
495
496 std::vector<double> sumix(n_local_part);
497 std::vector<double> sumiy(n_local_part);
498 std::vector<double> sumiz(n_local_part);
499
500 for (std::size_t i = 0u; i < n_local_part; i++) {
501 sumix[i] = my[i] * box_dip[2u] - mz[i] * box_dip[1u];
502 sumiy[i] = mz[i] * box_dip[0u] - mx[i] * box_dip[2u];
503 sumiz[i] = mx[i] * box_dip[1u] - my[i] * box_dip[0u];
504 }
505
506 ip = 0u;
507 for (auto &p : particles) {
508 auto &torque = p.torque();
509 torque[0u] -= pref * sumix[ip];
510 torque[1u] -= pref * sumiy[ip];
511 torque[2u] -= pref * sumiz[ip];
512 ip++;
513 }
514 }
515
516 return energy;
517}
518
519template <typename FloatType, Arch Architecture>
521 dp3m.g_force = grid_influence_function<FloatType, 3>(
522 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
523 get_system().box_geo->length_inv());
524}
525
526template <typename FloatType, Arch Architecture>
528 dp3m.g_energy = grid_influence_function<FloatType, 2>(
529 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
530 get_system().box_geo->length_inv());
531}
532
533template <typename FloatType, Arch Architecture>
536 int m_mesh_max = -1, m_mesh_min = -1;
537
538public:
539 DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m,
540 double prefactor, int timings)
541 : TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m} {}
542
543 P3MParameters &get_params() override { return dp3m.params; }
544
545 void on_solver_change() const override { m_system.on_dipoles_change(); }
546
547 std::optional<std::string>
548 layer_correction_veto_r_cut(double) const override {
549 return {};
550 }
551
552 void setup_logger(bool verbose) override {
553 auto const &box_geo = *m_system.box_geo;
554 m_logger = std::make_unique<TuningLogger>(
555 verbose and this_node == 0, "DipolarP3M", TuningLogger::Mode::Dipolar);
556 m_logger->tuning_goals(dp3m.params.accuracy, m_prefactor,
557 box_geo.length()[0], dp3m.sum_dip_part,
558 dp3m.sum_mu2);
559 m_logger->log_tuning_start();
560 }
561
562 std::tuple<double, double, double, double>
564 double r_cut_iL) const override {
565
566 double alpha_L, rs_err, ks_err;
567 auto const &box_geo = *m_system.box_geo;
568
569 /* calc maximal real space error for setting */
570 rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL,
571 dp3m.sum_dip_part, dp3m.sum_mu2, 0.001);
572 // alpha cannot be zero for dipoles because real-space formula breaks down
573
574 if (std::numbers::sqrt2 * rs_err > dp3m.params.accuracy) {
575 /* assume rs_err = ks_err -> rs_err = accuracy/sqrt(2.0) -> alpha_L */
576 alpha_L = dp3m_rtbisection(
577 box_geo.length()[0], r_cut_iL, dp3m.sum_dip_part, dp3m.sum_mu2,
578 0.0001 * box_geo.length()[0], 5. * box_geo.length()[0], 0.0001,
579 dp3m.params.accuracy);
580 } else {
581 /* even alpha=0 is ok, however, we cannot choose it since it kills the
582 k-space error formula.
583 Anyways, this very likely NOT the optimal solution */
584 alpha_L = 0.1;
585 }
586
587 /* calculate real-space and k-space error for this alpha_L */
588 rs_err = dp3m_real_space_error(box_geo.length()[0], r_cut_iL,
589 dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L);
590 ks_err = dp3m_k_space_error(box_geo.length()[0], mesh[0], cao,
591 dp3m.sum_dip_part, dp3m.sum_mu2, alpha_L);
592
593 return {Utils::Vector2d{rs_err, ks_err}.norm(), rs_err, ks_err, alpha_L};
594 }
595
596 void determine_mesh_limits() override {
597 if (dp3m.params.mesh[0] == -1) {
598 /* simple heuristic to limit the tried meshes if the accuracy cannot
599 be obtained with smaller meshes, but normally not all these
600 meshes have to be tested */
601 auto const expo = std::log(std::cbrt(dp3m.sum_dip_part)) / std::log(2.);
602 /* Medium-educated guess for the minimal mesh */
603 m_mesh_min = static_cast<int>(std::round(std::pow(2., std::floor(expo))));
604 /* avoid using more than 1 GB of FFT arrays */
605 m_mesh_max = 128;
606 } else {
607 m_mesh_min = m_mesh_max = dp3m.params.mesh[0];
608 m_logger->report_fixed_mesh(dp3m.params.mesh);
609 }
610 }
611
613 auto tuned_params = TuningAlgorithm::Parameters{};
614 auto time_best = time_sentinel;
615 for (auto tmp_mesh = m_mesh_min; tmp_mesh <= m_mesh_max; tmp_mesh += 2) {
616 auto trial_params = TuningAlgorithm::Parameters{};
617 trial_params.mesh = Utils::Vector3i::broadcast(tmp_mesh);
618 trial_params.cao = cao_best;
619
620 auto const trial_time =
621 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
622 trial_params.alpha_L, trial_params.accuracy);
623
624 /* this mesh does not work at all */
625 if (trial_time < 0.)
626 continue;
627
628 /* the optimum r_cut for this mesh is the upper limit for higher meshes,
629 everything else is slower */
630 m_r_cut_iL_max = trial_params.r_cut_iL;
631
632 if (trial_time < time_best) {
633 /* new optimum */
635 tuned_params = trial_params;
636 time_best = tuned_params.time = trial_time;
637 } else if (trial_time > time_best + time_granularity or
639 /* no hope of further optimisation */
640 break;
641 }
642 }
643 return tuned_params;
644 }
645};
646
647template <typename FloatType, Arch Architecture>
649 auto &system = get_system();
650 auto const &box_geo = *system.box_geo;
651 if (dp3m.params.alpha_L == 0. and dp3m.params.alpha != 0.) {
652 dp3m.params.alpha_L = dp3m.params.alpha * box_geo.length()[0];
653 }
654 if (dp3m.params.r_cut_iL == 0. and dp3m.params.r_cut != 0.) {
655 dp3m.params.r_cut_iL = dp3m.params.r_cut * box_geo.length_inv()[0];
656 }
657 if (not is_tuned()) {
659 if (dp3m.sum_dip_part == 0) {
660 throw std::runtime_error(
661 "DipolarP3M: no dipolar particles in the system");
662 }
663 try {
665 system, dp3m, prefactor, tune_timings);
666 parameters.setup_logger(tune_verbose);
667 // parameter ranges
668 parameters.determine_mesh_limits();
669 parameters.determine_r_cut_limits();
670 parameters.determine_cao_limits(3);
671 // run tuning algorithm
672 parameters.tune();
673 m_is_tuned = true;
674 system.on_dipoles_change();
675 } catch (...) {
676 dp3m.params.tuning = false;
677 throw;
678 }
679 }
680 init();
681}
682
683/** Tuning dipolar-P3M */
684static auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh,
685 double mesh_i, int cao, double alpha_L_i) {
686
687 auto constexpr mesh_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN);
688 auto constexpr mesh_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1);
689 auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i);
690 auto alias1 = 0.;
691 auto alias2 = 0.;
692
693 Utils::Vector3i indices{};
694 Utils::Vector3i nm{};
695 Utils::Vector3d fnm{};
697 mesh_start, mesh_stop, indices,
698 [&]() {
699 auto const norm_sq = nm.norm2();
700 auto const ex = std::exp(-factor1 * norm_sq);
701 auto const U2 = std::pow(Utils::product(fnm), 2 * cao);
702 alias1 += Utils::sqr(ex) * norm_sq;
703 alias2 += U2 * ex * std::pow(shift * nm, 3) / norm_sq;
704 },
705 [&](unsigned dim, int n) {
706 nm[dim] = shift[dim] + n * mesh;
707 fnm[dim] = math::sinc(nm[dim] * mesh_i);
708 });
709
710 return std::make_pair(alias1, alias2);
711}
712
713/** Calculate the k-space error of dipolar-P3M */
714static double dp3m_k_space_error(double box_size, int mesh, int cao,
715 int n_c_part, double sum_q2, double alpha_L) {
716
717 auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao);
718 auto const mesh_i = 1. / static_cast<double>(mesh);
719 auto const alpha_L_i = 1. / alpha_L;
720 auto const mesh_stop = Utils::Vector3i::broadcast(mesh / 2);
721 auto const mesh_start = -mesh_stop;
722 auto indices = Utils::Vector3i{};
723 auto values = Utils::Vector3d{};
724 auto he_q = 0.;
725
727 mesh_start, mesh_stop, indices,
728 [&]() {
729 if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
730 auto const n2 = indices.norm2();
731 auto const cs = Utils::product(values);
732 auto const [alias1, alias2] =
733 dp3m_tune_aliasing_sums(indices, mesh, mesh_i, cao, alpha_L_i);
734 auto const d =
735 alias1 - Utils::sqr(alias2 / cs) /
736 Utils::int_pow<3>(static_cast<double>(n2));
737 /* at high precision, d can become negative due to extinction;
738 also, don't take values that have no significant digits left*/
739 if (d > 0. and std::fabs(d / alias1) > ROUND_ERROR_PREC)
740 he_q += d;
741 }
742 },
743 [&values, &mesh_i, cotangent_sum](unsigned dim, int n) {
744 values[dim] = cotangent_sum(n, mesh_i);
745 });
746
747 return 8. * Utils::sqr(std::numbers::pi) / 3. * sum_q2 *
748 sqrt(he_q / n_c_part) / Utils::int_pow<4>(box_size);
749}
750
751/** Calculate the value of the errors for the REAL part of the force in terms
752 * of the splitting parameter alpha of Ewald. Based on eq. (33) @cite wang01a.
753 *
754 * Please note that in this more refined approach we don't use
755 * eq. (37), but eq. (33) which maintains all the powers in alpha.
756 */
757static double dp3m_real_space_error(double box_size, double r_cut_iL,
758 int n_c_part, double sum_q2,
759 double alpha_L) {
760 double d_error_f, d_cc, d_dc, d_con;
761
762 auto const d_rcut = r_cut_iL * box_size;
763 auto const d_rcut2 = Utils::sqr(d_rcut);
764 auto const d_rcut4 = Utils::sqr(d_rcut2);
765
766 auto const d_a2 = Utils::sqr(alpha_L) / Utils::sqr(box_size);
767
768 auto const d_c = sum_q2 * exp(-d_a2 * d_rcut2);
769
770 d_cc = 4. * Utils::sqr(d_a2) * Utils::sqr(d_rcut2) + 6. * d_a2 * d_rcut2 + 3.;
771
772 d_dc = 8. * Utils::int_pow<3>(d_a2) * Utils::int_pow<3>(d_rcut2) +
773 20. * Utils::sqr(d_a2) * d_rcut4 + 30. * d_a2 * d_rcut2 + 15.;
774
775 d_con = 1. / sqrt(Utils::int_pow<3>(box_size) * Utils::sqr(d_a2) * d_rcut *
776 Utils::sqr(d_rcut4) * static_cast<double>(n_c_part));
777
778 d_error_f = d_c * d_con *
779 sqrt((13. / 6.) * Utils::sqr(d_cc) +
780 (2. / 15.) * Utils::sqr(d_dc) - (13. / 15.) * d_cc * d_dc);
781
782 return d_error_f;
783}
784
785/** Using bisection, find the root of a function "func-tuned_accuracy/sqrt(2.)"
786 * known to lie between x1 and x2. The root, returned as rtbis, will be
787 * refined until its accuracy is \f$\pm\f$ @p xacc.
788 */
789double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part,
790 double sum_q2, double x1, double x2, double xacc,
791 double tuned_accuracy) {
792 constexpr int JJ_RTBIS_MAX = 40;
793
794 auto const constant = tuned_accuracy / std::numbers::sqrt2;
795
796 auto const f1 =
797 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, x1) -
798 constant;
799 auto const f2 =
800 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, x2) -
801 constant;
802 if (f1 * f2 >= 0.0) {
803 throw std::runtime_error(
804 "Root must be bracketed for bisection in dp3m_rtbisection");
805 }
806 // Orient the search dx, and set rtb to x1 or x2 ...
807 double dx;
808 double rtb = f1 < 0.0 ? (dx = x2 - x1, x1) : (dx = x1 - x2, x2);
809 for (int j = 1; j <= JJ_RTBIS_MAX; j++) {
810 auto const xmid = rtb + (dx *= 0.5);
811 auto const fmid =
812 dp3m_real_space_error(box_size, r_cut_iL, n_c_part, sum_q2, xmid) -
813 constant;
814 if (fmid <= 0.0)
815 rtb = xmid;
816 if (fabs(dx) < xacc || fmid == 0.0)
817 return rtb;
818 }
819 throw std::runtime_error("Too many bisections in dp3m_rtbisection");
820}
821
823 auto const &system = get_system();
824 auto const &box_geo = *system.box_geo;
825 auto const &local_geo = *system.local_geo;
826 for (auto i = 0u; i < 3u; i++) {
827 /* check k-space cutoff */
828 if (dp3m_params.cao_cut[i] >= box_geo.length_half()[i]) {
829 std::stringstream msg;
830 msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i]
831 << " is larger than half of box dimension " << box_geo.length()[i];
832 throw std::runtime_error(msg.str());
833 }
834 if (dp3m_params.cao_cut[i] >= local_geo.length()[i]) {
835 std::stringstream msg;
836 msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i]
837 << " is larger than local box dimension " << local_geo.length()[i];
838 throw std::runtime_error(msg.str());
839 }
840 }
841
842 if ((box_geo.length()[0] != box_geo.length()[1]) or
843 (box_geo.length()[1] != box_geo.length()[2])) {
844 throw std::runtime_error("DipolarP3M: requires a cubic box");
845 }
846}
847
849 auto const &box_geo = *get_system().box_geo;
850 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
851 throw std::runtime_error(
852 "DipolarP3M: requires periodicity (True, True, True)");
853 }
854}
855
857 auto const &local_geo = *get_system().local_geo;
858 if (local_geo.cell_structure_type() != CellStructureType::REGULAR and
859 local_geo.cell_structure_type() != CellStructureType::HYBRID) {
860 throw std::runtime_error(
861 "DipolarP3M: requires the regular or hybrid decomposition cell system");
862 }
863 if (::communicator.size > 1 and
864 local_geo.cell_structure_type() == CellStructureType::HYBRID) {
865 throw std::runtime_error(
866 "DipolarP3M: does not work with the hybrid decomposition cell system, "
867 "if using more than one MPI node");
868 }
869}
870
872 auto const &node_grid = ::communicator.node_grid;
873 if (node_grid[0] < node_grid[1] or node_grid[1] < node_grid[2]) {
874 throw std::runtime_error(
875 "DipolarP3M: node grid must be sorted, largest first");
876 }
877}
878
879template <typename FloatType, Arch Architecture>
881 auto const &box_geo = *get_system().box_geo;
882 dp3m.params.r_cut = dp3m.params.r_cut_iL * box_geo.length()[0];
883 dp3m.params.alpha = dp3m.params.alpha_L * box_geo.length_inv()[0];
884 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
886 sanity_checks_boxl();
887 calc_influence_function_force();
888 calc_influence_function_energy();
889 dp3m.energy_correction = 0.;
890}
891
892template <typename FloatType, Arch Architecture>
894 auto const &box_geo = *get_system().box_geo;
895 auto const Ukp3m = calc_average_self_energy_k_space() * box_geo.volume();
896 auto const Ewald_volume = Utils::int_pow<3>(dp3m.params.alpha_L);
897 auto const Eself = -2. * Ewald_volume * std::numbers::inv_sqrtpi / 3.;
898 dp3m.energy_correction =
899 -dp3m.sum_mu2 * (Ukp3m + Eself + 2. * std::numbers::pi / 3.);
900}
901
902#ifdef NPT
906#endif // NPT
907
910
911#endif // DP3M
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
@ INTEG_METHOD_NPT_ISO
Vector implementation and trait types for boost qvm interoperability.
void on_solver_change() const override
Definition dp3m.cpp:545
DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m, double prefactor, int timings)
Definition dp3m.cpp:539
void setup_logger(bool verbose) override
Definition dp3m.cpp:552
TuningAlgorithm::Parameters get_time() override
Definition dp3m.cpp:612
P3MParameters & get_params() override
Definition dp3m.cpp:543
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
Definition dp3m.cpp:563
std::optional< std::string > layer_correction_veto_r_cut(double) const override
Definition dp3m.cpp:548
void determine_mesh_limits() override
Definition dp3m.cpp:596
A range of particles.
base_type::size_type size() const
Main system class.
std::shared_ptr< CellStructure > cell_structure
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
static auto constexpr time_sentinel
Value for invalid time measurements.
static auto constexpr max_n_consecutive_trials
Maximal number of consecutive trials that don't improve runtime.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
T norm() const
Definition Vector.hpp:138
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value)
Create a vector that has all entries set to the same value.
Definition Vector.hpp:110
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
This file contains the defaults for ESPResSo.
#define ROUND_ERROR_PREC
Precision for capture of round off errors.
Definition config.hpp:66
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
Definition config.hpp:53
__device__ void vector_product(float const *a, float const *b, float *out)
static int count_magnetic_particles(System::System const &system)
Definition dlc.cpp:398
static auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh, double mesh_i, int cao, double alpha_L_i)
Tuning dipolar-P3M.
Definition dp3m.cpp:684
static double dp3m_k_space_error(double box_size, int mesh, int cao, int n_c_part, double sum_q2, double alpha_L)
Calculate the k-space error of dipolar-P3M.
Definition dp3m.cpp:714
void npt_add_virial_magnetic_contribution(double energy)
Update the NpT virial.
Definition dp3m.cpp:903
double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part, double sum_q2, double x1, double x2, double xacc, double tuned_accuracy)
Compute the value of alpha through a bisection method.
Definition dp3m.cpp:789
static double dp3m_real_space_error(double box_size, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L)
Calculate the value of the errors for the REAL part of the force in terms of the splitting parameter ...
Definition dp3m.cpp:757
P3M algorithm for long-range magnetic dipole-dipole interaction.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
Routines, row decomposition, data structures and communication for the 3D-FFT.
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
double grid_influence_function_self_energy(P3MParameters const &params, Utils::Vector3i const &n_start, Utils::Vector3i const &n_stop, std::vector< FloatType > const &g)
Calculate self-energy of the influence function.
void p3m_interpolate(P3MLocalMesh const &local_mesh, InterpolationWeights< cao > const &weights, Kernel kernel)
P3M grid interpolation.
T product(Vector< T, N > const &v)
Definition Vector.hpp:374
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
Definition sqr.hpp:28
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
Definition math.hpp:53
auto get_analytic_cotangent_sum_kernel(int cao)
Definition math.hpp:128
void npt_add_virial_contribution(double energy)
Definition npt.cpp:141
Exports for the NpT code.
Common functions for dipolar and charge P3M.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
Utils::Vector3i node_grid
void calc_energy_correction() override
Definition dp3m.cpp:893
double long_range_kernel(bool force_flag, bool energy_flag, ParticleRange const &particles)
Compute the k-space part of forces and energies.
Definition dp3m.cpp:247
double calc_surface_term(bool force_flag, bool energy_flag, ParticleRange const &particles) override
Definition dp3m.cpp:452
void dipole_assign(ParticleRange const &particles) override
Definition dp3m.cpp:177
void init_cpu_kernels()
Definition dp3m.cpp:128
void calc_influence_function_energy() override
Definition dp3m.cpp:527
void count_magnetic_particles() override
Definition dp3m.cpp:84
void calc_influence_function_force() override
Definition dp3m.cpp:520
void tune() override
Definition dp3m.cpp:648
void scaleby_box_l() override
Definition dp3m.cpp:880
double calc_average_self_energy_k_space() const override
Definition dp3m.cpp:115
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
Definition dp3m.cpp:822
void sanity_checks_cell_structure() const
Definition dp3m.cpp:856
P3MParameters const & dp3m_params
Definition dp3m.hpp:60
void sanity_checks_periodicity() const
Definition dp3m.cpp:848
void sanity_checks_node_grid() const
Definition dp3m.cpp:871
void recalc_ld_pos(P3MParameters const &params)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0).
void operator()(auto &dp3m, Utils::Vector3d const &real_pos, Utils::Vector3d const &dip) const
Definition dp3m.cpp:158
void operator()(auto &dp3m, double prefac, int d_rs, ParticleRange const &particles) const
Definition dp3m.cpp:220
void operator()(auto &dp3m, double prefac, int d_rs, ParticleRange const &particles) const
Definition dp3m.cpp:196
double energy_correction
cached k-space self-energy correction
Definition dp3m.impl.hpp:57
int sum_dip_part
number of dipolar particles (only on head node).
Definition dp3m.impl.hpp:49
double sum_mu2
Sum of square of magnetic dipoles (only on head node).
Definition dp3m.impl.hpp:51
P3MLocalMesh local_mesh
Local mesh properties.
P3MParameters params
P3M base parameters.