50#include "communication.hpp"
54#include "system/System.hpp"
62#include <boost/mpi/collectives/all_reduce.hpp>
63#include <boost/mpi/collectives/reduce.hpp>
81#error "The FFTW3 library shouldn't be visible in this translation unit"
84template <
typename FloatType, Arch Architecture>
86 auto local_n = std::size_t{0u};
87 double local_mu2 = 0.;
89 for (
auto const &p : get_system().
cell_structure->local_particles()) {
91 local_mu2 += p.calc_dip().norm2();
96 boost::mpi::all_reduce(
comm_cart, local_mu2, dp3m.sum_mu2, std::plus<>());
97 boost::mpi::all_reduce(
comm_cart, local_n, dp3m.sum_dip_part, std::plus<>());
101 int n_c_part,
double sum_q2,
double alpha_L);
104 int n_c_part,
double sum_q2,
111 double sum_q2,
double x1,
double x2,
double xacc,
112 double tuned_accuracy);
114template <
typename FloatType, Arch Architecture>
118 auto const &box_geo = *get_system().
box_geo;
119 auto const node_phi =
120 grid_influence_function_self_energy<FloatType, P3M_BRILLOUIN>(
121 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
124 boost::mpi::reduce(
comm_cart, node_phi, phi, std::plus<>(), 0);
125 phi /= 3. * box_geo.length()[0] * Utils::int_pow<3>(dp3m.params.mesh[0]);
126 return phi * std::numbers::pi;
129template <
typename FloatType, Arch Architecture>
132 assert(dp3m.params.cao >= 1 and dp3m.params.cao <= 7);
133 assert(dp3m.params.alpha > 0.);
135 auto const &system = get_system();
136 auto const &box_geo = *system.box_geo;
137 auto const &local_geo = *system.local_geo;
138 auto const verlet_skin = system.cell_structure->get_verlet_skin();
140 dp3m.params.cao3 = Utils::int_pow<3>(dp3m.params.cao);
141 dp3m.params.recalc_a_ai_cao_cut(box_geo.length());
144 dp3m.local_mesh.calc_local_ca_mesh(dp3m.params, local_geo, verlet_skin, 0.);
145 dp3m.fft_buffers->init_halo();
146 dp3m.fft->init(dp3m.params);
147 dp3m.mesh.ks_pnum = dp3m.fft->get_ks_pnum();
148 dp3m.fft_buffers->init_meshes(dp3m.fft->get_ca_mesh_size());
149 dp3m.update_mesh_views();
150 dp3m.calc_differential_operator();
163 typename std::remove_reference_t<
decltype(dp3m)>::value_type;
165 auto const weights = p3m_calculate_interpolation_weights<cao, memory_order>(
166 real_pos, dp3m.params.ai, dp3m.local_mesh);
167 p3m_interpolate<cao>(
168 dp3m.local_mesh, weights, [&dip, &dp3m](
int ind,
double w) {
169 dp3m.mesh.rs_fields[0u][ind] += value_type(w * dip[0u]);
170 dp3m.mesh.rs_fields[1u][ind] += value_type(w * dip[1u]);
171 dp3m.mesh.rs_fields[2u][ind] += value_type(w * dip[2u]);
174 dp3m.inter_weights.template store<cao>(weights);
179template <
typename FloatType, Arch Architecture>
182 dp3m.inter_weights.reset(dp3m.params.cao);
185 for (
auto &rs_mesh_field : dp3m.mesh.rs_fields)
186 for (
int j = 0; j < dp3m.local_mesh.size; j++)
187 rs_mesh_field[j] = 0.;
189 for (
auto const &p : particles) {
190 if (p.dipm() != 0.) {
191 Utils::integral_parameter<int, AssignDipole, 1, 7>(dp3m.params.cao, dp3m,
192 p.pos(), p.calc_dip());
203 auto p_index = std::size_t{0ul};
205 for (
auto &p : particles) {
206 if (p.dipm() != 0.) {
207 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
211 [&E, &dp3m, d_rs](
int ind,
double w) {
212 E[d_rs] += w * double(dp3m.mesh.rs_scalar[ind]);
227 auto p_index = std::size_t{0ul};
229 for (
auto &p : particles) {
230 if (p.dipm() != 0.) {
231 auto const weights = dp3m.inter_weights.template load<cao>(p_index);
235 [&E, &dp3m](
int ind,
double w) {
236 E[0u] += w * double(dp3m.mesh.rs_fields[0u][ind]);
237 E[1u] += w * double(dp3m.mesh.rs_fields[1u][ind]);
238 E[2u] += w * double(dp3m.mesh.rs_fields[2u][ind]);
241 p.force()[d_rs] += p.calc_dip() * prefac * E;
249template <
typename FloatType, Arch Architecture>
251 bool force_flag,
bool energy_flag,
ParticleRange const &particles) {
254 auto const &system = get_system();
255 auto const &box_geo = *system.box_geo;
256 auto const dipole_prefac = prefactor / Utils::int_pow<3>(dp3m.params.mesh[0]);
258 auto const npt_flag =
261 auto constexpr npt_flag =
false;
264 if (dp3m.sum_mu2 > 0.) {
265 dipole_assign(particles);
266 dp3m.fft_buffers->perform_vector_halo_gather();
267 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
268 dp3m.fft->forward_fft(rs_mesh);
270 dp3m.update_mesh_views();
274 if (energy_flag or npt_flag) {
278 if (dp3m.sum_mu2 > 0.) {
283 auto const &offset = dp3m.mesh.start;
284 auto const &d_op = dp3m.d_op[0u];
285 auto const &mesh_dip = dp3m.mesh.rs_fields;
286 auto const permutations = dp3m.fft->get_permutations();
288 auto index = std::size_t(0u);
289 auto it_energy = dp3m.g_energy.begin();
290 auto node_energy = 0.;
291 for_each_3d(mesh_start, dp3m.mesh.size, indices, [&]() {
292 auto const [KX, KY, KZ] = permutations;
293 auto const shift = indices + offset;
295 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
296 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
297 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
300 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
301 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
302 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
304 node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im));
305 std::advance(it_energy, 1);
308 node_energy *= dipole_prefac * std::numbers::pi * box_geo.length_inv()[0];
309 boost::mpi::reduce(
comm_cart, node_energy, energy, std::plus<>(), 0);
311 if (dp3m.energy_correction == 0.)
312 calc_energy_correction();
316 energy -= prefactor * dp3m.sum_mu2 * std::numbers::inv_sqrtpi *
317 (2. / 3.) * Utils::int_pow<3>(dp3m.params.alpha);
320 energy += prefactor * dp3m.energy_correction / box_geo.volume();
330 if (dp3m.sum_mu2 > 0.) {
331 auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0u];
333 auto const &mesh_stop = dp3m.mesh.size;
334 auto const offset = dp3m.mesh.start;
335 auto const &d_op = dp3m.d_op[0u];
336 auto const permutations = dp3m.fft->get_permutations();
337 auto &mesh_dip = dp3m.mesh.rs_fields;
339 auto index = std::size_t(0u);
340 dp3m.ks_scalar.resize(dp3m.mesh.rs_scalar.size());
343 auto it_energy = dp3m.g_energy.begin();
345 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
346 auto const [KX, KY, KZ] = permutations;
347 auto const shift = indices + offset;
349 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
350 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
351 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
352 dp3m.ks_scalar[index] = *it_energy * re;
355 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
356 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
357 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
358 dp3m.ks_scalar[index] = *it_energy * im;
360 std::advance(it_energy, 1);
364 for (
int d = 0; d < 3; d++) {
366 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
367 auto const d_op_val = FloatType(d_op[indices[d] + offset[d]]);
368 dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.ks_scalar[index];
370 dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.ks_scalar[index];
373 dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh());
374 dp3m.fft_buffers->perform_scalar_halo_spread();
376 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
377 Utils::integral_parameter<int, AssignTorques, 1, 7>(
378 dp3m.params.cao, dp3m, dipole_prefac * wavenumber, d_rs, particles);
389 auto it_force = dp3m.g_force.begin();
391 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
392 auto const [KX, KY, KZ] = permutations;
393 auto const shift = indices + offset;
395 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
396 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
397 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
400 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
401 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
402 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
404 dp3m.ks_scalar[index - 2] = *it_force * im;
405 dp3m.ks_scalar[index - 1] = *it_force * (-re);
406 std::advance(it_force, 1);
410 for (
int d = 0; d < 3; d++) {
412 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
413 auto const [KX, KY, KZ] = permutations;
414 auto const d_op_val = FloatType(d_op[indices[d] + offset[d]]);
415 auto const shift = indices + offset;
416 auto const f1 = d_op_val * dp3m.ks_scalar[index];
417 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f1;
418 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f1;
419 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f1;
421 auto const f2 = d_op_val * dp3m.ks_scalar[index];
422 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f2;
423 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f2;
424 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f2;
427 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
428 dp3m.fft->backward_fft(rs_mesh);
430 dp3m.fft_buffers->perform_vector_halo_spread();
432 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
433 Utils::integral_parameter<int, AssignForces, 1, 7>(
434 dp3m.params.cao, dp3m, dipole_prefac *
Utils::sqr(wavenumber), d_rs,
441 auto const surface_term =
442 calc_surface_term(force_flag, energy_flag or npt_flag, particles);
444 energy += surface_term;
452 if (not energy_flag) {
459template <
typename FloatType, Arch Architecture>
461 bool force_flag,
bool energy_flag,
ParticleRange const &particles) {
462 auto const &box_geo = *get_system().
box_geo;
463 auto const pref = prefactor * 4. * std::numbers::pi / box_geo.volume() /
464 (2. * dp3m.params.epsilon + 1.);
465 auto const n_local_part = particles.
size();
469 std::vector<double> mx(n_local_part);
470 std::vector<double> my(n_local_part);
471 std::vector<double> mz(n_local_part);
474 for (
auto const &p : particles) {
475 auto const dip = p.calc_dip();
484 for (std::size_t i = 0u; i < n_local_part; i++) {
485 local_dip[0u] += mx[i];
486 local_dip[1u] += my[i];
487 local_dip[2u] += mz[i];
490 boost::mpi::all_reduce(
comm_cart, local_dip, std::plus<>());
495 for (std::size_t i = 0u; i < n_local_part; i++) {
496 sum_e += mx[i] * box_dip[0] + my[i] * box_dip[1] + mz[i] * box_dip[2];
499 0.5 * pref * boost::mpi::all_reduce(
comm_cart, sum_e, std::plus<>());
504 std::vector<double> sumix(n_local_part);
505 std::vector<double> sumiy(n_local_part);
506 std::vector<double> sumiz(n_local_part);
508 for (std::size_t i = 0u; i < n_local_part; i++) {
509 sumix[i] = my[i] * box_dip[2u] - mz[i] * box_dip[1u];
510 sumiy[i] = mz[i] * box_dip[0u] - mx[i] * box_dip[2u];
511 sumiz[i] = mx[i] * box_dip[1u] - my[i] * box_dip[0u];
515 for (
auto &p : particles) {
516 auto &torque = p.torque();
517 torque[0u] -= pref * sumix[ip];
518 torque[1u] -= pref * sumiy[ip];
519 torque[2u] -= pref * sumiz[ip];
527template <
typename FloatType, Arch Architecture>
529 dp3m.g_force = grid_influence_function<FloatType, 3, P3M_BRILLOUIN>(
530 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
531 get_system().box_geo->length_inv());
534template <
typename FloatType, Arch Architecture>
536 dp3m.g_energy = grid_influence_function<FloatType, 2, P3M_BRILLOUIN>(
537 dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
538 get_system().box_geo->length_inv());
541template <
typename FloatType, Arch Architecture>
544 int m_mesh_max = -1, m_mesh_min = -1;
545 std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
549 double prefactor,
int timings,
550 decltype(m_tune_limits) tune_limits)
552 m_tune_limits{std::move(tune_limits)} {}
558 std::optional<std::string>
565 m_logger = std::make_unique<TuningLogger>(
573 std::tuple<double, double, double, double>
575 double r_cut_iL)
const override {
577 double alpha_L, rs_err, ks_err;
589 0.0001 * box_geo.length()[0], 5. * box_geo.length()[0], 0.0001,
612 auto const expo = std::log(std::cbrt(dp3m.
sum_dip_part)) / std::log(2.);
614 m_mesh_min =
static_cast<int>(std::round(std::pow(2., std::floor(expo))));
617 if (m_tune_limits.first) {
618 m_mesh_min = *m_tune_limits.first;
620 if (m_tune_limits.second) {
621 m_mesh_max = *m_tune_limits.second;
624 m_mesh_min = m_mesh_max = dp3m.
params.
mesh[0];
632 for (
auto tmp_mesh = m_mesh_min; tmp_mesh <= m_mesh_max; tmp_mesh += 2) {
637 auto const trial_time =
638 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
639 trial_params.alpha_L, trial_params.accuracy);
649 if (trial_time < time_best) {
652 tuned_params = trial_params;
653 time_best = tuned_params.time = trial_time;
664template <
typename FloatType, Arch Architecture>
666 auto &system = get_system();
667 auto const &box_geo = *system.
box_geo;
674 if (not is_tuned()) {
677 throw std::runtime_error(
678 "DipolarP3M: no dipolar particles in the system");
682 system, dp3m, prefactor, tune_timings, tune_limits);
702 double mesh_i,
int cao,
double alpha_L_i) {
706 auto const factor1 =
Utils::sqr(std::numbers::pi * alpha_L_i);
714 mesh_start, mesh_stop, indices,
716 auto const norm_sq = nm.norm2();
717 auto const ex = std::exp(-factor1 * norm_sq);
720 alias2 += U2 * ex * std::pow(shift * nm, 3) / norm_sq;
722 [&](
unsigned dim,
int n) {
723 nm[dim] = shift[dim] + n * mesh;
727 return std::make_pair(alias1, alias2);
732 int n_c_part,
double sum_q2,
double alpha_L) {
735 auto const mesh_i = 1. /
static_cast<double>(mesh);
736 auto const alpha_L_i = 1. / alpha_L;
738 auto const mesh_start = -mesh_stop;
744 mesh_start, mesh_stop, indices,
746 if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
747 auto const n2 = indices.norm2();
749 auto const [alias1, alias2] =
753 Utils::int_pow<3>(
static_cast<double>(n2));
760 [&values, &mesh_i, cotangent_sum](
unsigned dim,
int n) {
761 values[dim] = cotangent_sum(n, mesh_i);
764 return 8. *
Utils::sqr(std::numbers::pi) / 3. * sum_q2 *
765 sqrt(he_q / n_c_part) / Utils::int_pow<4>(box_size);
775 int n_c_part,
double sum_q2,
777 auto constexpr exp_min = -708.4;
778 double d_error_f, d_cc, d_dc, d_con;
780 auto const d_rcut = r_cut_iL * box_size;
785 auto const exponent = -d_a2 * d_rcut2;
786 auto const exp_term = (exponent < exp_min) ? 0. : std::exp(exponent);
787 auto const d_c = sum_q2 * exp_term;
791 d_dc = 8. * Utils::int_pow<3>(d_a2) * Utils::int_pow<3>(d_rcut2) +
792 20. *
Utils::sqr(d_a2) * d_rcut4 + 30. * d_a2 * d_rcut2 + 15.;
794 d_con = 1. / sqrt(Utils::int_pow<3>(box_size) *
Utils::sqr(d_a2) * d_rcut *
795 Utils::sqr(d_rcut4) *
static_cast<double>(n_c_part));
797 d_error_f = d_c * d_con *
799 (2. / 15.) *
Utils::sqr(d_dc) - (13. / 15.) * d_cc * d_dc);
809 double sum_q2,
double x1,
double x2,
double xacc,
810 double tuned_accuracy) {
811 constexpr int JJ_RTBIS_MAX = 40;
813 auto const constant = tuned_accuracy / std::numbers::sqrt2;
821 if (f1 * f2 >= 0.0) {
822 throw std::runtime_error(
823 "Root must be bracketed for bisection in dp3m_rtbisection");
827 double rtb = f1 < 0.0 ? (dx = x2 - x1, x1) : (dx = x1 - x2, x2);
828 for (
int j = 1; j <= JJ_RTBIS_MAX; j++) {
829 auto const xmid = rtb + (dx *= 0.5);
835 if (fabs(dx) < xacc || fmid == 0.0)
838 throw std::runtime_error(
"Too many bisections in dp3m_rtbisection");
843 auto const &box_geo = *system.
box_geo;
844 auto const &local_geo = *system.
local_geo;
845 for (
auto i = 0u; i < 3u; i++) {
848 std::stringstream msg;
850 <<
" is larger than half of box dimension " << box_geo.length()[i];
851 throw std::runtime_error(msg.str());
854 std::stringstream msg;
856 <<
" is larger than local box dimension " << local_geo.length()[i];
857 throw std::runtime_error(msg.str());
861 if ((box_geo.length()[0] != box_geo.length()[1]) or
862 (box_geo.length()[1] != box_geo.length()[2])) {
863 throw std::runtime_error(
"DipolarP3M: requires a cubic box");
869 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
870 throw std::runtime_error(
871 "DipolarP3M: requires periodicity (True, True, True)");
876 auto const &local_geo = *
get_system().local_geo;
879 throw std::runtime_error(
880 "DipolarP3M: requires the regular or hybrid decomposition cell system");
884 throw std::runtime_error(
885 "DipolarP3M: does not work with the hybrid decomposition cell system, "
886 "if using more than one MPI node");
892 if (node_grid[0] < node_grid[1] or node_grid[1] < node_grid[2]) {
893 throw std::runtime_error(
894 "DipolarP3M: node grid must be sorted, largest first");
898template <
typename FloatType, Arch Architecture>
900 auto const &box_geo = *get_system().
box_geo;
905 sanity_checks_boxl();
906 calc_influence_function_force();
907 calc_influence_function_energy();
911template <
typename FloatType, Arch Architecture>
913 auto const &box_geo = *get_system().
box_geo;
914 auto const Ukp3m = calc_average_self_energy_k_space() * box_geo.volume();
915 auto const Ewald_volume = Utils::int_pow<3>(dp3m.
params.
alpha_L);
916 auto const Eself = -2. * Ewald_volume * std::numbers::inv_sqrtpi / 3.;
918 -dp3m.
sum_mu2 * (Ukp3m + Eself + 2. * std::numbers::pi / 3.);
922template <
typename FloatType, Arch Architecture>
924 double energy)
const {
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
Vector implementation and trait types for boost qvm interoperability.
void on_solver_change() const override
void setup_logger(bool verbose) override
TuningAlgorithm::Parameters get_time() override
P3MParameters & get_params() override
DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m, double prefactor, int timings, decltype(m_tune_limits) tune_limits)
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
std::optional< std::string > layer_correction_veto_r_cut(double) const override
void determine_mesh_limits() override
base_type::size_type size() const
std::shared_ptr< LocalBox > local_geo
void npt_add_virial_contribution(double energy)
std::shared_ptr< CellStructure > cell_structure
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
static auto constexpr time_sentinel
Value for invalid time measurements.
static auto constexpr max_n_consecutive_trials
Maximal number of consecutive trials that don't improve runtime.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value) noexcept
Create a vector that has all entries set to the same value.
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
This file contains the defaults for ESPResSo.
#define ROUND_ERROR_PREC
Precision for capture of round off errors.
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
__device__ void vector_product(float const *a, float const *b, float *out)
static int count_magnetic_particles(System::System const &system)
static auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh, double mesh_i, int cao, double alpha_L_i)
Tuning dipolar-P3M.
static double dp3m_k_space_error(double box_size, int mesh, int cao, int n_c_part, double sum_q2, double alpha_L)
Calculate the k-space error of dipolar-P3M.
double dp3m_rtbisection(double box_size, double r_cut_iL, int n_c_part, double sum_q2, double x1, double x2, double xacc, double tuned_accuracy)
Compute the value of alpha through a bisection method.
static double dp3m_real_space_error(double box_size, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L)
Calculate the value of the errors for the REAL part of the force in terms of the splitting parameter ...
P3M algorithm for long-range magnetic dipole-dipole interaction.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
Routines, row decomposition, data structures and communication for the 3D-FFT.
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
void p3m_interpolate(P3MLocalMesh const &local_mesh, InterpolationWeights< cao > const &weights, Kernel kernel)
P3M grid interpolation.
T product(Vector< T, N > const &v)
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
auto get_analytic_cotangent_sum_kernel(int cao)
Exports for the NpT code.
Common functions for dipolar and charge P3M.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
Utils::Vector3i node_grid
void calc_energy_correction() override
double long_range_kernel(bool force_flag, bool energy_flag, ParticleRange const &particles)
Compute the k-space part of forces and energies.
double calc_surface_term(bool force_flag, bool energy_flag, ParticleRange const &particles) override
void dipole_assign(ParticleRange const &particles) override
void calc_influence_function_energy() override
void count_magnetic_particles() override
void npt_add_virial_contribution(double energy) const override
void calc_influence_function_force() override
void scaleby_box_l() override
double calc_average_self_energy_k_space() const override
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
void sanity_checks_cell_structure() const
P3MParameters const & dp3m_params
void sanity_checks_periodicity() const
void sanity_checks_node_grid() const
void recalc_ld_pos(P3MParameters const ¶ms)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0), in real space.
P3MParameters params
P3M base parameters.
P3MLocalMesh local_mesh
Local mesh geometry information for this MPI rank.
void operator()(auto &dp3m, Utils::Vector3d const &real_pos, Utils::Vector3d const &dip) const
void operator()(auto &dp3m, double prefac, int d_rs, ParticleRange const &particles) const
void operator()(auto &dp3m, double prefac, int d_rs, ParticleRange const &particles) const
double energy_correction
cached k-space self-energy correction
double sum_mu2
Sum of square of magnetic dipoles.
std::size_t sum_dip_part
number of dipolar particles.