59#include "communication.hpp"
65#include "system/System.hpp"
74#include <boost/mpi/collectives/all_reduce.hpp>
75#include <boost/mpi/collectives/broadcast.hpp>
76#include <boost/mpi/collectives/reduce.hpp>
77#include <boost/mpi/communicator.hpp>
78#include <boost/range/combine.hpp>
79#include <boost/range/numeric.hpp>
87#include <initializer_list>
98template <
typename FloatType>
99std::complex<FloatType>
102 return std::complex<FloatType>(-z.imag() * k, z.real() * k);
105template <
typename FloatType>
106std::complex<FloatType>
109 return std::complex<FloatType>(z.real() * k, z.imag() * k);
112template <
typename FloatType>
117template <Utils::MemoryOrder order_in, Utils::MemoryOrder order_out,
typename T>
121 std::vector<T> flat_array_t(flat_array.size());
125 flat_array_t[index_out] = flat_array[index_in];
132 return mesh[0u] % node_grid[0u] == 0 and mesh[1u] % node_grid[1u] == 0 and
133 mesh[2u] % node_grid[2u] == 0;
137 return std::accumulate(shape.
cbegin(), shape.
cend(), std::size_t{1u},
138 [](std::size_t
const acc,
int const value) {
139 return acc * static_cast<std::size_t>(value);
143template <
typename FloatType, Arch Architecture>
145 auto local_n = std::size_t{0u};
149 for (
auto const &p : get_system().
cell_structure->local_particles()) {
157 boost::mpi::all_reduce(
comm_cart, local_n, p3m.sum_qpart, std::plus<>());
158 boost::mpi::all_reduce(
comm_cart, local_q2, p3m.sum_q2, std::plus<>());
159 boost::mpi::all_reduce(
comm_cart, local_q, p3m.square_sum_q, std::plus<>());
160 p3m.square_sum_q =
Utils::sqr(p3m.square_sum_q);
172template <
typename FloatType, Arch Architecture>
174 p3m.g_force = grid_influence_function<FloatType, 1, P3M_BRILLOUIN>(
175 p3m.params, p3m.fft->ks_local_ld_index(), p3m.fft->ks_local_ur_index(),
176 get_system().box_geo->length_inv());
182template <
typename FloatType, Arch Architecture>
184 p3m.g_energy = grid_influence_function<FloatType, 0, P3M_BRILLOUIN>(
185 p3m.params, p3m.fft->ks_local_ld_index(), p3m.fft->ks_local_ur_index(),
186 get_system().box_geo->length_inv());
197 auto constexpr exp_min = -708.4;
198 auto const factor1 =
Utils::sqr(std::numbers::pi * alpha_L_i);
206 mesh_start, mesh_stop, indices,
208 auto const norm_sq = nm.norm2();
209 auto const exponent = -factor1 * norm_sq;
210 auto const exp_limit = (exp_min + std::log(norm_sq)) / 2.;
211 auto const ex = (exponent < exp_limit) ? 0. : std::exp(exponent);
214 alias2 += energy * ex * (shift * nm) / norm_sq;
216 [&](
unsigned dim,
int n) {
217 nm[dim] = shift[dim] + n * mesh[dim];
221 return std::make_pair(alias1, alias2);
235 double sum_q2,
double alpha_L,
238 return (2. * pref * sum_q2 * exp(-
Utils::sqr(r_cut_iL * alpha_L))) /
239 sqrt(
static_cast<double>(n_c_part) * r_cut_iL * box_l[0] * volume);
256 int cao,
int n_c_part,
double sum_q2,
261 auto const alpha_L_i = 1. / alpha_L;
262 auto const mesh_stop = mesh / 2;
263 auto const mesh_start = -mesh_stop;
269 mesh_start, mesh_stop, indices,
271 if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) {
272 auto const n2 = indices.norm2();
274 auto const [alias1, alias2] =
276 auto const d = alias1 -
Utils::sqr(alias2 / cs) / n2;
284 [&values, &mesh_i, cotangent_sum](
unsigned dim,
int n) {
285 values[dim] = cotangent_sum(n, mesh_i[dim]);
288 return 2. * pref * sum_q2 * sqrt(he_q /
static_cast<double>(n_c_part)) /
289 (box_l[1] * box_l[2]);
292template <
typename FloatType, Arch Architecture>
295 assert(p3m.params.cao >= 1 and p3m.params.cao <= 7);
296 assert(p3m.params.alpha > 0.);
298 auto const &system = get_system();
299 auto const &box_geo = *system.box_geo;
300 auto const &local_geo = *system.local_geo;
301 auto const skin = system.cell_structure->get_verlet_skin();
303 p3m.params.cao3 = Utils::int_pow<3>(p3m.params.cao);
304 p3m.params.recalc_a_ai_cao_cut(box_geo.length());
308 auto const &solver = system.coulomb.impl->solver;
309 double elc_layer = 0.;
310 if (
auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
311 elc_layer = actor->elc.space_layer;
314 p3m.local_mesh.calc_local_ca_mesh(p3m.params, local_geo, skin, elc_layer);
315 p3m.fft = std::make_shared<P3MFFT<FloatType>>(
316 comm_cart, p3m.params.mesh, p3m.local_mesh.ld_no_halo,
317 p3m.local_mesh.ur_no_halo, get_memory_layout());
318 p3m.fft->set_preferred_kspace_decomposition(
::communicator.node_grid);
320 auto const rs_array_size_no_halo =
323 p3m.rs_charge_density.resize(rs_array_size);
324 p3m.ks_charge_density.resize(fft_mesh_size);
325 for (
auto d : {0u, 1u, 2u}) {
326 p3m.rs_E_fields[d].resize(rs_array_size);
328 p3m.ks_E_fields_storage.resize(3u * fft_mesh_size);
329 p3m.rs_E_fields_no_halo.resize(3u * rs_array_size_no_halo);
330 p3m.calc_differential_operator();
335 count_charged_particles();
342 typename std::remove_reference_t<
decltype(p3m)>::value_type;
344 p3m.rs_charge_density[ind] += value_type(w * q);
350 auto constexpr memory_order =
351 std::remove_reference<
decltype(p3m)>::type::memory_order;
352 auto const w = p3m_calculate_interpolation_weights<cao, memory_order>(
353 real_pos, p3m.params.ai, p3m.local_mesh);
354 inter_weights.
store(w);
355 this->operator()(p3m, q, w);
359 auto constexpr memory_order =
360 std::remove_reference<
decltype(p3m)>::type::memory_order;
361 auto const w = p3m_calculate_interpolation_weights<cao, memory_order>(
362 real_pos, p3m.params.ai, p3m.local_mesh);
363 this->operator()(p3m, q, w);
366 template <
typename combined_ranges>
367 void operator()(
auto &p3m, combined_ranges
const &p_q_pos_range) {
368 for (
auto zipped : p_q_pos_range) {
369 auto const p_q = boost::get<0>(zipped);
370 auto const &p_pos = boost::get<1>(zipped);
372 this->operator()(p3m, p_q, p_pos, p3m.inter_weights);
367 void operator()(
auto &p3m, combined_ranges
const &p_q_pos_range) {
…}
379template <
typename FloatType, Arch Architecture>
382 prepare_fft_mesh(
true);
387 Utils::integral_parameter<int, AssignCharge, 1, 7>(
388 p3m.params.cao, p3m, boost::combine(p_q_range, p_pos_range));
391template <
typename FloatType, Arch Architecture>
395 Utils::integral_parameter<int, AssignCharge, 1, 7>(p3m.params.cao, p3m, q,
398 Utils::integral_parameter<int, AssignCharge, 1, 7>(
399 p3m.params.cao, p3m, q, real_pos, p3m.inter_weights);
404 template <
typename combined_ranges>
406 combined_ranges
const &p_q_force_range)
const {
408 assert(cao == p3m.inter_weights.cao());
411 auto p_index = std::size_t{0ul};
413 for (
auto zipped : p_q_force_range) {
414 auto p_q = boost::get<0>(zipped);
415 auto &p_force = boost::get<1>(zipped);
417 auto const pref = p_q * force_prefac;
418 auto const w = p3m.inter_weights.template load<cao>(p_index);
422 force[0u] += w * double(p3m.rs_E_fields[0u][ind]);
423 force[1u] += w * double(p3m.rs_E_fields[1u][ind]);
424 force[2u] += w * double(p3m.rs_E_fields[2u][ind]);
427 p_force -= pref * force;
434template <
typename combined_ranges>
436 combined_ranges
const &p_q_unfolded_pos_range) {
437 auto const local_dip =
440 auto const p_q = boost::get<0>(q_pos);
441 auto const &p_unfolded_pos = boost::get<1>(q_pos);
442 return dip + p_q * p_unfolded_pos;
444 return boost::mpi::all_reduce(comm, local_dip, std::plus<>());
447template <
typename FloatType, Arch Architecture>
450 p3m.halo_comm.gather_grid(
comm_cart, p3m.rs_charge_density.data(),
455 p3m.rs_charge_density, p3m.local_mesh.dim, p3m.local_mesh.n_halo_ld,
456 p3m.local_mesh.dim - p3m.local_mesh.n_halo_ur,
463 p3m.fft->forward(charge_density_no_halos.data(),
464 p3m.ks_charge_density.data());
467template <
typename FloatType, Arch Architecture>
470 auto const &mesh_stop = p3m.fft->ks_local_size();
475 std::array<std::span<std::complex<FloatType>>, 3> ks_E_fields;
477 for (
auto d : {0u, 1u, 2u}) {
478 auto const offset = d * fft_mesh_length;
479 auto const begin = p3m.ks_E_fields_storage.begin() + offset;
480 ks_E_fields[d] = {begin, fft_mesh_length};
484 auto const wavevector =
488 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
489 auto const global_index = indices + p3m.fft->ks_local_ld_index();
493 p3m.ks_charge_density[local_index], p3m.g_force[local_index]);
495 for (
auto d : {0u, 1u, 2u}) {
497 auto const k = FloatType(p3m.d_op[d][global_index[d]]) * wavevector[d];
504 auto const rs_mesh_size_no_halo =
506 p3m.fft->backward_batch(3, p3m.ks_E_fields_storage.data(),
507 p3m.rs_E_fields_no_halo.data());
510 auto const size = p3m.local_mesh.ur_no_halo - p3m.local_mesh.ld_no_halo;
511 for (
auto d : {0u, 1u, 2u}) {
512 auto const offset = d * rs_mesh_size_no_halo;
513 auto const begin = p3m.rs_E_fields_no_halo.begin() + offset;
514 auto f = std::span<std::complex<FloatType>>(begin, rs_mesh_size_no_halo);
518 std::span<std::complex<FloatType>>(f_t), p3m.local_mesh.dim_no_halo,
519 p3m.local_mesh.n_halo_ld, p3m.local_mesh.n_halo_ur);
523 auto field_pointers = std::vector<FloatType *>{p3m.rs_E_fields[0u].data(),
524 p3m.rs_E_fields[1u].data(),
525 p3m.rs_E_fields[2u].data()};
526 p3m.halo_comm.spread_grid(
comm_cart, field_pointers, p3m.local_mesh.dim);
534template <
typename FloatType, Arch Architecture>
537 auto const &box_geo = *get_system().
box_geo;
540 if (p3m.sum_q2 > 0.) {
542 kernel_ks_charge_density();
545 auto const &mesh_stop = p3m.fft->ks_local_size();
546 auto const &k_size = p3m.fft->ks_local_size();
547 auto const half_alpha_inv_sq =
Utils::sqr(1. / 2. / p3m.params.alpha);
548 auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv();
552 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
553 auto const global_index = indices + p3m.fft->ks_local_ld_index();
554 auto const kx = p3m.d_op[0u][global_index[0u]] * wavevector[0u];
555 auto const ky = p3m.d_op[1u][global_index[1u]] * wavevector[1u];
556 auto const kz = p3m.d_op[2u][global_index[2u]] * wavevector[2u];
562 auto const node_k_space_energy =
static_cast<double>(
563 p3m.g_energy[index] *
complex_norm2(p3m.ks_charge_density[index]));
564 auto const vterm = -2. * (1. / norm_sq + half_alpha_inv_sq);
565 auto const pref = node_k_space_energy * vterm;
566 node_k_space_pressure_tensor[0u] += pref * kx * kx;
567 node_k_space_pressure_tensor[1u] += pref * kx * ky;
568 node_k_space_pressure_tensor[2u] += pref * kx * kz;
569 node_k_space_pressure_tensor[4u] += pref * ky * ky;
570 node_k_space_pressure_tensor[5u] += pref * ky * kz;
571 node_k_space_pressure_tensor[8u] += pref * kz * kz;
572 diagonal += node_k_space_energy;
576 node_k_space_pressure_tensor[0u] += diagonal;
577 node_k_space_pressure_tensor[4u] += diagonal;
578 node_k_space_pressure_tensor[8u] += diagonal;
579 node_k_space_pressure_tensor[3u] = node_k_space_pressure_tensor[1u];
580 node_k_space_pressure_tensor[6u] = node_k_space_pressure_tensor[2u];
581 node_k_space_pressure_tensor[7u] = node_k_space_pressure_tensor[5u];
584 return node_k_space_pressure_tensor * prefactor / (2. * box_geo.volume());
587template <
typename FloatType, Arch Architecture>
589 bool force_flag,
bool energy_flag,
ParticleRange const &particles) {
591 auto const &system = get_system();
592 auto const &box_geo = *system.box_geo;
594 auto const npt_flag =
597 auto constexpr npt_flag =
false;
599 if (p3m.sum_qpart == 0u) {
603 if (not has_actor_of_type<ElectrostaticLayerCorrection>(
604 system.coulomb.impl->solver)) {
608 kernel_ks_charge_density();
613 auto p_unfolded_pos_range =
617 auto const box_dipole =
620 comm_cart, boost::combine(p_q_range, p_unfolded_pos_range)))
622 auto const volume = box_geo.volume();
624 4. * std::numbers::pi / volume / (2. * p3m.params.epsilon + 1.);
629 kernel_rs_electric_field();
632 auto const force_prefac = prefactor / volume;
633 Utils::integral_parameter<int, AssignForces, 1, 7>(
634 p3m.params.cao, p3m, force_prefac,
635 boost::combine(p_q_range, p_force_range));
640 auto const dm = prefactor * pref * box_dipole.value();
641 for (
auto zipped : boost::combine(p_q_range, p_force_range)) {
642 auto p_q = boost::get<0>(zipped);
643 auto &p_force = boost::get<1>(zipped);
650 if (energy_flag or npt_flag) {
652 auto const &mesh_stop = p3m.fft->ks_local_size();
654 auto node_energy = 0.;
655 for_each_3d(mesh_start, mesh_stop, indices, [&]() {
660 node_energy +=
static_cast<double>(
661 p3m.g_energy[index] *
complex_norm2(p3m.ks_charge_density[index]));
663 node_energy /= 2. * volume;
666 boost::mpi::reduce(
comm_cart, node_energy, energy, std::plus<>(), 0);
670 energy -= p3m.sum_q2 * p3m.params.alpha * std::numbers::inv_sqrtpi;
673 energy -= p3m.square_sum_q * std::numbers::pi /
678 energy += pref * box_dipole.value().norm2();
687 if (not energy_flag) {
695template <
typename FloatType, Arch Architecture>
698 double m_mesh_density_min = -1., m_mesh_density_max = -1.;
700 bool m_tune_mesh =
false;
701 std::pair<std::optional<int>, std::optional<int>> m_tune_limits;
708 double prefactor,
int timings,
709 decltype(m_tune_limits) tune_limits)
711 m_tune_limits{std::move(tune_limits)} {}
718 auto const on_gpu = Architecture ==
Arch::GPU;
720 auto const on_gpu =
false;
722 m_logger = std::make_unique<TuningLogger>(
723 verbose and
this_node == 0, (on_gpu) ?
"CoulombP3MGPU" :
"CoulombP3M",
730 std::optional<std::string>
733 if (
auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
734 return actor->veto_r_cut(r_cut);
742 if constexpr (Architecture ==
Arch::GPU) {
746 auto const [KX, KY, KZ] = p3m.
fft->get_memory_layout();
747 auto valid_decomposition =
false;
752 return Utils::Vector3i{{std::max(lhs[0u], rhs[0u]),
753 std::max(lhs[1u], rhs[1u]),
754 std::max(lhs[2u], rhs[2u])}};
759 valid_decomposition =
760 (mesh_size_r_space[0u] == mesh_size_k_space[KX] and
761 mesh_size_r_space[1u] == mesh_size_k_space[KY] and
762 mesh_size_r_space[2u] == mesh_size_k_space[KZ] and
765 boost::mpi::broadcast(
::comm_cart, valid_decomposition, 0);
766 std::optional<std::string> retval{
"conflict with FFT domain decomposition"};
767 if (valid_decomposition) {
768 retval = std::nullopt;
773 std::tuple<double, double, double, double>
775 double r_cut_iL)
const override {
777 auto const &box_geo = *m_system.box_geo;
778 double alpha_L, rs_err, ks_err;
782 p3m.
sum_q2, 0., box_geo.length());
786 alpha_L = sqrt(log(std::numbers::sqrt2 * rs_err / p3m.
params.
accuracy)) /
797 p3m.
sum_q2, alpha_L, box_geo.length());
799 if constexpr (Architecture ==
Arch::GPU) {
803 p3m.
sum_q2, alpha_L, box_geo.length().data());
805 boost::mpi::broadcast(
comm_cart, ks_err, 0);
809 p3m.
sum_q2, alpha_L, box_geo.length());
815 auto const &box_geo = *m_system.box_geo;
816 auto const mesh_density =
817 static_cast<double>(p3m.
params.
mesh[0]) * box_geo.length_inv()[0];
821 auto const normalized_box_dim = std::cbrt(box_geo.volume());
822 auto const max_npart_per_dim = 512.;
826 auto const min_npart_per_dim = std::min(
827 max_npart_per_dim, std::cbrt(
static_cast<double>(p3m.
sum_qpart)));
828 m_mesh_density_min = min_npart_per_dim / normalized_box_dim;
829 m_mesh_density_max = max_npart_per_dim / normalized_box_dim;
830 if (m_tune_limits.first or m_tune_limits.second) {
831 auto const &box_l = box_geo.length();
832 auto const dim = std::max({box_l[0], box_l[1], box_l[2]});
833 if (m_tune_limits.first) {
834 m_mesh_density_min =
static_cast<double>(*m_tune_limits.first) / dim;
836 if (m_tune_limits.second) {
837 m_mesh_density_max =
static_cast<double>(*m_tune_limits.second) / dim;
842 m_mesh_density_min = m_mesh_density_max = mesh_density;
846 for (
auto i : {1u, 2u}) {
848 static_cast<int>(std::round(mesh_density * box_geo.length()[i]));
858 auto const &box_geo = *m_system.box_geo;
859 auto const &solver = m_system.coulomb.impl->solver;
861 auto time_best = time_sentinel;
862 auto mesh_density = m_mesh_density_min;
865 for (
auto i : {0u, 1u, 2u}) {
867 static_cast<int>(std::round(box_geo.length()[i] * mesh_density));
869 current_mesh[i] += current_mesh[i] % 2;
873 while (mesh_density <= m_mesh_density_max) {
875 trial_params.
mesh = current_mesh;
876 trial_params.cao = cao_best;
877 trial_params.cao = cao_best;
879 auto const trial_time =
880 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
881 trial_params.alpha_L, trial_params.accuracy);
883 if (trial_time >= 0.) {
886 if (has_actor_of_type<CoulombP3M>(solver)) {
887 m_r_cut_iL_max = trial_params.r_cut_iL;
890 if (trial_time < time_best) {
893 tuned_params = trial_params;
894 time_best = tuned_params.time = trial_time;
895 }
else if (trial_time > time_best + time_granularity or
896 get_n_trials() > max_n_consecutive_trials) {
903 mesh_density = current_mesh[0] / box_geo.length()[0];
912template <
typename FloatType, Arch Architecture>
914 auto &system = get_system();
915 auto const &box_geo = *system.
box_geo;
922 if (not is_tuned()) {
923 count_charged_particles();
925 throw std::runtime_error(
926 "CoulombP3M: no charged particles in the system");
930 system, p3m, prefactor, tune_timings, tune_limits);
950 auto const &box_geo = *system.
box_geo;
951 auto const &local_geo = *system.
local_geo;
952 for (
auto i = 0u; i < 3u; i++) {
955 std::stringstream msg;
957 <<
" is larger than half of box dimension " << box_geo.length()[i];
958 throw std::runtime_error(msg.str());
961 std::stringstream msg;
963 <<
" is larger than local box dimension " << local_geo.length()[i];
964 throw std::runtime_error(msg.str());
969 if ((box_geo.length()[0] != box_geo.length()[1]) or
970 (box_geo.length()[1] != box_geo.length()[2]) or
973 throw std::runtime_error(
974 "CoulombP3M: non-metallic epsilon requires cubic box");
981 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
982 throw std::runtime_error(
983 "CoulombP3M: requires periodicity (True, True, True)");
988 auto const &local_geo = *
get_system().local_geo;
991 throw std::runtime_error(
992 "CoulombP3M: requires the regular or hybrid decomposition cell system");
996 throw std::runtime_error(
997 "CoulombP3M: does not work with the hybrid decomposition cell system, "
998 "if using more than one MPI node");
1002template <
typename FloatType, Arch Architecture>
1004 auto const &box_geo = *get_system().
box_geo;
1009 sanity_checks_boxl();
1010 calc_influence_function_force();
1011 calc_influence_function_energy();
1016template <
typename FloatType, Arch Architecture>
1019 if constexpr (Architecture ==
Arch::GPU) {
1025 static_cast<void>(particles);
1028 auto &gpu = get_system().
gpu;
1040template <
typename FloatType, Arch Architecture>
1042 if constexpr (Architecture ==
Arch::GPU) {
1043 auto &system = get_system();
1044 if (has_actor_of_type<ElectrostaticLayerCorrection>(
1053template <
typename FloatType, Arch Architecture>
1055 if constexpr (Architecture ==
Arch::GPU) {
1056 auto &gpu_particle_data = get_system().
gpu;
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
Vector implementation and trait types for boost qvm interoperability.
TuningAlgorithm::Parameters get_time() override
P3MParameters & get_params() override
std::optional< std::string > fft_decomposition_veto(Utils::Vector3i const &mesh_size_r_space) const override
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
CoulombTuningAlgorithm(System::System &system, auto &input_p3m, double prefactor, int timings, decltype(m_tune_limits) tune_limits)
void setup_logger(bool verbose) override
void on_solver_change() const override
std::optional< std::string > layer_correction_veto_r_cut(double r_cut) const override
void determine_mesh_limits() override
void enable_property(std::size_t property)
std::size_t n_particles() const
std::shared_ptr< LocalBox > local_geo
void npt_add_virial_contribution(double energy)
std::shared_ptr< Propagation > propagation
std::shared_ptr< CellStructure > cell_structure
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value) noexcept
Create a vector that has all entries set to the same value.
Cache for interpolation weights.
void store(const InterpolationWeights< cao > &w)
Push back weights for one point.
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
This file contains the defaults for ESPResSo.
#define ROUND_ERROR_PREC
Precision for capture of round off errors.
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
void charge_assign(elc_data const &elc, CoulombP3M &solver, combined_ranges const &p_q_pos_range)
ELC algorithm for long-range Coulomb interactions.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
auto pad_with_zeros_discard_imag(std::span< T > cropped_array, Utils::Vector3i cropped_dim, Utils::Vector3i pad_left, Utils::Vector3i pad_right)
auto extract_block(Container const &in_array, Utils::Vector3i const &dimensions, Utils::Vector3i const &start, Utils::Vector3i const &stop, Utils::MemoryOrder memory_order, Utils::MemoryOrder output_memory_order)
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
void p3m_interpolate(P3MLocalMesh const &local_mesh, InterpolationWeights< cao > const &weights, Kernel kernel)
P3M grid interpolation.
auto charge_range(ParticleRange const &particles)
auto pos_range(ParticleRange const &particles)
auto force_range(ParticleRange const &particles)
auto unfolded_pos_range(ParticleRange const &particles, BoxGeometry const &box)
T product(Vector< T, N > const &v)
int get_linear_index(int a, int b, int c, const Vector3i &adim, MemoryOrder memory_order=MemoryOrder::COLUMN_MAJOR)
get the linear index from the position (a,b,c) in a 3D grid of dimensions adim.
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
auto get_analytic_cotangent_sum_kernel(int cao)
Exports for the NpT code.
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
static auto get_size_from_shape(Utils::Vector3i const &shape)
auto transpose(std::span< T > const &flat_array, Utils::Vector3i const &shape)
static auto p3m_tune_aliasing_sums(Utils::Vector3i const &shift, Utils::Vector3i const &mesh, Utils::Vector3d const &mesh_i, int cao, double alpha_L_i)
Aliasing sum used by p3m_k_space_error.
std::complex< FloatType > multiply_complex_by_real(std::complex< FloatType > const &z, FloatType k)
FloatType complex_norm2(std::complex< FloatType > const &z)
static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh, int cao, int n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the analytic expression of the error estimate for the P3M method in (eq.
static bool is_node_grid_compatible_with_mesh(Utils::Vector3i const &node_grid, Utils::Vector3i const &mesh)
static double p3m_real_space_error(double pref, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the real space contribution to the rms error in the force (as described by Kolafa and Perra...
std::complex< FloatType > multiply_complex_by_imaginary(std::complex< FloatType > const &z, FloatType k)
static auto calc_dipole_moment(boost::mpi::communicator const &comm, combined_ranges const &p_q_unfolded_pos_range)
P3M algorithm for long-range Coulomb interaction.
void p3m_gpu_add_farfield_force(P3MGpuParams &data, GpuParticleData &gpu, double prefactor, std::size_t n_part)
The long-range part of the P3M algorithm.
void p3m_gpu_init(std::shared_ptr< P3MGpuParams > &data, int cao, Utils::Vector3i const &mesh, double alpha, Utils::Vector3d const &box_l, std::size_t n_part)
Initialize the internal data structure of the P3M GPU.
P3M electrostatics on GPU.
double p3m_k_space_error_gpu(double prefactor, const int *mesh, int cao, int npart, double sum_q2, double alpha_L, const double *box)
void operator()(auto &p3m, double force_prefac, combined_ranges const &p_q_force_range) const
Utils::Vector3i node_grid
void charge_assign(ParticleRange const &particles) override
void assign_charge(double q, Utils::Vector3d const &real_pos, bool skip_cache) override
void calc_influence_function_energy() override
Calculate the influence function optimized for the energy and the self energy correction.
void scaleby_box_l() override
void calc_influence_function_force() override
Calculate the optimal influence function of .
Utils::Vector9d long_range_pressure(ParticleRange const &particles) override
void count_charged_particles() override
double long_range_kernel(bool force_flag, bool energy_flag, ParticleRange const &particles)
Compute the k-space part of forces and energies.
void add_long_range_forces_gpu(ParticleRange const &particles)
p3m_send_mesh< FloatType > halo_comm
std::shared_ptr< P3MFFT< FloatType > > fft
std::size_t sum_qpart
number of charged particles.
double sum_q2
Sum of square of charges.
void sanity_checks_periodicity() const
void sanity_checks_boxl() const
Checks for correctness of the k-space cutoff.
void sanity_checks_cell_structure() const
P3MParameters const & p3m_params
std::unique_ptr< Implementation > impl
Pointer-to-implementation.
static constexpr std::size_t force
static constexpr std::size_t pos
static constexpr std::size_t q
Interpolation weights for one point.
void recalc_ld_pos(P3MParameters const ¶ms)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
int cao
charge assignment order ([0,7]).
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0), in real space.
double epsilon
epsilon of the "surrounding dielectric".
P3MParameters params
P3M base parameters.
P3MLocalMesh local_mesh
Local mesh geometry information for this MPI rank.
DEVICE_QUALIFIER constexpr pointer data() noexcept
DEVICE_QUALIFIER constexpr const_iterator cbegin() const noexcept
DEVICE_QUALIFIER constexpr const_iterator cend() const noexcept
void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos, p3m_interpolation_cache &inter_weights)
void operator()(auto &p3m, double q, InterpolationWeights< cao > const &w)
void operator()(auto &p3m, combined_ranges const &p_q_pos_range)
void operator()(auto &p3m, double q, Utils::Vector3d const &real_pos)