50#include "communication.hpp"
54#include "system/System.hpp"
65#include <boost/mpi/collectives/all_reduce.hpp>
66#include <boost/mpi/collectives/broadcast.hpp>
67#include <boost/mpi/collectives/reduce.hpp>
68#include <boost/mpi/communicator.hpp>
69#include <boost/range/combine.hpp>
70#include <boost/range/numeric.hpp>
89 for (
auto const &p :
get_system().cell_structure->local_particles()) {
113void CoulombP3M::calc_influence_function_force() {
124void CoulombP3M::calc_influence_function_energy() {
144 auto const nmx = nx + mx * mesh[0];
145 auto const fnmx = mesh_i[0] * nmx;
147 auto const nmy = ny + my * mesh[1];
148 auto const fnmy = mesh_i[1] * nmy;
150 auto const nmz = nz + mz * mesh[2];
151 auto const fnmz = mesh_i[2] * nmz;
154 auto const ex = exp(-factor1 * nm2);
157 auto const U2 = pow(sinc(fnmx) * sinc(fnmy) * sinc(fnmz), 2.0 * cao);
160 alias2 += U2 * ex * (nx * nmx + ny * nmy + nz * nmz) / nm2;
164 return std::make_pair(alias1, alias2);
178 double sum_q2,
double alpha_L,
181 return (2. * pref * sum_q2 * exp(-
Utils::sqr(r_cut_iL * alpha_L))) /
182 sqrt(
static_cast<double>(n_c_part) * r_cut_iL * box_l[0] * volume);
199 int cao,
int n_c_part,
double sum_q2,
203 auto const alpha_L_i = 1. / alpha_L;
206 for (
int nx = -mesh[0] / 2; nx < mesh[0] / 2; nx++) {
208 for (
int ny = -mesh[1] / 2; ny < mesh[1] / 2; ny++) {
211 for (
int nz = -mesh[2] / 2; nz < mesh[2] / 2; nz++) {
212 if ((nx != 0) || (ny != 0) || (nz != 0)) {
216 auto const [alias1, alias2] =
218 auto const d = alias1 -
Utils::sqr(alias2 / cs) / n2;
227 return 2. * pref * sum_q2 * sqrt(he_q /
static_cast<double>(n_c_part)) /
228 (box_l[1] * box_l[2]);
234 int npart,
double sum_q2,
double alpha_L,
239 alpha_L, box_l.
data());
241 boost::mpi::broadcast(
comm_cart, ks_err, 0);
252 auto const &box_geo = *system.box_geo;
253 auto const &local_geo = *system.local_geo;
254 auto const skin = system.cell_structure->get_verlet_skin();
261 auto const &solver = system.coulomb.impl->solver;
262 double elc_layer = 0.;
263 if (
auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
264 elc_layer = actor->elc.space_layer;
276 e.resize(ca_mesh_size);
288 int tune_timings,
bool tune_verbose,
289 bool check_complex_residuals)
290 : p3m{std::move(parameters)}, tune_timings{tune_timings},
291 tune_verbose{tune_verbose},
292 check_complex_residuals{check_complex_residuals} {
295 throw std::domain_error(
"Parameter 'timings' must be > 0");
307 auto const w = p3m_calculate_interpolation_weights<cao>(
310 inter_weights.
store(w);
313 p3m.rs_mesh[ind] += w * q;
321 p3m_calculate_interpolation_weights<cao>(real_pos, p3m.
params.
ai,
323 [q, &p3m](
int ind,
double w) { p3m.rs_mesh[ind] += w * q; });
326 template <
typename combined_ranges>
328 for (
auto zipped : p_q_pos_range) {
329 auto const p_q = boost::get<0>(zipped);
330 auto const &p_pos = boost::get<1>(zipped);
349 Utils::integral_parameter<int, AssignCharge, 1, 7>(
355 Utils::integral_parameter<int, AssignCharge, 1, 7>(
p3m.
params.
cao,
p3m, q,
356 real_pos, inter_weights);
360 Utils::integral_parameter<int, AssignCharge, 1, 7>(
p3m.
params.
cao,
p3m, q,
365 template <
typename combined_ranges>
367 combined_ranges
const &p_q_force_range)
const {
375 auto p_index = std::size_t{0ul};
377 for (
auto zipped : p_q_force_range) {
378 auto p_q = boost::get<0>(zipped);
379 auto &p_force = boost::get<1>(zipped);
381 auto const pref = p_q * force_prefac;
386 force += w * Utils::Vector3d{p3m.E_mesh[0][ind], p3m.E_mesh[1][ind],
390 p_force -= pref *
force;
397template <
typename combined_ranges>
399 combined_ranges
const &p_q_unfolded_pos_range) {
400 auto const local_dip =
403 auto const p_q = boost::get<0>(q_pos);
404 auto const &p_unfolded_pos = boost::get<1>(q_pos);
405 return dip + p_q * p_unfolded_pos;
407 return boost::mpi::all_reduce(comm, local_dip, std::plus<>());
417 using namespace detail::FFT_indexing;
437 box_geo.length_inv()[RX];
440 box_geo.length_inv()[RY];
443 box_geo.length_inv()[RZ];
447 auto const node_k_space_energy =
450 auto const vterm = -2. * (1. / sqk + half_alpha_inv_sq);
451 auto const pref = node_k_space_energy * vterm;
452 node_k_space_pressure_tensor[0] += pref * kx * kx;
453 node_k_space_pressure_tensor[1] += pref * kx * ky;
454 node_k_space_pressure_tensor[2] += pref * kx * kz;
455 node_k_space_pressure_tensor[3] += pref * ky * kx;
456 node_k_space_pressure_tensor[4] += pref * ky * ky;
457 node_k_space_pressure_tensor[5] += pref * ky * kz;
458 node_k_space_pressure_tensor[6] += pref * kz * kx;
459 node_k_space_pressure_tensor[7] += pref * kz * ky;
460 node_k_space_pressure_tensor[8] += pref * kz * kz;
461 diagonal += node_k_space_energy;
467 node_k_space_pressure_tensor[0] += diagonal;
468 node_k_space_pressure_tensor[4] += diagonal;
469 node_k_space_pressure_tensor[8] += diagonal;
472 return node_k_space_pressure_tensor *
prefactor / (2. * box_geo.volume());
478 auto const &box_geo = *system.box_geo;
480 auto const npt_flag =
483 auto constexpr npt_flag =
false;
487 if (not has_actor_of_type<ElectrostaticLayerCorrection>(
488 system.coulomb.impl->solver)) {
499 auto p_unfolded_pos_range =
505 auto const box_dipole =
508 comm_cart, boost::combine(p_q_range, p_unfolded_pos_range)))
510 auto const volume = box_geo.volume();
523 auto const rho_hat = std::complex<double>(
p3m.
rs_mesh[2 * ind + 0],
525 auto const phi_hat =
p3m.
g_force[ind] * rho_hat;
527 for (
int d = 0; d < 3; d++) {
533 box_geo.length_inv()[d_rs];
536 p3m.
E_mesh[d_rs][2 * ind + 0] = -k * phi_hat.imag();
537 p3m.
E_mesh[d_rs][2 * ind + 1] = +k * phi_hat.real();
547 for (
int d = 0; d < 3; d++) {
552 std::array<double *, 3> E_fields = {
557 auto const force_prefac =
prefactor / volume;
558 Utils::integral_parameter<int, AssignForces, 1, 7>(
560 boost::combine(p_q_range, p_force_range));
565 auto const dm =
prefactor * pref * box_dipole.value();
566 for (
auto zipped : boost::combine(p_q_range, p_force_range)) {
567 auto p_q = boost::get<0>(zipped);
568 auto &p_force = boost::get<1>(zipped);
575 if (energy_flag or npt_flag) {
576 auto node_energy = 0.;
583 node_energy /= 2. * volume;
586 boost::mpi::reduce(
comm_cart, node_energy, energy, std::plus<>(), 0);
598 energy += pref * box_dipole.value().norm2();
607 if (not energy_flag) {
618 double m_mesh_density_min = -1., m_mesh_density_max = -1.;
620 bool m_tune_mesh =
false;
627 double prefactor,
int timings)
636 auto const on_gpu = has_actor_of_type<CoulombP3MGPU>(solver);
638 auto const on_gpu =
false;
640 m_logger = std::make_unique<TuningLogger>(
641 verbose and
this_node == 0, (on_gpu) ?
"CoulombP3MGPU" :
"CoulombP3M",
648 std::optional<std::string>
651 if (
auto actor = get_actor_by_type<ElectrostaticLayerCorrection>(solver)) {
652 return actor->veto_r_cut(r_cut);
657 std::tuple<double, double, double, double>
659 double r_cut_iL)
const override {
662 double alpha_L, rs_err, ks_err;
666 p3m.
sum_q2, 0., box_geo.length());
681 p3m.
sum_q2, alpha_L, box_geo.length());
684 if (has_actor_of_type<CoulombP3MGPU>(solver)) {
686 p3m.
sum_q2, alpha_L, box_geo.length());
690 p3m.
sum_q2, alpha_L, box_geo.length());
697 auto const mesh_density =
698 static_cast<double>(p3m.
params.
mesh[0]) * box_geo.length_inv()[0];
702 auto const normalized_box_dim = std::cbrt(box_geo.volume());
703 auto const max_npart_per_dim = 512.;
707 auto const min_npart_per_dim = std::min(
708 max_npart_per_dim, std::cbrt(
static_cast<double>(p3m.
sum_qpart)));
709 m_mesh_density_min = min_npart_per_dim / normalized_box_dim;
710 m_mesh_density_max = max_npart_per_dim / normalized_box_dim;
713 m_mesh_density_min = m_mesh_density_max = mesh_density;
717 for (
int i : {1, 2}) {
719 static_cast<int>(std::round(mesh_density * box_geo.length()[i]));
733 auto mesh_density = m_mesh_density_min;
734 while (mesh_density <= m_mesh_density_max) {
737 for (
int i : {0, 1, 2}) {
738 trial_params.
mesh[i] =
739 static_cast<int>(std::round(box_geo.length()[i] * mesh_density));
741 trial_params.mesh[i] += trial_params.mesh[i] % 2;
748 auto const trial_time =
749 get_m_time(trial_params.mesh, trial_params.cao, trial_params.r_cut_iL,
750 trial_params.alpha_L, trial_params.accuracy);
752 if (trial_time >= 0.) {
755 if (has_actor_of_type<CoulombP3M>(solver)) {
759 if (trial_time < time_best) {
762 tuned_params = trial_params;
763 time_best = tuned_params.time = trial_time;
778 auto const &box_geo = *system.box_geo;
788 throw std::runtime_error(
789 "CoulombP3M: no charged particles in the system");
801 system.on_coulomb_change();
810void CoulombP3M::sanity_checks_boxl()
const {
812 auto const &box_geo = *system.box_geo;
813 auto const &local_geo = *system.local_geo;
814 for (
unsigned int i = 0
u; i < 3u; i++) {
817 std::stringstream msg;
819 <<
" is larger than half of box dimension " << box_geo.length()[i];
820 throw std::runtime_error(msg.str());
823 std::stringstream msg;
825 <<
" is larger than local box dimension " << local_geo.length()[i];
826 throw std::runtime_error(msg.str());
831 if ((box_geo.length()[0] != box_geo.length()[1]) or
832 (box_geo.length()[1] != box_geo.length()[2]) or
835 throw std::runtime_error(
836 "CoulombP3M: non-metallic epsilon requires cubic box");
841void CoulombP3M::sanity_checks_periodicity()
const {
843 if (!box_geo.periodic(0) or !box_geo.periodic(1) or !box_geo.periodic(2)) {
844 throw std::runtime_error(
845 "CoulombP3M: requires periodicity (True, True, True)");
849void CoulombP3M::sanity_checks_cell_structure()
const {
850 auto const &local_geo = *
get_system().local_geo;
853 throw std::runtime_error(
854 "CoulombP3M: requires the regular or hybrid decomposition cell system");
858 throw std::runtime_error(
859 "CoulombP3M: does not work with the hybrid decomposition cell system, "
860 "if using more than one MPI node");
864void CoulombP3M::sanity_checks_node_grid()
const {
866 if (node_grid[0] < node_grid[1] || node_grid[1] < node_grid[2]) {
867 throw std::runtime_error(
868 "CoulombP3M: node grid must be sorted, largest first");
872void CoulombP3M::scaleby_box_l() {
878 sanity_checks_boxl();
879 calc_influence_function_force();
880 calc_influence_function_energy();
@ HYBRID
Hybrid decomposition.
@ REGULAR
Regular decomposition.
Vector implementation and trait types for boost qvm interoperability.
std::optional< std::string > layer_correction_veto_r_cut(double r_cut) const override
std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const override
void setup_logger(bool verbose) override
void determine_mesh_limits() override
TuningAlgorithm::Parameters get_time() override
P3MParameters & get_params() override
void on_solver_change() const override
CoulombTuningAlgorithm(System::System &system, p3m_data_struct &input_p3m, double prefactor, int timings)
void set_prefactor(double new_prefactor)
double prefactor
Electrostatics prefactor.
std::shared_ptr< BoxGeometry > box_geo
Tuning algorithm for P3M.
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
static auto constexpr time_sentinel
Value for invalid time measurements.
static auto constexpr max_n_consecutive_trials
Maximal number of consecutive trials that don't improve runtime.
System::System & m_system
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
A stripped-down version of std::span from C++17.
static Vector< T, N > broadcast(T const &s)
Create a vector that has all entries set to one value.
Cache for interpolation weights.
void store(const InterpolationWeights< cao > &w)
Push back weights for one point.
InterpolationWeights< cao > load(std::size_t i) const
Load entry from the cache.
auto cao() const
Charge assignment order the weights are for.
void reset(int cao)
Reset the cache.
void spread_grid(Utils::Span< double * > meshes, const boost::mpi::communicator &comm, const Utils::Vector3i &dim)
void resize(const boost::mpi::communicator &comm, const P3MLocalMesh &local_mesh)
void gather_grid(Utils::Span< double * > meshes, const boost::mpi::communicator &comm, const Utils::Vector3i &dim)
auto constexpr P3M_EPSILON_METALLIC
This value indicates metallic boundary conditions.
Communicator communicator
boost::mpi::communicator comm_cart
The communicator.
int this_node
The number of this node.
#define ROUND_ERROR_PREC
Precision for capture of round off errors.
#define P3M_BRILLOUIN
P3M: Number of Brillouin zones taken into account in the calculation of the optimal influence functio...
ELC algorithm for long-range Coulomb interactions.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
void fft_perform_forw(double *data, fft_data_struct &fft, const boost::mpi::communicator &comm)
Perform an in-place forward 3D FFT.
void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft, const boost::mpi::communicator &comm)
Perform an in-place backward 3D FFT.
int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, Utils::Vector3i const &global_mesh_dim, Utils::Vector3d const &global_mesh_off, int &ks_pnum, fft_data_struct &fft, Utils::Vector3i const &grid, boost::mpi::communicator const &comm)
Initialize everything connected to the 3D-FFT.
Routines, row decomposition, data structures and communication for the 3D-FFT.
void p3m_interpolate(P3MLocalMesh const &local_mesh, InterpolationWeights< cao > const &weights, Kernel kernel)
P3M grid interpolation.
auto charge_range(ParticleRange const &particles)
auto pos_range(ParticleRange const &particles)
auto force_range(ParticleRange const &particles)
auto unfolded_pos_range(ParticleRange const &particles, BoxGeometry const &box)
T product(Vector< T, N > const &v)
DEVICE_QUALIFIER constexpr T pi()
Ratio of diameter and circumference of a circle.
DEVICE_QUALIFIER T sinc(T d)
Calculates the sinc-function as sin(PI*x)/(PI*x).
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
DEVICE_QUALIFIER constexpr Span< T > make_span(T *p, std::size_t N)
DEVICE_QUALIFIER constexpr T sqrt_2()
Square root of 2.
auto hadamard_division(Vector< T, N > const &a, Vector< U, N > const &b)
DEVICE_QUALIFIER constexpr Span< std::add_const_t< T > > make_const_span(T *p, std::size_t N)
DEVICE_QUALIFIER constexpr T sqrt_pi_i()
One over square root of pi.
void npt_add_virial_contribution(double energy)
Exports for the NpT code.
static auto p3m_tune_aliasing_sums(int nx, int ny, int nz, Utils::Vector3i const &mesh, Utils::Vector3d const &mesh_i, int cao, double alpha_L_i)
Aliasing sum used by p3m_k_space_error.
static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh, int cao, int n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the analytic expression of the error estimate for the P3M method in (eq.
static double p3m_real_space_error(double pref, double r_cut_iL, int n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
Calculate the real space contribution to the rms error in the force (as described by Kolafa and Perra...
static auto calc_dipole_moment(boost::mpi::communicator const &comm, combined_ranges const &p_q_unfolded_pos_range)
static double p3mgpu_k_space_error(double prefactor, Utils::Vector3i const &mesh, int cao, int npart, double sum_q2, double alpha_L, Utils::Vector3d const &box_l)
P3M algorithm for long-range Coulomb interaction.
P3M electrostatics on GPU.
P3M electrostatics on GPU.
double p3m_k_space_error_gpu(double prefactor, const int *mesh, int cao, int npart, double sum_q2, double alpha_L, const double *box)
static __device__ double p3m_analytic_cotangent_sum(int n, double mesh_i)
void operator()(p3m_data_struct &p3m, double force_prefac, combined_ranges const &p_q_force_range) const
Utils::Vector3i node_grid
void charge_assign(ParticleRange const &particles)
Assign the physical charges using the tabulated charge assignment function.
bool check_complex_residuals
void assign_charge(double q, Utils::Vector3d const &real_pos, p3m_interpolation_cache &inter_weights)
Assign a single charge into the current charge grid.
double long_range_kernel(bool force_flag, bool energy_flag, ParticleRange const &particles)
Compute the k-space part of forces and energies.
CoulombP3M(P3MParameters &¶meters, double prefactor, int tune_timings, bool tune_verbose, bool check_complex_residuals)
Utils::Vector9d long_range_pressure(ParticleRange const &particles)
Compute the k-space part of the pressure tensor.
void init()
Recalculate all derived parameters.
void sanity_checks() const
void count_charged_particles()
Count the number of charged particles and calculate the sum of the squared charges.
p3m_data_struct p3m
P3M parameters.
void tune()
Tune P3M parameters to desired accuracy.
std::unique_ptr< Implementation > impl
Pointer-to-implementation.
Utils::Vector3i dim
dimension (size) of local mesh.
int size
number of local mesh points.
void recalc_ld_pos(P3MParameters const ¶ms)
Recalculate quantities derived from the mesh and box length: ld_pos (position of the left down mesh).
void calc_local_ca_mesh(P3MParameters const ¶ms, LocalBox const &local_geo, double skin, double space_layer)
Calculate properties of the local FFT mesh for the charge assignment process.
int margin[6]
number of margin mesh points.
Structure to hold P3M parameters and some dependent variables.
Utils::Vector3d cao_cut
cutoff for charge assignment.
double alpha
unscaled alpha_L for use with fast inline functions only
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
int cao
charge assignment order ([0,7]).
double accuracy
accuracy of the actual parameter set.
double alpha_L
Ewald splitting parameter (0.
int cao3
number of points unto which a single charge is interpolated, i.e.
Utils::Vector3d mesh_off
offset of the first mesh point (lower left corner) from the coordinate origin ([0,...
Utils::Vector3d ai
inverse mesh constant.
double r_cut
unscaled r_cut_iL for use with fast inline functions only
void recalc_a_ai_cao_cut(Utils::Vector3d const &box_l)
Recalculate quantities derived from the mesh and box length: a, ai and cao_cut.
bool tuning
tuning or production?
Utils::Vector3i mesh
number of mesh points per coordinate direction (>0).
double epsilon
epsilon of the "surrounding dielectric".
DEVICE_QUALIFIER constexpr pointer data() noexcept
void operator()(p3m_data_struct &p3m, double q, Utils::Vector3d const &real_pos)
void operator()(p3m_data_struct &p3m, double q, Utils::Vector3d const &real_pos, p3m_interpolation_cache &inter_weights)
void operator()(p3m_data_struct &p3m, combined_ranges const &p_q_pos_range)
fft_forw_plan plan[4]
Information for forward FFTs.
int start[3]
lower left point of local FFT mesh in global FFT mesh coordinates.
int new_mesh[3]
size of local mesh after communication, also used for actual FFT.
int new_size
size of new mesh (number of mesh points).
std::vector< double > g_energy
Energy optimised influence function (k-space)
void calc_differential_operator()
Calculate the Fourier transformed differential operator.
std::array< std::vector< int >, 3 > d_op
Spatial differential operator in k-space.
std::vector< double > g_force
Force optimised influence function (k-space)
int ks_pnum
number of permutations in k_space
P3MLocalMesh local_mesh
local mesh.
p3m_send_mesh sm
send/recv mesh sizes
double sum_q2
Sum of square of charges (only on head node).
fft_vector< double > rs_mesh
real space mesh (local) for CA/FFT.
double square_sum_q
square of sum of charges (only on head node).
int sum_qpart
number of charged particles (only on head node).
p3m_interpolation_cache inter_weights
std::array< fft_vector< double >, 3 > E_mesh
mesh (local) for the electric field.