34#include <thrust/device_vector.h>
35#include <thrust/reduce.h>
43#if defined(OMPI_MPI_H) || defined(_MPI_H)
44#error CU-file includes mpi.h! This should not happen!
49 double alpha_L,
double *
he_q) {
60 if ((
nx >= mesh.x / 2) || (
ny >= mesh.y / 2) || (
nz >= mesh.z / 2))
63 const int lind = (
nx + mesh.x / 2) * mesh.y * mesh.z +
64 (
ny + mesh.y / 2) * mesh.z + (
nz + mesh.z / 2);
66 if ((
nx != 0) || (
ny != 0) || (
nz != 0)) {
68 const double n2 = sqr(
nx) + sqr(
ny) + sqr(
nz);
69 const double cs = math::analytic_cotangent_sum<cao>(
nz,
meshi.z) *
70 math::analytic_cotangent_sum<cao>(
nx,
meshi.x) *
71 math::analytic_cotangent_sum<cao>(
ny,
meshi.y);
73 const double ex2 = sqr(
ex);
88 int npart,
double sum_q2,
double alpha_L,
90 static thrust::device_vector<double>
he_q;
91 he_q.resize(
static_cast<unsigned>(mesh[0] * mesh[1] * mesh[2]));
93 dim3 grid(std::max<unsigned>(1u,
static_cast<unsigned>(mesh[0] / 8 + 1)),
94 std::max<unsigned>(1u,
static_cast<unsigned>(mesh[1] / 8 + 1)),
95 std::max<unsigned>(1u,
static_cast<unsigned>(mesh[2] / 8 + 1)));
105 meshi.x = 1. / mesh[0];
106 meshi.y = 1. / mesh[1];
107 meshi.z = 1. / mesh[2];
112 alpha_L, thrust::raw_pointer_cast(
he_q.data()));
116 alpha_L, thrust::raw_pointer_cast(
he_q.data()));
120 alpha_L, thrust::raw_pointer_cast(
he_q.data()));
124 alpha_L, thrust::raw_pointer_cast(
he_q.data()));
128 alpha_L, thrust::raw_pointer_cast(
he_q.data()));
132 alpha_L, thrust::raw_pointer_cast(
he_q.data()));
136 alpha_L, thrust::raw_pointer_cast(
he_q.data()));
142 return 2. * prefactor * sum_q2 * sqrt(
he_q_final /
npart) / (box[1] * box[2]);
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
constexpr auto round_error_prec
Precision below which a double-precision float is assumed to be zero.
static double * block(double *p, std::size_t index, std::size_t size)
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
DEVICE_QUALIFIER constexpr T int_pow(T x)
Calculate integer powers.
DEVICE_QUALIFIER auto sinc(T x)
Calculate the function .
P3M electrostatics on GPU.
double p3m_k_space_error_gpu(double prefactor, const int *mesh, int cao, int npart, double sum_q2, double alpha_L, const double *box)
__global__ void p3m_k_space_error_gpu_kernel_ik(int3 mesh, double3 meshi, double alpha_L, double *he_q)
#define KERNELCALL(_function, _grid, _block,...)