49 double alpha_L,
double *he_q) {
54 -mesh.x / 2 +
static_cast<int>(blockDim.x * blockIdx.x + threadIdx.x);
56 -mesh.y / 2 +
static_cast<int>(blockDim.y * blockIdx.y + threadIdx.y);
58 -mesh.z / 2 +
static_cast<int>(blockDim.z * blockIdx.z + threadIdx.z);
60 if ((nx >= mesh.x / 2) || (ny >= mesh.y / 2) || (nz >= mesh.z / 2))
63 const int lind = (nx + mesh.x / 2) * mesh.y * mesh.z +
64 (ny + mesh.y / 2) * mesh.z + (nz + mesh.z / 2);
66 if ((nx != 0) || (ny != 0) || (nz != 0)) {
67 const double alpha_L_i = 1. / alpha_L;
68 const double n2 = sqr(nx) + sqr(ny) + sqr(nz);
69 const double cs = math::analytic_cotangent_sum<cao>(nz, meshi.z) *
70 math::analytic_cotangent_sum<cao>(nx, meshi.x) *
71 math::analytic_cotangent_sum<cao>(ny, meshi.y);
72 const double ex = exp(-sqr(std::numbers::pi * alpha_L_i) * n2);
73 const double ex2 = sqr(ex);
77 auto const alias1 = ex2 / n2;
78 auto const d = alias1 - sqr(U2 * ex / cs) / n2;
88 int npart,
double sum_q2,
double alpha_L,
90 static thrust::device_vector<double> he_q;
91 he_q.resize(
static_cast<unsigned>(mesh[0] * mesh[1] * mesh[2]));
93 dim3 grid(std::max<unsigned>(1u,
static_cast<unsigned>(mesh[0] / 8 + 1)),
94 std::max<unsigned>(1u,
static_cast<unsigned>(mesh[1] / 8 + 1)),
95 std::max<unsigned>(1u,
static_cast<unsigned>(mesh[2] / 8 + 1)));
105 meshi.x = 1. / mesh[0];
106 meshi.y = 1. / mesh[1];
107 meshi.z = 1. / mesh[2];
111 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<1>, grid,
block, mesh3, meshi,
112 alpha_L, thrust::raw_pointer_cast(he_q.data()));
115 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<2>, grid,
block, mesh3, meshi,
116 alpha_L, thrust::raw_pointer_cast(he_q.data()));
119 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<3>, grid,
block, mesh3, meshi,
120 alpha_L, thrust::raw_pointer_cast(he_q.data()));
123 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<4>, grid,
block, mesh3, meshi,
124 alpha_L, thrust::raw_pointer_cast(he_q.data()));
127 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<5>, grid,
block, mesh3, meshi,
128 alpha_L, thrust::raw_pointer_cast(he_q.data()));
131 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<6>, grid,
block, mesh3, meshi,
132 alpha_L, thrust::raw_pointer_cast(he_q.data()));
135 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<7>, grid,
block, mesh3, meshi,
136 alpha_L, thrust::raw_pointer_cast(he_q.data()));
140 auto const he_q_final = thrust::reduce(he_q.begin(), he_q.end());
142 return 2. * prefactor * sum_q2 * sqrt(he_q_final / npart) / (box[1] * box[2]);
double p3m_k_space_error_gpu(double prefactor, const int *mesh, int cao, int npart, double sum_q2, double alpha_L, const double *box)