ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
p3m_gpu_error_cuda.cu
Go to the documentation of this file.
1/*
2 * Copyright (C) 2015-2022 The ESPResSo project
3 *
4 * This file is part of ESPResSo.
5 *
6 * ESPResSo is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * ESPResSo is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19/** @file
20 *
21 * P3M electrostatics on GPU.
22 *
23 * The corresponding header file is p3m_gpu_error.hpp.
24 */
25
26#include "p3m_gpu_error.hpp"
27
28#include "cuda/utils.cuh"
29
31#include <utils/math/sinc.hpp>
32#include <utils/math/sqr.hpp>
33
34#include <thrust/device_vector.h>
35#include <thrust/reduce.h>
36
37#include <cuda.h>
38
39#include <algorithm>
40#include <cstddef>
41
42#if defined(OMPI_MPI_H) || defined(_MPI_H)
43#error CU-file includes mpi.h! This should not happen!
44#endif
45
46using Utils::int_pow;
47using Utils::sqr;
48
49/** @todo Extend to higher order. This comes from some 1/sin expansion in
50 * @cite hockney88a
51 */
52
53template <int cao>
54__device__ static double p3m_analytic_cotangent_sum(int n, double mesh_i) {
55 const double c = sqr(cos(Utils::pi() * mesh_i * n));
56
57 switch (cao) {
58 case 1:
59 return 1;
60 case 2:
61 return (1.0 + c * 2.0) / 3.0;
62 case 3:
63 return (2.0 + c * (11.0 + c * 2.0)) / 15.0;
64 case 4:
65 return (17.0 + c * (180.0 + c * (114.0 + c * 4.0))) / 315.0;
66 case 5:
67 return (62.0 + c * (1072.0 + c * (1452.0 + c * (247.0 + c * 2.0)))) /
68 2835.0;
69 case 6:
70 return (1382.0 +
71 c * (35396.0 +
72 c * (83021.0 + c * (34096.0 + c * (2026.0 + c * 4.0))))) /
73 155925.0;
74 case 7:
75 return (21844.0 +
76 c * (776661.0 +
77 c * (2801040.0 +
78 c * (2123860.0 +
79 c * (349500.0 + c * (8166.0 + c * 4.0)))))) /
80 6081075.0;
81 }
82
83 return 0.0;
84}
85
86template <int cao>
87__global__ void p3m_k_space_error_gpu_kernel_ik(int3 mesh, double3 meshi,
88 double alpha_L, double *he_q) {
89 const int nx =
90 -mesh.x / 2 + static_cast<int>(blockDim.x * blockIdx.x + threadIdx.x);
91 const int ny =
92 -mesh.y / 2 + static_cast<int>(blockDim.y * blockIdx.y + threadIdx.y);
93 const int nz =
94 -mesh.z / 2 + static_cast<int>(blockDim.z * blockIdx.z + threadIdx.z);
95
96 if ((nx >= mesh.x / 2) || (ny >= mesh.y / 2) || (nz >= mesh.z / 2))
97 return;
98
99 const int lind = (nx + mesh.x / 2) * mesh.y * mesh.z +
100 (ny + mesh.y / 2) * mesh.z + (nz + mesh.z / 2);
101
102 if ((nx != 0) || (ny != 0) || (nz != 0)) {
103 const double alpha_L_i = 1. / alpha_L;
104 const double n2 = sqr(nx) + sqr(ny) + sqr(nz);
105 const double cs = p3m_analytic_cotangent_sum<cao>(nz, meshi.z) *
106 p3m_analytic_cotangent_sum<cao>(nx, meshi.x) *
107 p3m_analytic_cotangent_sum<cao>(ny, meshi.y);
108 const double ex = exp(-sqr(Utils::pi() * alpha_L_i) * n2);
109 const double ex2 = sqr(ex);
110 const double U2 =
111 int_pow<2 * cao>(Utils::sinc(meshi.x * nx) * Utils::sinc(meshi.y * ny) *
112 Utils::sinc(meshi.z * nz));
113 auto const alias1 = ex2 / n2;
114 auto const d = alias1 - sqr(U2 * ex / cs) / n2;
115
116 if (d > 0 && (d / alias1 > ROUND_ERROR_PREC))
117 he_q[lind] = d;
118 } else {
119 he_q[lind] = 0;
120 }
121}
122
123double p3m_k_space_error_gpu(double prefactor, const int *mesh, int cao,
124 int npart, double sum_q2, double alpha_L,
125 const double *box) {
126 static thrust::device_vector<double> he_q;
127 he_q.resize(static_cast<unsigned>(mesh[0] * mesh[1] * mesh[2]));
128
129 dim3 grid(std::max<unsigned>(1u, static_cast<unsigned>(mesh[0] / 8 + 1)),
130 std::max<unsigned>(1u, static_cast<unsigned>(mesh[1] / 8 + 1)),
131 std::max<unsigned>(1u, static_cast<unsigned>(mesh[2] / 8 + 1)));
132
133 dim3 block(8, 8, 8);
134
135 int3 mesh3;
136 mesh3.x = mesh[0];
137 mesh3.y = mesh[1];
138 mesh3.z = mesh[2];
139
140 double3 meshi;
141 meshi.x = 1. / mesh[0];
142 meshi.y = 1. / mesh[1];
143 meshi.z = 1. / mesh[2];
144
145 switch (cao) {
146 case 1:
147 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<1>, grid, block, mesh3, meshi,
148 alpha_L, thrust::raw_pointer_cast(he_q.data()));
149 break;
150 case 2:
151 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<2>, grid, block, mesh3, meshi,
152 alpha_L, thrust::raw_pointer_cast(he_q.data()));
153 break;
154 case 3:
155 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<3>, grid, block, mesh3, meshi,
156 alpha_L, thrust::raw_pointer_cast(he_q.data()));
157 break;
158 case 4:
159 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<4>, grid, block, mesh3, meshi,
160 alpha_L, thrust::raw_pointer_cast(he_q.data()));
161 break;
162 case 5:
163 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<5>, grid, block, mesh3, meshi,
164 alpha_L, thrust::raw_pointer_cast(he_q.data()));
165 break;
166 case 6:
167 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<6>, grid, block, mesh3, meshi,
168 alpha_L, thrust::raw_pointer_cast(he_q.data()));
169 break;
170 case 7:
171 KERNELCALL(p3m_k_space_error_gpu_kernel_ik<7>, grid, block, mesh3, meshi,
172 alpha_L, thrust::raw_pointer_cast(he_q.data()));
173 break;
174 }
175
176 auto const he_q_final = thrust::reduce(he_q.begin(), he_q.end());
177
178 return 2 * prefactor * sum_q2 * sqrt(he_q_final / npart) / (box[1] * box[2]);
179}
#define ROUND_ERROR_PREC
Precision for capture of round off errors.
Definition config.hpp:66
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:174
DEVICE_QUALIFIER constexpr T pi()
Ratio of diameter and circumference of a circle.
Definition constants.hpp:36
DEVICE_QUALIFIER T sinc(T d)
Calculates the sinc-function as sin(PI*x)/(PI*x).
Definition sinc.hpp:45
DEVICE_QUALIFIER constexpr T sqr(T x)
Calculates the SQuaRe of x.
Definition sqr.hpp:26
DEVICE_QUALIFIER constexpr T int_pow(T x)
Calculate integer powers.
Definition int_pow.hpp:61
P3M electrostatics on GPU.
double p3m_k_space_error_gpu(double prefactor, const int *mesh, int cao, int npart, double sum_q2, double alpha_L, const double *box)
__global__ void p3m_k_space_error_gpu_kernel_ik(int3 mesh, double3 meshi, double alpha_L, double *he_q)
static __device__ double p3m_analytic_cotangent_sum(int n, double mesh_i)
#define KERNELCALL(_function, _grid, _block,...)
Definition utils.cuh:77