ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
GpuParticleData_cuda.cu
Go to the documentation of this file.
1/*
2 * Copyright (C) 2014-2022 The ESPResSo project
3 *
4 * This file is part of ESPResSo.
5 *
6 * ESPResSo is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * ESPResSo is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19/**
20 * @file
21 * CUDA kernels to convert the particles AoS to a SoA on the device.
22 */
23
24#include "config/config.hpp"
25
26#include "GpuParticleData.hpp"
27#include "ResourceCleanup.hpp"
28#include "System.hpp"
29
30#include "ParticleRange.hpp"
31#include "errorhandling.hpp"
32
33#include "cuda/init.hpp"
34#include "cuda/utils.cuh"
35
36#include <thrust/copy.h>
37#include <thrust/device_vector.h>
38
39#include <cuda.h>
40
41#include <cstddef>
42#include <cstdio>
43#include <memory>
44#include <span>
45
46#if defined(OMPI_MPI_H) || defined(_MPI_H)
47#error CU-file includes mpi.h! This should not happen!
48#endif
49
50template <class T> T *raw_data_pointer(thrust::device_vector<T> &vec) {
51 return thrust::raw_pointer_cast(vec.data());
52}
53
54template <class SpanLike> std::size_t byte_size(SpanLike const &v) {
55 return v.size() * sizeof(typename SpanLike::value_type);
56}
57
58/**
59 * @brief Resize a @c thrust::device_vector.
60 *
61 * Due to a bug in thrust (https://github.com/thrust/thrust/issues/939),
62 * resizing or appending to default constructed containers causes undefined
63 * behavior by dereferencing a null-pointer for certain types. This
64 * function is used instead of the resize member function to side-step
65 * the problem. This is done by replacing the existing vector by a new
66 * one constructed with the desired size if resizing from capacity zero.
67 * Behaves as-if @c vec.resize(n) was called.
68 * This is fixed in Thrust 1.11, shipped in CUDA 11.3
69 * (https://github.com/NVIDIA/thrust/commit/1c4f25d9).
70 *
71 * @tparam T Type contained in the vector.
72 * @param vec Vector to resize.
73 * @param n Desired new size of the vector.
74 */
75template <class T>
76void resize_or_replace(thrust::device_vector<T> &vec, std::size_t n) {
77 if (vec.capacity() == 0) {
78 vec = thrust::device_vector<T>(n);
79 } else {
80 vec.resize(n);
81 }
82}
83
84template <typename T> void free_device_vector(thrust::device_vector<T> &vec) {
85 vec.clear();
86 thrust::device_vector<T>().swap(vec);
87}
88
89/** @brief Host and device containers for particle data. */
91 void free_device_memory();
93 friend DeviceMemory;
94
95public:
96 /** @brief Which particle properties are needed by GPU methods. */
99 std::size_t current_size = 0ul;
101 thrust::device_vector<GpuParticle> particle_data_device;
103 thrust::device_vector<float> particle_forces_device;
104#ifdef ROTATION
106 thrust::device_vector<float> particle_torques_device;
107#endif
108 float *particle_pos_device = nullptr;
109#ifdef DIPOLES
110 float *particle_dip_device = nullptr;
111#endif
112#ifdef ELECTROSTATICS
113 float *particle_q_device = nullptr;
114#endif
115
116 static auto make_shared(ResourceCleanup &cleanup_queue) {
117 auto obj = std::make_shared<GpuParticleData::Storage>();
118 cleanup_queue.push<DeviceMemory>(obj);
119 return obj;
120 }
121
122 ~Storage() { free_device_memory(); }
127 if (not particle_forces_device.empty()) {
128 thrust::copy(particle_forces_device.begin(), particle_forces_device.end(),
129 particle_forces_host.begin());
130 }
131 }
132#ifdef ROTATION
134 if (not particle_torques_device.empty()) {
135 thrust::copy(particle_torques_device.begin(),
137 particle_torques_host.begin());
138 }
139 }
140#endif
141 std::span<float> get_particle_forces_host_span() {
142 return {particle_forces_host.data(), particle_forces_host.size()};
143 }
144#ifdef ROTATION
146 return {particle_torques_host.data(), particle_torques_host.size()};
147 }
148#endif
149};
150
154
155std::size_t GpuParticleData::n_particles() const {
156 return m_data->particle_data_device.size();
157}
158
160 return m_data->particle_pos_device;
161}
162
164 return raw_data_pointer(m_data->particle_forces_device);
165}
166
167#ifdef ROTATION
169 return raw_data_pointer(m_data->particle_torques_device);
170}
171#endif
172
173#ifdef DIPOLES
175 return m_data->particle_dip_device;
176}
177#endif
178
179#ifdef ELECTROSTATICS
181 return m_data->particle_q_device;
182}
183#endif
184
186 return m_data->energy_device;
187}
188
189void GpuParticleData::enable_property(std::size_t property) {
190 m_need_particles_update = true;
191 m_data->m_need[property] = true;
192 if (property != prop::force and property != prop::torque) {
193 m_split_particle_struct = true;
194 }
195 enable_particle_transfer();
196}
197
198bool GpuParticleData::has_compatible_device_impl() const {
199 auto result = false;
200 invoke_skip_cuda_exceptions([&result]() {
202 result = true;
203 });
204 return result;
205}
206
207/**
208 * @brief Setup and call particle reallocation from the host.
209 */
210void GpuParticleData::gpu_init_particle_comm() {
211 try {
213 } catch (cuda_runtime_error const &err) {
214 throw cuda_fatal_error(err.what());
215 }
216 m_data->realloc_device_memory();
217}
218
220 // resize buffers
221 auto const n_part = particle_data_host.size();
223 particle_forces_host.resize(3ul * n_part);
225#ifdef ROTATION
226 particle_torques_host.resize(3ul * n_part);
228#endif
229
230 // zero out device memory for forces and torques
231 cudaMemsetAsync(raw_data_pointer(particle_forces_device), 0x0,
233#ifdef ROTATION
234 cudaMemsetAsync(raw_data_pointer(particle_torques_device), 0x0,
236#endif
237
238 // copy particles to device
239 cudaMemcpyAsync(raw_data_pointer(particle_data_device),
241 cudaMemcpyHostToDevice, stream[0]);
242}
243
244void GpuParticleData::copy_particles_to_device(ParticleRange const &particles,
245 int this_node) {
246 if (m_communication_enabled) {
247 gather_particle_data(particles, m_data->particle_data_host, this_node);
248 if (this_node == 0) {
249 m_data->copy_particles_to_device();
250 if (m_split_particle_struct) {
251 m_data->realloc_device_memory();
252 m_data->split_particle_struct();
253 }
254 }
255 }
256}
257
259 int this_node) {
260 if (m_communication_enabled) {
261 // copy results from device memory to host memory
262 if (this_node == 0) {
263 m_data->copy_particle_forces_to_host();
264#ifdef ROTATION
265 m_data->copy_particle_torques_to_host();
266#endif
267 }
268
269 auto forces_buffer = m_data->get_particle_forces_host_span();
270#ifdef ROTATION
271 auto torques_buffer = m_data->get_particle_torques_host_span();
272#else
273 auto torques_buffer = std::span<float>();
274#endif
275
276 // add forces and torques to the particles
277 particles_scatter_forces(particles, forces_buffer, torques_buffer);
278 }
279}
280
282 if (m_communication_enabled) {
283 if (m_data->energy_device == nullptr) {
284 cuda_safe_mem(cudaMalloc(&m_data->energy_device, sizeof(GpuEnergy)));
285 }
286 cuda_safe_mem(cudaMemset(m_data->energy_device, 0, sizeof(GpuEnergy)));
287 }
288}
289
291 GpuEnergy energy_host{};
292 if (m_communication_enabled) {
293 cuda_safe_mem(cudaMemcpy(&energy_host, m_data->energy_device,
294 sizeof(GpuEnergy), cudaMemcpyDeviceToHost));
295 }
296 return energy_host;
297}
298
299// Position only
301 float *r, std::size_t n) {
302 auto idx = blockDim.x * blockIdx.x + threadIdx.x;
303 if (idx >= n)
304 return;
305
306 auto const &p = particles[idx];
307 idx *= 3u;
308 r[idx + 0u] = p.p[0u];
309 r[idx + 1u] = p.p[1u];
310 r[idx + 2u] = p.p[2u];
311}
312
313#ifdef ELECTROSTATICS
314// Position and charge
316 float *r, float *q, std::size_t n) {
317 auto const idx = blockDim.x * blockIdx.x + threadIdx.x;
318 if (idx >= n)
319 return;
320
321 auto const &p = particles[idx];
322 r[3u * idx + 0u] = p.p[0u];
323 r[3u * idx + 1u] = p.p[1u];
324 r[3u * idx + 2u] = p.p[2u];
325 q[idx] = p.q;
326}
327
328// Charge only
330 float *q, std::size_t n) {
331 auto const idx = blockDim.x * blockIdx.x + threadIdx.x;
332 if (idx >= n)
333 return;
334
335 auto const &p = particles[idx];
336 q[idx] = p.q;
337}
338#endif
339
340#ifdef DIPOLES
341// Dipole moment
343 float *dip, std::size_t n) {
344 auto idx = blockDim.x * blockIdx.x + threadIdx.x;
345 if (idx >= n)
346 return;
347
348 auto const &p = particles[idx];
349
350 idx *= 3u;
351
352 dip[idx + 0u] = p.dip[0u];
353 dip[idx + 1u] = p.dip[1u];
354 dip[idx + 2u] = p.dip[2u];
355}
356#endif
357
359 auto const n_part = particle_data_device.size();
360 if (n_part == 0ul)
361 return;
362
364 dim3 const threadsPerBlock{512u, 1u, 1u};
365 dim3 const numBlocks{static_cast<unsigned>(n_part / threadsPerBlock.x + 1ul)};
366
367#ifdef ELECTROSTATICS
368 if (m_need[prop::q] and m_need[prop::pos]) {
369 split_kernel_rq<<<numBlocks, threadsPerBlock, 0, nullptr>>>(
370 raw_data_pointer(particle_data_device), particle_pos_device,
371 particle_q_device, n_part);
372 } else if (m_need[prop::q]) {
373 split_kernel_q<<<numBlocks, threadsPerBlock, 0, nullptr>>>(
374 raw_data_pointer(particle_data_device), particle_q_device, n_part);
375 } else
376#endif
377 if (m_need[prop::pos]) {
378 split_kernel_r<<<numBlocks, threadsPerBlock, 0, nullptr>>>(
379 raw_data_pointer(particle_data_device), particle_pos_device, n_part);
380 }
381#ifdef DIPOLES
382 if (m_need[prop::dip]) {
383 split_kernel_dip<<<numBlocks, threadsPerBlock, 0, nullptr>>>(
384 raw_data_pointer(particle_data_device), particle_dip_device, n_part);
385 }
386#endif
387}
388
391 auto const new_size = particle_data_device.size();
392 auto const resize_needed = new_size != current_size;
393 if (m_need[prop::pos] and (resize_needed or particle_pos_device == nullptr)) {
394 if (particle_pos_device != nullptr) {
395 cuda_safe_mem(cudaFree(particle_pos_device));
396 }
398 cudaMalloc(&particle_pos_device, 3ul * new_size * sizeof(float)));
399 }
400#ifdef DIPOLES
401 if (m_need[prop::dip] and (resize_needed or particle_dip_device == nullptr)) {
402 if (particle_dip_device != nullptr) {
403 cuda_safe_mem(cudaFree(particle_dip_device));
404 }
406 cudaMalloc(&particle_dip_device, 3ul * new_size * sizeof(float)));
407 }
408#endif
409#ifdef ELECTROSTATICS
410 if (m_need[prop::q] and (resize_needed or particle_q_device == nullptr)) {
411 if (particle_q_device != nullptr) {
412 cuda_safe_mem(cudaFree(particle_q_device));
413 }
414 cuda_safe_mem(cudaMalloc(&particle_q_device, new_size * sizeof(float)));
415 }
416#endif
417 current_size = new_size;
418}
419
420void GpuParticleData::Storage::free_device_memory() {
421 auto const free_device_pointer = [](auto *&ptr) {
422 if (ptr != nullptr) {
423 cuda_safe_mem(cudaFree(reinterpret_cast<void *>(ptr)));
424 ptr = nullptr;
425 }
426 };
427 free_device_vector(particle_data_device);
428 free_device_vector(particle_forces_device);
429#ifdef ROTATION
430 free_device_vector(particle_torques_device);
431#endif
432 free_device_pointer(particle_pos_device);
433#ifdef DIPOLES
434 free_device_pointer(particle_dip_device);
435#endif
436#ifdef ELECTROSTATICS
437 free_device_pointer(particle_q_device);
438#endif
439 free_device_pointer(energy_device);
440}
std::vector< T, CudaHostAllocator< T > > pinned_vector
void free_device_vector(thrust::device_vector< T > &vec)
void resize_or_replace(thrust::device_vector< T > &vec, std::size_t n)
Resize a thrust::device_vector.
T * raw_data_pointer(thrust::device_vector< T > &vec)
std::size_t byte_size(SpanLike const &v)
__global__ void split_kernel_rq(GpuParticleData::GpuParticle *particles, float *r, float *q, std::size_t n)
__global__ void split_kernel_dip(GpuParticleData::GpuParticle *particles, float *dip, std::size_t n)
__global__ void split_kernel_r(GpuParticleData::GpuParticle *particles, float *r, std::size_t n)
__global__ void split_kernel_q(GpuParticleData::GpuParticle *particles, float *q, std::size_t n)
Host and device containers for particle data.
thrust::device_vector< float > particle_torques_device
std::span< float > get_particle_forces_host_span()
pinned_vector< float > particle_torques_host
thrust::device_vector< GpuParticle > particle_data_device
thrust::device_vector< float > particle_forces_device
GpuParticleData::prop::bitset m_need
Which particle properties are needed by GPU methods.
pinned_vector< GpuParticle > particle_data_host
pinned_vector< float > particle_forces_host
GpuParticleData::GpuEnergy * energy_device
static auto make_shared(ResourceCleanup &cleanup_queue)
std::span< float > get_particle_torques_host_span()
float * get_particle_torques_device() const
float * get_particle_charges_device() const
void copy_forces_to_host(ParticleRange const &particles, int this_node)
GpuEnergy * get_energy_device() const
float * get_particle_dipoles_device() const
float * get_particle_forces_device() const
GpuEnergy copy_energy_to_host() const
void enable_property(std::size_t property)
std::size_t n_particles() const
float * get_particle_positions_device() const
A range of particles.
base_type::size_type size() const
Attorney for a resource deallocator.
Queue to deallocate resources before normal program termination.
void push(std::shared_ptr< Container > const &resource)
Register a resource for cleanup.
Fatal CUDA exception.
Wrapper for CUDA runtime exceptions.
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
int this_node
The number of this node.
This file contains the defaults for ESPResSo.
void invoke_skip_cuda_exceptions(F &&f, Args &&...args)
Invoke a function and silently ignore any thrown cuda_runtime_error error.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
void cuda_check_device()
Check that a device is available, that its compute capability is sufficient for ESPResSo,...
Definition init_cuda.cu:123
Energies that are retrieved from the GPU.
Subset of Particle which is copied to the GPU.
Particle properties that need to be communicated to the GPU.
static constexpr std::size_t force
static constexpr std::size_t torque
std::bitset< 5 > bitset
static constexpr std::size_t pos
static constexpr std::size_t dip
static constexpr std::size_t q
#define cuda_safe_mem(a)
Definition utils.cuh:73