35#include <thrust/copy.h>
36#include <thrust/device_vector.h>
45#if defined(OMPI_MPI_H) || defined(_MPI_H)
46#error CU-file includes mpi.h! This should not happen!
50 return thrust::raw_pointer_cast(
vec.data());
54 return v.size() *
sizeof(
typename SpanLike::value_type);
77 if (
vec.capacity() == 0) {
78 vec = thrust::device_vector<T>(n);
86 thrust::device_vector<T>().swap(
vec);
100#ifdef ESPRESSO_DIPOLE_FIELD_TRACKING
104#ifdef ESPRESSO_ROTATION
109#ifdef ESPRESSO_DIPOLES
112#ifdef ESPRESSO_ELECTROSTATICS
127#ifdef ESPRESSO_DIPOLE_FIELD_TRACKING
137#ifdef ESPRESSO_ROTATION
150#ifdef ESPRESSO_DIPOLE_FIELD_TRACKING
156#ifdef ESPRESSO_ROTATION
169 m_data = std::make_unique<GpuParticleData::Storage>();
173void GpuParticleData::deinitialize()
noexcept { m_data.reset(); }
176 return m_data->particle_data_device.size();
180 return m_data->particle_pos_device;
186#ifdef ESPRESSO_DIPOLE_FIELD_TRACKING
192#ifdef ESPRESSO_ROTATION
198#ifdef ESPRESSO_DIPOLES
200 return m_data->particle_dip_device;
204#ifdef ESPRESSO_ELECTROSTATICS
206 return m_data->particle_q_device;
211 return m_data->energy_device;
215 m_need_particles_update =
true;
217#ifdef ESPRESSO_DIPOLE_FIELD_TRACKING
220 m_split_particle_struct =
true;
224 m_split_particle_struct =
true;
227 enable_particle_transfer();
230bool GpuParticleData::has_compatible_device_impl()
const {
242void GpuParticleData::gpu_init_particle_comm() {
248 m_data->realloc_device_memory();
258#ifdef ESPRESSO_DIPOLE_FIELD_TRACKING
263#ifdef ESPRESSO_ROTATION
272#ifdef ESPRESSO_DIPOLE_FIELD_TRACKING
277#ifdef ESPRESSO_ROTATION
288void GpuParticleData::copy_particles_to_device(
ParticleRange const &particles,
290 if (m_communication_enabled) {
291 gather_particle_data(particles, m_data->particle_data_host,
this_node);
293 m_data->copy_particles_to_device();
294 if (m_split_particle_struct) {
295 m_data->realloc_device_memory();
296 m_data->split_particle_struct();
304 if (m_communication_enabled) {
307 m_data->copy_particle_forces_to_host();
308#ifdef ESPRESSO_ROTATION
309 m_data->copy_particle_torques_to_host();
313 auto forces_buffer = m_data->get_particle_forces_host_span();
314#ifdef ESPRESSO_ROTATION
324#ifdef ESPRESSO_DIPOLE_FIELD_TRACKING
327 if (m_communication_enabled) {
330 m_data->copy_particle_dip_fld_to_host();
342 if (m_communication_enabled) {
343 if (m_data->energy_device ==
nullptr) {
352 if (m_communication_enabled) {
361 float *r, std::size_t n) {
366 auto const &p = particles[
idx];
368 r[
idx + 0
u] = p.p[0
u];
369 r[
idx + 1u] = p.p[1u];
370 r[
idx + 2u] = p.p[2u];
373#ifdef ESPRESSO_ELECTROSTATICS
376 float *r,
float *q, std::size_t n) {
381 auto const &p = particles[
idx];
382 r[3u *
idx + 0
u] = p.p[0
u];
383 r[3u *
idx + 1u] = p.p[1u];
384 r[3u *
idx + 2u] = p.p[2u];
390 float *q, std::size_t n) {
395 auto const &p = particles[
idx];
400#ifdef ESPRESSO_DIPOLES
403 float *dip, std::size_t n) {
408 auto const &p = particles[
idx];
412 dip[
idx + 0
u] = p.dip[0
u];
413 dip[
idx + 1u] = p.dip[1u];
414 dip[
idx + 2u] = p.dip[2u];
419 auto const n_part = particle_data_device.
size();
427#ifdef ESPRESSO_ELECTROSTATICS
431 particle_q_device, n_part);
441#ifdef ESPRESSO_DIPOLES
451 auto const new_size = particle_data_device.size();
454 if (particle_pos_device !=
nullptr) {
458 cudaMalloc(&particle_pos_device, 3ul * new_size *
sizeof(
float)));
460#ifdef ESPRESSO_DIPOLES
462 if (particle_dip_device !=
nullptr) {
466 cudaMalloc(&particle_dip_device, 3ul * new_size *
sizeof(
float)));
469#ifdef ESPRESSO_ELECTROSTATICS
471 if (particle_q_device !=
nullptr) {
477 current_size = new_size;
482 if (ptr !=
nullptr) {
483 cudaFree(
reinterpret_cast<void *
>(ptr));
488#ifdef ESPRESSO_DIPOLES
491#ifdef ESPRESSO_ELECTROSTATICS
std::vector< T, CudaHostAllocator< T > > pinned_vector
void free_device_vector(thrust::device_vector< T > &vec)
void resize_or_replace(thrust::device_vector< T > &vec, std::size_t n)
Resize a thrust::device_vector.
T * raw_data_pointer(thrust::device_vector< T > &vec)
std::size_t byte_size(SpanLike const &v)
__global__ void split_kernel_rq(GpuParticleData::GpuParticle *particles, float *r, float *q, std::size_t n)
__global__ void split_kernel_dip(GpuParticleData::GpuParticle *particles, float *dip, std::size_t n)
__global__ void split_kernel_r(GpuParticleData::GpuParticle *particles, float *r, std::size_t n)
__global__ void split_kernel_q(GpuParticleData::GpuParticle *particles, float *q, std::size_t n)
Host and device containers for particle data.
thrust::device_vector< float > particle_torques_device
float * particle_q_device
thrust::device_vector< float > particle_dip_fld_device
void split_particle_struct()
std::span< float > get_particle_forces_host_span()
pinned_vector< float > particle_torques_host
thrust::device_vector< GpuParticle > particle_data_device
void copy_particle_torques_to_host()
float * particle_pos_device
thrust::device_vector< float > particle_forces_device
void copy_particle_dip_fld_to_host()
GpuParticleData::prop::bitset m_need
Which particle properties are needed by GPU methods.
void copy_particle_forces_to_host()
pinned_vector< GpuParticle > particle_data_host
std::span< float > get_particle_dip_fld_host_span()
pinned_vector< float > particle_forces_host
GpuParticleData::GpuEnergy * energy_device
float * particle_dip_device
pinned_vector< float > particle_dip_fld_host
std::span< float > get_particle_torques_host_span()
void copy_particles_to_device()
void realloc_device_memory()
float * get_particle_dip_fld_device() const
float * get_particle_torques_device() const
float * get_particle_charges_device() const
void copy_forces_to_host(ParticleRange const &particles, int this_node)
GpuEnergy * get_energy_device() const
void copy_dip_fld_to_host(ParticleRange const &particles, int this_node)
float * get_particle_dipoles_device() const
float * get_particle_forces_device() const
GpuEnergy copy_energy_to_host() const
void clear_energy_on_device()
void enable_property(std::size_t property)
std::size_t n_particles() const
float * get_particle_positions_device() const
base_type::size_type size() const
Attorney for a resource deallocator.
Wrapper for CUDA runtime exceptions.
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
int this_node
The number of this node.
void invoke_skip_cuda_exceptions(F &&f, Args &&...args)
Invoke a function and silently ignore any thrown cuda_runtime_error error.
void cuda_check_device()
Check that a device is available, that its compute capability is sufficient for ESPResSo,...
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
Energies that are retrieved from the GPU.
Subset of Particle which is copied to the GPU.
Particle properties that need to be communicated to the GPU.
static constexpr std::size_t force
static constexpr std::size_t dip_fld
static constexpr std::size_t torque
static constexpr std::size_t pos
static constexpr std::size_t dip
static constexpr std::size_t q