ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
barnes_hut_gpu_cuda.cuh
Go to the documentation of this file.
1/*
2 * Copyright (C) 2016-2022 The ESPResSo project
3 * Copyright (C) 2012 Alexander (Polyakov) Peletskyi
4 *
5 * This file is part of ESPResSo.
6 *
7 * ESPResSo is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * ESPResSo is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21#pragma once
22
23#include "config/config.hpp"
24
25#ifdef DIPOLAR_BARNES_HUT
26
27struct BHData {
28 /// CUDA blocks
29 int blocks;
30 /// each node corresponds to a split of the cubic box in 3D space to equal
31 /// cubic boxes hence, 8 octant nodes per particle is a theoretical octree
32 /// limit: a maximal number of octree nodes is "nnodesd" and a number of
33 /// particles "nbodiesd" respectively.
35 int nnodes;
36 /// particle positions on the device:
37 float *r;
38 /// particle dipole moments on the device:
39 float *u;
40 /// Not a real mass. Just a node weight coefficient.
41 float *mass;
42 /// min positions' coordinates of the Barnes-Hut box.
43 float *minp;
44 /// max positions' coordinates of the Barnes-Hut box.
45 float *maxp;
46 /// Error report.
47 int *err;
48 /// Indices of particles sorted according to the tree linear representation.
49 int *sort;
50 /// The tree linear representation.
51 int *child;
52 /// Supplementary array: a tree nodes (division octant cells/particles inside)
53 /// counting.
54 int *count;
55 /// Start indices for the per-cell sorting.
56 int *start;
57 /// trace the max loops for a threads' sync
58 int *max_lps;
59};
60
61/// @name Barnes-Hut thread count for different kernels.
62/// @{
63#define THREADS1 512
64#define THREADS2 1024
65#define THREADS3 1024
66#define THREADS4 1024
67#define THREADS5 256
68/// @}
69
70/// @name Barnes-Hut block factor for different kernels.
71/// block count = factor * number of blocks
72/// @{
73#define FACTOR1 2
74#define FACTOR2 1
75#define FACTOR3 1 /* must all be resident at the same time */
76#define FACTOR4 1 /* must all be resident at the same time */
77#define FACTOR5 4
78/// @}
79
80/// Barnes-Hut warp size.
81#define WARPSIZE 32
82/// Maximal depth of the Barnes-Hut tree branching.
83#define MAXDEPTH 32
84
85/// Barnes-Hut parameters setter.
86void setBHPrecision(float epssq, float itolsq);
87
88/// An allocation of the GPU device memory and an initialization where it is
89/// needed.
90void allocBHmemCopy(int nbodies, BHData *bh_data);
91
92/// A deallocation of the GPU device memory.
93void deallocBH(BHData *bh_data);
94
95/// Copy Barnes-Hut data to @ref bhpara and copy particle data.
96/// @param r device particle positions to copy
97/// @param dip device particle dipoles to copy
98/// @param bh_data Barnes-Hut container
99void fill_bh_data(float const *r, float const *dip, BHData const *bh_data);
100
101/// Barnes-Hut CUDA initialization.
102void initBHgpu(int blocks);
103
104/// Building Barnes-Hut spatial min/max position box
105void buildBoxBH(int blocks);
106
107/// Building Barnes-Hut tree in a linear child array representation
108/// of octant cells and particles inside.
109void buildTreeBH(int blocks);
110
111/// Calculate octant cells masses and cell index counts.
112/// Determine cells centers of mass and total dipole moments
113/// on all possible levels of the Barnes-Hut tree.
114void summarizeBH(int blocks);
115
116/// Sort particle indexes according to the Barnes-Hut tree representation.
117/// Crucial for the per-warp performance tuning of @c forceCalculationKernel
118/// and @c energyCalculationKernel.
119void sortBH(int blocks);
120
121/// Barnes-Hut force calculation.
122void forceBH(BHData *bh_data, float k, float *f, float *torque);
123
124/// Barnes-Hut energy calculation.
125void energyBH(BHData *bh_data, float k, float *E);
126
127#endif // DIPOLAR_BARNES_HUT
float f[3]
__global__ float float * torque
void buildBoxBH(int blocks)
Building Barnes-Hut spatial min/max position box.
void setBHPrecision(float epssq, float itolsq)
Barnes-Hut parameters setter.
void energyBH(BHData *bh_data, float k, float *E)
Barnes-Hut energy calculation.
void forceBH(BHData *bh_data, float k, float *f, float *torque)
Barnes-Hut force calculation.
void initBHgpu(int blocks)
Barnes-Hut CUDA initialization.
void summarizeBH(int blocks)
Calculate octant cells masses and cell index counts. Determine cells centers of mass and total dipole...
void deallocBH(BHData *bh_data)
A deallocation of the GPU device memory.
void allocBHmemCopy(int nbodies, BHData *bh_data)
An allocation of the GPU device memory and an initialization where it is needed.
void sortBH(int blocks)
Sort particle indexes according to the Barnes-Hut tree representation. Crucial for the per-warp perfo...
void fill_bh_data(float const *r, float const *dip, BHData const *bh_data)
Copy Barnes-Hut data to bhpara and copy particle data.
void buildTreeBH(int blocks)
Building Barnes-Hut tree in a linear child array representation of octant cells and particles inside.
This file contains the defaults for ESPResSo.
float * r
particle positions on the device:
float * mass
Not a real mass. Just a node weight coefficient.
int nbodies
each node corresponds to a split of the cubic box in 3D space to equal cubic boxes hence,...
int * err
Error report.
int * child
The tree linear representation.
float * maxp
max positions' coordinates of the Barnes-Hut box.
int * sort
Indices of particles sorted according to the tree linear representation.
int * count
Supplementary array: a tree nodes (division octant cells/particles inside) counting.
float * u
particle dipole moments on the device:
float * minp
min positions' coordinates of the Barnes-Hut box.
int * max_lps
trace the max loops for a threads' sync
int blocks
CUDA blocks.
int * start
Start indices for the per-cell sorting.