ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
|
The method is based on [9]. More...
#include "config/config.hpp"
#include "magnetostatics/barnes_hut_gpu_cuda.cuh"
#include "cuda/init.hpp"
#include "cuda/utils.cuh"
#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
#include <cuda.h>
#include <algorithm>
#include <cstdio>
#include <stdexcept>
Go to the source code of this file.
Functions | |
__device__ void | dds_sumReduction_BH (float *input, float *sum) |
__global__ void | initializationKernel () |
__global__ | __launch_bounds__ (THREADS1, FACTOR1) void boundingBoxKernel() |
__global__ | __launch_bounds__ (THREADS2, FACTOR2) void treeBuildingKernel() |
__global__ | __launch_bounds__ (THREADS3, FACTOR3) void summarizationKernel() |
__global__ | __launch_bounds__ (THREADS4, FACTOR4) void sortKernel() |
__global__ | __launch_bounds__ (THREADS5, FACTOR5) void forceCalculationKernel(float pf |
if (threadIdx.x==0) | |
__syncthreads () | |
if (maxdepthd<=MAXDEPTH) | |
dds_sumReduction_BH (res, &(energySum[blockIdx.x])) | |
void | initBHgpu (int blocks) |
Barnes-Hut CUDA initialization. | |
void | buildBoxBH (int blocks) |
Building Barnes-Hut spatial min/max position box. | |
void | buildTreeBH (int blocks) |
Building Barnes-Hut tree in a linear child array representation of octant cells and particles inside. | |
void | summarizeBH (int blocks) |
Calculate octant cells masses and cell index counts. Determine cells centers of mass and total dipole moments on all possible levels of the Barnes-Hut tree. | |
void | sortBH (int blocks) |
Sort particle indexes according to the Barnes-Hut tree representation. Crucial for the per-warp performance tuning of forceCalculationKernel and energyCalculationKernel . | |
void | forceBH (BHData *bh_data, float k, float *f, float *torque) |
Barnes-Hut force calculation. | |
void | energyBH (BHData *bh_data, float k, float *E) |
Barnes-Hut energy calculation. | |
void | setBHPrecision (float epssq, float itolsq) |
Barnes-Hut parameters setter. | |
void | deallocBH (BHData *bh_data) |
A deallocation of the GPU device memory. | |
void | allocBHmemCopy (int nbodies, BHData *bh_data) |
An allocation of the GPU device memory and an initialization where it is needed. | |
void | fill_bh_data (float const *r, float const *dip, BHData const *bh_data) |
Copy Barnes-Hut data to bhpara and copy particle data. | |
Variables | |
__constant__ float | epssqd [1] |
__constant__ float | itolsqd [1] |
__device__ volatile int | bottomd |
__device__ volatile int | maxdepthd |
__device__ volatile int | blkcntd |
__device__ volatile float | radiusd |
__device__ __constant__ volatile BHData | bhpara [1] |
__global__ float * | force |
__global__ float float * | torque |
float | dr [3] |
float | f [3] |
float | h [3] |
float | u [3] |
float | uc [3] |
float | N [3] |
__shared__ int | pos [MAXDEPTH *THREADS5/WARPSIZE] |
__shared__ int | node [MAXDEPTH *THREADS5/WARPSIZE] |
__shared__ float | dq [MAXDEPTH *THREADS5/WARPSIZE] |
__global__ float * | energySum |
float | sum = 0.0 |
__shared__ float | res [] = sum |
The method is based on [9].
Definition in file barnes_hut_gpu_cuda.cu.
Definition at line 91 of file barnes_hut_gpu_cuda.cu.
References __syncthreads(), bhpara, blkcntd, bottomd, BHData::child, BHData::mass, BHData::maxp, BHData::minp, BHData::nbodies, BHData::nnodes, BHData::r, radiusd, BHData::start, and THREADS1.
Definition at line 206 of file barnes_hut_gpu_cuda.cu.
References bhpara, bottomd, BHData::child, BHData::err, BHData::mass, BHData::max_lps, maxdepthd, BHData::nbodies, BHData::nnodes, pos, BHData::r, radiusd, BHData::start, and THREADS2.
Definition at line 452 of file barnes_hut_gpu_cuda.cu.
References __syncthreads(), bhpara, bottomd, BHData::child, BHData::count, BHData::mass, BHData::max_lps, BHData::nbodies, BHData::nnodes, BHData::r, THREADS3, u, and BHData::u.
Definition at line 630 of file barnes_hut_gpu_cuda.cu.
References bhpara, bottomd, BHData::child, BHData::count, BHData::max_lps, BHData::nbodies, BHData::nnodes, BHData::sort, BHData::start, and THREADS4.
__syncthreads | ( | ) |
void allocBHmemCopy | ( | int | nbodies, |
BHData * | bh_data | ||
) |
An allocation of the GPU device memory and an initialization where it is needed.
Definition at line 1177 of file barnes_hut_gpu_cuda.cu.
References BHData::blocks, BHData::child, BHData::count, cuda_get_device(), cuda_get_device_props(), cuda_safe_mem, BHData::err, FACTOR1, BHData::mass, BHData::max_lps, BHData::maxp, BHData::minp, BHData::nbodies, BHData::nnodes, BHData::r, BHData::sort, BHData::start, and BHData::u.
void buildBoxBH | ( | int | blocks | ) |
Building Barnes-Hut spatial min/max position box.
Definition at line 1040 of file barnes_hut_gpu_cuda.cu.
References block(), cuda_safe_mem, FACTOR1, KERNELCALL, and THREADS1.
void buildTreeBH | ( | int | blocks | ) |
Building Barnes-Hut tree in a linear child array representation of octant cells and particles inside.
Definition at line 1054 of file barnes_hut_gpu_cuda.cu.
References block(), cuda_safe_mem, FACTOR2, KERNELCALL, and THREADS2.
__device__ void dds_sumReduction_BH | ( | float * | input, |
float * | sum | ||
) |
Definition at line 58 of file barnes_hut_gpu_cuda.cu.
References __syncthreads(), and sum.
dds_sumReduction_BH | ( | res | , |
& | energySum[blockIdx.x] | ||
) |
void deallocBH | ( | BHData * | bh_data | ) |
A deallocation of the GPU device memory.
Definition at line 1152 of file barnes_hut_gpu_cuda.cu.
References BHData::child, BHData::count, cuda_safe_mem, BHData::err, BHData::mass, BHData::max_lps, BHData::maxp, BHData::minp, BHData::r, BHData::sort, BHData::start, and BHData::u.
Referenced by DipolarBarnesHutGpu::~DipolarBarnesHutGpu().
void energyBH | ( | BHData * | bh_data, |
float | k, | ||
float * | E | ||
) |
Barnes-Hut energy calculation.
Definition at line 1113 of file barnes_hut_gpu_cuda.cu.
References block(), BHData::blocks, cuda_safe_mem, energySum, BHData::err, FACTOR5, KERNELCALL_shared, and THREADS5.
Referenced by DipolarBarnesHutGpu::long_range_energy().
void fill_bh_data | ( | float const * | r, |
float const * | dip, | ||
BHData const * | bh_data | ||
) |
Copy Barnes-Hut data to bhpara and copy particle data.
r | device particle positions to copy |
dip | device particle dipoles to copy |
bh_data | Barnes-Hut container |
Definition at line 1258 of file barnes_hut_gpu_cuda.cu.
References bhpara, cuda_safe_mem, BHData::nbodies, BHData::r, and BHData::u.
void forceBH | ( | BHData * | bh_data, |
float | k, | ||
float * | f, | ||
float * | torque | ||
) |
Barnes-Hut force calculation.
Definition at line 1094 of file barnes_hut_gpu_cuda.cu.
References block(), BHData::blocks, cuda_safe_mem, BHData::err, f, FACTOR5, KERNELCALL, THREADS5, and torque.
Referenced by DipolarBarnesHutGpu::add_long_range_forces().
if | ( | maxdepthd<= | MAXDEPTH | ) |
Definition at line 731 of file barnes_hut_gpu_cuda.cu.
References __syncthreads(), bhpara, BHData::child, dq, dr, f, force, h, MAXDEPTH, N, BHData::nbodies, BHData::nnodes, node, pos, BHData::r, BHData::sort, torque, u, BHData::u, uc, and WARPSIZE.
if | ( | threadIdx. | x = = 0 | ) |
Definition at line 701 of file barnes_hut_gpu_cuda.cu.
References bhpara, dq, epssqd, BHData::err, itolsqd, MAXDEPTH, maxdepthd, and radiusd.
void initBHgpu | ( | int | blocks | ) |
Barnes-Hut CUDA initialization.
Definition at line 1019 of file barnes_hut_gpu_cuda.cu.
References block(), FACTOR5, initializationKernel(), KERNELCALL, and THREADS5.
__global__ void initializationKernel | ( | ) |
Definition at line 77 of file barnes_hut_gpu_cuda.cu.
References bhpara, blkcntd, BHData::err, BHData::max_lps, and maxdepthd.
Referenced by initBHgpu().
void setBHPrecision | ( | float | epssq, |
float | itolsq | ||
) |
Barnes-Hut parameters setter.
Definition at line 1145 of file barnes_hut_gpu_cuda.cu.
References cuda_safe_mem, epssqd, and itolsqd.
Referenced by DipolarBarnesHutGpu::on_activation().
void sortBH | ( | int | blocks | ) |
Sort particle indexes according to the Barnes-Hut tree representation. Crucial for the per-warp performance tuning of forceCalculationKernel
and energyCalculationKernel
.
Definition at line 1082 of file barnes_hut_gpu_cuda.cu.
References block(), cuda_safe_mem, FACTOR4, KERNELCALL, and THREADS4.
void summarizeBH | ( | int | blocks | ) |
Calculate octant cells masses and cell index counts. Determine cells centers of mass and total dipole moments on all possible levels of the Barnes-Hut tree.
Definition at line 1068 of file barnes_hut_gpu_cuda.cu.
References block(), cuda_safe_mem, FACTOR3, KERNELCALL, and THREADS3.
__device__ __constant__ volatile BHData bhpara[1] |
Definition at line 53 of file barnes_hut_gpu_cuda.cu.
Referenced by __launch_bounds__(), __launch_bounds__(), __launch_bounds__(), __launch_bounds__(), fill_bh_data(), if(), if(), and initializationKernel().
__device__ volatile int blkcntd |
Definition at line 49 of file barnes_hut_gpu_cuda.cu.
Referenced by __launch_bounds__(), and initializationKernel().
__device__ volatile int bottomd |
Definition at line 49 of file barnes_hut_gpu_cuda.cu.
Referenced by __launch_bounds__(), __launch_bounds__(), __launch_bounds__(), and __launch_bounds__().
__shared__ float dq |
Definition at line 698 of file barnes_hut_gpu_cuda.cu.
Referenced by if(), if(), and steepest_descent_step().
float dr |
Definition at line 690 of file barnes_hut_gpu_cuda.cu.
Referenced by OifLocalForcesBond::calc_forces(), Shapes::Cylinder::calculate_dist(), Shapes::SimplePore::calculate_dist(), Shapes::SpheroCylinder::calculate_dist(), dipole_ia_energy(), dipole_ia_force(), FeneBond::energy(), FeneBond::force(), HarmonicBond::force(), QuarticBond::force(), ClusterAnalysis::Cluster::fractal_dimension(), and if().
__global__ float* energySum |
Definition at line 888 of file barnes_hut_gpu_cuda.cu.
Referenced by DipolarDirectSum_kernel_energy(), DipolarDirectSum_kernel_wrapper_energy(), and energyBH().
__constant__ float epssqd[1] |
Definition at line 44 of file barnes_hut_gpu_cuda.cu.
Referenced by if(), and setBHPrecision().
float f[3] |
Definition at line 690 of file barnes_hut_gpu_cuda.cu.
Referenced by OifLocalForcesBond::calc_forces(), dipolar_energy_correction(), dipolar_force_corrections(), DipolarDirectSum_kernel_force(), DipolarDirectSum_kernel_wrapper_force(), ScriptInterface::walberla::LBFluid::do_call_method(), ScriptInterface::walberla::LBFluidNode::do_call_method(), dpd_viscous_stress_local(), Mpiio::dump_info(), walberla::LBWalberlaImpl< FloatType, Architecture >::VelocityVTKWriter< OutputType >::evaluate(), walberla::LBWalberlaImpl< FloatType, Architecture >::PressureTensorVTKWriter< OutputType >::evaluate(), external_force(), Utils::for_each(), Utils::for_each(), for_each_image(), Particle::force(), Particle::force(), Particle::force_and_torque(), Particle::force_and_torque(), forceBH(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_node_population(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_slice_last_applied_force(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_slice_velocity(), getLinearIndex(), getLinearIndex(), if(), image_sum(), ScriptInterface::BondBreakage::initialize(), ScriptInterface::initialize(), ScriptInterface::Shapes::initialize(), walberla::interpolate_bspline_at_pos(), invoke_skip_cuda_exceptions(), Mpiio::mpiio_dump_array(), Mpiio::mpiio_read_array(), p3m_add_block(), anonymous_namespace{fft.cpp}::pack_block_permute1(), anonymous_namespace{fft.cpp}::pack_block_permute2(), pair_force(), Utils::raster(), Mpiio::read_head(), walberla::pystencils::FrictionCouplingKernel_double_precision::run(), walberla::pystencils::FrictionCouplingKernel_single_precision::run(), walberla::pystencils::FrictionCouplingKernel_double_precision::runOnCellInterval(), walberla::pystencils::FrictionCouplingKernel_single_precision::runOnCellInterval(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_node_population(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_slice_last_applied_force(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_slice_velocity(), steepest_descent_step(), Particle::torque(), Particle::torque(), and DipolarScafacosImpl::update_particle_forces().
__global__ float* force |
Definition at line 685 of file barnes_hut_gpu_cuda.cu.
Referenced by LB::LBWalberla::add_force_at_pos(), walberla::LBWalberlaImpl< FloatType, Architecture >::add_force_at_pos(), Constraints::Constraints< ParticleRange, Constraint >::add_forces(), add_non_bonded_pair_force(), add_non_bonded_pair_virials(), add_PoQ_force(), add_PQ_force(), calc_bonded_virial_pressure_tensor(), dpd_pair_force(), force_calc_icc(), forcesKernel(), if(), walberla::lbm::accessor::Velocity::kernel_set(), walberla::lbm::accessor::Velocity::kernel_set(), walberla::lbm::accessor::MomentumDensity::kernel_sum(), walberla::lbm::accessor::MomentumDensity::kernel_sum(), npt_add_virial_contribution(), npt_add_virial_force_contribution(), AssignForces< cao >::operator()(), CoulombMMM1D::pair_force(), DipolarP3M::pair_force(), walberla::pystencils::CollideSweepDoublePrecisionLeesEdwards::run(), walberla::pystencils::CollideSweepDoublePrecisionLeesEdwardsAVX::run(), walberla::pystencils::CollideSweepDoublePrecisionThermalized::run(), walberla::pystencils::CollideSweepDoublePrecisionThermalizedAVX::run(), walberla::pystencils::CollideSweepSinglePrecisionLeesEdwards::run(), walberla::pystencils::CollideSweepSinglePrecisionLeesEdwardsAVX::run(), walberla::pystencils::CollideSweepSinglePrecisionThermalized::run(), walberla::pystencils::CollideSweepSinglePrecisionThermalizedAVX::run(), walberla::pystencils::InitialPDFsSetterDoublePrecision::run(), walberla::pystencils::InitialPDFsSetterSinglePrecision::run(), walberla::pystencils::StreamSweepDoublePrecision::run(), walberla::pystencils::StreamSweepDoublePrecisionAVX::run(), walberla::pystencils::StreamSweepSinglePrecision::run(), walberla::pystencils::StreamSweepSinglePrecisionAVX::run(), walberla::pystencils::CollideSweepDoublePrecisionLeesEdwardsCUDA::run(), walberla::pystencils::CollideSweepDoublePrecisionThermalizedCUDA::run(), walberla::pystencils::CollideSweepSinglePrecisionLeesEdwardsCUDA::run(), walberla::pystencils::CollideSweepSinglePrecisionThermalizedCUDA::run(), walberla::pystencils::InitialPDFsSetterDoublePrecisionCUDA::run(), walberla::pystencils::InitialPDFsSetterSinglePrecisionCUDA::run(), walberla::pystencils::StreamSweepDoublePrecisionCUDA::run(), walberla::pystencils::StreamSweepSinglePrecisionCUDA::run(), walberla::pystencils::CollideSweepDoublePrecisionLeesEdwards::runOnCellInterval(), walberla::pystencils::CollideSweepDoublePrecisionLeesEdwardsAVX::runOnCellInterval(), walberla::pystencils::CollideSweepDoublePrecisionThermalized::runOnCellInterval(), walberla::pystencils::CollideSweepDoublePrecisionThermalizedAVX::runOnCellInterval(), walberla::pystencils::CollideSweepSinglePrecisionLeesEdwards::runOnCellInterval(), walberla::pystencils::CollideSweepSinglePrecisionLeesEdwardsAVX::runOnCellInterval(), walberla::pystencils::CollideSweepSinglePrecisionThermalized::runOnCellInterval(), walberla::pystencils::CollideSweepSinglePrecisionThermalizedAVX::runOnCellInterval(), walberla::pystencils::InitialPDFsSetterDoublePrecision::runOnCellInterval(), walberla::pystencils::InitialPDFsSetterSinglePrecision::runOnCellInterval(), walberla::pystencils::StreamSweepDoublePrecision::runOnCellInterval(), walberla::pystencils::StreamSweepDoublePrecisionAVX::runOnCellInterval(), walberla::pystencils::StreamSweepSinglePrecision::runOnCellInterval(), walberla::pystencils::StreamSweepSinglePrecisionAVX::runOnCellInterval(), walberla::pystencils::CollideSweepDoublePrecisionLeesEdwardsCUDA::runOnCellInterval(), walberla::pystencils::CollideSweepDoublePrecisionThermalizedCUDA::runOnCellInterval(), walberla::pystencils::CollideSweepSinglePrecisionLeesEdwardsCUDA::runOnCellInterval(), walberla::pystencils::CollideSweepSinglePrecisionThermalizedCUDA::runOnCellInterval(), walberla::pystencils::InitialPDFsSetterDoublePrecisionCUDA::runOnCellInterval(), walberla::pystencils::InitialPDFsSetterSinglePrecisionCUDA::runOnCellInterval(), walberla::pystencils::StreamSweepDoublePrecisionCUDA::runOnCellInterval(), walberla::pystencils::StreamSweepSinglePrecisionCUDA::runOnCellInterval(), serialize_and_reduce(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_node_last_applied_force(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_slice_last_applied_force(), TabulatedBond::TabulatedBond(), and TabulatedPotential::TabulatedPotential().
float h |
Definition at line 690 of file barnes_hut_gpu_cuda.cu.
Referenced by add_oif_global_forces(), OifLocalForcesBond::calc_forces(), cuda_test_device_access(), G_opt(), grid_influence_function(), and if().
__constant__ float itolsqd[1] |
Definition at line 44 of file barnes_hut_gpu_cuda.cu.
Referenced by if(), and setBHPrecision().
__device__ volatile int maxdepthd |
Definition at line 49 of file barnes_hut_gpu_cuda.cu.
Referenced by __launch_bounds__(), if(), and initializationKernel().
float N[3] |
Definition at line 690 of file barnes_hut_gpu_cuda.cu.
Referenced by Utils::Array< T, N >::at(), Utils::Array< T, N >::at(), Utils::Array< T, N >::broadcast(), Utils::Array< T, N >::cend(), Utils::Array< T, N >::end(), Utils::Array< T, N >::end(), energiesKernel(), forcesKernel(), walberla::lbm::accessor::EquilibriumDistribution::get(), walberla::lbm::accessor::EquilibriumDistribution::get(), if(), boost::serialization::load(), Utils::make_const_span(), Utils::make_span(), Utils::Array< T, N >::max_size(), Random::noise_gaussian(), Random::noise_uniform(), Utils::Vector< T, N >::normalize(), Utils::Array< T, N >::operator[](), Utils::Array< T, N >::operator[](), ScriptInterface::walberla::CheckpointFile::read(), scaleAndAddKernel(), Utils::Array< T, N >::size(), sumKernel(), Utils::Histogram< T, N, M, U >::update(), Utils::Vector< T, N >::Vector(), Utils::Vector< T, N >::Vector(), vectorReductionKernel(), and ScriptInterface::walberla::CheckpointFile::write().
__shared__ int node |
Definition at line 697 of file barnes_hut_gpu_cuda.cu.
Referenced by walberla::LBWalberlaImpl< FloatType, Architecture >::add_force_at_pos(), fft_init(), walberla::get_block_and_cell(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_density_at_pos(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_node_boundary_force(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::get_node_density(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_node_density(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::get_node_density_at_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::get_node_flux_at_boundary(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_node_force_to_be_applied(), walberla::EKReactionImplIndexed::get_node_is_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::get_node_is_boundary(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_node_is_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::get_node_is_density_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::get_node_is_flux_boundary(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_node_last_applied_force(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_node_population(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_node_pressure_tensor(), walberla::BoundaryHandling< T, BoundaryClass >::get_node_value_at_boundary(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_node_velocity(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_node_velocity_at_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::get_slice_density_at_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::get_slice_flux_at_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::get_slice_is_boundary(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_slice_is_boundary(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_slice_velocity(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_slice_velocity_at_boundary(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_velocity_at_pos(), ghost_communicator(), if(), is_poststorable(), is_prefetchable(), is_recv_op(), is_send_op(), make_new_particle(), LatticeWalberla::node_in_local_domain(), LatticeWalberla::node_in_local_halo(), walberla::BoundaryHandling< T, BoundaryClass >::node_is_boundary(), walberla::field::communication::BoundaryPackInfo< GhostLayerField_T, Boundary_T >::packDataImpl(), walberla::LBWalberlaImpl< FloatType, Architecture >::remove_node_from_boundary(), walberla::BoundaryHandling< T, BoundaryClass >::remove_node_from_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::remove_node_from_density_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::remove_node_from_flux_boundary(), walberla::set_boundary_from_grid(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::set_node_density(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_node_density(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::set_node_density_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::set_node_flux_boundary(), walberla::EKReactionImplIndexed::set_node_is_boundary(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_node_last_applied_force(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_node_population(), walberla::BoundaryHandling< T, BoundaryClass >::set_node_value_at_boundary(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_node_velocity(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_node_velocity_at_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::set_slice_density_boundary(), walberla::EKinWalberlaImpl< FluxCount, FloatType >::set_slice_flux_boundary(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_slice_velocity_at_boundary(), walberla::BoundaryHandling< T, BoundaryClass >::unpack_node(), and walberla::field::communication::BoundaryPackInfo< GhostLayerField_T, Boundary_T >::unpackData().
__shared__ int pos |
Definition at line 696 of file barnes_hut_gpu_cuda.cu.
Referenced by __launch_bounds__(), Constraints::Constraints< ParticleRange, Constraint >::add_energy(), LB::LBWalberla::add_force_at_pos(), walberla::LBWalberlaImpl< FloatType, Architecture >::add_force_at_pos(), LB::Solver::add_force_density(), Constraints::Constraints< ParticleRange, Constraint >::add_forces(), LB::LBWalberla::add_forces_at_pos(), LB::Solver::add_forces_at_pos(), walberla::LBWalberlaImpl< FloatType, Architecture >::add_forces_at_pos(), add_oif_global_forces(), angular_momentum(), Utils::Interpolation::bspline_3d(), Utils::Interpolation::bspline_3d_accumulate(), Utils::Interpolation::bspline_3d_gradient(), Utils::Interpolation::bspline_3d_gradient_accumulate(), Constraints::ShapeBasedConstraint::calc_dist(), P3MParameters::calc_grid_pos(), calc_oif_global(), Shapes::Cylinder::calculate_dist(), Shapes::Ellipsoid::calculate_dist(), Shapes::HollowConicalFrustum::calculate_dist(), Shapes::SimplePore::calculate_dist(), Shapes::Slitpore::calculate_dist(), Shapes::SpheroCylinder::calculate_dist(), Shapes::Torus::calculate_dist(), Shapes::Wall::calculate_dist(), Shapes::Union::calculate_dist(), walberla::lbm::accessor::Interpolation::calculate_weights(), walberla::lbm::accessor::Interpolation::calculate_weights(), Utils::Mpi::cart_coords(), Utils::Mpi::cart_rank(), ClusterAnalysis::Cluster::center_of_mass_subcluster(), dipolar_energy_correction(), dipolar_force_corrections(), DipolarDirectSum_kernel_energy(), DipolarDirectSum_kernel_force(), DipolarDirectSum_kernel_wrapper_energy(), DipolarDirectSum_kernel_wrapper_force(), ScriptInterface::MPIIO::MPIIOScript::do_call_method(), ScriptInterface::Analysis::Analysis::do_call_method(), ScriptInterface::Shapes::Shape::do_call_method(), ScriptInterface::walberla::LBFluid::do_call_method(), ScriptInterface::Particles::ParticleHandle::do_construct(), draw_polymer_positions(), BondList::erase(), Observables::CylindricalDensityProfile::evaluate(), Observables::CylindricalFluxDensityProfile::evaluate(), Observables::CylindricalLBFluxDensityProfileAtParticlePositions::evaluate(), Observables::CylindricalVelocityProfile::evaluate(), Observables::CylindricalLBVelocityProfileAtParticlePositions::evaluate(), anonymous_namespace{fft.cpp}::find_comm_groups(), BoxGeometry::fold_position(), ClusterAnalysis::Cluster::fractal_dimension(), walberla::lbm::accessor::Interpolation::get(), walberla::lbm::accessor::Interpolation::get(), walberla::get_block(), walberla::get_block_extended(), LB::Solver::get_coupling_interpolated_velocities(), LB::Solver::get_coupling_interpolated_velocity(), LB::LBWalberla::get_density_at_pos(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_density_at_pos(), LB::Solver::get_interpolated_density(), LB::Solver::get_interpolated_velocity(), LatticeWalberla::get_local_grid_range(), LB::LBWalberla::get_velocities_at_pos(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_velocities_at_pos(), LB::LBWalberla::get_velocity_at_pos(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_velocity_at_pos(), glue_to_surface_calc_vs_pos(), gyration_tensor(), handle_collisions(), if(), in_box(), in_local_domain(), in_local_halo(), walberla::interpolate_bspline_at_pos(), Shapes::Shape::is_inside(), Shapes::Union::is_inside(), is_valid_position(), LB::ParticleCoupling::kernel(), walberla::lbm::accessor::Interpolation::kernel_get(), walberla::lbm::accessor::Interpolation::kernel_get(), walberla::lbm::accessor::Interpolation::kernel_set(), walberla::lbm::accessor::Interpolation::kernel_set(), lb_tracers_add_particle_force_to_fluid(), make_new_particle(), maybe_insert_particle(), maybe_move_particle(), moment_of_inertia_matrix(), Mpiio::mpi_mpiio_common_read(), Mpiio::mpi_mpiio_common_write(), nbhood(), Observables::CylindricalLBVelocityProfile::operator()(), Observables::LBVelocityProfile::operator()(), p3m_assign_image_charge(), p3m_calculate_interpolation_weights(), ScriptInterface::Particles::particle_checks(), ScriptInterface::Particles::ParticleHandle::ParticleHandle(), place_vs_and_relate_to_particle(), LatticeWalberla::pos_in_local_domain(), LatticeWalberla::pos_in_local_halo(), positions_in_halo(), Shapes::Shape::rasterize(), ReactionMethods::ReactionAlgorithm::restore_old_system_state(), serialize_and_reduce(), walberla::lbm::accessor::Interpolation::set(), walberla::lbm::accessor::Interpolation::set(), set_particle_pos(), structure_factor(), Utils::transform_coordinate_cartesian_to_cylinder(), Utils::transform_coordinate_cartesian_to_cylinder(), Utils::transform_coordinate_cylinder_to_cartesian(), Utils::transform_coordinate_cylinder_to_cartesian(), Utils::transform_vector_cartesian_to_cylinder(), BoxGeometry::unfolded_position(), Utils::Histogram< T, N, M, U >::update(), Utils::Histogram< T, N, M, U >::update(), CoulombScafacosImpl::update_particle_data(), and DipolarScafacosImpl::update_particle_data().
__device__ volatile float radiusd |
Definition at line 51 of file barnes_hut_gpu_cuda.cu.
Referenced by __launch_bounds__(), __launch_bounds__(), and if().
res[threadIdx.x] = sum |
Definition at line 1012 of file barnes_hut_gpu_cuda.cu.
Referenced by Shapes::Union::calculate_dist(), PairCriteria::PairCriterion::decide(), DipolarDirectSum_kernel_energy(), LeesEdwardsBC::distance(), ScriptInterface::ObjectMap< ManagedType, BaseType, KeyType >::do_call_method(), Observables::BondAngles::evaluate(), Observables::BondDihedrals::evaluate(), Observables::ParticleDistances::evaluate(), gather_global_collision_queue(), BondBreakage::gather_global_queue(), Accumulators::Correlator::get_correlation(), LB::Solver::get_coupling_interpolated_velocities(), LB::Solver::get_coupling_interpolated_velocity(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_density_at_pos(), Accumulators::Correlator::get_lag_times(), get_mi_vector_dds(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_velocities_at_pos(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_velocity_at_pos(), Utils::Accumulator::mean(), ParticleObservables::Map< ValueOp >::operator()(), positions_in_halo(), positions_in_halo_impl(), Utils::raster(), and Utils::Accumulator::variance().
sum * sum = 0.0 |
Definition at line 897 of file barnes_hut_gpu_cuda.cu.
Referenced by calc_transmit_size(), ScriptInterface::walberla::EKReaction::calculate_bulk_conversion_factor(), dds_sumReduction(), dds_sumReduction_BH(), DipolarDirectSum_kernel_energy(), and sumReduction().
__global__ float float* torque |
Definition at line 685 of file barnes_hut_gpu_cuda.cu.
Referenced by convert_torque_to_body_frame_apply_fix(), DipolarDirectSum_kernel_force(), DipolarDirectSum_kernel_wrapper_force(), ScriptInterface::Galilei::Galilei::do_call_method(), forceBH(), if(), Galilei::kill_particle_forces(), DipolarP3M::pair_force(), ScriptInterface::Particles::ParticleHandle::ParticleHandle(), and serialize_and_reduce().
float u |
Definition at line 690 of file barnes_hut_gpu_cuda.cu.
Referenced by __launch_bounds__(), add_forces_and_torques(), ComFixed::apply(), assign_charge_kernel(), assign_charges(), assign_forces(), assign_forces_kernel(), ScriptInterface::Particles::bitfield_from_flag(), LatticeWalberla::calc_grid_dimensions(), calc_sc_cache(), Shapes::HollowConicalFrustum::calculate_dist(), DipolarDirectSum::dipole_field_at_part(), LeesEdwardsBC::distance(), ScriptInterface::walberla::LBFluidNode::do_call_method(), Observables::ParticleObservable< ObsType >::evaluate(), walberla::FFT< FloatType >::FFT(), BoxGeometry::fold_position(), BoxGeometry::folded_position(), ThermalizedBond::forces(), walberla::lbm::accessor::Density::get(), walberla::lbm::accessor::Density::get(), walberla::lbm::accessor::EquilibriumDistribution::get(), walberla::lbm::accessor::EquilibriumDistribution::get(), LatticeWalberla::get_local_grid_range(), ScriptInterface::LatticeIndices::get_mapped_index(), Utils::get_n_triangle(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_node_population(), get_simulation_box(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_slice_last_applied_force(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_slice_pressure_tensor(), walberla::LBWalberlaImpl< FloatType, Architecture >::get_slice_velocity(), gyration_tensor(), if(), GatherParticleTraits< T >::join(), LB::ParticleCoupling::kernel(), walberla::lbm::accessor::Vector::kernel_add(), walberla::lbm::accessor::Vector::kernel_add(), walberla::lbm::accessor::Vector::kernel_add_interval(), walberla::lbm::accessor::Vector::kernel_add_interval(), walberla::lbm::accessor::PressureTensor::kernel_get(), walberla::lbm::accessor::Population::kernel_get(), walberla::lbm::accessor::Vector::kernel_get(), walberla::lbm::accessor::Interpolation::kernel_get(), walberla::lbm::accessor::PressureTensor::kernel_get(), walberla::lbm::accessor::Population::kernel_get(), walberla::lbm::accessor::Vector::kernel_get(), walberla::lbm::accessor::Interpolation::kernel_get(), walberla::lbm::accessor::Population::kernel_get_interval(), walberla::lbm::accessor::Vector::kernel_get_interval(), walberla::lbm::accessor::Population::kernel_get_interval(), walberla::lbm::accessor::Vector::kernel_get_interval(), walberla::lbm::accessor::Population::kernel_set(), walberla::lbm::accessor::Velocity::kernel_set(), walberla::lbm::accessor::Vector::kernel_set(), walberla::lbm::accessor::Interpolation::kernel_set(), walberla::lbm::accessor::Population::kernel_set(), walberla::lbm::accessor::Velocity::kernel_set(), walberla::lbm::accessor::Vector::kernel_set(), walberla::lbm::accessor::Interpolation::kernel_set(), walberla::lbm::accessor::Equilibrium::kernel_set_device(), walberla::lbm::accessor::Equilibrium::kernel_set_device(), walberla::lbm::accessor::Population::kernel_set_interval(), walberla::lbm::accessor::Vector::kernel_set_interval(), walberla::lbm::accessor::Population::kernel_set_interval(), walberla::lbm::accessor::Vector::kernel_set_interval(), walberla::lbm::accessor::MomentumDensity::kernel_sum(), walberla::lbm::accessor::MomentumDensity::kernel_sum(), LatticeWalberla::LatticeWalberla(), lb_tracers_propagate(), walberla::LBWalberlaImpl< FloatType, Architecture >::LBWalberlaImpl(), Utils::compact_vector< T >::load(), DipolarDirectSum::long_range_energy(), LocalBox::make_regular_decomposition(), map_data_parts(), RegularDecomposition::max_cutoff(), Mpiio::mpi_mpiio_common_read(), Random::noise_gaussian(), System::System::on_periodicity_change(), Observables::Energy::operator()(), Observables::Pressure::operator()(), Observables::PressureTensor::operator()(), p3m_gpu_add_farfield_force(), pack_particles(), walberla::field::communication::BoundaryPackInfo< GhostLayerField_T, Boundary_T >::packDataImpl(), Random::philox_4_uint64s(), Mpiio::read_head(), walberla::pystencils::AdvectiveFluxKernel_double_precision::run(), walberla::pystencils::AdvectiveFluxKernel_single_precision::run(), walberla::pystencils::AdvectiveFluxKernel_double_precision::runOnCellInterval(), walberla::pystencils::AdvectiveFluxKernel_single_precision::runOnCellInterval(), walberla::lbm::accessor::Velocity::set(), walberla::lbm::accessor::Velocity::set(), Particle::set_can_rotate_all_axes(), Particle::set_cannot_rotate_all_axes(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_collision_model(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_node_population(), BaseThermostat::set_rng_counter(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_slice_last_applied_force(), walberla::LBWalberlaImpl< FloatType, Architecture >::set_slice_velocity(), split_kernel_dip(), split_kernel_r(), split_kernel_rq(), walberla::field::communication::BoundaryPackInfo< GhostLayerField_T, Boundary_T >::unpackData(), and BoxGeometry::velocity_difference().
float uc |
Definition at line 690 of file barnes_hut_gpu_cuda.cu.
Referenced by if().