33#include <core/DataTypes.h>
35#include <blockforest/StructuredBlockForest.h>
36#include <core/debug/Debug.h>
37#include <domain_decomposition/BlockDataID.h>
38#include <domain_decomposition/IBlock.h>
39#include <field/FlagField.h>
40#include <gpu/FieldCopy.h>
41#include <gpu/GPUField.h>
42#include <gpu/GPUWrapper.h>
51#pragma clang diagnostic push
52#pragma clang diagnostic ignored "-Wunused-variable"
53#pragma clang diagnostic ignored "-Wunused-parameter"
54#elif defined(__GNUC__) or defined(__GNUG__)
55#pragma GCC diagnostic push
56#pragma GCC diagnostic ignored "-Wunused-variable"
57#pragma GCC diagnostic ignored "-Wunused-parameter"
61#define RESTRICT __restrict__
63#define RESTRICT __restrict
68#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
83 return x ==
o.x &&
y ==
o.y &&
z ==
o.z;
95 return other.cpuVectors_ == cpuVectors_;
99 for (
auto &
gpuVec : gpuVectors_) {
108 return cpuVectors_[t].empty() ?
nullptr : cpuVectors_[t].data();
113 for (
auto &
gpuVec : gpuVectors_) {
119 gpuVectors_.resize(cpuVectors_.size());
122 for (
size_t i = 0; i < cpuVectors_.size(); ++i) {
123 auto &
gpuVec = gpuVectors_[i];
124 auto &
cpuVec = cpuVectors_[i];
137 std::vector<CpuIndexVector> cpuVectors_{
NUM_TYPES};
139 using GpuIndexVector = IndexInfo *;
140 std::vector<GpuIndexVector> gpuVectors_;
144 const std::shared_ptr<StructuredBlockForest> &
blocks,
147 float order_3,
float rate_coefficient,
float stoech_0,
float stoech_1,
148 float stoech_2,
float stoech_3)
154 auto createIdxVector = [](IBlock *
const, StructuredBlockStorage *
const) {
159 "IndexField_ReactionKernelIndexed_4_single_precision_CUDA");
165 float order_2,
float order_3,
float rate_coefficient,
float stoech_0,
166 float stoech_1,
float stoech_2,
float stoech_3)
186 "Boundary condition was not generated including force calculation.")
202 template <
typename FlagField_T>
211 template <
typename FlagField_T>
281#if defined(__clang__)
282#pragma clang diagnostic pop
283#elif defined(__GNUC__) or defined(__GNUG__)
284#pragma GCC diagnostic pop
IndexInfo * pointerCpu(Type t)
auto & indexVector(Type t)
auto const & indexVector(Type t) const
IndexInfo * pointerGpu(Type t)
std::vector< IndexInfo > CpuIndexVector
bool operator==(IndexVectors const &other) const
ReactionKernelIndexed_4_single_precision_CUDA(const std::shared_ptr< StructuredBlockForest > &blocks, BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_, float order_0, float order_1, float order_2, float order_3, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2, float stoech_3)
void operator()(IBlock *block, gpuStream_t stream=nullptr)
Vector3< double > getForce(IBlock *)
void outer(IBlock *block, gpuStream_t stream=nullptr)
void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
void inner(IBlock *block, gpuStream_t stream=nullptr)
std::function< void(IBlock *)> getInnerSweep(gpuStream_t stream=nullptr)
std::function< void(IBlock *)> getOuterSweep(gpuStream_t stream=nullptr)
void run(IBlock *block, gpuStream_t stream=nullptr)
ReactionKernelIndexed_4_single_precision_CUDA(BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_, float order_0, float order_1, float order_2, float order_3, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2, float stoech_3)
std::function< void(IBlock *)> getSweep(gpuStream_t stream=nullptr)
void fillFromFlagField(const std::shared_ptr< StructuredBlockForest > &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
\file PackInfoPdfDoublePrecision.cpp \author pystencils
bool operator==(const IndexInfo &o) const
IndexInfo(int32_t x_, int32_t y_, int32_t z_)