25#include "core/DataTypes.h"
26#include "core/logging/Logging.h"
28#include "blockforest/StructuredBlockForest.h"
29#include "core/debug/Debug.h"
30#include "domain_decomposition/BlockDataID.h"
31#include "domain_decomposition/IBlock.h"
32#include "field/FlagField.h"
33#include "gpu/FieldCopy.h"
34#include "gpu/GPUField.h"
35#include "gpu/GPUWrapper.h"
42#define RESTRICT __restrict__
47#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
65 return x ==
o.x &&
y ==
o.y &&
z ==
o.z &&
dir ==
o.dir &&
78 return other.cpuVectors_ == cpuVectors_;
82 for (
auto &
gpuVec : gpuVectors_) {
91 return cpuVectors_[t].empty() ?
nullptr : cpuVectors_[t].data();
96 for (
auto &
gpuVec : gpuVectors_) {
102 gpuVectors_.resize(cpuVectors_.size());
105 for (
size_t i = 0; i < cpuVectors_.size(); ++i) {
106 auto &
gpuVec = gpuVectors_[i];
107 auto &
cpuVec = cpuVectors_[i];
120 std::vector<CpuIndexVector> cpuVectors_{
NUM_TYPES};
122 using GpuIndexVector = IndexInfo *;
123 std::vector<GpuIndexVector> gpuVectors_;
127 const std::shared_ptr<StructuredBlockForest> &
blocks,
129 std::function<
double(
const Cell &,
130 const shared_ptr<StructuredBlockForest> &, IBlock &)>
133 auto createIdxVector = [](IBlock *
const, StructuredBlockStorage *
const) {
153 "Boundary condition was not generated including force calculation.")
169 template <
typename FlagField_T>
178 template <
typename FlagField_T>
235 std::function<
double(
const Cell &,
const shared_ptr<StructuredBlockForest> &,
std::vector< IndexInfo > CpuIndexVector
bool operator==(IndexVectors const &other) const
auto & indexVector(Type t)
IndexInfo * pointerCpu(Type t)
IndexInfo * pointerGpu(Type t)
auto const & indexVector(Type t) const
std::function< void(IBlock *)> getOuterSweep(gpuStream_t stream=nullptr)
void inner(IBlock *block, gpuStream_t stream=nullptr)
std::function< void(IBlock *)> getInnerSweep(gpuStream_t stream=nullptr)
std::function< void(IBlock *)> getSweep(gpuStream_t stream=nullptr)
void outer(IBlock *block, gpuStream_t stream=nullptr)
Vector3< double > getForce(IBlock *)
Dirichlet_double_precision_CUDA(const std::shared_ptr< StructuredBlockForest > &blocks, BlockDataID fieldID_, std::function< double(const Cell &, const shared_ptr< StructuredBlockForest > &, IBlock &)> &dirichletCallback)
void operator()(IBlock *block, gpuStream_t stream=nullptr)
void fillFromFlagField(const std::shared_ptr< StructuredBlockForest > &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
void run(IBlock *block, gpuStream_t stream=nullptr)
void fillFromFlagField(const shared_ptr< StructuredBlockForest > &blocks, IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
\file PackInfoPdfDoublePrecision.cpp \author pystencils
bool operator==(const IndexInfo &o) const
IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)