ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
Dirichlet_single_precision_CUDA.h
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file Dirichlet_single_precision_CUDA.h
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.4+1.ge851f4e, lbmpy v1.4+1.ge9efe34,
21// sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit
22// 007e77e077ad9d22b5eed6f3d3118240993e553c
23
24#pragma once
25#include "core/DataTypes.h"
26#include "core/logging/Logging.h"
27
28#include "blockforest/StructuredBlockForest.h"
29#include "core/debug/Debug.h"
30#include "domain_decomposition/BlockDataID.h"
31#include "domain_decomposition/IBlock.h"
32#include "field/FlagField.h"
33#include "gpu/FieldCopy.h"
34#include "gpu/GPUField.h"
35#include "gpu/GPUWrapper.h"
36
37#include <functional>
38#include <memory>
39#include <vector>
40
41#ifdef __GNUC__
42#define RESTRICT __restrict__
43#else
44#define RESTRICT
45#endif
46
47#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
48using walberla::half;
49#endif
50
51namespace walberla {
52namespace pystencils {
53
55public:
56 struct IndexInfo {
57 int32_t x;
58 int32_t y;
59 int32_t z;
60 int32_t dir;
61 float value;
62 IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
63 : x(x_), y(y_), z(z_), dir(dir_), value() {}
64 bool operator==(const IndexInfo &o) const {
65 return x == o.x && y == o.y && z == o.z && dir == o.dir &&
66 floatIsEqual(value, o.value);
67 }
68 };
69
71 public:
72 using CpuIndexVector = std::vector<IndexInfo>;
73
74 enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
75
76 IndexVectors() = default;
77 bool operator==(IndexVectors const &other) const {
78 return other.cpuVectors_ == cpuVectors_;
79 }
80
82 for (auto &gpuVec : gpuVectors_) {
83 if (gpuVec) {
84 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
85 }
86 }
87 }
88 auto &indexVector(Type t) { return cpuVectors_[t]; }
89 auto const &indexVector(Type t) const { return cpuVectors_[t]; }
91 return cpuVectors_[t].empty() ? nullptr : cpuVectors_[t].data();
92 }
93
94 IndexInfo *pointerGpu(Type t) { return gpuVectors_[t]; }
95 void syncGPU() {
96 for (auto &gpuVec : gpuVectors_) {
97 if (gpuVec) {
98 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
99 gpuVec = nullptr;
100 }
101 }
102 gpuVectors_.resize(cpuVectors_.size());
103
104 WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
105 for (size_t i = 0; i < cpuVectors_.size(); ++i) {
106 auto &gpuVec = gpuVectors_[i];
107 auto &cpuVec = cpuVectors_[i];
108 if (cpuVec.empty()) {
109 continue;
110 }
111 WALBERLA_GPU_CHECK(
112 gpuMalloc(&gpuVec, sizeof(IndexInfo) * cpuVec.size()));
113 WALBERLA_GPU_CHECK(gpuMemcpy(gpuVec, cpuVec.data(),
114 sizeof(IndexInfo) * cpuVec.size(),
115 gpuMemcpyHostToDevice));
116 }
117 }
118
119 private:
120 std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
121
122 using GpuIndexVector = IndexInfo *;
123 std::vector<GpuIndexVector> gpuVectors_;
124 };
125
127 const std::shared_ptr<StructuredBlockForest> &blocks,
128 BlockDataID fieldID_,
129 std::function<float(const Cell &,
130 const shared_ptr<StructuredBlockForest> &, IBlock &)>
131 &dirichletCallback)
132 : elementInitaliser(dirichletCallback), fieldID(fieldID_) {
133 auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
134 return new IndexVectors();
135 };
136 indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
137 createIdxVector, "IndexField_Dirichlet_single_precision_CUDA");
138 }
139
140 void run(IBlock *block, gpuStream_t stream = nullptr);
141
142 void operator()(IBlock *block, gpuStream_t stream = nullptr) {
143 run(block, stream);
144 }
145
146 void inner(IBlock *block, gpuStream_t stream = nullptr);
147
148 void outer(IBlock *block, gpuStream_t stream = nullptr);
149
150 Vector3<float> getForce(IBlock * /*block*/) {
151
152 WALBERLA_ABORT(
153 "Boundary condition was not generated including force calculation.")
154 return Vector3<float>(float_c(0.0));
155 }
156
157 std::function<void(IBlock *)> getSweep(gpuStream_t stream = nullptr) {
158 return [this, stream](IBlock *b) { this->run(b, stream); };
159 }
160
161 std::function<void(IBlock *)> getInnerSweep(gpuStream_t stream = nullptr) {
162 return [this, stream](IBlock *b) { this->inner(b, stream); };
163 }
164
165 std::function<void(IBlock *)> getOuterSweep(gpuStream_t stream = nullptr) {
166 return [this, stream](IBlock *b) { this->outer(b, stream); };
167 }
168
169 template <typename FlagField_T>
170 void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
171 ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
172 FlagUID domainFlagUID) {
173 for (auto &block : *blocks)
174 fillFromFlagField<FlagField_T>(blocks, &block, flagFieldID,
175 boundaryFlagUID, domainFlagUID);
176 }
177
178 template <typename FlagField_T>
179 void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
180 IBlock *block, ConstBlockDataID flagFieldID,
181 FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
182 auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
183 auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
184 auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
185 auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
186
187 auto *flagField = block->getData<FlagField_T>(flagFieldID);
188
189 if (!(flagField->flagExists(boundaryFlagUID) &&
190 flagField->flagExists(domainFlagUID)))
191 return;
192
193 auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
194 auto domainFlag = flagField->getFlag(domainFlagUID);
195
196 auto inner = flagField->xyzSize();
197 inner.expand(cell_idx_t(-1));
198
199 indexVectorAll.clear();
200 indexVectorInner.clear();
201 indexVectorOuter.clear();
202
203 auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
204 for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
205 ++it) {
206
207 if (!isFlagSet(it, boundaryFlag))
208 continue;
209 if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
210 it.y() + cell_idx_c(0),
211 it.z() + cell_idx_c(0)) &&
212 isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
213
214 auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
215 float InitialisatonAdditionalData =
216 elementInitaliser(Cell(it.x(), it.y(), it.z()), blocks, *block);
217 element.value = InitialisatonAdditionalData;
218 indexVectorAll.emplace_back(element);
219 if (inner.contains(it.x(), it.y(), it.z()))
220 indexVectorInner.emplace_back(element);
221 else
222 indexVectorOuter.emplace_back(element);
223 }
224 }
225
226 indexVectors->syncGPU();
227 }
228
229private:
230 void run_impl(IBlock *block, IndexVectors::Type type,
231 gpuStream_t stream = nullptr);
232
233 BlockDataID indexVectorID;
234
235 std::function<float(const Cell &, const shared_ptr<StructuredBlockForest> &,
236 IBlock &)>
237 elementInitaliser;
238
239public:
240 BlockDataID fieldID;
241};
242
243} // namespace pystencils
244} // namespace walberla
Definition Cell.hpp:96
std::function< void(IBlock *)> getOuterSweep(gpuStream_t stream=nullptr)
Dirichlet_single_precision_CUDA(const std::shared_ptr< StructuredBlockForest > &blocks, BlockDataID fieldID_, std::function< float(const Cell &, const shared_ptr< StructuredBlockForest > &, IBlock &)> &dirichletCallback)
std::function< void(IBlock *)> getInnerSweep(gpuStream_t stream=nullptr)
void fillFromFlagField(const std::shared_ptr< StructuredBlockForest > &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
void operator()(IBlock *block, gpuStream_t stream=nullptr)
void fillFromFlagField(const shared_ptr< StructuredBlockForest > &blocks, IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
std::function< void(IBlock *)> getSweep(gpuStream_t stream=nullptr)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:176
\file PackInfoPdfDoublePrecision.cpp \author pystencils