ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
Dirichlet_double_precision_CUDA.h
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file Dirichlet_double_precision_CUDA.h
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.3.7+13.gdfd203a, lbmpy
21// v1.3.7+10.gd3f6236, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from
22// waLBerla commit 191cf58b16b96d1d2f050dcbd9e88443995b2222
23
24#pragma once
25#include "core/DataTypes.h"
26#include "core/logging/Logging.h"
27
28#include "blockforest/StructuredBlockForest.h"
29#include "core/debug/Debug.h"
30#include "domain_decomposition/BlockDataID.h"
31#include "domain_decomposition/IBlock.h"
32#include "field/FlagField.h"
33#include "gpu/FieldCopy.h"
34#include "gpu/GPUField.h"
35#include "gpu/GPUWrapper.h"
36
37#include <functional>
38#include <memory>
39#include <vector>
40
41#ifdef __GNUC__
42#define RESTRICT __restrict__
43#elif _MSC_VER
44#define RESTRICT __restrict
45#else
46#define RESTRICT
47#endif
48
49#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
50using walberla::half;
51#endif
52
53namespace walberla {
54namespace pystencils {
55
57public:
58 struct IndexInfo {
59 int32_t x;
60 int32_t y;
61 int32_t z;
62 int32_t dir;
63 double value;
64 IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
65 : x(x_), y(y_), z(z_), dir(dir_), value() {}
66 bool operator==(const IndexInfo &o) const {
67 return x == o.x && y == o.y && z == o.z && dir == o.dir &&
68 floatIsEqual(value, o.value);
69 }
70 };
71
73 public:
74 using CpuIndexVector = std::vector<IndexInfo>;
75
76 enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
77
78 IndexVectors() = default;
79 bool operator==(IndexVectors const &other) const {
80 return other.cpuVectors_ == cpuVectors_;
81 }
82
84 for (auto &gpuVec : gpuVectors_) {
85 if (gpuVec) {
86 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
87 }
88 }
89 }
90 auto &indexVector(Type t) { return cpuVectors_[t]; }
91 auto const &indexVector(Type t) const { return cpuVectors_[t]; }
93 return cpuVectors_[t].empty() ? nullptr : cpuVectors_[t].data();
94 }
95
96 IndexInfo *pointerGpu(Type t) { return gpuVectors_[t]; }
97 void syncGPU() {
98 for (auto &gpuVec : gpuVectors_)
99 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
100 gpuVectors_.resize(cpuVectors_.size());
101
102 WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
103 for (size_t i = 0; i < cpuVectors_.size(); ++i) {
104 auto &gpuVec = gpuVectors_[i];
105 auto &cpuVec = cpuVectors_[i];
106 if (cpuVec.empty()) {
107 continue;
108 }
109 WALBERLA_GPU_CHECK(
110 gpuMalloc(&gpuVec, sizeof(IndexInfo) * cpuVec.size()));
111 WALBERLA_GPU_CHECK(gpuMemcpy(gpuVec, cpuVec.data(),
112 sizeof(IndexInfo) * cpuVec.size(),
113 gpuMemcpyHostToDevice));
114 }
115 }
116
117 private:
118 std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
119
120 using GpuIndexVector = IndexInfo *;
121 std::vector<GpuIndexVector> gpuVectors_;
122 };
123
125 const std::shared_ptr<StructuredBlockForest> &blocks,
126 BlockDataID fieldID_,
127 std::function<double(const Cell &,
128 const shared_ptr<StructuredBlockForest> &, IBlock &)>
129 &dirichletCallback)
130 : elementInitaliser(dirichletCallback), fieldID(fieldID_) {
131 auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
132 return new IndexVectors();
133 };
134 indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
135 createIdxVector, "IndexField_Dirichlet_double_precision_CUDA");
136 }
137
138 void run(IBlock *block, gpuStream_t stream = nullptr);
139
140 void operator()(IBlock *block, gpuStream_t stream = nullptr) {
141 run(block, stream);
142 }
143
144 void inner(IBlock *block, gpuStream_t stream = nullptr);
145
146 void outer(IBlock *block, gpuStream_t stream = nullptr);
147
148 Vector3<double> getForce(IBlock * /*block*/) {
149
150 WALBERLA_ABORT(
151 "Boundary condition was not generated including force calculation.")
152 return Vector3<double>(double_c(0.0));
153 }
154
155 std::function<void(IBlock *)> getSweep(gpuStream_t stream = nullptr) {
156 return [this, stream](IBlock *b) { this->run(b, stream); };
157 }
158
159 std::function<void(IBlock *)> getInnerSweep(gpuStream_t stream = nullptr) {
160 return [this, stream](IBlock *b) { this->inner(b, stream); };
161 }
162
163 std::function<void(IBlock *)> getOuterSweep(gpuStream_t stream = nullptr) {
164 return [this, stream](IBlock *b) { this->outer(b, stream); };
165 }
166
167 template <typename FlagField_T>
168 void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
169 ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
170 FlagUID domainFlagUID) {
171 for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
172 fillFromFlagField<FlagField_T>(blocks, &*blockIt, flagFieldID,
173 boundaryFlagUID, domainFlagUID);
174 }
175
176 template <typename FlagField_T>
177 void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
178 IBlock *block, ConstBlockDataID flagFieldID,
179 FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
180 auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
181 auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
182 auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
183 auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
184
185 auto *flagField = block->getData<FlagField_T>(flagFieldID);
186
187 if (!(flagField->flagExists(boundaryFlagUID) &&
188 flagField->flagExists(domainFlagUID)))
189 return;
190
191 auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
192 auto domainFlag = flagField->getFlag(domainFlagUID);
193
194 auto inner = flagField->xyzSize();
195 inner.expand(cell_idx_t(-1));
196
197 indexVectorAll.clear();
198 indexVectorInner.clear();
199 indexVectorOuter.clear();
200
201 auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
202 for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
203 ++it) {
204
205 if (!isFlagSet(it, boundaryFlag))
206 continue;
207 if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
208 it.y() + cell_idx_c(0),
209 it.z() + cell_idx_c(0)) &&
210 isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
211
212 auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
213 double InitialisatonAdditionalData =
214 elementInitaliser(Cell(it.x(), it.y(), it.z()), blocks, *block);
215 element.value = InitialisatonAdditionalData;
216 indexVectorAll.emplace_back(element);
217 if (inner.contains(it.x(), it.y(), it.z()))
218 indexVectorInner.emplace_back(element);
219 else
220 indexVectorOuter.emplace_back(element);
221 }
222 }
223
224 indexVectors->syncGPU();
225 }
226
227private:
228 void run_impl(IBlock *block, IndexVectors::Type type,
229 gpuStream_t stream = nullptr);
230
231 BlockDataID indexVectorID;
232
233 std::function<double(const Cell &, const shared_ptr<StructuredBlockForest> &,
234 IBlock &)>
235 elementInitaliser;
236
237public:
238 BlockDataID fieldID;
239};
240
241} // namespace pystencils
242} // namespace walberla
Definition Cell.hpp:96
std::function< void(IBlock *)> getOuterSweep(gpuStream_t stream=nullptr)
std::function< void(IBlock *)> getInnerSweep(gpuStream_t stream=nullptr)
std::function< void(IBlock *)> getSweep(gpuStream_t stream=nullptr)
Dirichlet_double_precision_CUDA(const std::shared_ptr< StructuredBlockForest > &blocks, BlockDataID fieldID_, std::function< double(const Cell &, const shared_ptr< StructuredBlockForest > &, IBlock &)> &dirichletCallback)
void operator()(IBlock *block, gpuStream_t stream=nullptr)
void fillFromFlagField(const std::shared_ptr< StructuredBlockForest > &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
void fillFromFlagField(const shared_ptr< StructuredBlockForest > &blocks, IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:176
\file PackInfoPdfDoublePrecision.cpp \author pystencils