ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
ReactionKernelIndexed_5_double_precision_CUDA.h
Go to the documentation of this file.
1/*
2 * Copyright (C) 2022-2025 The ESPResSo project
3 * Copyright (C) 2020-2025 The waLBerla project
4 *
5 * This file is part of ESPResSo.
6 *
7 * ESPResSo is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * ESPResSo is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21// kernel generated with pystencils v1.4+1.ge851f4e, lbmpy v1.4+1.ge9efe34,
22// sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit
23// 007e77e077ad9d22b5eed6f3d3118240993e553c
24
25/*
26 * Boundary class.
27 * Adapted from the waLBerla source file
28 * https://i10git.cs.fau.de/walberla/walberla/-/blob/3e54d4f2336e47168ad87e3caaf7b3b082d86ca7/python/pystencils_walberla/templates/Boundary.tmpl.h
29 */
30
31#pragma once
32
33#include <core/DataTypes.h>
34
35#include <blockforest/StructuredBlockForest.h>
36#include <core/debug/Debug.h>
37#include <domain_decomposition/BlockDataID.h>
38#include <domain_decomposition/IBlock.h>
39#include <field/FlagField.h>
40#include <gpu/FieldCopy.h>
41#include <gpu/GPUField.h>
42#include <gpu/GPUWrapper.h>
43
44#include <array>
45#include <cassert>
46#include <functional>
47#include <memory>
48#include <vector>
49
50#if defined(__clang__)
51#pragma clang diagnostic push
52#pragma clang diagnostic ignored "-Wunused-variable"
53#pragma clang diagnostic ignored "-Wunused-parameter"
54#elif defined(__GNUC__) or defined(__GNUG__)
55#pragma GCC diagnostic push
56#pragma GCC diagnostic ignored "-Wunused-variable"
57#pragma GCC diagnostic ignored "-Wunused-parameter"
58#endif
59
60#ifdef __GNUC__
61#define RESTRICT __restrict__
62#elif _MSC_VER
63#define RESTRICT __restrict
64#else
65#define RESTRICT
66#endif
67
68#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
69using walberla::half;
70#endif
71
72namespace walberla {
73namespace pystencils {
74
76public:
77 struct IndexInfo {
78 int32_t x;
79 int32_t y;
80 int32_t z;
81 IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
82 bool operator==(const IndexInfo &o) const {
83 return x == o.x && y == o.y && z == o.z;
84 }
85 };
86
88 public:
89 using CpuIndexVector = std::vector<IndexInfo>;
90
91 enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
92
93 IndexVectors() = default;
94 bool operator==(IndexVectors const &other) const {
95 return other.cpuVectors_ == cpuVectors_;
96 }
97
99 for (auto &gpuVec : gpuVectors_) {
100 if (gpuVec) {
101 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
102 }
103 }
104 }
105 auto &indexVector(Type t) { return cpuVectors_[t]; }
106 auto const &indexVector(Type t) const { return cpuVectors_[t]; }
108 return cpuVectors_[t].empty() ? nullptr : cpuVectors_[t].data();
109 }
110
111 IndexInfo *pointerGpu(Type t) { return gpuVectors_[t]; }
112 void syncGPU() {
113 for (auto &gpuVec : gpuVectors_) {
114 if (gpuVec) {
115 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
116 gpuVec = nullptr;
117 }
118 }
119 gpuVectors_.resize(cpuVectors_.size());
120
121 WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
122 for (size_t i = 0; i < cpuVectors_.size(); ++i) {
123 auto &gpuVec = gpuVectors_[i];
124 auto &cpuVec = cpuVectors_[i];
125 if (cpuVec.empty()) {
126 continue;
127 }
128 WALBERLA_GPU_CHECK(
129 gpuMalloc(&gpuVec, sizeof(IndexInfo) * cpuVec.size()));
130 WALBERLA_GPU_CHECK(gpuMemcpy(gpuVec, cpuVec.data(),
131 sizeof(IndexInfo) * cpuVec.size(),
132 gpuMemcpyHostToDevice));
133 }
134 }
135
136 private:
137 std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
138
139 using GpuIndexVector = IndexInfo *;
140 std::vector<GpuIndexVector> gpuVectors_;
141 };
142
144 const std::shared_ptr<StructuredBlockForest> &blocks,
145 BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_,
146 BlockDataID rho_3ID_, BlockDataID rho_4ID_, double order_0,
147 double order_1, double order_2, double order_3, double order_4,
148 double rate_coefficient, double stoech_0, double stoech_1,
149 double stoech_2, double stoech_3, double stoech_4)
150 : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
151 rho_3ID(rho_3ID_), rho_4ID(rho_4ID_), order_0_(order_0),
152 order_1_(order_1), order_2_(order_2), order_3_(order_3),
153 order_4_(order_4), rate_coefficient_(rate_coefficient),
154 stoech_0_(stoech_0), stoech_1_(stoech_1), stoech_2_(stoech_2),
155 stoech_3_(stoech_3), stoech_4_(stoech_4) {
156 auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
157 return new IndexVectors();
158 };
159 indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
160 createIdxVector,
161 "IndexField_ReactionKernelIndexed_5_double_precision_CUDA");
162 }
163
165 BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_,
166 BlockDataID rho_2ID_, BlockDataID rho_3ID_, BlockDataID rho_4ID_,
167 double order_0, double order_1, double order_2, double order_3,
168 double order_4, double rate_coefficient, double stoech_0, double stoech_1,
169 double stoech_2, double stoech_3, double stoech_4)
170 : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), rho_1ID(rho_1ID_),
171 rho_2ID(rho_2ID_), rho_3ID(rho_3ID_), rho_4ID(rho_4ID_),
172 order_0_(order_0), order_1_(order_1), order_2_(order_2),
173 order_3_(order_3), order_4_(order_4),
174 rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
175 stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3),
176 stoech_4_(stoech_4) {}
177
178 void run(IBlock *block, gpuStream_t stream = nullptr);
179
180 void operator()(IBlock *block, gpuStream_t stream = nullptr) {
181 run(block, stream);
182 }
183
184 void inner(IBlock *block, gpuStream_t stream = nullptr);
185
186 void outer(IBlock *block, gpuStream_t stream = nullptr);
187
188 Vector3<double> getForce(IBlock * /*block*/) {
189
190 WALBERLA_ABORT(
191 "Boundary condition was not generated including force calculation.")
192 return Vector3<double>(double_c(0.0));
193 }
194
195 std::function<void(IBlock *)> getSweep(gpuStream_t stream = nullptr) {
196 return [this, stream](IBlock *b) { this->run(b, stream); };
197 }
198
199 std::function<void(IBlock *)> getInnerSweep(gpuStream_t stream = nullptr) {
200 return [this, stream](IBlock *b) { this->inner(b, stream); };
201 }
202
203 std::function<void(IBlock *)> getOuterSweep(gpuStream_t stream = nullptr) {
204 return [this, stream](IBlock *b) { this->outer(b, stream); };
205 }
206
207 template <typename FlagField_T>
208 void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
209 ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
210 FlagUID domainFlagUID) {
211 for (auto &block : *blocks)
212 fillFromFlagField<FlagField_T>(&block, flagFieldID, boundaryFlagUID,
213 domainFlagUID);
214 }
215
216 template <typename FlagField_T>
217 void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
218 FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
219 auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
220 auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
221 auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
222 auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
223
224 auto *flagField = block->getData<FlagField_T>(flagFieldID);
225
226 if (!(flagField->flagExists(boundaryFlagUID) and
227 flagField->flagExists(domainFlagUID)))
228 return;
229
230 auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
231 auto domainFlag = flagField->getFlag(domainFlagUID);
232
233 auto inner = flagField->xyzSize();
234 inner.expand(cell_idx_t(-1));
235
236 indexVectorAll.clear();
237 indexVectorInner.clear();
238 indexVectorOuter.clear();
239
240 auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
241 for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
242 ++it) {
243
244 if (!isFlagSet(it, boundaryFlag))
245 continue;
246 if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
247 it.y() + cell_idx_c(0),
248 it.z() + cell_idx_c(0)) &&
249 isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
250
251 auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
252
253 indexVectorAll.emplace_back(element);
254 if (inner.contains(it.x(), it.y(), it.z()))
255 indexVectorInner.emplace_back(element);
256 else
257 indexVectorOuter.emplace_back(element);
258 }
259 }
260
261 indexVectors->syncGPU();
262 }
263
264private:
265 void run_impl(IBlock *block, IndexVectors::Type type,
266 gpuStream_t stream = nullptr);
267
268 BlockDataID indexVectorID;
269
270public:
271 BlockDataID rho_0ID;
272 BlockDataID rho_1ID;
273 BlockDataID rho_2ID;
274 BlockDataID rho_3ID;
275 BlockDataID rho_4ID;
276 double order_0_;
277 double order_1_;
278 double order_2_;
279 double order_3_;
280 double order_4_;
282 double stoech_0_;
283 double stoech_1_;
284 double stoech_2_;
285 double stoech_3_;
286 double stoech_4_;
287};
288
289#if defined(__clang__)
290#pragma clang diagnostic pop
291#elif defined(__GNUC__) or defined(__GNUG__)
292#pragma GCC diagnostic pop
293#endif
294
295} // namespace pystencils
296} // namespace walberla
ReactionKernelIndexed_5_double_precision_CUDA(const std::shared_ptr< StructuredBlockForest > &blocks, BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_, BlockDataID rho_4ID_, double order_0, double order_1, double order_2, double order_3, double order_4, double rate_coefficient, double stoech_0, double stoech_1, double stoech_2, double stoech_3, double stoech_4)
void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
ReactionKernelIndexed_5_double_precision_CUDA(BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_, BlockDataID rho_4ID_, double order_0, double order_1, double order_2, double order_3, double order_4, double rate_coefficient, double stoech_0, double stoech_1, double stoech_2, double stoech_3, double stoech_4)
void fillFromFlagField(const std::shared_ptr< StructuredBlockForest > &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:176
\file PackInfoPdfDoublePrecision.cpp \author pystencils