ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
ReactionKernelIndexed_4_single_precision_CUDA.h
Go to the documentation of this file.
1/*
2 * Copyright (C) 2022-2023 The ESPResSo project
3 * Copyright (C) 2020-2023 The waLBerla project
4 *
5 * This file is part of ESPResSo.
6 *
7 * ESPResSo is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * ESPResSo is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21// kernel generated with pystencils v1.3.7+13.gdfd203a, lbmpy
22// v1.3.7+10.gd3f6236, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from
23// waLBerla commit e12db9965373887d86aab4aaaf4dd7b38fa588e8
24
25/*
26 * Boundary class.
27 * Adapted from the waLBerla source file
28 * https://i10git.cs.fau.de/walberla/walberla/-/blob/e12db9965373887d86aab4aaaf4dd7b38fa588e8/python/pystencils_walberla/templates/Boundary.tmpl.h
29 */
30
31#pragma once
32
33#include <core/DataTypes.h>
34
35#include <blockforest/StructuredBlockForest.h>
36#include <core/debug/Debug.h>
37#include <domain_decomposition/BlockDataID.h>
38#include <domain_decomposition/IBlock.h>
39#include <field/FlagField.h>
40#include <gpu/FieldCopy.h>
41#include <gpu/GPUField.h>
42#include <gpu/GPUWrapper.h>
43
44#include <cassert>
45#include <functional>
46#include <memory>
47#include <vector>
48
49#if defined(__clang__)
50#pragma clang diagnostic push
51#pragma clang diagnostic ignored "-Wunused-variable"
52#pragma clang diagnostic ignored "-Wunused-parameter"
53#elif defined(__GNUC__) or defined(__GNUG__)
54#pragma GCC diagnostic push
55#pragma GCC diagnostic ignored "-Wunused-variable"
56#pragma GCC diagnostic ignored "-Wunused-parameter"
57#endif
58
59#ifdef __GNUC__
60#define RESTRICT __restrict__
61#elif _MSC_VER
62#define RESTRICT __restrict
63#else
64#define RESTRICT
65#endif
66
67#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
68using walberla::half;
69#endif
70
71namespace walberla {
72namespace pystencils {
73
75public:
76 struct IndexInfo {
77 int32_t x;
78 int32_t y;
79 int32_t z;
80 IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}
81 bool operator==(const IndexInfo &o) const {
82 return x == o.x && y == o.y && z == o.z;
83 }
84 };
85
87 public:
88 using CpuIndexVector = std::vector<IndexInfo>;
89
90 enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
91
92 IndexVectors() = default;
93 bool operator==(IndexVectors const &other) const {
94 return other.cpuVectors_ == cpuVectors_;
95 }
96
98 for (auto &gpuVec : gpuVectors_) {
99 if (gpuVec) {
100 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
101 }
102 }
103 }
104 CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
106 return cpuVectors_[t].empty() ? nullptr : cpuVectors_[t].data();
107 }
108
109 IndexInfo *pointerGpu(Type t) { return gpuVectors_[t]; }
110 void syncGPU() {
111 for (auto &gpuVec : gpuVectors_)
112 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
113 gpuVectors_.resize(cpuVectors_.size());
114
115 WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
116 for (size_t i = 0; i < cpuVectors_.size(); ++i) {
117 auto &gpuVec = gpuVectors_[i];
118 auto &cpuVec = cpuVectors_[i];
119 if (cpuVec.empty()) {
120 continue;
121 }
122 WALBERLA_GPU_CHECK(
123 gpuMalloc(&gpuVec, sizeof(IndexInfo) * cpuVec.size()));
124 WALBERLA_GPU_CHECK(gpuMemcpy(gpuVec, cpuVec.data(),
125 sizeof(IndexInfo) * cpuVec.size(),
126 gpuMemcpyHostToDevice));
127 }
128 }
129
130 private:
131 std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
132
133 using GpuIndexVector = IndexInfo *;
134 std::vector<GpuIndexVector> gpuVectors_;
135 };
136
138 const std::shared_ptr<StructuredBlockForest> &blocks,
139 BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_,
140 BlockDataID rho_3ID_, float order_0, float order_1, float order_2,
141 float order_3, float rate_coefficient, float stoech_0, float stoech_1,
142 float stoech_2, float stoech_3)
143 : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),
144 rho_3ID(rho_3ID_), order_0_(order_0), order_1_(order_1),
145 order_2_(order_2), order_3_(order_3),
146 rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
147 stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3) {
148 auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
149 return new IndexVectors();
150 };
151 indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
152 createIdxVector,
153 "IndexField_ReactionKernelIndexed_4_single_precision_CUDA");
154 }
155
157 BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_,
158 BlockDataID rho_2ID_, BlockDataID rho_3ID_, float order_0, float order_1,
159 float order_2, float order_3, float rate_coefficient, float stoech_0,
160 float stoech_1, float stoech_2, float stoech_3)
161 : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), rho_1ID(rho_1ID_),
162 rho_2ID(rho_2ID_), rho_3ID(rho_3ID_), order_0_(order_0),
163 order_1_(order_1), order_2_(order_2), order_3_(order_3),
164 rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),
165 stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3) {}
166
167 void run(IBlock *block, gpuStream_t stream = nullptr);
168
169 void operator()(IBlock *block, gpuStream_t stream = nullptr) {
170 run(block, stream);
171 }
172
173 void inner(IBlock *block, gpuStream_t stream = nullptr);
174
175 void outer(IBlock *block, gpuStream_t stream = nullptr);
176
177 Vector3<real_t> getForce(IBlock * /*block*/) {
178
179 WALBERLA_ABORT(
180 "Boundary condition was not generated including force calculation.")
181 return Vector3<real_t>(real_c(0.0));
182 }
183
184 std::function<void(IBlock *)> getSweep(gpuStream_t stream = nullptr) {
185 return [this, stream](IBlock *b) { this->run(b, stream); };
186 }
187
188 std::function<void(IBlock *)> getInnerSweep(gpuStream_t stream = nullptr) {
189 return [this, stream](IBlock *b) { this->inner(b, stream); };
190 }
191
192 std::function<void(IBlock *)> getOuterSweep(gpuStream_t stream = nullptr) {
193 return [this, stream](IBlock *b) { this->outer(b, stream); };
194 }
195
196 template <typename FlagField_T>
197 void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
198 ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
199 FlagUID domainFlagUID) {
200 for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
201 fillFromFlagField<FlagField_T>(&*blockIt, flagFieldID, boundaryFlagUID,
202 domainFlagUID);
203 }
204
205 template <typename FlagField_T>
206 void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,
207 FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
208 auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
209 auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
210 auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
211 auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
212
213 auto *flagField = block->getData<FlagField_T>(flagFieldID);
214
215 if (!(flagField->flagExists(boundaryFlagUID) and
216 flagField->flagExists(domainFlagUID)))
217 return;
218
219 auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
220 auto domainFlag = flagField->getFlag(domainFlagUID);
221
222 auto inner = flagField->xyzSize();
223 inner.expand(cell_idx_t(-1));
224
225 indexVectorAll.clear();
226 indexVectorInner.clear();
227 indexVectorOuter.clear();
228
229 auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
230 for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
231 ++it) {
232
233 if (!isFlagSet(it, boundaryFlag))
234 continue;
235 if (flagWithGLayers.contains(it.x() + cell_idx_c(0),
236 it.y() + cell_idx_c(0),
237 it.z() + cell_idx_c(0)) &&
238 isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
239
240 auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
241
242 indexVectorAll.emplace_back(element);
243 if (inner.contains(it.x(), it.y(), it.z()))
244 indexVectorInner.emplace_back(element);
245 else
246 indexVectorOuter.emplace_back(element);
247 }
248 }
249
250 indexVectors->syncGPU();
251 }
252
253private:
254 void run_impl(IBlock *block, IndexVectors::Type type,
255 gpuStream_t stream = nullptr);
256
257 BlockDataID indexVectorID;
258
259public:
260 BlockDataID rho_0ID;
261 BlockDataID rho_1ID;
262 BlockDataID rho_2ID;
263 BlockDataID rho_3ID;
264 float order_0_;
265 float order_1_;
266 float order_2_;
267 float order_3_;
273};
274
275} // namespace pystencils
276} // namespace walberla
ReactionKernelIndexed_4_single_precision_CUDA(const std::shared_ptr< StructuredBlockForest > &blocks, BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_, float order_0, float order_1, float order_2, float order_3, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2, float stoech_3)
void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
ReactionKernelIndexed_4_single_precision_CUDA(BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_, float order_0, float order_1, float order_2, float order_3, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2, float stoech_3)
void fillFromFlagField(const std::shared_ptr< StructuredBlockForest > &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:176
\file PackInfoPdfDoublePrecision.cpp \author pystencils