ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
DynamicUBBSinglePrecisionCUDA.h
Go to the documentation of this file.
1/*
2 * Copyright (C) 2022-2025 The ESPResSo project
3 * Copyright (C) 2020-2025 The waLBerla project
4 *
5 * This file is part of ESPResSo.
6 *
7 * ESPResSo is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * ESPResSo is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21// kernel generated with pystencils v1.3.7+13.gdfd203a, lbmpy
22// v1.3.7+15.g5018a18, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from
23// waLBerla commit 191cf58b16b96d1d2f050dcbd9e88443995b2222
24
25/*
26 * Boundary class.
27 * Adapted from the waLBerla source file
28 * https://i10git.cs.fau.de/walberla/walberla/-/blob/3e54d4f2336e47168ad87e3caaf7b3b082d86ca7/python/pystencils_walberla/templates/Boundary.tmpl.h
29 */
30
31#pragma once
32
33#include <core/DataTypes.h>
34
35#include <blockforest/StructuredBlockForest.h>
36#include <core/debug/Debug.h>
37#include <domain_decomposition/BlockDataID.h>
38#include <domain_decomposition/IBlock.h>
39#include <field/FlagField.h>
40#include <gpu/FieldCopy.h>
41#include <gpu/GPUField.h>
42#include <gpu/GPUWrapper.h>
43
44#include <array>
45#include <cassert>
46#include <functional>
47#include <memory>
48#include <vector>
49
50#if defined(__clang__)
51#pragma clang diagnostic push
52#pragma clang diagnostic ignored "-Wunused-variable"
53#pragma clang diagnostic ignored "-Wunused-parameter"
54#elif defined(__GNUC__) or defined(__GNUG__)
55#pragma GCC diagnostic push
56#pragma GCC diagnostic ignored "-Wunused-variable"
57#pragma GCC diagnostic ignored "-Wunused-parameter"
58#endif
59
60#ifdef __GNUC__
61#define RESTRICT __restrict__
62#elif _MSC_VER
63#define RESTRICT __restrict
64#else
65#define RESTRICT
66#endif
67
68#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
69using walberla::half;
70#endif
71
72namespace walberla {
73namespace lbm {
74
76public:
77 struct IndexInfo {
78 int32_t x;
79 int32_t y;
80 int32_t z;
81 int32_t dir;
82 float vel_0;
83 float vel_1;
84 float vel_2;
85 IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
86 : x(x_), y(y_), z(z_), dir(dir_), vel_0(), vel_1(), vel_2() {}
87 bool operator==(const IndexInfo &o) const {
88 return x == o.x && y == o.y && z == o.z && dir == o.dir &&
89 floatIsEqual(vel_0, o.vel_0) && floatIsEqual(vel_1, o.vel_1) &&
90 floatIsEqual(vel_2, o.vel_2);
91 }
92 };
93
95 public:
96 using CpuIndexVector = std::vector<IndexInfo>;
97
98 enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
99
100 IndexVectors() = default;
101 bool operator==(IndexVectors const &other) const {
102 return other.cpuVectors_ == cpuVectors_;
103 }
104
106 for (auto &gpuVec : gpuVectors_) {
107 if (gpuVec) {
108 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
109 }
110 }
111 }
112 auto &indexVector(Type t) { return cpuVectors_[t]; }
113 auto const &indexVector(Type t) const { return cpuVectors_[t]; }
115 return cpuVectors_[t].empty() ? nullptr : cpuVectors_[t].data();
116 }
117
118 IndexInfo *pointerGpu(Type t) { return gpuVectors_[t]; }
119 void syncGPU() {
120 for (auto &gpuVec : gpuVectors_) {
121 if (gpuVec) {
122 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
123 gpuVec = nullptr;
124 }
125 }
126 gpuVectors_.resize(cpuVectors_.size());
127
128 WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
129 for (size_t i = 0; i < cpuVectors_.size(); ++i) {
130 auto &gpuVec = gpuVectors_[i];
131 auto &cpuVec = cpuVectors_[i];
132 if (cpuVec.empty()) {
133 continue;
134 }
135 WALBERLA_GPU_CHECK(
136 gpuMalloc(&gpuVec, sizeof(IndexInfo) * cpuVec.size()));
137 WALBERLA_GPU_CHECK(gpuMemcpy(gpuVec, cpuVec.data(),
138 sizeof(IndexInfo) * cpuVec.size(),
139 gpuMemcpyHostToDevice));
140 }
141 }
142
143 private:
144 std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
145
146 using GpuIndexVector = IndexInfo *;
147 std::vector<GpuIndexVector> gpuVectors_;
148 };
149
150 struct ForceStruct {
151 double F_0;
152 double F_1;
153 double F_2;
155 : F_0(double_c(0.0)), F_1(double_c(0.0)), F_2(double_c(0.0)) {}
156 bool operator==(const ForceStruct &o) const {
157 return floatIsEqual(F_0, o.F_0) && floatIsEqual(F_1, o.F_1) &&
158 floatIsEqual(F_2, o.F_2);
159 }
160 };
161
163 public:
164 ForceVector() = default;
165 bool operator==(ForceVector const &other) const {
166 return other.cpuVector_ == cpuVector_;
167 }
168
170 if (!gpuVector_.empty()) {
171 WALBERLA_GPU_CHECK(gpuFree(gpuVector_[0]))
172 }
173 }
174 auto &forceVector() { return cpuVector_; }
175 auto const &forceVector() const { return cpuVector_; }
177 return cpuVector_.empty() ? nullptr : cpuVector_.data();
178 }
179 bool empty() const { return cpuVector_.empty(); }
180
181 ForceStruct *pointerGpu() { return gpuVector_[0]; }
182 Vector3<double> getForce() {
183 syncCPU();
184 Vector3<double> result(double_c(0.0));
185 for (auto const &force : cpuVector_) {
186 result[0] += force.F_0;
187 result[1] += force.F_1;
188 result[2] += force.F_2;
189 }
190 return result;
191 }
192
193 void syncGPU() {
194 if (!gpuVector_.empty()) {
195 WALBERLA_GPU_CHECK(gpuFree(gpuVector_[0]))
196 gpuVector_[0] = nullptr;
197 }
198 if (!cpuVector_.empty()) {
199 gpuVector_.resize(cpuVector_.size());
200 WALBERLA_GPU_CHECK(
201 gpuMalloc(&gpuVector_[0], sizeof(ForceStruct) * cpuVector_.size()))
202 WALBERLA_GPU_CHECK(gpuMemcpy(gpuVector_[0], &cpuVector_[0],
203 sizeof(ForceStruct) * cpuVector_.size(),
204 gpuMemcpyHostToDevice))
205 }
206 }
207
208 void syncCPU() {
209 WALBERLA_GPU_CHECK(gpuMemcpy(&cpuVector_[0], gpuVector_[0],
210 sizeof(ForceStruct) * cpuVector_.size(),
211 gpuMemcpyDeviceToHost))
212 }
213
214 private:
215 std::vector<ForceStruct> cpuVector_;
216 std::vector<ForceStruct *> gpuVector_;
217 };
218
220 const std::shared_ptr<StructuredBlockForest> &blocks, BlockDataID pdfsID_,
221 std::function<Vector3<float>(
222 const Cell &, const shared_ptr<StructuredBlockForest> &, IBlock &)>
223 &velocityCallbackDynamicUBBSinglePrecisionCUDA)
224 : elementInitialiser(velocityCallbackDynamicUBBSinglePrecisionCUDA),
225 pdfsID(pdfsID_) {
226 auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
227 return new IndexVectors();
228 };
229 indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
230 createIdxVector, "IndexField_DynamicUBBSinglePrecisionCUDA");
231 auto createForceVector = [](IBlock *const, StructuredBlockStorage *const) {
232 return new ForceVector();
233 };
234 forceVectorID = blocks->addStructuredBlockData<ForceVector>(
235 createForceVector, "forceVector_DynamicUBBSinglePrecisionCUDA");
236 }
237
238 void run(IBlock *block, gpuStream_t stream = nullptr);
239
240 void operator()(IBlock *block, gpuStream_t stream = nullptr) {
241 run(block, stream);
242 }
243
244 void inner(IBlock *block, gpuStream_t stream = nullptr);
245
246 void outer(IBlock *block, gpuStream_t stream = nullptr);
247
248 Vector3<double> getForce(IBlock *block) {
249 auto *forceVector = block->getData<ForceVector>(forceVectorID);
250 if (forceVector->empty())
251 return Vector3<double>(double_c(0.0));
252 return forceVector->getForce();
253 }
254
255 std::function<void(IBlock *)> getSweep(gpuStream_t stream = nullptr) {
256 return [this, stream](IBlock *b) { this->run(b, stream); };
257 }
258
259 std::function<void(IBlock *)> getInnerSweep(gpuStream_t stream = nullptr) {
260 return [this, stream](IBlock *b) { this->inner(b, stream); };
261 }
262
263 std::function<void(IBlock *)> getOuterSweep(gpuStream_t stream = nullptr) {
264 return [this, stream](IBlock *b) { this->outer(b, stream); };
265 }
266
267 template <typename FlagField_T>
268 void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
269 ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
270 FlagUID domainFlagUID) {
271 for (auto &block : *blocks)
272 fillFromFlagField<FlagField_T>(blocks, &block, flagFieldID,
273 boundaryFlagUID, domainFlagUID);
274 }
275
276 template <typename FlagField_T>
277 void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
278 IBlock *block, ConstBlockDataID flagFieldID,
279 FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
280 auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
281 auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
282 auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
283 auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
284 auto *forceVector = block->getData<ForceVector>(forceVectorID);
285
286 auto *flagField = block->getData<FlagField_T>(flagFieldID);
287
288 if (!(flagField->flagExists(boundaryFlagUID) and
289 flagField->flagExists(domainFlagUID)))
290 return;
291
292 auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
293 auto domainFlag = flagField->getFlag(domainFlagUID);
294
295 auto inner = flagField->xyzSize();
296 inner.expand(cell_idx_t(-1));
297
298 indexVectorAll.clear();
299 indexVectorInner.clear();
300 indexVectorOuter.clear();
301
302 for (auto it = flagField->beginWithGhostLayerXYZ(
303 cell_idx_c(flagField->nrOfGhostLayers() - 1));
304 it != flagField->end(); ++it) {
305 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
306 continue;
307
308 if (isFlagSet(it.neighbor(0, 0, 0, 0), boundaryFlag)) {
309 auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
310 auto const InitialisationAdditionalData = elementInitialiser(
311 Cell(it.x() + 0, it.y() + 0, it.z() + 0), blocks, *block);
312 element.vel_0 = InitialisationAdditionalData[0];
313 element.vel_1 = InitialisationAdditionalData[1];
314 element.vel_2 = InitialisationAdditionalData[2];
315 indexVectorAll.emplace_back(element);
316 if (inner.contains(it.x(), it.y(), it.z()))
317 indexVectorInner.emplace_back(element);
318 else
319 indexVectorOuter.emplace_back(element);
320 }
321 }
322
323 for (auto it = flagField->beginWithGhostLayerXYZ(
324 cell_idx_c(flagField->nrOfGhostLayers() - 1));
325 it != flagField->end(); ++it) {
326 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
327 continue;
328
329 if (isFlagSet(it.neighbor(0, 1, 0, 0), boundaryFlag)) {
330 auto element = IndexInfo(it.x(), it.y(), it.z(), 1);
331 auto const InitialisationAdditionalData = elementInitialiser(
332 Cell(it.x() + 0, it.y() + 1, it.z() + 0), blocks, *block);
333 element.vel_0 = InitialisationAdditionalData[0];
334 element.vel_1 = InitialisationAdditionalData[1];
335 element.vel_2 = InitialisationAdditionalData[2];
336 indexVectorAll.emplace_back(element);
337 if (inner.contains(it.x(), it.y(), it.z()))
338 indexVectorInner.emplace_back(element);
339 else
340 indexVectorOuter.emplace_back(element);
341 }
342 }
343
344 for (auto it = flagField->beginWithGhostLayerXYZ(
345 cell_idx_c(flagField->nrOfGhostLayers() - 1));
346 it != flagField->end(); ++it) {
347 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
348 continue;
349
350 if (isFlagSet(it.neighbor(0, -1, 0, 0), boundaryFlag)) {
351 auto element = IndexInfo(it.x(), it.y(), it.z(), 2);
352 auto const InitialisationAdditionalData = elementInitialiser(
353 Cell(it.x() + 0, it.y() - 1, it.z() + 0), blocks, *block);
354 element.vel_0 = InitialisationAdditionalData[0];
355 element.vel_1 = InitialisationAdditionalData[1];
356 element.vel_2 = InitialisationAdditionalData[2];
357 indexVectorAll.emplace_back(element);
358 if (inner.contains(it.x(), it.y(), it.z()))
359 indexVectorInner.emplace_back(element);
360 else
361 indexVectorOuter.emplace_back(element);
362 }
363 }
364
365 for (auto it = flagField->beginWithGhostLayerXYZ(
366 cell_idx_c(flagField->nrOfGhostLayers() - 1));
367 it != flagField->end(); ++it) {
368 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
369 continue;
370
371 if (isFlagSet(it.neighbor(-1, 0, 0, 0), boundaryFlag)) {
372 auto element = IndexInfo(it.x(), it.y(), it.z(), 3);
373 auto const InitialisationAdditionalData = elementInitialiser(
374 Cell(it.x() - 1, it.y() + 0, it.z() + 0), blocks, *block);
375 element.vel_0 = InitialisationAdditionalData[0];
376 element.vel_1 = InitialisationAdditionalData[1];
377 element.vel_2 = InitialisationAdditionalData[2];
378 indexVectorAll.emplace_back(element);
379 if (inner.contains(it.x(), it.y(), it.z()))
380 indexVectorInner.emplace_back(element);
381 else
382 indexVectorOuter.emplace_back(element);
383 }
384 }
385
386 for (auto it = flagField->beginWithGhostLayerXYZ(
387 cell_idx_c(flagField->nrOfGhostLayers() - 1));
388 it != flagField->end(); ++it) {
389 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
390 continue;
391
392 if (isFlagSet(it.neighbor(1, 0, 0, 0), boundaryFlag)) {
393 auto element = IndexInfo(it.x(), it.y(), it.z(), 4);
394 auto const InitialisationAdditionalData = elementInitialiser(
395 Cell(it.x() + 1, it.y() + 0, it.z() + 0), blocks, *block);
396 element.vel_0 = InitialisationAdditionalData[0];
397 element.vel_1 = InitialisationAdditionalData[1];
398 element.vel_2 = InitialisationAdditionalData[2];
399 indexVectorAll.emplace_back(element);
400 if (inner.contains(it.x(), it.y(), it.z()))
401 indexVectorInner.emplace_back(element);
402 else
403 indexVectorOuter.emplace_back(element);
404 }
405 }
406
407 for (auto it = flagField->beginWithGhostLayerXYZ(
408 cell_idx_c(flagField->nrOfGhostLayers() - 1));
409 it != flagField->end(); ++it) {
410 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
411 continue;
412
413 if (isFlagSet(it.neighbor(0, 0, 1, 0), boundaryFlag)) {
414 auto element = IndexInfo(it.x(), it.y(), it.z(), 5);
415 auto const InitialisationAdditionalData = elementInitialiser(
416 Cell(it.x() + 0, it.y() + 0, it.z() + 1), blocks, *block);
417 element.vel_0 = InitialisationAdditionalData[0];
418 element.vel_1 = InitialisationAdditionalData[1];
419 element.vel_2 = InitialisationAdditionalData[2];
420 indexVectorAll.emplace_back(element);
421 if (inner.contains(it.x(), it.y(), it.z()))
422 indexVectorInner.emplace_back(element);
423 else
424 indexVectorOuter.emplace_back(element);
425 }
426 }
427
428 for (auto it = flagField->beginWithGhostLayerXYZ(
429 cell_idx_c(flagField->nrOfGhostLayers() - 1));
430 it != flagField->end(); ++it) {
431 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
432 continue;
433
434 if (isFlagSet(it.neighbor(0, 0, -1, 0), boundaryFlag)) {
435 auto element = IndexInfo(it.x(), it.y(), it.z(), 6);
436 auto const InitialisationAdditionalData = elementInitialiser(
437 Cell(it.x() + 0, it.y() + 0, it.z() - 1), blocks, *block);
438 element.vel_0 = InitialisationAdditionalData[0];
439 element.vel_1 = InitialisationAdditionalData[1];
440 element.vel_2 = InitialisationAdditionalData[2];
441 indexVectorAll.emplace_back(element);
442 if (inner.contains(it.x(), it.y(), it.z()))
443 indexVectorInner.emplace_back(element);
444 else
445 indexVectorOuter.emplace_back(element);
446 }
447 }
448
449 for (auto it = flagField->beginWithGhostLayerXYZ(
450 cell_idx_c(flagField->nrOfGhostLayers() - 1));
451 it != flagField->end(); ++it) {
452 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
453 continue;
454
455 if (isFlagSet(it.neighbor(-1, 1, 0, 0), boundaryFlag)) {
456 auto element = IndexInfo(it.x(), it.y(), it.z(), 7);
457 auto const InitialisationAdditionalData = elementInitialiser(
458 Cell(it.x() - 1, it.y() + 1, it.z() + 0), blocks, *block);
459 element.vel_0 = InitialisationAdditionalData[0];
460 element.vel_1 = InitialisationAdditionalData[1];
461 element.vel_2 = InitialisationAdditionalData[2];
462 indexVectorAll.emplace_back(element);
463 if (inner.contains(it.x(), it.y(), it.z()))
464 indexVectorInner.emplace_back(element);
465 else
466 indexVectorOuter.emplace_back(element);
467 }
468 }
469
470 for (auto it = flagField->beginWithGhostLayerXYZ(
471 cell_idx_c(flagField->nrOfGhostLayers() - 1));
472 it != flagField->end(); ++it) {
473 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
474 continue;
475
476 if (isFlagSet(it.neighbor(1, 1, 0, 0), boundaryFlag)) {
477 auto element = IndexInfo(it.x(), it.y(), it.z(), 8);
478 auto const InitialisationAdditionalData = elementInitialiser(
479 Cell(it.x() + 1, it.y() + 1, it.z() + 0), blocks, *block);
480 element.vel_0 = InitialisationAdditionalData[0];
481 element.vel_1 = InitialisationAdditionalData[1];
482 element.vel_2 = InitialisationAdditionalData[2];
483 indexVectorAll.emplace_back(element);
484 if (inner.contains(it.x(), it.y(), it.z()))
485 indexVectorInner.emplace_back(element);
486 else
487 indexVectorOuter.emplace_back(element);
488 }
489 }
490
491 for (auto it = flagField->beginWithGhostLayerXYZ(
492 cell_idx_c(flagField->nrOfGhostLayers() - 1));
493 it != flagField->end(); ++it) {
494 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
495 continue;
496
497 if (isFlagSet(it.neighbor(-1, -1, 0, 0), boundaryFlag)) {
498 auto element = IndexInfo(it.x(), it.y(), it.z(), 9);
499 auto const InitialisationAdditionalData = elementInitialiser(
500 Cell(it.x() - 1, it.y() - 1, it.z() + 0), blocks, *block);
501 element.vel_0 = InitialisationAdditionalData[0];
502 element.vel_1 = InitialisationAdditionalData[1];
503 element.vel_2 = InitialisationAdditionalData[2];
504 indexVectorAll.emplace_back(element);
505 if (inner.contains(it.x(), it.y(), it.z()))
506 indexVectorInner.emplace_back(element);
507 else
508 indexVectorOuter.emplace_back(element);
509 }
510 }
511
512 for (auto it = flagField->beginWithGhostLayerXYZ(
513 cell_idx_c(flagField->nrOfGhostLayers() - 1));
514 it != flagField->end(); ++it) {
515 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
516 continue;
517
518 if (isFlagSet(it.neighbor(1, -1, 0, 0), boundaryFlag)) {
519 auto element = IndexInfo(it.x(), it.y(), it.z(), 10);
520 auto const InitialisationAdditionalData = elementInitialiser(
521 Cell(it.x() + 1, it.y() - 1, it.z() + 0), blocks, *block);
522 element.vel_0 = InitialisationAdditionalData[0];
523 element.vel_1 = InitialisationAdditionalData[1];
524 element.vel_2 = InitialisationAdditionalData[2];
525 indexVectorAll.emplace_back(element);
526 if (inner.contains(it.x(), it.y(), it.z()))
527 indexVectorInner.emplace_back(element);
528 else
529 indexVectorOuter.emplace_back(element);
530 }
531 }
532
533 for (auto it = flagField->beginWithGhostLayerXYZ(
534 cell_idx_c(flagField->nrOfGhostLayers() - 1));
535 it != flagField->end(); ++it) {
536 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
537 continue;
538
539 if (isFlagSet(it.neighbor(0, 1, 1, 0), boundaryFlag)) {
540 auto element = IndexInfo(it.x(), it.y(), it.z(), 11);
541 auto const InitialisationAdditionalData = elementInitialiser(
542 Cell(it.x() + 0, it.y() + 1, it.z() + 1), blocks, *block);
543 element.vel_0 = InitialisationAdditionalData[0];
544 element.vel_1 = InitialisationAdditionalData[1];
545 element.vel_2 = InitialisationAdditionalData[2];
546 indexVectorAll.emplace_back(element);
547 if (inner.contains(it.x(), it.y(), it.z()))
548 indexVectorInner.emplace_back(element);
549 else
550 indexVectorOuter.emplace_back(element);
551 }
552 }
553
554 for (auto it = flagField->beginWithGhostLayerXYZ(
555 cell_idx_c(flagField->nrOfGhostLayers() - 1));
556 it != flagField->end(); ++it) {
557 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
558 continue;
559
560 if (isFlagSet(it.neighbor(0, -1, 1, 0), boundaryFlag)) {
561 auto element = IndexInfo(it.x(), it.y(), it.z(), 12);
562 auto const InitialisationAdditionalData = elementInitialiser(
563 Cell(it.x() + 0, it.y() - 1, it.z() + 1), blocks, *block);
564 element.vel_0 = InitialisationAdditionalData[0];
565 element.vel_1 = InitialisationAdditionalData[1];
566 element.vel_2 = InitialisationAdditionalData[2];
567 indexVectorAll.emplace_back(element);
568 if (inner.contains(it.x(), it.y(), it.z()))
569 indexVectorInner.emplace_back(element);
570 else
571 indexVectorOuter.emplace_back(element);
572 }
573 }
574
575 for (auto it = flagField->beginWithGhostLayerXYZ(
576 cell_idx_c(flagField->nrOfGhostLayers() - 1));
577 it != flagField->end(); ++it) {
578 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
579 continue;
580
581 if (isFlagSet(it.neighbor(-1, 0, 1, 0), boundaryFlag)) {
582 auto element = IndexInfo(it.x(), it.y(), it.z(), 13);
583 auto const InitialisationAdditionalData = elementInitialiser(
584 Cell(it.x() - 1, it.y() + 0, it.z() + 1), blocks, *block);
585 element.vel_0 = InitialisationAdditionalData[0];
586 element.vel_1 = InitialisationAdditionalData[1];
587 element.vel_2 = InitialisationAdditionalData[2];
588 indexVectorAll.emplace_back(element);
589 if (inner.contains(it.x(), it.y(), it.z()))
590 indexVectorInner.emplace_back(element);
591 else
592 indexVectorOuter.emplace_back(element);
593 }
594 }
595
596 for (auto it = flagField->beginWithGhostLayerXYZ(
597 cell_idx_c(flagField->nrOfGhostLayers() - 1));
598 it != flagField->end(); ++it) {
599 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
600 continue;
601
602 if (isFlagSet(it.neighbor(1, 0, 1, 0), boundaryFlag)) {
603 auto element = IndexInfo(it.x(), it.y(), it.z(), 14);
604 auto const InitialisationAdditionalData = elementInitialiser(
605 Cell(it.x() + 1, it.y() + 0, it.z() + 1), blocks, *block);
606 element.vel_0 = InitialisationAdditionalData[0];
607 element.vel_1 = InitialisationAdditionalData[1];
608 element.vel_2 = InitialisationAdditionalData[2];
609 indexVectorAll.emplace_back(element);
610 if (inner.contains(it.x(), it.y(), it.z()))
611 indexVectorInner.emplace_back(element);
612 else
613 indexVectorOuter.emplace_back(element);
614 }
615 }
616
617 for (auto it = flagField->beginWithGhostLayerXYZ(
618 cell_idx_c(flagField->nrOfGhostLayers() - 1));
619 it != flagField->end(); ++it) {
620 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
621 continue;
622
623 if (isFlagSet(it.neighbor(0, 1, -1, 0), boundaryFlag)) {
624 auto element = IndexInfo(it.x(), it.y(), it.z(), 15);
625 auto const InitialisationAdditionalData = elementInitialiser(
626 Cell(it.x() + 0, it.y() + 1, it.z() - 1), blocks, *block);
627 element.vel_0 = InitialisationAdditionalData[0];
628 element.vel_1 = InitialisationAdditionalData[1];
629 element.vel_2 = InitialisationAdditionalData[2];
630 indexVectorAll.emplace_back(element);
631 if (inner.contains(it.x(), it.y(), it.z()))
632 indexVectorInner.emplace_back(element);
633 else
634 indexVectorOuter.emplace_back(element);
635 }
636 }
637
638 for (auto it = flagField->beginWithGhostLayerXYZ(
639 cell_idx_c(flagField->nrOfGhostLayers() - 1));
640 it != flagField->end(); ++it) {
641 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
642 continue;
643
644 if (isFlagSet(it.neighbor(0, -1, -1, 0), boundaryFlag)) {
645 auto element = IndexInfo(it.x(), it.y(), it.z(), 16);
646 auto const InitialisationAdditionalData = elementInitialiser(
647 Cell(it.x() + 0, it.y() - 1, it.z() - 1), blocks, *block);
648 element.vel_0 = InitialisationAdditionalData[0];
649 element.vel_1 = InitialisationAdditionalData[1];
650 element.vel_2 = InitialisationAdditionalData[2];
651 indexVectorAll.emplace_back(element);
652 if (inner.contains(it.x(), it.y(), it.z()))
653 indexVectorInner.emplace_back(element);
654 else
655 indexVectorOuter.emplace_back(element);
656 }
657 }
658
659 for (auto it = flagField->beginWithGhostLayerXYZ(
660 cell_idx_c(flagField->nrOfGhostLayers() - 1));
661 it != flagField->end(); ++it) {
662 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
663 continue;
664
665 if (isFlagSet(it.neighbor(-1, 0, -1, 0), boundaryFlag)) {
666 auto element = IndexInfo(it.x(), it.y(), it.z(), 17);
667 auto const InitialisationAdditionalData = elementInitialiser(
668 Cell(it.x() - 1, it.y() + 0, it.z() - 1), blocks, *block);
669 element.vel_0 = InitialisationAdditionalData[0];
670 element.vel_1 = InitialisationAdditionalData[1];
671 element.vel_2 = InitialisationAdditionalData[2];
672 indexVectorAll.emplace_back(element);
673 if (inner.contains(it.x(), it.y(), it.z()))
674 indexVectorInner.emplace_back(element);
675 else
676 indexVectorOuter.emplace_back(element);
677 }
678 }
679
680 for (auto it = flagField->beginWithGhostLayerXYZ(
681 cell_idx_c(flagField->nrOfGhostLayers() - 1));
682 it != flagField->end(); ++it) {
683 if (!isFlagSet(it, domainFlag) || isFlagSet(it, boundaryFlag))
684 continue;
685
686 if (isFlagSet(it.neighbor(1, 0, -1, 0), boundaryFlag)) {
687 auto element = IndexInfo(it.x(), it.y(), it.z(), 18);
688 auto const InitialisationAdditionalData = elementInitialiser(
689 Cell(it.x() + 1, it.y() + 0, it.z() - 1), blocks, *block);
690 element.vel_0 = InitialisationAdditionalData[0];
691 element.vel_1 = InitialisationAdditionalData[1];
692 element.vel_2 = InitialisationAdditionalData[2];
693 indexVectorAll.emplace_back(element);
694 if (inner.contains(it.x(), it.y(), it.z()))
695 indexVectorInner.emplace_back(element);
696 else
697 indexVectorOuter.emplace_back(element);
698 }
699 }
700
701 indexVectors->syncGPU();
702 forceVector->forceVector().resize(indexVectorAll.size());
703 forceVector->syncGPU();
704 }
705
706private:
707 void run_impl(IBlock *block, IndexVectors::Type type,
708 gpuStream_t stream = nullptr);
709
710 BlockDataID indexVectorID;
711 BlockDataID forceVectorID;
712 std::function<Vector3<float>(
713 const Cell &, const shared_ptr<StructuredBlockForest> &, IBlock &)>
714 elementInitialiser;
715
716public:
717 static constexpr std::array<std::array<int, 19u>, 3u> neighborOffset = {{
718 {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1},
719 {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0},
720 {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1},
721 }};
722
723 auto const &getForceVector(IBlock const *block) const {
724 auto const *forceVector = block->getData<ForceVector>(forceVectorID);
725 return forceVector->forceVector();
726 }
727
728 auto const &getIndexVector(IBlock const *block) const {
729 auto const *indexVectors = block->getData<IndexVectors>(indexVectorID);
730 return indexVectors->indexVector(IndexVectors::ALL);
731 }
732
733 BlockDataID getIndexVectorID() const { return indexVectorID; }
734 BlockDataID getForceVectorID() const { return forceVectorID; }
735
736public:
737 BlockDataID pdfsID;
738};
739
740#if defined(__clang__)
741#pragma clang diagnostic pop
742#elif defined(__GNUC__) or defined(__GNUG__)
743#pragma GCC diagnostic pop
744#endif
745
746} // namespace lbm
747} // namespace walberla
Definition Cell.hpp:96
auto const & getIndexVector(IBlock const *block) const
void operator()(IBlock *block, gpuStream_t stream=nullptr)
void outer(IBlock *block, gpuStream_t stream=nullptr)
std::function< void(IBlock *)> getOuterSweep(gpuStream_t stream=nullptr)
static constexpr std::array< std::array< int, 19u >, 3u > neighborOffset
std::function< void(IBlock *)> getSweep(gpuStream_t stream=nullptr)
DynamicUBBSinglePrecisionCUDA(const std::shared_ptr< StructuredBlockForest > &blocks, BlockDataID pdfsID_, std::function< Vector3< float >(const Cell &, const shared_ptr< StructuredBlockForest > &, IBlock &)> &velocityCallbackDynamicUBBSinglePrecisionCUDA)
void fillFromFlagField(const std::shared_ptr< StructuredBlockForest > &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
void inner(IBlock *block, gpuStream_t stream=nullptr)
void run(IBlock *block, gpuStream_t stream=nullptr)
std::function< void(IBlock *)> getInnerSweep(gpuStream_t stream=nullptr)
void fillFromFlagField(const shared_ptr< StructuredBlockForest > &blocks, IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
auto const & getForceVector(IBlock const *block) const
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:176
\file PackInfoPdfDoublePrecision.cpp \author pystencils
IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)