ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
DynamicUBBSinglePrecisionCUDA.h
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file DynamicUBBSinglePrecisionCUDA.h
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.3.7, lbmpy v1.3.7, sympy v1.12.1,
21// lbmpy_walberla/pystencils_walberla from waLBerla commit
22// f36fa0a68bae59f0b516f6587ea8fa7c24a41141
23
24#pragma once
25#include "core/DataTypes.h"
26#include "core/logging/Logging.h"
27
28#include "blockforest/StructuredBlockForest.h"
29#include "core/debug/Debug.h"
30#include "domain_decomposition/BlockDataID.h"
31#include "domain_decomposition/IBlock.h"
32#include "field/FlagField.h"
33#include "gpu/FieldCopy.h"
34#include "gpu/GPUField.h"
35#include "gpu/GPUWrapper.h"
36
37#include <set>
38#include <vector>
39
40#ifdef __GNUC__
41#define RESTRICT __restrict__
42#elif _MSC_VER
43#define RESTRICT __restrict
44#else
45#define RESTRICT
46#endif
47
48#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
49using walberla::half;
50#endif
51
52namespace walberla {
53namespace lbm {
54
56public:
57 struct IndexInfo {
58 int32_t x;
59 int32_t y;
60 int32_t z;
61 int32_t dir;
62 float vel_0;
63 float vel_1;
64 float vel_2;
65 IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
66 : x(x_), y(y_), z(z_), dir(dir_), vel_0(), vel_1(), vel_2() {}
67 bool operator==(const IndexInfo &o) const {
68 return x == o.x && y == o.y && z == o.z && dir == o.dir &&
69 floatIsEqual(vel_0, o.vel_0) && floatIsEqual(vel_1, o.vel_1) &&
70 floatIsEqual(vel_2, o.vel_2);
71 }
72 };
73
75 public:
76 using CpuIndexVector = std::vector<IndexInfo>;
77
78 enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
79
80 IndexVectors() = default;
81 bool operator==(IndexVectors const &other) const {
82 return other.cpuVectors_ == cpuVectors_;
83 }
84
86 for (auto &gpuVec : gpuVectors_)
87 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
88 }
89 CpuIndexVector &indexVector(Type t) { return cpuVectors_[t]; }
90 IndexInfo *pointerCpu(Type t) { return cpuVectors_[t].data(); }
91
92 IndexInfo *pointerGpu(Type t) { return gpuVectors_[t]; }
93 void syncGPU() {
94 for (auto &gpuVec : gpuVectors_)
95 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
96 gpuVectors_.resize(cpuVectors_.size());
97
98 WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
99 for (size_t i = 0; i < cpuVectors_.size(); ++i) {
100 auto &gpuVec = gpuVectors_[i];
101 auto &cpuVec = cpuVectors_[i];
102 WALBERLA_GPU_CHECK(
103 gpuMalloc(&gpuVec, sizeof(IndexInfo) * cpuVec.size()));
104 WALBERLA_GPU_CHECK(gpuMemcpy(gpuVec, &cpuVec[0],
105 sizeof(IndexInfo) * cpuVec.size(),
106 gpuMemcpyHostToDevice));
107 }
108 }
109
110 private:
111 std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
112
113 using GpuIndexVector = IndexInfo *;
114 std::vector<GpuIndexVector> gpuVectors_;
115 };
116
118 const shared_ptr<StructuredBlockForest> &blocks, BlockDataID pdfsID_,
119 std::function<Vector3<float32>(const Cell &,
120 const shared_ptr<StructuredBlockForest> &,
121 IBlock &)> &velocityCallback)
122 : elementInitialiser(velocityCallback), pdfsID(pdfsID_) {
123 auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
124 return new IndexVectors();
125 };
126 indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
127 createIdxVector, "IndexField_DynamicUBBSinglePrecisionCUDA");
128 }
129
130 void run(IBlock *block, gpuStream_t stream = nullptr);
131
132 void operator()(IBlock *block, gpuStream_t stream = nullptr) {
133 run(block, stream);
134 }
135
136 void inner(IBlock *block, gpuStream_t stream = nullptr);
137
138 void outer(IBlock *block, gpuStream_t stream = nullptr);
139
140 Vector3<double> getForce(IBlock * /*block*/) {
141
142 WALBERLA_ABORT(
143 "Boundary condition was not generated including force calculation.")
144 return Vector3<double>(double_c(0.0));
145 }
146
147 std::function<void(IBlock *)> getSweep(gpuStream_t stream = nullptr) {
148 return [this, stream](IBlock *b) { this->run(b, stream); };
149 }
150
151 std::function<void(IBlock *)> getInnerSweep(gpuStream_t stream = nullptr) {
152 return [this, stream](IBlock *b) { this->inner(b, stream); };
153 }
154
155 std::function<void(IBlock *)> getOuterSweep(gpuStream_t stream = nullptr) {
156 return [this, stream](IBlock *b) { this->outer(b, stream); };
157 }
158
159 template <typename FlagField_T>
160 void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
161 ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
162 FlagUID domainFlagUID) {
163 for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
164 fillFromFlagField<FlagField_T>(blocks, &*blockIt, flagFieldID,
165 boundaryFlagUID, domainFlagUID);
166 }
167
168 template <typename FlagField_T>
169 void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
170 IBlock *block, ConstBlockDataID flagFieldID,
171 FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
172 auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
173 auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
174 auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
175 auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
176
177 auto *flagField = block->getData<FlagField_T>(flagFieldID);
178
179 if (!(flagField->flagExists(boundaryFlagUID) &&
180 flagField->flagExists(domainFlagUID)))
181 return;
182
183 auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
184 auto domainFlag = flagField->getFlag(domainFlagUID);
185
186 auto inner = flagField->xyzSize();
187 inner.expand(cell_idx_t(-1));
188
189 indexVectorAll.clear();
190 indexVectorInner.clear();
191 indexVectorOuter.clear();
192
193 for (auto it = flagField->beginWithGhostLayerXYZ(
194 cell_idx_c(flagField->nrOfGhostLayers() - 1));
195 it != flagField->end(); ++it) {
196 if (!isFlagSet(it, domainFlag))
197 continue;
198
199 if (isFlagSet(it.neighbor(0, 0, 0, 0), boundaryFlag)) {
200 auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
201 auto const InitialisationAdditionalData = elementInitialiser(
202 Cell(it.x() + 0, it.y() + 0, it.z() + 0), blocks, *block);
203 element.vel_0 = InitialisationAdditionalData[0];
204 element.vel_1 = InitialisationAdditionalData[1];
205 element.vel_2 = InitialisationAdditionalData[2];
206 indexVectorAll.push_back(element);
207 if (inner.contains(it.x(), it.y(), it.z()))
208 indexVectorInner.push_back(element);
209 else
210 indexVectorOuter.push_back(element);
211 }
212 }
213
214 for (auto it = flagField->beginWithGhostLayerXYZ(
215 cell_idx_c(flagField->nrOfGhostLayers() - 1));
216 it != flagField->end(); ++it) {
217 if (!isFlagSet(it, domainFlag))
218 continue;
219
220 if (isFlagSet(it.neighbor(0, 1, 0, 0), boundaryFlag)) {
221 auto element = IndexInfo(it.x(), it.y(), it.z(), 1);
222 auto const InitialisationAdditionalData = elementInitialiser(
223 Cell(it.x() + 0, it.y() + 1, it.z() + 0), blocks, *block);
224 element.vel_0 = InitialisationAdditionalData[0];
225 element.vel_1 = InitialisationAdditionalData[1];
226 element.vel_2 = InitialisationAdditionalData[2];
227 indexVectorAll.push_back(element);
228 if (inner.contains(it.x(), it.y(), it.z()))
229 indexVectorInner.push_back(element);
230 else
231 indexVectorOuter.push_back(element);
232 }
233 }
234
235 for (auto it = flagField->beginWithGhostLayerXYZ(
236 cell_idx_c(flagField->nrOfGhostLayers() - 1));
237 it != flagField->end(); ++it) {
238 if (!isFlagSet(it, domainFlag))
239 continue;
240
241 if (isFlagSet(it.neighbor(0, -1, 0, 0), boundaryFlag)) {
242 auto element = IndexInfo(it.x(), it.y(), it.z(), 2);
243 auto const InitialisationAdditionalData = elementInitialiser(
244 Cell(it.x() + 0, it.y() - 1, it.z() + 0), blocks, *block);
245 element.vel_0 = InitialisationAdditionalData[0];
246 element.vel_1 = InitialisationAdditionalData[1];
247 element.vel_2 = InitialisationAdditionalData[2];
248 indexVectorAll.push_back(element);
249 if (inner.contains(it.x(), it.y(), it.z()))
250 indexVectorInner.push_back(element);
251 else
252 indexVectorOuter.push_back(element);
253 }
254 }
255
256 for (auto it = flagField->beginWithGhostLayerXYZ(
257 cell_idx_c(flagField->nrOfGhostLayers() - 1));
258 it != flagField->end(); ++it) {
259 if (!isFlagSet(it, domainFlag))
260 continue;
261
262 if (isFlagSet(it.neighbor(-1, 0, 0, 0), boundaryFlag)) {
263 auto element = IndexInfo(it.x(), it.y(), it.z(), 3);
264 auto const InitialisationAdditionalData = elementInitialiser(
265 Cell(it.x() - 1, it.y() + 0, it.z() + 0), blocks, *block);
266 element.vel_0 = InitialisationAdditionalData[0];
267 element.vel_1 = InitialisationAdditionalData[1];
268 element.vel_2 = InitialisationAdditionalData[2];
269 indexVectorAll.push_back(element);
270 if (inner.contains(it.x(), it.y(), it.z()))
271 indexVectorInner.push_back(element);
272 else
273 indexVectorOuter.push_back(element);
274 }
275 }
276
277 for (auto it = flagField->beginWithGhostLayerXYZ(
278 cell_idx_c(flagField->nrOfGhostLayers() - 1));
279 it != flagField->end(); ++it) {
280 if (!isFlagSet(it, domainFlag))
281 continue;
282
283 if (isFlagSet(it.neighbor(1, 0, 0, 0), boundaryFlag)) {
284 auto element = IndexInfo(it.x(), it.y(), it.z(), 4);
285 auto const InitialisationAdditionalData = elementInitialiser(
286 Cell(it.x() + 1, it.y() + 0, it.z() + 0), blocks, *block);
287 element.vel_0 = InitialisationAdditionalData[0];
288 element.vel_1 = InitialisationAdditionalData[1];
289 element.vel_2 = InitialisationAdditionalData[2];
290 indexVectorAll.push_back(element);
291 if (inner.contains(it.x(), it.y(), it.z()))
292 indexVectorInner.push_back(element);
293 else
294 indexVectorOuter.push_back(element);
295 }
296 }
297
298 for (auto it = flagField->beginWithGhostLayerXYZ(
299 cell_idx_c(flagField->nrOfGhostLayers() - 1));
300 it != flagField->end(); ++it) {
301 if (!isFlagSet(it, domainFlag))
302 continue;
303
304 if (isFlagSet(it.neighbor(0, 0, 1, 0), boundaryFlag)) {
305 auto element = IndexInfo(it.x(), it.y(), it.z(), 5);
306 auto const InitialisationAdditionalData = elementInitialiser(
307 Cell(it.x() + 0, it.y() + 0, it.z() + 1), blocks, *block);
308 element.vel_0 = InitialisationAdditionalData[0];
309 element.vel_1 = InitialisationAdditionalData[1];
310 element.vel_2 = InitialisationAdditionalData[2];
311 indexVectorAll.push_back(element);
312 if (inner.contains(it.x(), it.y(), it.z()))
313 indexVectorInner.push_back(element);
314 else
315 indexVectorOuter.push_back(element);
316 }
317 }
318
319 for (auto it = flagField->beginWithGhostLayerXYZ(
320 cell_idx_c(flagField->nrOfGhostLayers() - 1));
321 it != flagField->end(); ++it) {
322 if (!isFlagSet(it, domainFlag))
323 continue;
324
325 if (isFlagSet(it.neighbor(0, 0, -1, 0), boundaryFlag)) {
326 auto element = IndexInfo(it.x(), it.y(), it.z(), 6);
327 auto const InitialisationAdditionalData = elementInitialiser(
328 Cell(it.x() + 0, it.y() + 0, it.z() - 1), blocks, *block);
329 element.vel_0 = InitialisationAdditionalData[0];
330 element.vel_1 = InitialisationAdditionalData[1];
331 element.vel_2 = InitialisationAdditionalData[2];
332 indexVectorAll.push_back(element);
333 if (inner.contains(it.x(), it.y(), it.z()))
334 indexVectorInner.push_back(element);
335 else
336 indexVectorOuter.push_back(element);
337 }
338 }
339
340 for (auto it = flagField->beginWithGhostLayerXYZ(
341 cell_idx_c(flagField->nrOfGhostLayers() - 1));
342 it != flagField->end(); ++it) {
343 if (!isFlagSet(it, domainFlag))
344 continue;
345
346 if (isFlagSet(it.neighbor(-1, 1, 0, 0), boundaryFlag)) {
347 auto element = IndexInfo(it.x(), it.y(), it.z(), 7);
348 auto const InitialisationAdditionalData = elementInitialiser(
349 Cell(it.x() - 1, it.y() + 1, it.z() + 0), blocks, *block);
350 element.vel_0 = InitialisationAdditionalData[0];
351 element.vel_1 = InitialisationAdditionalData[1];
352 element.vel_2 = InitialisationAdditionalData[2];
353 indexVectorAll.push_back(element);
354 if (inner.contains(it.x(), it.y(), it.z()))
355 indexVectorInner.push_back(element);
356 else
357 indexVectorOuter.push_back(element);
358 }
359 }
360
361 for (auto it = flagField->beginWithGhostLayerXYZ(
362 cell_idx_c(flagField->nrOfGhostLayers() - 1));
363 it != flagField->end(); ++it) {
364 if (!isFlagSet(it, domainFlag))
365 continue;
366
367 if (isFlagSet(it.neighbor(1, 1, 0, 0), boundaryFlag)) {
368 auto element = IndexInfo(it.x(), it.y(), it.z(), 8);
369 auto const InitialisationAdditionalData = elementInitialiser(
370 Cell(it.x() + 1, it.y() + 1, it.z() + 0), blocks, *block);
371 element.vel_0 = InitialisationAdditionalData[0];
372 element.vel_1 = InitialisationAdditionalData[1];
373 element.vel_2 = InitialisationAdditionalData[2];
374 indexVectorAll.push_back(element);
375 if (inner.contains(it.x(), it.y(), it.z()))
376 indexVectorInner.push_back(element);
377 else
378 indexVectorOuter.push_back(element);
379 }
380 }
381
382 for (auto it = flagField->beginWithGhostLayerXYZ(
383 cell_idx_c(flagField->nrOfGhostLayers() - 1));
384 it != flagField->end(); ++it) {
385 if (!isFlagSet(it, domainFlag))
386 continue;
387
388 if (isFlagSet(it.neighbor(-1, -1, 0, 0), boundaryFlag)) {
389 auto element = IndexInfo(it.x(), it.y(), it.z(), 9);
390 auto const InitialisationAdditionalData = elementInitialiser(
391 Cell(it.x() - 1, it.y() - 1, it.z() + 0), blocks, *block);
392 element.vel_0 = InitialisationAdditionalData[0];
393 element.vel_1 = InitialisationAdditionalData[1];
394 element.vel_2 = InitialisationAdditionalData[2];
395 indexVectorAll.push_back(element);
396 if (inner.contains(it.x(), it.y(), it.z()))
397 indexVectorInner.push_back(element);
398 else
399 indexVectorOuter.push_back(element);
400 }
401 }
402
403 for (auto it = flagField->beginWithGhostLayerXYZ(
404 cell_idx_c(flagField->nrOfGhostLayers() - 1));
405 it != flagField->end(); ++it) {
406 if (!isFlagSet(it, domainFlag))
407 continue;
408
409 if (isFlagSet(it.neighbor(1, -1, 0, 0), boundaryFlag)) {
410 auto element = IndexInfo(it.x(), it.y(), it.z(), 10);
411 auto const InitialisationAdditionalData = elementInitialiser(
412 Cell(it.x() + 1, it.y() - 1, it.z() + 0), blocks, *block);
413 element.vel_0 = InitialisationAdditionalData[0];
414 element.vel_1 = InitialisationAdditionalData[1];
415 element.vel_2 = InitialisationAdditionalData[2];
416 indexVectorAll.push_back(element);
417 if (inner.contains(it.x(), it.y(), it.z()))
418 indexVectorInner.push_back(element);
419 else
420 indexVectorOuter.push_back(element);
421 }
422 }
423
424 for (auto it = flagField->beginWithGhostLayerXYZ(
425 cell_idx_c(flagField->nrOfGhostLayers() - 1));
426 it != flagField->end(); ++it) {
427 if (!isFlagSet(it, domainFlag))
428 continue;
429
430 if (isFlagSet(it.neighbor(0, 1, 1, 0), boundaryFlag)) {
431 auto element = IndexInfo(it.x(), it.y(), it.z(), 11);
432 auto const InitialisationAdditionalData = elementInitialiser(
433 Cell(it.x() + 0, it.y() + 1, it.z() + 1), blocks, *block);
434 element.vel_0 = InitialisationAdditionalData[0];
435 element.vel_1 = InitialisationAdditionalData[1];
436 element.vel_2 = InitialisationAdditionalData[2];
437 indexVectorAll.push_back(element);
438 if (inner.contains(it.x(), it.y(), it.z()))
439 indexVectorInner.push_back(element);
440 else
441 indexVectorOuter.push_back(element);
442 }
443 }
444
445 for (auto it = flagField->beginWithGhostLayerXYZ(
446 cell_idx_c(flagField->nrOfGhostLayers() - 1));
447 it != flagField->end(); ++it) {
448 if (!isFlagSet(it, domainFlag))
449 continue;
450
451 if (isFlagSet(it.neighbor(0, -1, 1, 0), boundaryFlag)) {
452 auto element = IndexInfo(it.x(), it.y(), it.z(), 12);
453 auto const InitialisationAdditionalData = elementInitialiser(
454 Cell(it.x() + 0, it.y() - 1, it.z() + 1), blocks, *block);
455 element.vel_0 = InitialisationAdditionalData[0];
456 element.vel_1 = InitialisationAdditionalData[1];
457 element.vel_2 = InitialisationAdditionalData[2];
458 indexVectorAll.push_back(element);
459 if (inner.contains(it.x(), it.y(), it.z()))
460 indexVectorInner.push_back(element);
461 else
462 indexVectorOuter.push_back(element);
463 }
464 }
465
466 for (auto it = flagField->beginWithGhostLayerXYZ(
467 cell_idx_c(flagField->nrOfGhostLayers() - 1));
468 it != flagField->end(); ++it) {
469 if (!isFlagSet(it, domainFlag))
470 continue;
471
472 if (isFlagSet(it.neighbor(-1, 0, 1, 0), boundaryFlag)) {
473 auto element = IndexInfo(it.x(), it.y(), it.z(), 13);
474 auto const InitialisationAdditionalData = elementInitialiser(
475 Cell(it.x() - 1, it.y() + 0, it.z() + 1), blocks, *block);
476 element.vel_0 = InitialisationAdditionalData[0];
477 element.vel_1 = InitialisationAdditionalData[1];
478 element.vel_2 = InitialisationAdditionalData[2];
479 indexVectorAll.push_back(element);
480 if (inner.contains(it.x(), it.y(), it.z()))
481 indexVectorInner.push_back(element);
482 else
483 indexVectorOuter.push_back(element);
484 }
485 }
486
487 for (auto it = flagField->beginWithGhostLayerXYZ(
488 cell_idx_c(flagField->nrOfGhostLayers() - 1));
489 it != flagField->end(); ++it) {
490 if (!isFlagSet(it, domainFlag))
491 continue;
492
493 if (isFlagSet(it.neighbor(1, 0, 1, 0), boundaryFlag)) {
494 auto element = IndexInfo(it.x(), it.y(), it.z(), 14);
495 auto const InitialisationAdditionalData = elementInitialiser(
496 Cell(it.x() + 1, it.y() + 0, it.z() + 1), blocks, *block);
497 element.vel_0 = InitialisationAdditionalData[0];
498 element.vel_1 = InitialisationAdditionalData[1];
499 element.vel_2 = InitialisationAdditionalData[2];
500 indexVectorAll.push_back(element);
501 if (inner.contains(it.x(), it.y(), it.z()))
502 indexVectorInner.push_back(element);
503 else
504 indexVectorOuter.push_back(element);
505 }
506 }
507
508 for (auto it = flagField->beginWithGhostLayerXYZ(
509 cell_idx_c(flagField->nrOfGhostLayers() - 1));
510 it != flagField->end(); ++it) {
511 if (!isFlagSet(it, domainFlag))
512 continue;
513
514 if (isFlagSet(it.neighbor(0, 1, -1, 0), boundaryFlag)) {
515 auto element = IndexInfo(it.x(), it.y(), it.z(), 15);
516 auto const InitialisationAdditionalData = elementInitialiser(
517 Cell(it.x() + 0, it.y() + 1, it.z() - 1), blocks, *block);
518 element.vel_0 = InitialisationAdditionalData[0];
519 element.vel_1 = InitialisationAdditionalData[1];
520 element.vel_2 = InitialisationAdditionalData[2];
521 indexVectorAll.push_back(element);
522 if (inner.contains(it.x(), it.y(), it.z()))
523 indexVectorInner.push_back(element);
524 else
525 indexVectorOuter.push_back(element);
526 }
527 }
528
529 for (auto it = flagField->beginWithGhostLayerXYZ(
530 cell_idx_c(flagField->nrOfGhostLayers() - 1));
531 it != flagField->end(); ++it) {
532 if (!isFlagSet(it, domainFlag))
533 continue;
534
535 if (isFlagSet(it.neighbor(0, -1, -1, 0), boundaryFlag)) {
536 auto element = IndexInfo(it.x(), it.y(), it.z(), 16);
537 auto const InitialisationAdditionalData = elementInitialiser(
538 Cell(it.x() + 0, it.y() - 1, it.z() - 1), blocks, *block);
539 element.vel_0 = InitialisationAdditionalData[0];
540 element.vel_1 = InitialisationAdditionalData[1];
541 element.vel_2 = InitialisationAdditionalData[2];
542 indexVectorAll.push_back(element);
543 if (inner.contains(it.x(), it.y(), it.z()))
544 indexVectorInner.push_back(element);
545 else
546 indexVectorOuter.push_back(element);
547 }
548 }
549
550 for (auto it = flagField->beginWithGhostLayerXYZ(
551 cell_idx_c(flagField->nrOfGhostLayers() - 1));
552 it != flagField->end(); ++it) {
553 if (!isFlagSet(it, domainFlag))
554 continue;
555
556 if (isFlagSet(it.neighbor(-1, 0, -1, 0), boundaryFlag)) {
557 auto element = IndexInfo(it.x(), it.y(), it.z(), 17);
558 auto const InitialisationAdditionalData = elementInitialiser(
559 Cell(it.x() - 1, it.y() + 0, it.z() - 1), blocks, *block);
560 element.vel_0 = InitialisationAdditionalData[0];
561 element.vel_1 = InitialisationAdditionalData[1];
562 element.vel_2 = InitialisationAdditionalData[2];
563 indexVectorAll.push_back(element);
564 if (inner.contains(it.x(), it.y(), it.z()))
565 indexVectorInner.push_back(element);
566 else
567 indexVectorOuter.push_back(element);
568 }
569 }
570
571 for (auto it = flagField->beginWithGhostLayerXYZ(
572 cell_idx_c(flagField->nrOfGhostLayers() - 1));
573 it != flagField->end(); ++it) {
574 if (!isFlagSet(it, domainFlag))
575 continue;
576
577 if (isFlagSet(it.neighbor(1, 0, -1, 0), boundaryFlag)) {
578 auto element = IndexInfo(it.x(), it.y(), it.z(), 18);
579 auto const InitialisationAdditionalData = elementInitialiser(
580 Cell(it.x() + 1, it.y() + 0, it.z() - 1), blocks, *block);
581 element.vel_0 = InitialisationAdditionalData[0];
582 element.vel_1 = InitialisationAdditionalData[1];
583 element.vel_2 = InitialisationAdditionalData[2];
584 indexVectorAll.push_back(element);
585 if (inner.contains(it.x(), it.y(), it.z()))
586 indexVectorInner.push_back(element);
587 else
588 indexVectorOuter.push_back(element);
589 }
590 }
591
592 indexVectors->syncGPU();
593 }
594
595private:
596 void run_impl(IBlock *block, IndexVectors::Type type,
597 gpuStream_t stream = nullptr);
598
599 BlockDataID indexVectorID;
600
601 std::function<Vector3<float32>(
602 const Cell &, const shared_ptr<StructuredBlockForest> &, IBlock &)>
603 elementInitialiser;
604
605public:
606 BlockDataID pdfsID;
607};
608
609} // namespace lbm
610} // namespace walberla
Definition Cell.hpp:97
void operator()(IBlock *block, gpuStream_t stream=nullptr)
void fillFromFlagField(const shared_ptr< StructuredBlockForest > &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
void outer(IBlock *block, gpuStream_t stream=nullptr)
std::function< void(IBlock *)> getOuterSweep(gpuStream_t stream=nullptr)
std::function< void(IBlock *)> getSweep(gpuStream_t stream=nullptr)
DynamicUBBSinglePrecisionCUDA(const shared_ptr< StructuredBlockForest > &blocks, BlockDataID pdfsID_, std::function< Vector3< float32 >(const Cell &, const shared_ptr< StructuredBlockForest > &, IBlock &)> &velocityCallback)
void inner(IBlock *block, gpuStream_t stream=nullptr)
void run(IBlock *block, gpuStream_t stream=nullptr)
std::function< void(IBlock *)> getInnerSweep(gpuStream_t stream=nullptr)
void fillFromFlagField(const shared_ptr< StructuredBlockForest > &blocks, IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:172
\file PackInfoPdfDoublePrecision.cpp \author pystencils
IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)