ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
FixedFlux_single_precision_CUDA.h
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file FixedFlux_single_precision_CUDA.h
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.4+1.ge851f4e, lbmpy v1.4+1.ge9efe34,
21// sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit
22// 007e77e077ad9d22b5eed6f3d3118240993e553c
23
24#pragma once
25#include "core/DataTypes.h"
26#include "core/logging/Logging.h"
27
28#include "blockforest/StructuredBlockForest.h"
29#include "core/debug/Debug.h"
30#include "domain_decomposition/BlockDataID.h"
31#include "domain_decomposition/IBlock.h"
32#include "field/FlagField.h"
33#include "gpu/FieldCopy.h"
34#include "gpu/GPUField.h"
35#include "gpu/GPUWrapper.h"
36
37#include <functional>
38#include <memory>
39#include <vector>
40
41#ifdef __GNUC__
42#define RESTRICT __restrict__
43#else
44#define RESTRICT
45#endif
46
47#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
48using walberla::half;
49#endif
50
51namespace walberla {
52namespace pystencils {
53
55public:
56 struct IndexInfo {
57 int32_t x;
58 int32_t y;
59 int32_t z;
60 int32_t dir;
61 float flux_0;
62 float flux_1;
63 float flux_2;
64 IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
65 : x(x_), y(y_), z(z_), dir(dir_), flux_0(), flux_1(), flux_2() {}
66 bool operator==(const IndexInfo &o) const {
67 return x == o.x && y == o.y && z == o.z && dir == o.dir &&
68 floatIsEqual(flux_0, o.flux_0) && floatIsEqual(flux_1, o.flux_1) &&
69 floatIsEqual(flux_2, o.flux_2);
70 }
71 };
72
74 public:
75 using CpuIndexVector = std::vector<IndexInfo>;
76
77 enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };
78
79 IndexVectors() = default;
80 bool operator==(IndexVectors const &other) const {
81 return other.cpuVectors_ == cpuVectors_;
82 }
83
85 for (auto &gpuVec : gpuVectors_) {
86 if (gpuVec) {
87 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
88 }
89 }
90 }
91 auto &indexVector(Type t) { return cpuVectors_[t]; }
92 auto const &indexVector(Type t) const { return cpuVectors_[t]; }
94 return cpuVectors_[t].empty() ? nullptr : cpuVectors_[t].data();
95 }
96
97 IndexInfo *pointerGpu(Type t) { return gpuVectors_[t]; }
98 void syncGPU() {
99 for (auto &gpuVec : gpuVectors_) {
100 if (gpuVec) {
101 WALBERLA_GPU_CHECK(gpuFree(gpuVec));
102 gpuVec = nullptr;
103 }
104 }
105 gpuVectors_.resize(cpuVectors_.size());
106
107 WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);
108 for (size_t i = 0; i < cpuVectors_.size(); ++i) {
109 auto &gpuVec = gpuVectors_[i];
110 auto &cpuVec = cpuVectors_[i];
111 if (cpuVec.empty()) {
112 continue;
113 }
114 WALBERLA_GPU_CHECK(
115 gpuMalloc(&gpuVec, sizeof(IndexInfo) * cpuVec.size()));
116 WALBERLA_GPU_CHECK(gpuMemcpy(gpuVec, cpuVec.data(),
117 sizeof(IndexInfo) * cpuVec.size(),
118 gpuMemcpyHostToDevice));
119 }
120 }
121
122 private:
123 std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};
124
125 using GpuIndexVector = IndexInfo *;
126 std::vector<GpuIndexVector> gpuVectors_;
127 };
128
130 const std::shared_ptr<StructuredBlockForest> &blocks, BlockDataID fluxID_,
131 std::function<Vector3<float>(const Cell &,
132 const shared_ptr<StructuredBlockForest> &,
133 IBlock &)> &fluxCallback)
134 : elementInitaliser(fluxCallback), fluxID(fluxID_) {
135 auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
136 return new IndexVectors();
137 };
138 indexVectorID = blocks->addStructuredBlockData<IndexVectors>(
139 createIdxVector, "IndexField_FixedFlux_single_precision_CUDA");
140 }
141
142 void run(IBlock *block, gpuStream_t stream = nullptr);
143
144 void operator()(IBlock *block, gpuStream_t stream = nullptr) {
145 run(block, stream);
146 }
147
148 void inner(IBlock *block, gpuStream_t stream = nullptr);
149
150 void outer(IBlock *block, gpuStream_t stream = nullptr);
151
152 Vector3<float> getForce(IBlock * /*block*/) {
153
154 WALBERLA_ABORT(
155 "Boundary condition was not generated including force calculation.")
156 return Vector3<float>(float_c(0.0));
157 }
158
159 std::function<void(IBlock *)> getSweep(gpuStream_t stream = nullptr) {
160 return [this, stream](IBlock *b) { this->run(b, stream); };
161 }
162
163 std::function<void(IBlock *)> getInnerSweep(gpuStream_t stream = nullptr) {
164 return [this, stream](IBlock *b) { this->inner(b, stream); };
165 }
166
167 std::function<void(IBlock *)> getOuterSweep(gpuStream_t stream = nullptr) {
168 return [this, stream](IBlock *b) { this->outer(b, stream); };
169 }
170
171 template <typename FlagField_T>
172 void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
173 ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
174 FlagUID domainFlagUID) {
175 for (auto &block : *blocks)
176 fillFromFlagField<FlagField_T>(blocks, &block, flagFieldID,
177 boundaryFlagUID, domainFlagUID);
178 }
179
180 template <typename FlagField_T>
181 void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
182 IBlock *block, ConstBlockDataID flagFieldID,
183 FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
184 auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
185 auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
186 auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
187 auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
188
189 auto *flagField = block->getData<FlagField_T>(flagFieldID);
190
191 if (!(flagField->flagExists(boundaryFlagUID) &&
192 flagField->flagExists(domainFlagUID)))
193 return;
194
195 auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
196 auto domainFlag = flagField->getFlag(domainFlagUID);
197
198 auto inner = flagField->xyzSize();
199 inner.expand(cell_idx_t(-1));
200
201 indexVectorAll.clear();
202 indexVectorInner.clear();
203 indexVectorOuter.clear();
204
205 for (auto it = flagField->beginWithGhostLayerXYZ(
206 cell_idx_c(flagField->nrOfGhostLayers() - 1));
207 it != flagField->end(); ++it) {
208 if (!isFlagSet(it, domainFlag))
209 continue;
210
211 if (isFlagSet(it.neighbor(0, 0, 0, 0), boundaryFlag)) {
212 auto element = IndexInfo(it.x(), it.y(), it.z(), 0);
213 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
214 Cell(it.x() + 0, it.y() + 0, it.z() + 0), blocks, *block);
215 element.flux_0 = InitialisatonAdditionalData[0];
216 element.flux_1 = InitialisatonAdditionalData[1];
217 element.flux_2 = InitialisatonAdditionalData[2];
218 indexVectorAll.emplace_back(element);
219 if (inner.contains(it.x(), it.y(), it.z()))
220 indexVectorInner.emplace_back(element);
221 else
222 indexVectorOuter.emplace_back(element);
223 }
224 }
225
226 for (auto it = flagField->beginWithGhostLayerXYZ(
227 cell_idx_c(flagField->nrOfGhostLayers() - 1));
228 it != flagField->end(); ++it) {
229 if (!isFlagSet(it, domainFlag))
230 continue;
231
232 if (isFlagSet(it.neighbor(0, 1, 0, 0), boundaryFlag)) {
233 auto element = IndexInfo(it.x(), it.y(), it.z(), 1);
234 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
235 Cell(it.x() + 0, it.y() + 1, it.z() + 0), blocks, *block);
236 element.flux_0 = InitialisatonAdditionalData[0];
237 element.flux_1 = InitialisatonAdditionalData[1];
238 element.flux_2 = InitialisatonAdditionalData[2];
239 indexVectorAll.emplace_back(element);
240 if (inner.contains(it.x(), it.y(), it.z()))
241 indexVectorInner.emplace_back(element);
242 else
243 indexVectorOuter.emplace_back(element);
244 }
245 }
246
247 for (auto it = flagField->beginWithGhostLayerXYZ(
248 cell_idx_c(flagField->nrOfGhostLayers() - 1));
249 it != flagField->end(); ++it) {
250 if (!isFlagSet(it, domainFlag))
251 continue;
252
253 if (isFlagSet(it.neighbor(0, -1, 0, 0), boundaryFlag)) {
254 auto element = IndexInfo(it.x(), it.y(), it.z(), 2);
255 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
256 Cell(it.x() + 0, it.y() + -1, it.z() + 0), blocks, *block);
257 element.flux_0 = InitialisatonAdditionalData[0];
258 element.flux_1 = InitialisatonAdditionalData[1];
259 element.flux_2 = InitialisatonAdditionalData[2];
260 indexVectorAll.emplace_back(element);
261 if (inner.contains(it.x(), it.y(), it.z()))
262 indexVectorInner.emplace_back(element);
263 else
264 indexVectorOuter.emplace_back(element);
265 }
266 }
267
268 for (auto it = flagField->beginWithGhostLayerXYZ(
269 cell_idx_c(flagField->nrOfGhostLayers() - 1));
270 it != flagField->end(); ++it) {
271 if (!isFlagSet(it, domainFlag))
272 continue;
273
274 if (isFlagSet(it.neighbor(-1, 0, 0, 0), boundaryFlag)) {
275 auto element = IndexInfo(it.x(), it.y(), it.z(), 3);
276 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
277 Cell(it.x() + -1, it.y() + 0, it.z() + 0), blocks, *block);
278 element.flux_0 = InitialisatonAdditionalData[0];
279 element.flux_1 = InitialisatonAdditionalData[1];
280 element.flux_2 = InitialisatonAdditionalData[2];
281 indexVectorAll.emplace_back(element);
282 if (inner.contains(it.x(), it.y(), it.z()))
283 indexVectorInner.emplace_back(element);
284 else
285 indexVectorOuter.emplace_back(element);
286 }
287 }
288
289 for (auto it = flagField->beginWithGhostLayerXYZ(
290 cell_idx_c(flagField->nrOfGhostLayers() - 1));
291 it != flagField->end(); ++it) {
292 if (!isFlagSet(it, domainFlag))
293 continue;
294
295 if (isFlagSet(it.neighbor(1, 0, 0, 0), boundaryFlag)) {
296 auto element = IndexInfo(it.x(), it.y(), it.z(), 4);
297 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
298 Cell(it.x() + 1, it.y() + 0, it.z() + 0), blocks, *block);
299 element.flux_0 = InitialisatonAdditionalData[0];
300 element.flux_1 = InitialisatonAdditionalData[1];
301 element.flux_2 = InitialisatonAdditionalData[2];
302 indexVectorAll.emplace_back(element);
303 if (inner.contains(it.x(), it.y(), it.z()))
304 indexVectorInner.emplace_back(element);
305 else
306 indexVectorOuter.emplace_back(element);
307 }
308 }
309
310 for (auto it = flagField->beginWithGhostLayerXYZ(
311 cell_idx_c(flagField->nrOfGhostLayers() - 1));
312 it != flagField->end(); ++it) {
313 if (!isFlagSet(it, domainFlag))
314 continue;
315
316 if (isFlagSet(it.neighbor(0, 0, 1, 0), boundaryFlag)) {
317 auto element = IndexInfo(it.x(), it.y(), it.z(), 5);
318 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
319 Cell(it.x() + 0, it.y() + 0, it.z() + 1), blocks, *block);
320 element.flux_0 = InitialisatonAdditionalData[0];
321 element.flux_1 = InitialisatonAdditionalData[1];
322 element.flux_2 = InitialisatonAdditionalData[2];
323 indexVectorAll.emplace_back(element);
324 if (inner.contains(it.x(), it.y(), it.z()))
325 indexVectorInner.emplace_back(element);
326 else
327 indexVectorOuter.emplace_back(element);
328 }
329 }
330
331 for (auto it = flagField->beginWithGhostLayerXYZ(
332 cell_idx_c(flagField->nrOfGhostLayers() - 1));
333 it != flagField->end(); ++it) {
334 if (!isFlagSet(it, domainFlag))
335 continue;
336
337 if (isFlagSet(it.neighbor(0, 0, -1, 0), boundaryFlag)) {
338 auto element = IndexInfo(it.x(), it.y(), it.z(), 6);
339 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
340 Cell(it.x() + 0, it.y() + 0, it.z() + -1), blocks, *block);
341 element.flux_0 = InitialisatonAdditionalData[0];
342 element.flux_1 = InitialisatonAdditionalData[1];
343 element.flux_2 = InitialisatonAdditionalData[2];
344 indexVectorAll.emplace_back(element);
345 if (inner.contains(it.x(), it.y(), it.z()))
346 indexVectorInner.emplace_back(element);
347 else
348 indexVectorOuter.emplace_back(element);
349 }
350 }
351
352 for (auto it = flagField->beginWithGhostLayerXYZ(
353 cell_idx_c(flagField->nrOfGhostLayers() - 1));
354 it != flagField->end(); ++it) {
355 if (!isFlagSet(it, domainFlag))
356 continue;
357
358 if (isFlagSet(it.neighbor(-1, 1, 0, 0), boundaryFlag)) {
359 auto element = IndexInfo(it.x(), it.y(), it.z(), 7);
360 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
361 Cell(it.x() + -1, it.y() + 1, it.z() + 0), blocks, *block);
362 element.flux_0 = InitialisatonAdditionalData[0];
363 element.flux_1 = InitialisatonAdditionalData[1];
364 element.flux_2 = InitialisatonAdditionalData[2];
365 indexVectorAll.emplace_back(element);
366 if (inner.contains(it.x(), it.y(), it.z()))
367 indexVectorInner.emplace_back(element);
368 else
369 indexVectorOuter.emplace_back(element);
370 }
371 }
372
373 for (auto it = flagField->beginWithGhostLayerXYZ(
374 cell_idx_c(flagField->nrOfGhostLayers() - 1));
375 it != flagField->end(); ++it) {
376 if (!isFlagSet(it, domainFlag))
377 continue;
378
379 if (isFlagSet(it.neighbor(1, 1, 0, 0), boundaryFlag)) {
380 auto element = IndexInfo(it.x(), it.y(), it.z(), 8);
381 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
382 Cell(it.x() + 1, it.y() + 1, it.z() + 0), blocks, *block);
383 element.flux_0 = InitialisatonAdditionalData[0];
384 element.flux_1 = InitialisatonAdditionalData[1];
385 element.flux_2 = InitialisatonAdditionalData[2];
386 indexVectorAll.emplace_back(element);
387 if (inner.contains(it.x(), it.y(), it.z()))
388 indexVectorInner.emplace_back(element);
389 else
390 indexVectorOuter.emplace_back(element);
391 }
392 }
393
394 for (auto it = flagField->beginWithGhostLayerXYZ(
395 cell_idx_c(flagField->nrOfGhostLayers() - 1));
396 it != flagField->end(); ++it) {
397 if (!isFlagSet(it, domainFlag))
398 continue;
399
400 if (isFlagSet(it.neighbor(-1, -1, 0, 0), boundaryFlag)) {
401 auto element = IndexInfo(it.x(), it.y(), it.z(), 9);
402 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
403 Cell(it.x() + -1, it.y() + -1, it.z() + 0), blocks, *block);
404 element.flux_0 = InitialisatonAdditionalData[0];
405 element.flux_1 = InitialisatonAdditionalData[1];
406 element.flux_2 = InitialisatonAdditionalData[2];
407 indexVectorAll.emplace_back(element);
408 if (inner.contains(it.x(), it.y(), it.z()))
409 indexVectorInner.emplace_back(element);
410 else
411 indexVectorOuter.emplace_back(element);
412 }
413 }
414
415 for (auto it = flagField->beginWithGhostLayerXYZ(
416 cell_idx_c(flagField->nrOfGhostLayers() - 1));
417 it != flagField->end(); ++it) {
418 if (!isFlagSet(it, domainFlag))
419 continue;
420
421 if (isFlagSet(it.neighbor(1, -1, 0, 0), boundaryFlag)) {
422 auto element = IndexInfo(it.x(), it.y(), it.z(), 10);
423 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
424 Cell(it.x() + 1, it.y() + -1, it.z() + 0), blocks, *block);
425 element.flux_0 = InitialisatonAdditionalData[0];
426 element.flux_1 = InitialisatonAdditionalData[1];
427 element.flux_2 = InitialisatonAdditionalData[2];
428 indexVectorAll.emplace_back(element);
429 if (inner.contains(it.x(), it.y(), it.z()))
430 indexVectorInner.emplace_back(element);
431 else
432 indexVectorOuter.emplace_back(element);
433 }
434 }
435
436 for (auto it = flagField->beginWithGhostLayerXYZ(
437 cell_idx_c(flagField->nrOfGhostLayers() - 1));
438 it != flagField->end(); ++it) {
439 if (!isFlagSet(it, domainFlag))
440 continue;
441
442 if (isFlagSet(it.neighbor(0, 1, 1, 0), boundaryFlag)) {
443 auto element = IndexInfo(it.x(), it.y(), it.z(), 11);
444 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
445 Cell(it.x() + 0, it.y() + 1, it.z() + 1), blocks, *block);
446 element.flux_0 = InitialisatonAdditionalData[0];
447 element.flux_1 = InitialisatonAdditionalData[1];
448 element.flux_2 = InitialisatonAdditionalData[2];
449 indexVectorAll.emplace_back(element);
450 if (inner.contains(it.x(), it.y(), it.z()))
451 indexVectorInner.emplace_back(element);
452 else
453 indexVectorOuter.emplace_back(element);
454 }
455 }
456
457 for (auto it = flagField->beginWithGhostLayerXYZ(
458 cell_idx_c(flagField->nrOfGhostLayers() - 1));
459 it != flagField->end(); ++it) {
460 if (!isFlagSet(it, domainFlag))
461 continue;
462
463 if (isFlagSet(it.neighbor(0, -1, 1, 0), boundaryFlag)) {
464 auto element = IndexInfo(it.x(), it.y(), it.z(), 12);
465 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
466 Cell(it.x() + 0, it.y() + -1, it.z() + 1), blocks, *block);
467 element.flux_0 = InitialisatonAdditionalData[0];
468 element.flux_1 = InitialisatonAdditionalData[1];
469 element.flux_2 = InitialisatonAdditionalData[2];
470 indexVectorAll.emplace_back(element);
471 if (inner.contains(it.x(), it.y(), it.z()))
472 indexVectorInner.emplace_back(element);
473 else
474 indexVectorOuter.emplace_back(element);
475 }
476 }
477
478 for (auto it = flagField->beginWithGhostLayerXYZ(
479 cell_idx_c(flagField->nrOfGhostLayers() - 1));
480 it != flagField->end(); ++it) {
481 if (!isFlagSet(it, domainFlag))
482 continue;
483
484 if (isFlagSet(it.neighbor(-1, 0, 1, 0), boundaryFlag)) {
485 auto element = IndexInfo(it.x(), it.y(), it.z(), 13);
486 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
487 Cell(it.x() + -1, it.y() + 0, it.z() + 1), blocks, *block);
488 element.flux_0 = InitialisatonAdditionalData[0];
489 element.flux_1 = InitialisatonAdditionalData[1];
490 element.flux_2 = InitialisatonAdditionalData[2];
491 indexVectorAll.emplace_back(element);
492 if (inner.contains(it.x(), it.y(), it.z()))
493 indexVectorInner.emplace_back(element);
494 else
495 indexVectorOuter.emplace_back(element);
496 }
497 }
498
499 for (auto it = flagField->beginWithGhostLayerXYZ(
500 cell_idx_c(flagField->nrOfGhostLayers() - 1));
501 it != flagField->end(); ++it) {
502 if (!isFlagSet(it, domainFlag))
503 continue;
504
505 if (isFlagSet(it.neighbor(1, 0, 1, 0), boundaryFlag)) {
506 auto element = IndexInfo(it.x(), it.y(), it.z(), 14);
507 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
508 Cell(it.x() + 1, it.y() + 0, it.z() + 1), blocks, *block);
509 element.flux_0 = InitialisatonAdditionalData[0];
510 element.flux_1 = InitialisatonAdditionalData[1];
511 element.flux_2 = InitialisatonAdditionalData[2];
512 indexVectorAll.emplace_back(element);
513 if (inner.contains(it.x(), it.y(), it.z()))
514 indexVectorInner.emplace_back(element);
515 else
516 indexVectorOuter.emplace_back(element);
517 }
518 }
519
520 for (auto it = flagField->beginWithGhostLayerXYZ(
521 cell_idx_c(flagField->nrOfGhostLayers() - 1));
522 it != flagField->end(); ++it) {
523 if (!isFlagSet(it, domainFlag))
524 continue;
525
526 if (isFlagSet(it.neighbor(0, 1, -1, 0), boundaryFlag)) {
527 auto element = IndexInfo(it.x(), it.y(), it.z(), 15);
528 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
529 Cell(it.x() + 0, it.y() + 1, it.z() + -1), blocks, *block);
530 element.flux_0 = InitialisatonAdditionalData[0];
531 element.flux_1 = InitialisatonAdditionalData[1];
532 element.flux_2 = InitialisatonAdditionalData[2];
533 indexVectorAll.emplace_back(element);
534 if (inner.contains(it.x(), it.y(), it.z()))
535 indexVectorInner.emplace_back(element);
536 else
537 indexVectorOuter.emplace_back(element);
538 }
539 }
540
541 for (auto it = flagField->beginWithGhostLayerXYZ(
542 cell_idx_c(flagField->nrOfGhostLayers() - 1));
543 it != flagField->end(); ++it) {
544 if (!isFlagSet(it, domainFlag))
545 continue;
546
547 if (isFlagSet(it.neighbor(0, -1, -1, 0), boundaryFlag)) {
548 auto element = IndexInfo(it.x(), it.y(), it.z(), 16);
549 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
550 Cell(it.x() + 0, it.y() + -1, it.z() + -1), blocks, *block);
551 element.flux_0 = InitialisatonAdditionalData[0];
552 element.flux_1 = InitialisatonAdditionalData[1];
553 element.flux_2 = InitialisatonAdditionalData[2];
554 indexVectorAll.emplace_back(element);
555 if (inner.contains(it.x(), it.y(), it.z()))
556 indexVectorInner.emplace_back(element);
557 else
558 indexVectorOuter.emplace_back(element);
559 }
560 }
561
562 for (auto it = flagField->beginWithGhostLayerXYZ(
563 cell_idx_c(flagField->nrOfGhostLayers() - 1));
564 it != flagField->end(); ++it) {
565 if (!isFlagSet(it, domainFlag))
566 continue;
567
568 if (isFlagSet(it.neighbor(-1, 0, -1, 0), boundaryFlag)) {
569 auto element = IndexInfo(it.x(), it.y(), it.z(), 17);
570 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
571 Cell(it.x() + -1, it.y() + 0, it.z() + -1), blocks, *block);
572 element.flux_0 = InitialisatonAdditionalData[0];
573 element.flux_1 = InitialisatonAdditionalData[1];
574 element.flux_2 = InitialisatonAdditionalData[2];
575 indexVectorAll.emplace_back(element);
576 if (inner.contains(it.x(), it.y(), it.z()))
577 indexVectorInner.emplace_back(element);
578 else
579 indexVectorOuter.emplace_back(element);
580 }
581 }
582
583 for (auto it = flagField->beginWithGhostLayerXYZ(
584 cell_idx_c(flagField->nrOfGhostLayers() - 1));
585 it != flagField->end(); ++it) {
586 if (!isFlagSet(it, domainFlag))
587 continue;
588
589 if (isFlagSet(it.neighbor(1, 0, -1, 0), boundaryFlag)) {
590 auto element = IndexInfo(it.x(), it.y(), it.z(), 18);
591 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
592 Cell(it.x() + 1, it.y() + 0, it.z() + -1), blocks, *block);
593 element.flux_0 = InitialisatonAdditionalData[0];
594 element.flux_1 = InitialisatonAdditionalData[1];
595 element.flux_2 = InitialisatonAdditionalData[2];
596 indexVectorAll.emplace_back(element);
597 if (inner.contains(it.x(), it.y(), it.z()))
598 indexVectorInner.emplace_back(element);
599 else
600 indexVectorOuter.emplace_back(element);
601 }
602 }
603
604 for (auto it = flagField->beginWithGhostLayerXYZ(
605 cell_idx_c(flagField->nrOfGhostLayers() - 1));
606 it != flagField->end(); ++it) {
607 if (!isFlagSet(it, domainFlag))
608 continue;
609
610 if (isFlagSet(it.neighbor(1, 1, 1, 0), boundaryFlag)) {
611 auto element = IndexInfo(it.x(), it.y(), it.z(), 19);
612 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
613 Cell(it.x() + 1, it.y() + 1, it.z() + 1), blocks, *block);
614 element.flux_0 = InitialisatonAdditionalData[0];
615 element.flux_1 = InitialisatonAdditionalData[1];
616 element.flux_2 = InitialisatonAdditionalData[2];
617 indexVectorAll.emplace_back(element);
618 if (inner.contains(it.x(), it.y(), it.z()))
619 indexVectorInner.emplace_back(element);
620 else
621 indexVectorOuter.emplace_back(element);
622 }
623 }
624
625 for (auto it = flagField->beginWithGhostLayerXYZ(
626 cell_idx_c(flagField->nrOfGhostLayers() - 1));
627 it != flagField->end(); ++it) {
628 if (!isFlagSet(it, domainFlag))
629 continue;
630
631 if (isFlagSet(it.neighbor(-1, 1, 1, 0), boundaryFlag)) {
632 auto element = IndexInfo(it.x(), it.y(), it.z(), 20);
633 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
634 Cell(it.x() + -1, it.y() + 1, it.z() + 1), blocks, *block);
635 element.flux_0 = InitialisatonAdditionalData[0];
636 element.flux_1 = InitialisatonAdditionalData[1];
637 element.flux_2 = InitialisatonAdditionalData[2];
638 indexVectorAll.emplace_back(element);
639 if (inner.contains(it.x(), it.y(), it.z()))
640 indexVectorInner.emplace_back(element);
641 else
642 indexVectorOuter.emplace_back(element);
643 }
644 }
645
646 for (auto it = flagField->beginWithGhostLayerXYZ(
647 cell_idx_c(flagField->nrOfGhostLayers() - 1));
648 it != flagField->end(); ++it) {
649 if (!isFlagSet(it, domainFlag))
650 continue;
651
652 if (isFlagSet(it.neighbor(1, -1, 1, 0), boundaryFlag)) {
653 auto element = IndexInfo(it.x(), it.y(), it.z(), 21);
654 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
655 Cell(it.x() + 1, it.y() + -1, it.z() + 1), blocks, *block);
656 element.flux_0 = InitialisatonAdditionalData[0];
657 element.flux_1 = InitialisatonAdditionalData[1];
658 element.flux_2 = InitialisatonAdditionalData[2];
659 indexVectorAll.emplace_back(element);
660 if (inner.contains(it.x(), it.y(), it.z()))
661 indexVectorInner.emplace_back(element);
662 else
663 indexVectorOuter.emplace_back(element);
664 }
665 }
666
667 for (auto it = flagField->beginWithGhostLayerXYZ(
668 cell_idx_c(flagField->nrOfGhostLayers() - 1));
669 it != flagField->end(); ++it) {
670 if (!isFlagSet(it, domainFlag))
671 continue;
672
673 if (isFlagSet(it.neighbor(-1, -1, 1, 0), boundaryFlag)) {
674 auto element = IndexInfo(it.x(), it.y(), it.z(), 22);
675 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
676 Cell(it.x() + -1, it.y() + -1, it.z() + 1), blocks, *block);
677 element.flux_0 = InitialisatonAdditionalData[0];
678 element.flux_1 = InitialisatonAdditionalData[1];
679 element.flux_2 = InitialisatonAdditionalData[2];
680 indexVectorAll.emplace_back(element);
681 if (inner.contains(it.x(), it.y(), it.z()))
682 indexVectorInner.emplace_back(element);
683 else
684 indexVectorOuter.emplace_back(element);
685 }
686 }
687
688 for (auto it = flagField->beginWithGhostLayerXYZ(
689 cell_idx_c(flagField->nrOfGhostLayers() - 1));
690 it != flagField->end(); ++it) {
691 if (!isFlagSet(it, domainFlag))
692 continue;
693
694 if (isFlagSet(it.neighbor(1, 1, -1, 0), boundaryFlag)) {
695 auto element = IndexInfo(it.x(), it.y(), it.z(), 23);
696 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
697 Cell(it.x() + 1, it.y() + 1, it.z() + -1), blocks, *block);
698 element.flux_0 = InitialisatonAdditionalData[0];
699 element.flux_1 = InitialisatonAdditionalData[1];
700 element.flux_2 = InitialisatonAdditionalData[2];
701 indexVectorAll.emplace_back(element);
702 if (inner.contains(it.x(), it.y(), it.z()))
703 indexVectorInner.emplace_back(element);
704 else
705 indexVectorOuter.emplace_back(element);
706 }
707 }
708
709 for (auto it = flagField->beginWithGhostLayerXYZ(
710 cell_idx_c(flagField->nrOfGhostLayers() - 1));
711 it != flagField->end(); ++it) {
712 if (!isFlagSet(it, domainFlag))
713 continue;
714
715 if (isFlagSet(it.neighbor(-1, 1, -1, 0), boundaryFlag)) {
716 auto element = IndexInfo(it.x(), it.y(), it.z(), 24);
717 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
718 Cell(it.x() + -1, it.y() + 1, it.z() + -1), blocks, *block);
719 element.flux_0 = InitialisatonAdditionalData[0];
720 element.flux_1 = InitialisatonAdditionalData[1];
721 element.flux_2 = InitialisatonAdditionalData[2];
722 indexVectorAll.emplace_back(element);
723 if (inner.contains(it.x(), it.y(), it.z()))
724 indexVectorInner.emplace_back(element);
725 else
726 indexVectorOuter.emplace_back(element);
727 }
728 }
729
730 for (auto it = flagField->beginWithGhostLayerXYZ(
731 cell_idx_c(flagField->nrOfGhostLayers() - 1));
732 it != flagField->end(); ++it) {
733 if (!isFlagSet(it, domainFlag))
734 continue;
735
736 if (isFlagSet(it.neighbor(1, -1, -1, 0), boundaryFlag)) {
737 auto element = IndexInfo(it.x(), it.y(), it.z(), 25);
738 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
739 Cell(it.x() + 1, it.y() + -1, it.z() + -1), blocks, *block);
740 element.flux_0 = InitialisatonAdditionalData[0];
741 element.flux_1 = InitialisatonAdditionalData[1];
742 element.flux_2 = InitialisatonAdditionalData[2];
743 indexVectorAll.emplace_back(element);
744 if (inner.contains(it.x(), it.y(), it.z()))
745 indexVectorInner.emplace_back(element);
746 else
747 indexVectorOuter.emplace_back(element);
748 }
749 }
750
751 for (auto it = flagField->beginWithGhostLayerXYZ(
752 cell_idx_c(flagField->nrOfGhostLayers() - 1));
753 it != flagField->end(); ++it) {
754 if (!isFlagSet(it, domainFlag))
755 continue;
756
757 if (isFlagSet(it.neighbor(-1, -1, -1, 0), boundaryFlag)) {
758 auto element = IndexInfo(it.x(), it.y(), it.z(), 26);
759 Vector3<float> InitialisatonAdditionalData = elementInitaliser(
760 Cell(it.x() + -1, it.y() + -1, it.z() + -1), blocks, *block);
761 element.flux_0 = InitialisatonAdditionalData[0];
762 element.flux_1 = InitialisatonAdditionalData[1];
763 element.flux_2 = InitialisatonAdditionalData[2];
764 indexVectorAll.emplace_back(element);
765 if (inner.contains(it.x(), it.y(), it.z()))
766 indexVectorInner.emplace_back(element);
767 else
768 indexVectorOuter.emplace_back(element);
769 }
770 }
771
772 indexVectors->syncGPU();
773 }
774
775private:
776 void run_impl(IBlock *block, IndexVectors::Type type,
777 gpuStream_t stream = nullptr);
778
779 BlockDataID indexVectorID;
780
781 std::function<Vector3<float>(
782 const Cell &, const shared_ptr<StructuredBlockForest> &, IBlock &)>
783 elementInitaliser;
784
785public:
786 BlockDataID fluxID;
787};
788
789} // namespace pystencils
790} // namespace walberla
Definition Cell.hpp:96
FixedFlux_single_precision_CUDA(const std::shared_ptr< StructuredBlockForest > &blocks, BlockDataID fluxID_, std::function< Vector3< float >(const Cell &, const shared_ptr< StructuredBlockForest > &, IBlock &)> &fluxCallback)
std::function< void(IBlock *)> getOuterSweep(gpuStream_t stream=nullptr)
void fillFromFlagField(const std::shared_ptr< StructuredBlockForest > &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
std::function< void(IBlock *)> getInnerSweep(gpuStream_t stream=nullptr)
void operator()(IBlock *block, gpuStream_t stream=nullptr)
void fillFromFlagField(const shared_ptr< StructuredBlockForest > &blocks, IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
std::function< void(IBlock *)> getSweep(gpuStream_t stream=nullptr)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:176
\file PackInfoPdfDoublePrecision.cpp \author pystencils