ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
DiffusiveFluxKernelThermalized_single_precision_CUDA.cu
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file DiffusiveFluxKernelThermalized_single_precision_CUDA.cpp
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.4+1.ge851f4e, lbmpy v1.4+1.ge9efe34, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit 007e77e077ad9d22b5eed6f3d3118240993e553c
21
22#include <cmath>
23
25#include "core/DataTypes.h"
26#include "core/Macros.h"
27
28#include "philox_rand.h"
29
30#define FUNC_PREFIX __global__
31
32#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
33#pragma GCC diagnostic push
34#pragma GCC diagnostic ignored "-Wfloat-equal"
35#pragma GCC diagnostic ignored "-Wshadow"
36#pragma GCC diagnostic ignored "-Wconversion"
37#pragma GCC diagnostic ignored "-Wunused-variable"
38#endif
39
40#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
41#pragma warning push
42#pragma warning(disable : 1599)
43#endif
44
45using namespace std;
46
47namespace walberla {
48namespace pystencils {
49
50namespace internal_diffusivefluxkernelthermalized_single_precision_cuda_diffusivefluxkernelthermalized_single_precision_cuda {
52 if (blockDim.y * blockIdx.y + threadIdx.y < _size_j_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_j_2 && blockDim.x * blockIdx.x + threadIdx.x + 1 < _size_j_0) {
53 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x + 1;
54 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
55 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
56 if (ctr_1 > 0 && ctr_2 > 0 && ctr_1 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
57
58 float random_3_0;
59 float random_3_1;
60 float random_3_2;
61 float random_3_3;
63
64 float random_2_0;
65 float random_2_1;
66 float random_2_2;
67 float random_2_3;
69
70 float random_1_0;
71 float random_1_1;
72 float random_1_2;
73 float random_1_3;
75
76 float random_0_0;
77 float random_0_1;
78 float random_0_2;
79 float random_0_3;
81
83 }
84 if (ctr_1 > 0 && ctr_2 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
85
86 float random_3_0;
87 float random_3_1;
88 float random_3_2;
89 float random_3_3;
91
92 float random_2_0;
93 float random_2_1;
94 float random_2_2;
95 float random_2_3;
97
98 float random_1_0;
99 float random_1_1;
100 float random_1_2;
101 float random_1_3;
103
104 float random_0_0;
105 float random_0_1;
106 float random_0_2;
107 float random_0_3;
109
111 }
112 if (ctr_1 > 0 && ctr_2 > 0 && ctr_0 < _size_j_0 - 1 && ctr_1 < _size_j_1 - 1) {
113
114 float random_3_0;
115 float random_3_1;
116 float random_3_2;
117 float random_3_3;
119
120 float random_2_0;
121 float random_2_1;
122 float random_2_2;
123 float random_2_3;
125
126 float random_1_0;
127 float random_1_1;
128 float random_1_2;
129 float random_1_3;
131
132 float random_0_0;
133 float random_0_1;
134 float random_0_2;
135 float random_0_3;
137
139 }
140 if (ctr_1 > 0 && ctr_2 > 0 && ctr_2 < _size_j_2 - 1) {
141
142 float random_3_0;
143 float random_3_1;
144 float random_3_2;
145 float random_3_3;
147
148 float random_2_0;
149 float random_2_1;
150 float random_2_2;
151 float random_2_3;
153
154 float random_1_0;
155 float random_1_1;
156 float random_1_2;
157 float random_1_3;
159
160 float random_0_0;
161 float random_0_1;
162 float random_0_2;
163 float random_0_3;
165
167 }
168 if (ctr_2 > 0 && ctr_1 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
169
170 float random_3_0;
171 float random_3_1;
172 float random_3_2;
173 float random_3_3;
175
176 float random_2_0;
177 float random_2_1;
178 float random_2_2;
179 float random_2_3;
181
182 float random_1_0;
183 float random_1_1;
184 float random_1_2;
185 float random_1_3;
187
188 float random_0_0;
189 float random_0_1;
190 float random_0_2;
191 float random_0_3;
193
195 }
196 if (ctr_1 > 0 && ctr_2 > 0 && ctr_1 < _size_j_1 - 1) {
197
198 float random_3_0;
199 float random_3_1;
200 float random_3_2;
201 float random_3_3;
203
204 float random_2_0;
205 float random_2_1;
206 float random_2_2;
207 float random_2_3;
209
210 float random_1_0;
211 float random_1_1;
212 float random_1_2;
213 float random_1_3;
215
216 float random_0_0;
217 float random_0_1;
218 float random_0_2;
219 float random_0_3;
221
223 }
224 if (ctr_1 > 0 && ctr_1 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
225
226 float random_3_0;
227 float random_3_1;
228 float random_3_2;
229 float random_3_3;
231
232 float random_2_0;
233 float random_2_1;
234 float random_2_2;
235 float random_2_3;
237
238 float random_1_0;
239 float random_1_1;
240 float random_1_2;
241 float random_1_3;
243
244 float random_0_0;
245 float random_0_1;
246 float random_0_2;
247 float random_0_3;
249
251 }
252 if (ctr_1 > 0 && ctr_2 > 0 && ctr_0 < _size_j_0 - 1) {
253
254 float random_3_0;
255 float random_3_1;
256 float random_3_2;
257 float random_3_3;
259
260 float random_2_0;
261 float random_2_1;
262 float random_2_2;
263 float random_2_3;
265
266 float random_1_0;
267 float random_1_1;
268 float random_1_2;
269 float random_1_3;
271
272 float random_0_0;
273 float random_0_1;
274 float random_0_2;
275 float random_0_3;
277
279 }
280 if (ctr_1 > 0 && ctr_0 < _size_j_0 - 1 && ctr_2 < _size_j_2 - 1) {
281
282 float random_3_0;
283 float random_3_1;
284 float random_3_2;
285 float random_3_3;
287
288 float random_2_0;
289 float random_2_1;
290 float random_2_2;
291 float random_2_3;
293
294 float random_1_0;
295 float random_1_1;
296 float random_1_2;
297 float random_1_3;
299
300 float random_0_0;
301 float random_0_1;
302 float random_0_2;
303 float random_0_3;
305
307 }
308 if (ctr_1 > 0 && ctr_2 > 0) {
309
310 float random_3_0;
311 float random_3_1;
312 float random_3_2;
313 float random_3_3;
315
316 float random_2_0;
317 float random_2_1;
318 float random_2_2;
319 float random_2_3;
321
322 float random_1_0;
323 float random_1_1;
324 float random_1_2;
325 float random_1_3;
327
328 float random_0_0;
329 float random_0_1;
330 float random_0_2;
331 float random_0_3;
333
335 }
336 if (ctr_1 > 0 && ctr_2 < _size_j_2 - 1) {
337
338 float random_3_0;
339 float random_3_1;
340 float random_3_2;
341 float random_3_3;
343
344 float random_2_0;
345 float random_2_1;
346 float random_2_2;
347 float random_2_3;
349
350 float random_1_0;
351 float random_1_1;
352 float random_1_2;
353 float random_1_3;
355
356 float random_0_0;
357 float random_0_1;
358 float random_0_2;
359 float random_0_3;
361
363 }
364 if (ctr_2 > 0 && ctr_1 < _size_j_1 - 1) {
365
366 float random_3_0;
367 float random_3_1;
368 float random_3_2;
369 float random_3_3;
371
372 float random_2_0;
373 float random_2_1;
374 float random_2_2;
375 float random_2_3;
377
378 float random_1_0;
379 float random_1_1;
380 float random_1_2;
381 float random_1_3;
383
384 float random_0_0;
385 float random_0_1;
386 float random_0_2;
387 float random_0_3;
389
391 }
392 if (ctr_1 < _size_j_1 - 1 && ctr_2 < _size_j_2 - 1) {
393
394 float random_3_0;
395 float random_3_1;
396 float random_3_2;
397 float random_3_3;
399
400 float random_2_0;
401 float random_2_1;
402 float random_2_2;
403 float random_2_3;
405
406 float random_1_0;
407 float random_1_1;
408 float random_1_2;
409 float random_1_3;
411
412 float random_0_0;
413 float random_0_1;
414 float random_0_2;
415 float random_0_3;
417
419 }
420 }
421}
422} // namespace internal_diffusivefluxkernelthermalized_single_precision_cuda_diffusivefluxkernelthermalized_single_precision_cuda
423
425 if (!this->configured_)
426 WALBERLA_ABORT("This Sweep contains a configure function that needs to be called manually")
427
428 auto rho = block->getData<gpu::GPUField<float>>(rhoID);
429 auto j = block->getData<gpu::GPUField<float>>(jID);
430
431 auto &field_size_1 = this->field_size_1_;
432 auto &time_step = this->time_step_;
433 auto &block_offset_2 = this->block_offset_2_;
434 auto &D = this->D_;
435 auto &seed = this->seed_;
436 auto &field_size_0 = this->field_size_0_;
437 auto &block_offset_1 = this->block_offset_1_;
438 auto &field_size_2 = this->field_size_2_;
439 auto &block_offset_0 = this->block_offset_0_;
440 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()))
441 float *RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
442 WALBERLA_ASSERT_EQUAL(j->layout(), field::fzyx)
443 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(rho->nrOfGhostLayers()))
444 float *RESTRICT const _data_rho = rho->dataAt(-1, -1, -1, 0);
445 WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(int64_c(j->xSize()) + 2))
446 const int64_t _size_j_0 = int64_t(int64_c(j->xSize()) + 2);
447 WALBERLA_ASSERT_EQUAL(j->layout(), field::fzyx)
448 WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(int64_c(j->ySize()) + 2))
449 const int64_t _size_j_1 = int64_t(int64_c(j->ySize()) + 2);
450 WALBERLA_ASSERT_EQUAL(j->layout(), field::fzyx)
451 WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(int64_c(j->zSize()) + 2))
452 const int64_t _size_j_2 = int64_t(int64_c(j->zSize()) + 2);
453 WALBERLA_ASSERT_EQUAL(j->layout(), field::fzyx)
454 const int64_t _stride_j_0 = int64_t(j->xStride());
455 const int64_t _stride_j_1 = int64_t(j->yStride());
456 const int64_t _stride_j_2 = int64_t(j->zStride());
457 const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
458 const int64_t _stride_rho_0 = int64_t(rho->xStride());
459 const int64_t _stride_rho_1 = int64_t(rho->yStride());
460 const int64_t _stride_rho_2 = int64_t(rho->zStride());
461 dim3 _block(uint32_c(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)), uint32_c(((1024 < ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))) ? 1024 : ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))), uint32_c(((64 < ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))))) ? 64 : ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))))));
462 dim3 _grid(uint32_c(((_size_j_0 - 1) % (((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)) == 0 ? (int64_t)(_size_j_0 - 1) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)) : ((int64_t)(_size_j_0 - 1) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))) + 1)), uint32_c(((_size_j_1) % (((1024 < ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))) ? 1024 : ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))) == 0 ? (int64_t)(_size_j_1) / (int64_t)(((1024 < ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))) ? 1024 : ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))) : ((int64_t)(_size_j_1) / (int64_t)(((1024 < ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))) ? 1024 : ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) + 1)), uint32_c(((_size_j_2) % (((64 < ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))))) ? 64 : ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))))) == 0 ? (int64_t)(_size_j_2) / (int64_t)(((64 < ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))))) ? 64 : ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))))) : ((int64_t)(_size_j_2) / (int64_t)(((64 < ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))))) ? 64 : ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))))))) + 1)));
463 internal_diffusivefluxkernelthermalized_single_precision_cuda_diffusivefluxkernelthermalized_single_precision_cuda::diffusivefluxkernelthermalized_single_precision_cuda_diffusivefluxkernelthermalized_single_precision_cuda<<<_grid, _block, 0, stream>>>(D, _data_j, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2, block_offset_0, block_offset_1, block_offset_2, field_size_0, field_size_1, field_size_2, seed, time_step);
464}
465
467 if (!this->configured_)
468 WALBERLA_ABORT("This Sweep contains a configure function that needs to be called manually")
469
470 CellInterval ci = globalCellInterval;
471 CellInterval blockBB = blocks->getBlockCellBB(*block);
472 blockBB.expand(ghostLayers);
473 ci.intersect(blockBB);
474 blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
475 if (ci.empty())
476 return;
477
478 auto rho = block->getData<gpu::GPUField<float>>(rhoID);
479 auto j = block->getData<gpu::GPUField<float>>(jID);
480
481 auto &field_size_1 = this->field_size_1_;
482 auto &time_step = this->time_step_;
483 auto &block_offset_2 = this->block_offset_2_;
484 auto &D = this->D_;
485 auto &seed = this->seed_;
486 auto &field_size_0 = this->field_size_0_;
487 auto &block_offset_1 = this->block_offset_1_;
488 auto &field_size_2 = this->field_size_2_;
489 auto &block_offset_0 = this->block_offset_0_;
490 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()))
491 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()))
492 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()))
493 float *RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
494 WALBERLA_ASSERT_EQUAL(j->layout(), field::fzyx)
495 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(rho->nrOfGhostLayers()))
496 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(rho->nrOfGhostLayers()))
497 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(rho->nrOfGhostLayers()))
498 float *RESTRICT const _data_rho = rho->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
499 WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 2))
500 const int64_t _size_j_0 = int64_t(int64_c(ci.xSize()) + 2);
501 WALBERLA_ASSERT_EQUAL(j->layout(), field::fzyx)
502 WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 2))
503 const int64_t _size_j_1 = int64_t(int64_c(ci.ySize()) + 2);
504 WALBERLA_ASSERT_EQUAL(j->layout(), field::fzyx)
505 WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 2))
506 const int64_t _size_j_2 = int64_t(int64_c(ci.zSize()) + 2);
507 WALBERLA_ASSERT_EQUAL(j->layout(), field::fzyx)
508 const int64_t _stride_j_0 = int64_t(j->xStride());
509 const int64_t _stride_j_1 = int64_t(j->yStride());
510 const int64_t _stride_j_2 = int64_t(j->zStride());
511 const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
512 const int64_t _stride_rho_0 = int64_t(rho->xStride());
513 const int64_t _stride_rho_1 = int64_t(rho->yStride());
514 const int64_t _stride_rho_2 = int64_t(rho->zStride());
515 dim3 _block(uint32_c(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)), uint32_c(((1024 < ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))) ? 1024 : ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))), uint32_c(((64 < ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))))) ? 64 : ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))))));
516 dim3 _grid(uint32_c(((_size_j_0 - 1) % (((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)) == 0 ? (int64_t)(_size_j_0 - 1) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)) : ((int64_t)(_size_j_0 - 1) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))) + 1)), uint32_c(((_size_j_1) % (((1024 < ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))) ? 1024 : ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))) == 0 ? (int64_t)(_size_j_1) / (int64_t)(((1024 < ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))) ? 1024 : ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))) : ((int64_t)(_size_j_1) / (int64_t)(((1024 < ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))) ? 1024 : ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) + 1)), uint32_c(((_size_j_2) % (((64 < ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))))) ? 64 : ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))))) == 0 ? (int64_t)(_size_j_2) / (int64_t)(((64 < ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))))) ? 64 : ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))))) : ((int64_t)(_size_j_2) / (int64_t)(((64 < ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))))) ? 64 : ((_size_j_2 < ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1))))))) ? _size_j_2 : ((int64_t)(256) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1) * ((_size_j_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))) ? _size_j_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_j_0 - 1) ? 128 : _size_j_0 - 1)))))))))) + 1)));
517 internal_diffusivefluxkernelthermalized_single_precision_cuda_diffusivefluxkernelthermalized_single_precision_cuda::diffusivefluxkernelthermalized_single_precision_cuda_diffusivefluxkernelthermalized_single_precision_cuda<<<_grid, _block, 0, stream>>>(D, _data_j, _data_rho, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, _stride_rho_0, _stride_rho_1, _stride_rho_2, block_offset_0, block_offset_1, block_offset_2, field_size_0, field_size_1, field_size_2, seed, time_step);
518}
519
520} // namespace pystencils
521} // namespace walberla
522
523#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
524#pragma GCC diagnostic pop
525#endif
526
527#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
528#pragma warning pop
529#endif
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
void runOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream=nullptr)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:177
QUALIFIERS void philox_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3, uint32 key0, uint32 key1, float &rnd1, float &rnd2, float &rnd3, float &rnd4)
Philox counter-based RNG from .
STL namespace.
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_1
static FUNC_PREFIX __launch_bounds__(256) void diffusivefluxkernelthermalized_single_precision_cuda_diffusivefluxkernelthermalized_single_precision_cuda(float D
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t uint32_t field_size_0
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_j_1
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t uint32_t uint32_t uint32_t field_size_2
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_j_2
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const _stride_j_0
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t uint32_t uint32_t uint32_t uint32_t seed
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t uint32_t uint32_t field_size_1
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t block_offset_1
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t block_offset_2
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t block_offset_0
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_0
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2
static FUNC_PREFIX float *RESTRICT const float *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_j_3
\file PackInfoPdfDoublePrecision.cpp \author pystencils