ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
ReactionKernelBulk_4_double_precision_CUDA.cu
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file ReactionKernelBulk_4_double_precision_CUDA.cpp
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.3.7+13.gdfd203a, lbmpy v1.3.7+10.gd3f6236, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit c69cb11d6a95d32b2280544d3d9abde1fe5fdbb5
21
22#include <cmath>
23
25#include "core/DataTypes.h"
26#include "core/Macros.h"
27
28#define FUNC_PREFIX __global__
29
30#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
31#pragma GCC diagnostic push
32#pragma GCC diagnostic ignored "-Wfloat-equal"
33#pragma GCC diagnostic ignored "-Wshadow"
34#pragma GCC diagnostic ignored "-Wconversion"
35#pragma GCC diagnostic ignored "-Wunused-variable"
36#endif
37
38#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
39#pragma warning push
40#pragma warning(disable : 1599)
41#endif
42
43using namespace std;
44
45namespace walberla {
46namespace pystencils {
47
48namespace internal_reactionkernelbulk_4_double_precision_cuda_reactionkernelbulk_4_double_precision_cuda {
49static FUNC_PREFIX __launch_bounds__(256) void reactionkernelbulk_4_double_precision_cuda_reactionkernelbulk_4_double_precision_cuda(double *RESTRICT _data_rho_0, double *RESTRICT _data_rho_1, double *RESTRICT _data_rho_2, double *RESTRICT _data_rho_3, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int64_t const _stride_rho_3_0, int64_t const _stride_rho_3_1, int64_t const _stride_rho_3_2, double order_0, double order_1, double order_2, double order_3, double rate_coefficient, double stoech_0, double stoech_1, double stoech_2, double stoech_3) {
50 if (blockDim.x * blockIdx.x + threadIdx.x < _size_rho_0_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_rho_0_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_rho_0_2) {
51 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
52 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
53 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
54 const double local_rho_0 = _data_rho_0[_stride_rho_0_0 * ctr_0 + _stride_rho_0_1 * ctr_1 + _stride_rho_0_2 * ctr_2];
55 const double local_rho_1 = _data_rho_1[_stride_rho_1_0 * ctr_0 + _stride_rho_1_1 * ctr_1 + _stride_rho_1_2 * ctr_2];
56 const double local_rho_2 = _data_rho_2[_stride_rho_2_0 * ctr_0 + _stride_rho_2_1 * ctr_1 + _stride_rho_2_2 * ctr_2];
57 const double local_rho_3 = _data_rho_3[_stride_rho_3_0 * ctr_0 + _stride_rho_3_1 * ctr_1 + _stride_rho_3_2 * ctr_2];
58 const double rate_factor = pow(local_rho_0, order_0) * pow(local_rho_1, order_1) * pow(local_rho_2, order_2) * pow(local_rho_3, order_3) * rate_coefficient;
59 _data_rho_0[_stride_rho_0_0 * ctr_0 + _stride_rho_0_1 * ctr_1 + _stride_rho_0_2 * ctr_2] = local_rho_0 + rate_factor * stoech_0;
60 _data_rho_1[_stride_rho_1_0 * ctr_0 + _stride_rho_1_1 * ctr_1 + _stride_rho_1_2 * ctr_2] = local_rho_1 + rate_factor * stoech_1;
61 _data_rho_2[_stride_rho_2_0 * ctr_0 + _stride_rho_2_1 * ctr_1 + _stride_rho_2_2 * ctr_2] = local_rho_2 + rate_factor * stoech_2;
62 _data_rho_3[_stride_rho_3_0 * ctr_0 + _stride_rho_3_1 * ctr_1 + _stride_rho_3_2 * ctr_2] = local_rho_3 + rate_factor * stoech_3;
63 }
64}
65} // namespace internal_reactionkernelbulk_4_double_precision_cuda_reactionkernelbulk_4_double_precision_cuda
66
68
69 auto rho_3 = block->getData<gpu::GPUField<double>>(rho_3ID);
70 auto rho_0 = block->getData<gpu::GPUField<double>>(rho_0ID);
71 auto rho_2 = block->getData<gpu::GPUField<double>>(rho_2ID);
72 auto rho_1 = block->getData<gpu::GPUField<double>>(rho_1ID);
73
74 auto &stoech_0 = this->stoech_0_;
75 auto &order_3 = this->order_3_;
76 auto &stoech_1 = this->stoech_1_;
77 auto &stoech_3 = this->stoech_3_;
78 auto &stoech_2 = this->stoech_2_;
79 auto &order_1 = this->order_1_;
80 auto &order_2 = this->order_2_;
81 auto &rate_coefficient = this->rate_coefficient_;
82 auto &order_0 = this->order_0_;
83 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()))
84 double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
85 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()))
86 double *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
87 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()))
88 double *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
89 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_3->nrOfGhostLayers()))
90 double *RESTRICT _data_rho_3 = rho_3->dataAt(0, 0, 0, 0);
91 WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(int64_c(rho_0->xSize()) + 0))
92 const int64_t _size_rho_0_0 = int64_t(int64_c(rho_0->xSize()) + 0);
93 WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(int64_c(rho_0->ySize()) + 0))
94 const int64_t _size_rho_0_1 = int64_t(int64_c(rho_0->ySize()) + 0);
95 WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(int64_c(rho_0->zSize()) + 0))
96 const int64_t _size_rho_0_2 = int64_t(int64_c(rho_0->zSize()) + 0);
97 const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
98 const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
99 const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
100 const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
101 const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
102 const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
103 const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
104 const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
105 const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
106 const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
107 const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
108 const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
109 dim3 _block(uint32_c(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)), uint32_c(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))), uint32_c(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))));
110 dim3 _grid(uint32_c(((_size_rho_0_0) % (((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)) == 0 ? (int64_t)(_size_rho_0_0) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)) : ((int64_t)(_size_rho_0_0) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))) + 1)), uint32_c(((_size_rho_0_1) % (((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))) == 0 ? (int64_t)(_size_rho_0_1) / (int64_t)(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))) : ((int64_t)(_size_rho_0_1) / (int64_t)(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) + 1)), uint32_c(((_size_rho_0_2) % (((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))) == 0 ? (int64_t)(_size_rho_0_2) / (int64_t)(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))) : ((int64_t)(_size_rho_0_2) / (int64_t)(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))))) + 1)));
111 internal_reactionkernelbulk_4_double_precision_cuda_reactionkernelbulk_4_double_precision_cuda::reactionkernelbulk_4_double_precision_cuda_reactionkernelbulk_4_double_precision_cuda<<<_grid, _block, 0, stream>>>(_data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, order_0, order_1, order_2, order_3, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3);
112}
113
114void ReactionKernelBulk_4_double_precision_CUDA::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream) {
115
116 CellInterval ci = globalCellInterval;
117 CellInterval blockBB = blocks->getBlockCellBB(*block);
118 blockBB.expand(ghostLayers);
119 ci.intersect(blockBB);
120 blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
121 if (ci.empty())
122 return;
123
124 auto rho_3 = block->getData<gpu::GPUField<double>>(rho_3ID);
125 auto rho_0 = block->getData<gpu::GPUField<double>>(rho_0ID);
126 auto rho_2 = block->getData<gpu::GPUField<double>>(rho_2ID);
127 auto rho_1 = block->getData<gpu::GPUField<double>>(rho_1ID);
128
129 auto &stoech_0 = this->stoech_0_;
130 auto &order_3 = this->order_3_;
131 auto &stoech_1 = this->stoech_1_;
132 auto &stoech_3 = this->stoech_3_;
133 auto &stoech_2 = this->stoech_2_;
134 auto &order_1 = this->order_1_;
135 auto &order_2 = this->order_2_;
136 auto &rate_coefficient = this->rate_coefficient_;
137 auto &order_0 = this->order_0_;
138 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()))
139 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()))
140 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()))
141 double *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
142 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_1->nrOfGhostLayers()))
143 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_1->nrOfGhostLayers()))
144 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_1->nrOfGhostLayers()))
145 double *RESTRICT _data_rho_1 = rho_1->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
146 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_2->nrOfGhostLayers()))
147 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_2->nrOfGhostLayers()))
148 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_2->nrOfGhostLayers()))
149 double *RESTRICT _data_rho_2 = rho_2->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
150 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_3->nrOfGhostLayers()))
151 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_3->nrOfGhostLayers()))
152 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_3->nrOfGhostLayers()))
153 double *RESTRICT _data_rho_3 = rho_3->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
154 WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
155 const int64_t _size_rho_0_0 = int64_t(int64_c(ci.xSize()) + 0);
156 WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
157 const int64_t _size_rho_0_1 = int64_t(int64_c(ci.ySize()) + 0);
158 WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
159 const int64_t _size_rho_0_2 = int64_t(int64_c(ci.zSize()) + 0);
160 const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
161 const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
162 const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
163 const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
164 const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
165 const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
166 const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
167 const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
168 const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
169 const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
170 const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
171 const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
172 dim3 _block(uint32_c(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)), uint32_c(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))), uint32_c(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))));
173 dim3 _grid(uint32_c(((_size_rho_0_0) % (((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)) == 0 ? (int64_t)(_size_rho_0_0) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)) : ((int64_t)(_size_rho_0_0) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))) + 1)), uint32_c(((_size_rho_0_1) % (((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))) == 0 ? (int64_t)(_size_rho_0_1) / (int64_t)(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))) : ((int64_t)(_size_rho_0_1) / (int64_t)(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) + 1)), uint32_c(((_size_rho_0_2) % (((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))) == 0 ? (int64_t)(_size_rho_0_2) / (int64_t)(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))) : ((int64_t)(_size_rho_0_2) / (int64_t)(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))))) + 1)));
174 internal_reactionkernelbulk_4_double_precision_cuda_reactionkernelbulk_4_double_precision_cuda::reactionkernelbulk_4_double_precision_cuda_reactionkernelbulk_4_double_precision_cuda<<<_grid, _block, 0, stream>>>(_data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, order_0, order_1, order_2, order_3, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3);
175}
176
177} // namespace pystencils
178} // namespace walberla
179
180#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
181#pragma GCC diagnostic pop
182#endif
183
184#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
185#pragma warning pop
186#endif
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
void runOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream=nullptr)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:176
STL namespace.
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const _stride_rho_0_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_3_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const double double order_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const _size_rho_0_2
static FUNC_PREFIX __launch_bounds__(256) void reactionkernelbulk_4_double_precision_cuda_reactionkernelbulk_4_double_precision_cuda(double *RESTRICT _data_rho_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_1_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const double double double double order_3
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const double double double order_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const double double double double double double double double stoech_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const double order_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_3_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_3_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_1_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const double double double double double double stoech_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const double double double double double double double stoech_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const _size_rho_0_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_0_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_0_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const double double double double double rate_coefficient
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_1_1
\file PackInfoPdfDoublePrecision.cpp \author pystencils