ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
ReactionKernelIndexed_5_double_precision_CUDA.cu
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file ReactionKernelIndexed_5_double_precision_CUDA.cpp
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.3.7+13.gdfd203a, lbmpy v1.3.7+10.gd3f6236, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit c69cb11d6a95d32b2280544d3d9abde1fe5fdbb5
21
23#include "core/DataTypes.h"
24#include "core/Macros.h"
25#include "gpu/ErrorChecking.h"
26
27#define FUNC_PREFIX __global__
28
29using namespace std;
30
31namespace walberla {
32namespace pystencils {
33
34#if defined(__NVCC__)
35#define RESTRICT __restrict__
36#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
37#pragma nv_diagnostic push
38#pragma nv_diag_suppress 177 // unused variable
39#else
40#pragma push
41#pragma diag_suppress 177 // unused variable
42#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
43#elif defined(__clang__)
44#if defined(__CUDA__)
45#if defined(__CUDA_ARCH__)
46// clang compiling CUDA code in device mode
47#define RESTRICT __restrict__
48#pragma clang diagnostic push
49#pragma clang diagnostic ignored "-Wunused-variable"
50#else
51// clang compiling CUDA code in host mode
52#define RESTRICT __restrict__
53#pragma clang diagnostic push
54#pragma clang diagnostic ignored "-Wunused-variable"
55#endif // defined(__CUDA_ARCH__)
56#endif // defined(__CUDA__)
57#elif defined(__GNUC__) or defined(__GNUG__)
58#define RESTRICT __restrict__
59#pragma GCC diagnostic push
60#pragma GCC diagnostic ignored "-Wunused-variable"
61#elif defined(_MSC_VER)
62#define RESTRICT __restrict
63#else
64#define RESTRICT
65#endif
66
67// NOLINTBEGIN(readability-non-const-parameter*)
68namespace internal_reactionkernelindexed_5_double_precision_cuda_boundary_ReactionKernelIndexed_5_double_precision_CUDA {
69static FUNC_PREFIX __launch_bounds__(256) void reactionkernelindexed_5_double_precision_cuda_boundary_ReactionKernelIndexed_5_double_precision_CUDA(uint8_t *RESTRICT const _data_indexVector, double *RESTRICT _data_rho_0, double *RESTRICT _data_rho_1, double *RESTRICT _data_rho_2, double *RESTRICT _data_rho_3, double *RESTRICT _data_rho_4, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, int64_t const _stride_rho_3_0, int64_t const _stride_rho_3_1, int64_t const _stride_rho_3_2, int64_t const _stride_rho_4_0, int64_t const _stride_rho_4_1, int64_t const _stride_rho_4_2, int32_t indexVectorSize, double order_0, double order_1, double order_2, double order_3, double order_4, double rate_coefficient, double stoech_0, double stoech_1, double stoech_2, double stoech_3, double stoech_4) {
70 if (blockDim.x * blockIdx.x + threadIdx.x < indexVectorSize) {
71 uint8_t *RESTRICT _data_indexVector_10 = _data_indexVector;
72 const int32_t x = *((int32_t *)(&_data_indexVector_10[12 * blockDim.x * blockIdx.x + 12 * threadIdx.x]));
73 uint8_t *RESTRICT _data_indexVector_14 = _data_indexVector + 4;
74 const int32_t y = *((int32_t *)(&_data_indexVector_14[12 * blockDim.x * blockIdx.x + 12 * threadIdx.x]));
75 uint8_t *RESTRICT _data_indexVector_18 = _data_indexVector + 8;
76 const int32_t z = *((int32_t *)(&_data_indexVector_18[12 * blockDim.x * blockIdx.x + 12 * threadIdx.x]));
77
78 const int32_t cx[] = {0};
79 const int32_t cy[] = {0};
80 const int32_t cz[] = {0};
81 const int32_t invdir[] = {0};
82
83 double *RESTRICT _data_rho_0_10_20 = _data_rho_0 + _stride_rho_0_1 * y + _stride_rho_0_2 * z;
84 const double local_rho_0 = _data_rho_0_10_20[_stride_rho_0_0 * x];
85 double *RESTRICT _data_rho_1_10_20 = _data_rho_1 + _stride_rho_1_1 * y + _stride_rho_1_2 * z;
86 const double local_rho_1 = _data_rho_1_10_20[_stride_rho_1_0 * x];
87 double *RESTRICT _data_rho_2_10_20 = _data_rho_2 + _stride_rho_2_1 * y + _stride_rho_2_2 * z;
88 const double local_rho_2 = _data_rho_2_10_20[_stride_rho_2_0 * x];
89 double *RESTRICT _data_rho_3_10_20 = _data_rho_3 + _stride_rho_3_1 * y + _stride_rho_3_2 * z;
90 const double local_rho_3 = _data_rho_3_10_20[_stride_rho_3_0 * x];
91 double *RESTRICT _data_rho_4_10_20 = _data_rho_4 + _stride_rho_4_1 * y + _stride_rho_4_2 * z;
92 const double local_rho_4 = _data_rho_4_10_20[_stride_rho_4_0 * x];
93 const double rate_factor = pow(local_rho_0, order_0) * pow(local_rho_1, order_1) * pow(local_rho_2, order_2) * pow(local_rho_3, order_3) * pow(local_rho_4, order_4) * rate_coefficient;
94 _data_rho_0_10_20[_stride_rho_0_0 * x] = local_rho_0 + rate_factor * stoech_0;
95 _data_rho_1_10_20[_stride_rho_1_0 * x] = local_rho_1 + rate_factor * stoech_1;
96 _data_rho_2_10_20[_stride_rho_2_0 * x] = local_rho_2 + rate_factor * stoech_2;
97 _data_rho_3_10_20[_stride_rho_3_0 * x] = local_rho_3 + rate_factor * stoech_3;
98 _data_rho_4_10_20[_stride_rho_4_0 * x] = local_rho_4 + rate_factor * stoech_4;
99 }
100}
101} // namespace internal_reactionkernelindexed_5_double_precision_cuda_boundary_ReactionKernelIndexed_5_double_precision_CUDA
102
103// NOLINTEND(readability-non-const-parameter*)
104
105#if defined(__NVCC__)
106#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
107#pragma nv_diagnostic pop
108#else
109#pragma pop
110#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
111#elif defined(__clang__)
112#if defined(__CUDA__)
113#if defined(__CUDA_ARCH__)
114// clang compiling CUDA code in device mode
115#pragma clang diagnostic pop
116#else
117// clang compiling CUDA code in host mode
118#pragma clang diagnostic pop
119#endif // defined(__CUDA_ARCH__)
120#endif // defined(__CUDA__)
121#elif defined(__GNUC__) or defined(__GNUG__)
122#pragma GCC diagnostic pop
123#endif
124
125void ReactionKernelIndexed_5_double_precision_CUDA::run_impl(IBlock *block, IndexVectors::Type type, gpuStream_t stream) {
126 auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
127 int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
128 if (indexVectorSize == 0)
129 return;
130
131 auto pointer = indexVectors->pointerGpu(type);
132
133 uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
134
135 auto rho_1 = block->getData<gpu::GPUField<double>>(rho_1ID);
136 auto rho_3 = block->getData<gpu::GPUField<double>>(rho_3ID);
137 auto rho_4 = block->getData<gpu::GPUField<double>>(rho_4ID);
138 auto rho_0 = block->getData<gpu::GPUField<double>>(rho_0ID);
139 auto rho_2 = block->getData<gpu::GPUField<double>>(rho_2ID);
140
141 auto &stoech_0 = stoech_0_;
142 auto &order_3 = order_3_;
143 auto &stoech_1 = stoech_1_;
144 auto &order_4 = order_4_;
145 auto &stoech_4 = stoech_4_;
146 auto &stoech_3 = stoech_3_;
147 auto &stoech_2 = stoech_2_;
148 auto &order_1 = order_1_;
149 auto &order_2 = order_2_;
150 auto &rate_coefficient = rate_coefficient_;
151 auto &order_0 = order_0_;
152 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()))
153 double *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
154 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()))
155 double *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
156 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()))
157 double *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
158 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_3->nrOfGhostLayers()))
159 double *RESTRICT _data_rho_3 = rho_3->dataAt(0, 0, 0, 0);
160 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_4->nrOfGhostLayers()))
161 double *RESTRICT _data_rho_4 = rho_4->dataAt(0, 0, 0, 0);
162 const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
163 const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
164 const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
165 const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
166 const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
167 const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
168 const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
169 const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
170 const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
171 const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
172 const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
173 const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
174 const int64_t _stride_rho_4_0 = int64_t(rho_4->xStride());
175 const int64_t _stride_rho_4_1 = int64_t(rho_4->yStride());
176 const int64_t _stride_rho_4_2 = int64_t(rho_4->zStride());
177 dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
178 dim3 _grid(uint32_c(((indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ((int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize))) + 1)), uint32_c(1), uint32_c(1));
179 internal_reactionkernelindexed_5_double_precision_cuda_boundary_ReactionKernelIndexed_5_double_precision_CUDA::reactionkernelindexed_5_double_precision_cuda_boundary_ReactionKernelIndexed_5_double_precision_CUDA<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _data_rho_4, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, _stride_rho_4_0, _stride_rho_4_1, _stride_rho_4_2, indexVectorSize, order_0, order_1, order_2, order_3, order_4, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3, stoech_4);
180}
181
185
189
193
194} // namespace pystencils
195} // namespace walberla
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:176
STL namespace.
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const _stride_rho_0_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_1_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double double double double double double double stoech_3
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_4_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_3_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double order_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_1_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t indexVectorSize
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double double double double double stoech_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const _stride_rho_0_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double order_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double double double double stoech_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_3_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double order_3
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const _stride_rho_1_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double double order_4
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double double double double double double stoech_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const _stride_rho_0_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double double double rate_coefficient
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double order_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_3_2
static FUNC_PREFIX __launch_bounds__(256) void reactionkernelindexed_5_double_precision_cuda_boundary_ReactionKernelIndexed_5_double_precision_CUDA(uint8_t *RESTRICT const _data_indexVector
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_4_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_4_1
\file PackInfoPdfDoublePrecision.cpp \author pystencils