ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
FixedFlux_double_precision_CUDA.cu
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file FixedFlux_double_precision_CUDA.cpp
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.4+1.ge851f4e, lbmpy v1.4+1.ge9efe34, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit 007e77e077ad9d22b5eed6f3d3118240993e553c
21
23#include "core/DataTypes.h"
24#include "core/Macros.h"
25#include "gpu/ErrorChecking.h"
26
27#define FUNC_PREFIX __global__
28
29using namespace std;
30
31namespace walberla {
32namespace pystencils {
33
34#if defined(__NVCC__)
35#define RESTRICT __restrict__
36#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
37#pragma nv_diagnostic push
38#pragma nv_diag_suppress 177 // unused variable
39#else
40#pragma push
41#pragma diag_suppress 177 // unused variable
42#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
43#elif defined(__clang__)
44#if defined(__CUDA__)
45#if defined(__CUDA_ARCH__)
46// clang compiling CUDA code in device mode
47#define RESTRICT __restrict__
48#pragma clang diagnostic push
49#pragma clang diagnostic ignored "-Wstrict-aliasing"
50#pragma clang diagnostic ignored "-Wunused-variable"
51#pragma clang diagnostic ignored "-Wconversion"
52#pragma clang diagnostic ignored "-Wsign-compare"
53#else
54// clang compiling CUDA code in host mode
55#define RESTRICT __restrict__
56#pragma clang diagnostic push
57#pragma clang diagnostic ignored "-Wstrict-aliasing"
58#pragma clang diagnostic ignored "-Wunused-variable"
59#pragma clang diagnostic ignored "-Wconversion"
60#pragma clang diagnostic ignored "-Wsign-compare"
61#endif // defined(__CUDA_ARCH__)
62#endif // defined(__CUDA__)
63#elif defined(__GNUC__) or defined(__GNUG__)
64#define RESTRICT __restrict__
65#pragma GCC diagnostic push
66#pragma GCC diagnostic ignored "-Wstrict-aliasing"
67#pragma GCC diagnostic ignored "-Wunused-variable"
68#pragma GCC diagnostic ignored "-Wconversion"
69#elif defined(_MSC_VER)
70#define RESTRICT __restrict
71#else
72#define RESTRICT
73#endif
74
75// NOLINTBEGIN(readability-non-const-parameter*)
76namespace internal_fixedflux_double_precision_cuda_boundary_FixedFlux_double_precision_CUDA {
77static FUNC_PREFIX __launch_bounds__(256) void fixedflux_double_precision_cuda_boundary_FixedFlux_double_precision_CUDA(double *RESTRICT const _data_flux, uint8_t *RESTRICT const _data_indexVector, int64_t const _stride_flux_0, int64_t const _stride_flux_1, int64_t const _stride_flux_2, int64_t const _stride_flux_3, int32_t indexVectorSize) {
78 if (blockDim.x * blockIdx.x + threadIdx.x < indexVectorSize) {
79 uint8_t *RESTRICT _data_indexVector_10 = _data_indexVector;
80 const int32_t x = *((int32_t *)(&_data_indexVector_10[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
81 uint8_t *RESTRICT _data_indexVector_14 = _data_indexVector + 4;
82 const int32_t y = *((int32_t *)(&_data_indexVector_14[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
83 uint8_t *RESTRICT _data_indexVector_18 = _data_indexVector + 8;
84 const int32_t z = *((int32_t *)(&_data_indexVector_18[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
85
86 uint8_t *RESTRICT _data_indexVector_112 = _data_indexVector + 12;
87 const int32_t dir = *((int32_t *)(&_data_indexVector_112[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
88 if (((dir) == (26))) {
89 double *RESTRICT _data_flux_10_20_39 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + 9 * _stride_flux_3;
90 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
91 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
92 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
93 _data_flux_10_20_39[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
94 } else {
95 if (((dir) == (25))) {
96 double *RESTRICT _data_flux_1m1_2m1_312 = _data_flux + _stride_flux_1 * y - _stride_flux_1 + _stride_flux_2 * z - _stride_flux_2 + 12 * _stride_flux_3;
97 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
98 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
99 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
100 _data_flux_1m1_2m1_312[_stride_flux_0 * x + _stride_flux_0] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
101 } else {
102 if (((dir) == (24))) {
103 double *RESTRICT _data_flux_10_20_311 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + 11 * _stride_flux_3;
104 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
105 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
106 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
107 _data_flux_10_20_311[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
108 } else {
109 if (((dir) == (23))) {
110 double *RESTRICT _data_flux_11_2m1_310 = _data_flux + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z - _stride_flux_2 + 10 * _stride_flux_3;
111 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
112 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
113 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
114 _data_flux_11_2m1_310[_stride_flux_0 * x + _stride_flux_0] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
115 } else {
116 if (((dir) == (22))) {
117 double *RESTRICT _data_flux_10_20_310 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + 10 * _stride_flux_3;
118 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
119 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
120 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
121 _data_flux_10_20_310[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
122 } else {
123 if (((dir) == (21))) {
124 double *RESTRICT _data_flux_1m1_21_311 = _data_flux + _stride_flux_1 * y - _stride_flux_1 + _stride_flux_2 * z + _stride_flux_2 + 11 * _stride_flux_3;
125 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
126 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
127 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
128 _data_flux_1m1_21_311[_stride_flux_0 * x + _stride_flux_0] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
129 } else {
130 if (((dir) == (20))) {
131 double *RESTRICT _data_flux_10_20_312 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + 12 * _stride_flux_3;
132 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
133 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
134 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
135 _data_flux_10_20_312[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
136 } else {
137 if (((dir) == (19))) {
138 double *RESTRICT _data_flux_11_21_39 = _data_flux + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + _stride_flux_2 + 9 * _stride_flux_3;
139 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
140 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
141 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
142 _data_flux_11_21_39[_stride_flux_0 * x + _stride_flux_0] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
143 } else {
144 if (((dir) == (18))) {
145 double *RESTRICT _data_flux_10_2m1_36 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z - _stride_flux_2 + 6 * _stride_flux_3;
146 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
147 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
148 _data_flux_10_2m1_36[_stride_flux_0 * x + _stride_flux_0] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
149 } else {
150 if (((dir) == (17))) {
151 double *RESTRICT _data_flux_10_20_35 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + 5 * _stride_flux_3;
152 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
153 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
154 _data_flux_10_20_35[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
155 } else {
156 if (((dir) == (16))) {
157 double *RESTRICT _data_flux_10_20_37 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + 7 * _stride_flux_3;
158 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
159 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
160 _data_flux_10_20_37[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
161 } else {
162 if (((dir) == (15))) {
163 double *RESTRICT _data_flux_11_2m1_38 = _data_flux + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z - _stride_flux_2 + 8 * _stride_flux_3;
164 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
165 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
166 _data_flux_11_2m1_38[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
167 } else {
168 if (((dir) == (14))) {
169 double *RESTRICT _data_flux_10_21_35 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + _stride_flux_2 + 5 * _stride_flux_3;
170 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
171 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
172 _data_flux_10_21_35[_stride_flux_0 * x + _stride_flux_0] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
173 } else {
174 if (((dir) == (13))) {
175 double *RESTRICT _data_flux_10_20_36 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + 6 * _stride_flux_3;
176 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
177 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
178 _data_flux_10_20_36[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
179 } else {
180 if (((dir) == (12))) {
181 double *RESTRICT _data_flux_10_20_38 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + 8 * _stride_flux_3;
182 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
183 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
184 _data_flux_10_20_38[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
185 } else {
186 if (((dir) == (11))) {
187 double *RESTRICT _data_flux_11_21_37 = _data_flux + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + _stride_flux_2 + 7 * _stride_flux_3;
188 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
189 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
190 _data_flux_11_21_37[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
191 } else {
192 if (((dir) == (10))) {
193 double *RESTRICT _data_flux_1m1_20_34 = _data_flux + _stride_flux_1 * y - _stride_flux_1 + _stride_flux_2 * z + 4 * _stride_flux_3;
194 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
195 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
196 _data_flux_1m1_20_34[_stride_flux_0 * x + _stride_flux_0] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
197 } else {
198 if (((dir) == (9))) {
199 double *RESTRICT _data_flux_10_20_33 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + 3 * _stride_flux_3;
200 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
201 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
202 _data_flux_10_20_33[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
203 } else {
204 if (((dir) == (8))) {
205 double *RESTRICT _data_flux_11_20_33 = _data_flux + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + 3 * _stride_flux_3;
206 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
207 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
208 _data_flux_11_20_33[_stride_flux_0 * x + _stride_flux_0] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) - 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
209 } else {
210 if (((dir) == (7))) {
211 double *RESTRICT _data_flux_10_20_34 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + 4 * _stride_flux_3;
212 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
213 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
214 _data_flux_10_20_34[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x])) + 0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
215 } else {
216 if (((dir) == (6))) {
217 double *RESTRICT _data_flux_10_20_32 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + 2 * _stride_flux_3;
218 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
219 _data_flux_10_20_32[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
220 } else {
221 if (((dir) == (5))) {
222 double *RESTRICT _data_flux_10_21_32 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + _stride_flux_2 + 2 * _stride_flux_3;
223 uint8_t *RESTRICT _data_indexVector_132 = _data_indexVector + 32;
224 _data_flux_10_21_32[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_132[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
225 } else {
226 if (((dir) == (4))) {
227 double *RESTRICT _data_flux_10_20_30 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z;
228 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
229 _data_flux_10_20_30[_stride_flux_0 * x + _stride_flux_0] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
230 } else {
231 if (((dir) == (3))) {
232 double *RESTRICT _data_flux_10_20_30 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z;
233 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
234 _data_flux_10_20_30[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_116[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
235 } else {
236 if (((dir) == (2))) {
237 double *RESTRICT _data_flux_10_20_31 = _data_flux + _stride_flux_1 * y + _stride_flux_2 * z + _stride_flux_3;
238 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
239 _data_flux_10_20_31[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
240 } else {
241 if (((dir) == (1))) {
242 double *RESTRICT _data_flux_11_20_31 = _data_flux + _stride_flux_1 * y + _stride_flux_1 + _stride_flux_2 * z + _stride_flux_3;
243 uint8_t *RESTRICT _data_indexVector_124 = _data_indexVector + 24;
244 _data_flux_11_20_31[_stride_flux_0 * x] = -0.1111111111111111 * *((double *)(&_data_indexVector_124[40 * blockDim.x * blockIdx.x + 40 * threadIdx.x]));
245 }
246 }
247 }
248 }
249 }
250 }
251 }
252 }
253 }
254 }
255 }
256 }
257 }
258 }
259 }
260 }
261 }
262 }
263 }
264 }
265 }
266 }
267 }
268 }
269 }
270 }
271 }
272}
273} // namespace internal_fixedflux_double_precision_cuda_boundary_FixedFlux_double_precision_CUDA
274
275// NOLINTEND(readability-non-const-parameter*)
276
277#if defined(__NVCC__)
278#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
279#pragma nv_diagnostic pop
280#else
281#pragma pop
282#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
283#elif defined(__clang__)
284#if defined(__CUDA__)
285#if defined(__CUDA_ARCH__)
286// clang compiling CUDA code in device mode
287#pragma clang diagnostic pop
288#else
289// clang compiling CUDA code in host mode
290#pragma clang diagnostic pop
291#endif // defined(__CUDA_ARCH__)
292#endif // defined(__CUDA__)
293#elif defined(__GNUC__) or defined(__GNUG__)
294#pragma GCC diagnostic pop
295#endif
296
297void FixedFlux_double_precision_CUDA::run_impl(IBlock *block, IndexVectors::Type type, gpuStream_t stream) {
298 auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
299 int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
300 if (indexVectorSize == 0)
301 return;
302
303 auto pointer = indexVectors->pointerGpu(type);
304
305 uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
306
307 auto flux = block->getData<gpu::GPUField<double>>(fluxID);
308
309 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(flux->nrOfGhostLayers()))
310 double *RESTRICT const _data_flux = flux->dataAt(0, 0, 0, 0);
311 const int64_t _stride_flux_0 = int64_t(flux->xStride());
312 const int64_t _stride_flux_1 = int64_t(flux->yStride());
313 const int64_t _stride_flux_2 = int64_t(flux->zStride());
314 const int64_t _stride_flux_3 = int64_t(1 * int64_t(flux->fStride()));
315 dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
316 dim3 _grid(uint32_c(((indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ((int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize))) + 1)), uint32_c(1), uint32_c(1));
317 internal_fixedflux_double_precision_cuda_boundary_FixedFlux_double_precision_CUDA::fixedflux_double_precision_cuda_boundary_FixedFlux_double_precision_CUDA<<<_grid, _block, 0, stream>>>(_data_flux, _data_indexVector, _stride_flux_0, _stride_flux_1, _stride_flux_2, _stride_flux_3, indexVectorSize);
318}
319
320void FixedFlux_double_precision_CUDA::run(IBlock *block, gpuStream_t stream) {
321 run_impl(block, IndexVectors::ALL, stream);
322}
323
325 run_impl(block, IndexVectors::INNER, stream);
326}
327
329 run_impl(block, IndexVectors::OUTER, stream);
330}
331
332} // namespace pystencils
333} // namespace walberla
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:177
STL namespace.
static FUNC_PREFIX uint8_t *RESTRICT const int64_t const int64_t const int64_t const int64_t const _stride_flux_3
static FUNC_PREFIX uint8_t *RESTRICT const int64_t const int64_t const int64_t const _stride_flux_2
static FUNC_PREFIX __launch_bounds__(256) void fixedflux_double_precision_cuda_boundary_FixedFlux_double_precision_CUDA(double *RESTRICT const _data_flux
\file PackInfoPdfDoublePrecision.cpp \author pystencils