ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
CollideSweepDoublePrecisionThermalizedCUDA.cu
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file CollideSweepDoublePrecisionThermalizedCUDA.cpp
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 0c8b4b926c6979288fd8a6846d02ec0870e1fe41
21
22#include <cmath>
23
25#include "core/DataTypes.h"
26#include "core/Macros.h"
27
28#include "philox_rand.h"
29
30#define FUNC_PREFIX __global__
31
32#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
33#pragma GCC diagnostic push
34#pragma GCC diagnostic ignored "-Wfloat-equal"
35#pragma GCC diagnostic ignored "-Wshadow"
36#pragma GCC diagnostic ignored "-Wconversion"
37#pragma GCC diagnostic ignored "-Wunused-variable"
38#endif
39
40#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
41#pragma warning push
42#pragma warning(disable : 1599)
43#endif
44
45using namespace std;
46
47namespace walberla {
48namespace pystencils {
49
50namespace internal_collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda {
51static FUNC_PREFIX __launch_bounds__(256) void collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda(double *RESTRICT const _data_force, double *RESTRICT _data_pdfs, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, uint32_t block_offset_0, uint32_t block_offset_1, uint32_t block_offset_2, double kT, double omega_bulk, double omega_even, double omega_odd, double omega_shear, uint32_t seed, uint32_t time_step) {
52 if (blockDim.x * blockIdx.x + threadIdx.x < _size_force_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_force_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_force_2) {
53 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
54 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
55 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
56 double *RESTRICT _data_pdfs_10_20_36 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3;
57 const double xi_244 = _data_pdfs_10_20_36[_stride_pdfs_0 * ctr_0];
58 double *RESTRICT _data_pdfs_10_20_318 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3;
59 const double xi_245 = _data_pdfs_10_20_318[_stride_pdfs_0 * ctr_0];
60 double *RESTRICT _data_pdfs_10_20_32 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
61 const double xi_246 = _data_pdfs_10_20_32[_stride_pdfs_0 * ctr_0];
62 double *RESTRICT _data_pdfs_10_20_313 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3;
63 const double xi_247 = _data_pdfs_10_20_313[_stride_pdfs_0 * ctr_0];
64 double *RESTRICT _data_pdfs_10_20_310 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
65 const double xi_248 = _data_pdfs_10_20_310[_stride_pdfs_0 * ctr_0];
66 double *RESTRICT _data_pdfs_10_20_30 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2;
67 const double xi_249 = _data_pdfs_10_20_30[_stride_pdfs_0 * ctr_0];
68 double *RESTRICT _data_pdfs_10_20_38 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
69 const double xi_250 = _data_pdfs_10_20_38[_stride_pdfs_0 * ctr_0];
70 double *RESTRICT _data_pdfs_10_20_39 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
71 const double xi_251 = _data_pdfs_10_20_39[_stride_pdfs_0 * ctr_0];
72 double *RESTRICT _data_pdfs_10_20_315 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3;
73 const double xi_252 = _data_pdfs_10_20_315[_stride_pdfs_0 * ctr_0];
74 double *RESTRICT _data_pdfs_10_20_317 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3;
75 const double xi_253 = _data_pdfs_10_20_317[_stride_pdfs_0 * ctr_0];
76 double *RESTRICT _data_force_10_20_32 = _data_force + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
77 const double xi_254 = _data_force_10_20_32[_stride_force_0 * ctr_0];
78 double *RESTRICT _data_pdfs_10_20_312 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3;
79 const double xi_255 = _data_pdfs_10_20_312[_stride_pdfs_0 * ctr_0];
80 double *RESTRICT _data_force_10_20_31 = _data_force + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + _stride_force_3;
81 const double xi_256 = _data_force_10_20_31[_stride_force_0 * ctr_0];
82 double *RESTRICT _data_pdfs_10_20_34 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
83 const double xi_257 = _data_pdfs_10_20_34[_stride_pdfs_0 * ctr_0];
84 double *RESTRICT _data_pdfs_10_20_311 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3;
85 const double xi_258 = _data_pdfs_10_20_311[_stride_pdfs_0 * ctr_0];
86 double *RESTRICT _data_pdfs_10_20_35 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3;
87 const double xi_259 = _data_pdfs_10_20_35[_stride_pdfs_0 * ctr_0];
88 double *RESTRICT _data_pdfs_10_20_31 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
89 const double xi_260 = _data_pdfs_10_20_31[_stride_pdfs_0 * ctr_0];
90 double *RESTRICT _data_pdfs_10_20_37 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
91 const double xi_261 = _data_pdfs_10_20_37[_stride_pdfs_0 * ctr_0];
92 double *RESTRICT _data_pdfs_10_20_316 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3;
93 const double xi_262 = _data_pdfs_10_20_316[_stride_pdfs_0 * ctr_0];
94 double *RESTRICT _data_pdfs_10_20_33 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
95 const double xi_263 = _data_pdfs_10_20_33[_stride_pdfs_0 * ctr_0];
96 double *RESTRICT _data_pdfs_10_20_314 = _data_pdfs + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3;
97 const double xi_264 = _data_pdfs_10_20_314[_stride_pdfs_0 * ctr_0];
98 double *RESTRICT _data_force_10_20_30 = _data_force + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2;
99 const double xi_265 = _data_force_10_20_30[_stride_force_0 * ctr_0];
100
101 double random_7_0{};
102 double random_7_1{};
103 if (kT > 0.) {
104 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 7, seed, random_7_0, random_7_1);
105 }
106
107 double random_6_0{};
108 double random_6_1{};
109 if (kT > 0.) {
110 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 6, seed, random_6_0, random_6_1);
111 }
112
113 double random_5_0{};
114 double random_5_1{};
115 if (kT > 0.) {
116 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 5, seed, random_5_0, random_5_1);
117 }
118
119 double random_4_0{};
120 double random_4_1{};
121 if (kT > 0.) {
122 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 4, seed, random_4_0, random_4_1);
123 }
124
125 double random_3_0{};
126 double random_3_1{};
127 if (kT > 0.) {
128 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 3, seed, random_3_0, random_3_1);
129 }
130
131 double random_2_0{};
132 double random_2_1{};
133 if (kT > 0.) {
134 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 2, seed, random_2_0, random_2_1);
135 }
136
137 double random_1_0{};
138 double random_1_1{};
139 if (kT > 0.) {
140 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 1, seed, random_1_0, random_1_1);
141 }
142
143 double random_0_0{};
144 double random_0_1{};
145 if (kT > 0.) {
146 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 0, seed, random_0_0, random_0_1);
147 }
148 const double xi_2 = xi_245 + xi_264;
149 const double xi_3 = xi_2 + xi_257;
150 const double xi_4 = xi_252 + xi_258 + xi_260;
151 const double xi_5 = xi_255 + xi_259;
152 const double xi_6 = xi_244 + xi_253;
153 const double xi_8 = xi_251 * -1.0;
154 const double xi_9 = xi_261 * -1.0;
155 const double xi_10 = xi_253 * -1.0;
156 const double xi_11 = xi_247 * -1.0;
157 const double xi_12 = xi_263 * -1.0;
158 const double xi_13 = xi_10 + xi_11 + xi_12;
159 const double xi_14 = xi_246 * -1.0;
160 const double xi_15 = xi_248 * -1.0;
161 const double xi_16 = xi_14 + xi_15;
162 const double xi_17 = xi_262 * -1.0;
163 const double xi_18 = xi_255 * -1.0;
164 const double xi_19 = xi_17 + xi_18;
165 const double xi_20 = xi_245 * -1.0;
166 const double xi_21 = xi_10 + xi_20;
167 const double xi_22 = xi_252 * -1.0;
168 const double xi_23 = xi_244 * -1.0;
169 const double xi_24 = xi_17 + xi_22 + xi_23 + xi_258;
170 const double xi_28 = omega_bulk * 0.5;
171 const double xi_29 = xi_256 * 0.16666666666666666;
172 const double xi_30 = xi_256 * 0.083333333333333329;
173 const double xi_42 = xi_265 * 0.16666666666666666;
174 const double xi_43 = xi_265 * 0.083333333333333329;
175 const double xi_49 = xi_254 * 0.16666666666666666;
176 const double xi_50 = xi_254 * 0.083333333333333329;
177 const double xi_55 = omega_shear * 0.041666666666666664;
178 const double xi_60 = omega_bulk * 0.041666666666666664;
179 const double xi_67 = xi_256 * 0.25;
180 const double xi_71 = omega_shear * 0.125;
181 const double xi_72 = xi_256 * xi_71;
182 const double xi_109 = 2.4494897427831779;
183 const double xi_114 = xi_249 * -1.0;
184 const double xi_118 = xi_258 * -1.0;
185 const double xi_119 = xi_118 + xi_18;
186 const double xi_120 = xi_250 * -1.0 + xi_8;
187 const double xi_122 = xi_264 * -1.0;
188 const double xi_123 = xi_11 + xi_122 + xi_15 + xi_21;
189 const double xi_125 = xi_252 * 2.0 + xi_255 * 2.0 + xi_258 * 2.0 + xi_262 * 2.0;
190 const double xi_126 = xi_125 + xi_257 * 5.0 + xi_263 * 5.0;
191 const double xi_128 = xi_247 * 2.0;
192 const double xi_129 = xi_264 * 2.0;
193 const double xi_130 = xi_245 * 2.0 + xi_253 * 2.0;
194 const double xi_132 = xi_118 + xi_255;
195 const double xi_133 = xi_132 + xi_14 + xi_22 + xi_260 + xi_262;
196 const double xi_134 = omega_odd * 0.25;
197 const double xi_135 = xi_133 * xi_134;
198 const double xi_136 = random_5_1 - 0.5;
199 const double xi_141 = xi_261 * 2.0;
200 const double xi_142 = xi_248 * 2.0;
201 const double xi_143 = xi_250 * -2.0 + xi_251 * 2.0;
202 const double xi_144 = xi_14 + xi_141 * -1.0 + xi_142 + xi_143 + xi_19 + xi_4;
203 const double xi_145 = omega_odd * 0.083333333333333329;
204 const double xi_146 = xi_144 * xi_145;
205 const double xi_147 = random_3_0 - 0.5;
206 const double xi_152 = random_0_1 - 0.5;
207 const double xi_166 = xi_122 + xi_247;
208 const double xi_167 = xi_12 + xi_166 + xi_20 + xi_253 + xi_257;
209 const double xi_168 = xi_134 * xi_167;
210 const double xi_169 = random_4_1 - 0.5;
211 const double xi_171 = xi_13 + xi_141 + xi_142 * -1.0 + xi_143 + xi_3;
212 const double xi_172 = xi_145 * xi_171;
213 const double xi_173 = random_4_0 - 0.5;
214 const double xi_178 = xi_119 + xi_23 + xi_252 + xi_259 + xi_262;
215 const double xi_179 = xi_134 * xi_178;
216 const double xi_180 = random_5_0 - 0.5;
217 const double xi_182 = xi_128 * -1.0 + xi_129 * -1.0 + xi_130 + xi_24 + xi_5;
218 const double xi_183 = xi_145 * xi_182;
219 const double xi_184 = random_3_1 - 0.5;
220 const double xi_198 = omega_shear * 0.25;
221 const double xi_211 = omega_odd * 0.041666666666666664;
222 const double xi_212 = xi_182 * xi_211;
223 const double xi_213 = omega_odd * 0.125;
224 const double xi_214 = xi_178 * xi_213;
225 const double xi_220 = xi_144 * xi_211;
226 const double xi_221 = xi_133 * xi_213;
227 const double xi_235 = xi_167 * xi_213;
228 const double xi_236 = xi_171 * xi_211;
229 const double rr_0 = 0.0;
230 const double xi_31 = rr_0 * xi_30;
231 const double xi_44 = rr_0 * xi_43;
232 const double xi_51 = rr_0 * xi_50;
233 const double xi_53 = rr_0 * 0.041666666666666664;
234 const double xi_54 = xi_265 * xi_53;
235 const double xi_59 = xi_256 * xi_53;
236 const double xi_81 = xi_254 * xi_53;
237 const double vel0Term = xi_248 + xi_250 + xi_3;
238 const double vel1Term = xi_261 + xi_4;
239 const double vel2Term = xi_247 + xi_5;
240 const double rho = vel0Term + vel1Term + vel2Term + xi_246 + xi_249 + xi_251 + xi_262 + xi_263 + xi_6;
241 const double xi_105 = kT * rho;
242 const double xi_106 = pow(xi_105 * (-1.0 * ((omega_even * -1.0 + 1.0) * (omega_even * -1.0 + 1.0)) + 1.0), 0.5);
243 const double xi_107 = xi_106 * (random_6_0 - 0.5) * 3.7416573867739413;
244 const double xi_108 = xi_106 * (random_7_0 - 0.5) * 5.4772255750516612;
245 const double xi_110 = xi_109 * (random_2_1 - 0.5) * pow(xi_105 * (-1.0 * ((omega_bulk * -1.0 + 1.0) * (omega_bulk * -1.0 + 1.0)) + 1.0), 0.5);
246 const double xi_111 = xi_106 * (random_6_1 - 0.5) * 8.3666002653407556;
247 const double xi_137 = pow(xi_105 * (-1.0 * ((omega_odd * -1.0 + 1.0) * (omega_odd * -1.0 + 1.0)) + 1.0), 0.5);
248 const double xi_138 = xi_137 * 1.4142135623730951;
249 const double xi_139 = xi_138 * 0.5;
250 const double xi_140 = xi_136 * xi_139;
251 const double xi_148 = xi_109 * xi_137;
252 const double xi_149 = xi_148 * 0.16666666666666666;
253 const double xi_150 = xi_147 * xi_149;
254 const double xi_151 = xi_146 * -1.0 + xi_150 * -1.0;
255 const double xi_153 = pow(xi_105 * (-1.0 * ((omega_shear * -1.0 + 1.0) * (omega_shear * -1.0 + 1.0)) + 1.0), 0.5);
256 const double xi_154 = xi_153 * 0.5;
257 const double xi_155 = xi_152 * xi_154;
258 const double xi_161 = xi_153 * (random_0_0 - 0.5) * 1.7320508075688772;
259 const double xi_165 = xi_146 + xi_150;
260 const double xi_170 = xi_139 * xi_169;
261 const double xi_174 = xi_149 * xi_173;
262 const double xi_175 = xi_172 + xi_174;
263 const double xi_177 = xi_172 * -1.0 + xi_174 * -1.0;
264 const double xi_181 = xi_139 * xi_180;
265 const double xi_185 = xi_149 * xi_184;
266 const double xi_186 = xi_183 * -1.0 + xi_185 * -1.0;
267 const double xi_188 = xi_183 + xi_185;
268 const double xi_189 = xi_152 * xi_153 * 0.25;
269 const double xi_192 = xi_107 * 0.083333333333333329;
270 const double xi_196 = xi_154 * (random_1_0 - 0.5);
271 const double xi_203 = xi_154 * (random_2_0 - 0.5);
272 const double xi_207 = xi_111 * -0.014285714285714285;
273 const double xi_208 = xi_108 * 0.050000000000000003;
274 const double xi_215 = xi_148 * 0.083333333333333329;
275 const double xi_216 = xi_184 * xi_215;
276 const double xi_217 = xi_138 * 0.25;
277 const double xi_218 = xi_180 * xi_217;
278 const double xi_219 = xi_212 * -1.0 + xi_214 + xi_216 * -1.0 + xi_218;
279 const double xi_222 = xi_147 * xi_215;
280 const double xi_223 = xi_136 * xi_217;
281 const double xi_224 = xi_220 * -1.0 + xi_221 + xi_222 * -1.0 + xi_223;
282 const double xi_225 = xi_220 + xi_221 * -1.0 + xi_222 + xi_223 * -1.0;
283 const double xi_227 = xi_189 * -1.0;
284 const double xi_230 = xi_111 * 0.035714285714285712;
285 const double xi_232 = xi_154 * (random_1_1 - 0.5);
286 const double xi_237 = xi_169 * xi_217;
287 const double xi_238 = xi_173 * xi_215;
288 const double xi_239 = xi_235 * -1.0 + xi_236 + xi_237 * -1.0 + xi_238;
289 const double xi_241 = xi_235 + xi_236 * -1.0 + xi_237 + xi_238 * -1.0;
290 const double xi_242 = xi_212 + xi_214 * -1.0 + xi_216 + xi_218 * -1.0;
291 const double xi_0 = ((1.0) / (rho));
292 const double xi_7 = xi_0 * 0.5;
293 const double u_0 = xi_0 * (vel0Term + xi_13 + xi_8 + xi_9) + xi_265 * xi_7;
294 const double xi_25 = u_0 * xi_265;
295 const double xi_37 = xi_25 * 0.16666666666666666;
296 const double xi_38 = xi_25 * 0.083333333333333329;
297 const double xi_39 = omega_shear * xi_38;
298 const double xi_40 = xi_37 * -1.0 + xi_39;
299 const double xi_56 = xi_25 * xi_55 * -1.0 + xi_37;
300 const double xi_57 = xi_43 * -1.0 + xi_54 + xi_56;
301 const double xi_61 = xi_25 * xi_60 * -1.0;
302 const double xi_68 = u_0 * xi_67;
303 const double xi_73 = u_0 * xi_72;
304 const double xi_77 = xi_43 + xi_54 * -1.0 + xi_56;
305 const double xi_84 = xi_38 * -1.0;
306 const double xi_95 = u_0 * xi_254;
307 const double xi_96 = xi_95 * 0.25;
308 const double xi_99 = xi_71 * xi_95;
309 const double xi_113 = rho * (u_0 * u_0);
310 const double u_1 = xi_0 * (vel1Term + xi_16 + xi_19 + xi_250 + xi_8) + xi_256 * xi_7;
311 const double xi_26 = u_1 * xi_256;
312 const double xi_32 = xi_26 * 0.16666666666666666;
313 const double xi_45 = xi_26 * 0.083333333333333329;
314 const double xi_46 = omega_shear * xi_45;
315 const double xi_47 = xi_32 * -1.0 + xi_46;
316 const double xi_62 = xi_26 * xi_60 * -1.0;
317 const double xi_69 = u_1 * 0.25;
318 const double xi_70 = xi_265 * xi_69;
319 const double xi_74 = u_1 * xi_71;
320 const double xi_75 = xi_265 * xi_74;
321 const double xi_76 = xi_68 * -1.0 + xi_70 * -1.0 + xi_73 + xi_75;
322 const double xi_78 = xi_68 + xi_70 + xi_73 * -1.0 + xi_75 * -1.0;
323 const double xi_86 = xi_254 * xi_69;
324 const double xi_88 = xi_254 * xi_74;
325 const double xi_93 = xi_45 * -1.0;
326 const double xi_112 = rho * (u_1 * u_1);
327 const double xi_121 = xi_112 + xi_120 + xi_9;
328 const double xi_197 = rho * u_1;
329 const double xi_199 = xi_198 * (u_0 * xi_197 + xi_120 + xi_248 + xi_261);
330 const double xi_200 = xi_196 * -1.0 + xi_199 * -1.0;
331 const double xi_201 = xi_196 + xi_199;
332 const double u_2 = xi_0 * (vel2Term + xi_21 + xi_24 + xi_264) + xi_254 * xi_7;
333 const double xi_27 = u_2 * xi_254;
334 const double xi_33 = xi_27 * 0.16666666666666666;
335 const double xi_34 = xi_27 * 0.083333333333333329;
336 const double xi_35 = omega_shear * xi_34;
337 const double xi_36 = xi_33 * -1.0 + xi_35;
338 const double xi_41 = omega_shear * xi_32 * -1.0 + xi_26 * 0.33333333333333331 + xi_36 + xi_40;
339 const double xi_48 = omega_shear * xi_37 * -1.0 + xi_25 * 0.33333333333333331 + xi_36 + xi_47;
340 const double xi_52 = omega_shear * xi_33 * -1.0 + xi_27 * 0.33333333333333331 + xi_40 + xi_47;
341 const double xi_58 = xi_34 * -1.0;
342 const double xi_63 = xi_27 * xi_60 * -1.0;
343 const double xi_64 = xi_26 * xi_55 * -1.0 + xi_32 + xi_61 + xi_62 + xi_63;
344 const double xi_65 = xi_30 + xi_59 * -1.0 + xi_64;
345 const double xi_66 = xi_35 + xi_58 + xi_65;
346 const double xi_79 = xi_30 * -1.0 + xi_59 + xi_64;
347 const double xi_80 = xi_35 + xi_58 + xi_79;
348 const double xi_82 = xi_27 * xi_55 * -1.0 + xi_33;
349 const double xi_83 = xi_50 + xi_81 * -1.0 + xi_82;
350 const double xi_85 = xi_39 + xi_65 + xi_84;
351 const double xi_87 = u_2 * xi_67;
352 const double xi_89 = u_2 * xi_72;
353 const double xi_90 = xi_86 + xi_87 + xi_88 * -1.0 + xi_89 * -1.0;
354 const double xi_91 = xi_39 + xi_79 + xi_84;
355 const double xi_92 = xi_86 * -1.0 + xi_87 * -1.0 + xi_88 + xi_89;
356 const double xi_94 = xi_46 + xi_61 + xi_62 + xi_63 + xi_83 + xi_93;
357 const double xi_97 = u_2 * xi_265;
358 const double xi_98 = xi_97 * 0.25;
359 const double xi_100 = xi_71 * xi_97;
360 const double xi_101 = xi_100 + xi_96 * -1.0 + xi_98 * -1.0 + xi_99;
361 const double xi_102 = xi_100 * -1.0 + xi_96 + xi_98 + xi_99 * -1.0;
362 const double xi_103 = xi_50 * -1.0 + xi_81 + xi_82;
363 const double xi_104 = xi_103 + xi_46 + xi_61 + xi_62 + xi_63 + xi_93;
364 const double xi_115 = rho * (u_2 * u_2);
365 const double xi_116 = xi_114 + xi_115 * 0.66666666666666663 + xi_244 * 3.0 + xi_259 * 3.0;
366 const double xi_117 = omega_even * (xi_112 * 0.66666666666666663 + xi_113 * 1.6666666666666667 + xi_116 + xi_246 * 3.0 + xi_252 * -3.0 + xi_255 * -3.0 + xi_258 * -3.0 + xi_260 * 3.0 + xi_262 * -3.0);
367 const double xi_124 = omega_bulk * (xi_113 + xi_115 + xi_119 + xi_121 + xi_123 + xi_17 + xi_22 + xi_249);
368 const double xi_127 = omega_even * (xi_112 * 2.3333333333333335 + xi_116 + xi_126 + xi_245 * -5.0 + xi_246 * -2.0 + xi_247 * -5.0 + xi_253 * -5.0 + xi_260 * -2.0 + xi_264 * -5.0);
369 const double xi_131 = omega_even * (xi_114 + xi_115 * 3.0 + xi_126 + xi_128 + xi_129 + xi_130 + xi_244 * -4.0 + xi_246 * 5.0 + xi_248 * -7.0 + xi_250 * -7.0 + xi_251 * -7.0 + xi_259 * -4.0 + xi_260 * 5.0 + xi_261 * -7.0);
370 const double xi_156 = xi_115 * -1.0 + xi_259;
371 const double xi_157 = omega_shear * (xi_121 + xi_156 + xi_16 + xi_2 + xi_247 + xi_260 * -1.0 + xi_6);
372 const double xi_158 = xi_157 * 0.125;
373 const double xi_159 = xi_107 * -0.11904761904761904 + xi_131 * -0.01984126984126984;
374 const double xi_160 = omega_shear * (xi_112 * -1.0 + xi_113 * 2.0 + xi_120 + xi_123 + xi_125 + xi_156 + xi_244 + xi_246 + xi_257 * -2.0 + xi_260 + xi_263 * -2.0 + xi_9);
375 const double xi_162 = xi_160 * -0.041666666666666664 + xi_161 * -0.16666666666666666;
376 const double xi_163 = xi_108 * -0.10000000000000001 + xi_117 * -0.050000000000000003 + xi_162;
377 const double xi_164 = xi_111 * 0.028571428571428571 + xi_127 * 0.014285714285714285 + xi_155 + xi_158 + xi_159 + xi_163;
378 const double xi_176 = xi_111 * -0.071428571428571425 + xi_127 * -0.035714285714285712 + xi_159 + xi_160 * 0.083333333333333329 + xi_161 * 0.33333333333333331;
379 const double xi_187 = xi_107 * 0.095238095238095233 + xi_111 * -0.042857142857142858 + xi_127 * -0.021428571428571429 + xi_131 * 0.015873015873015872 + xi_155 * -1.0 + xi_158 * -1.0 + xi_163;
380 const double xi_190 = xi_157 * 0.0625;
381 const double xi_191 = xi_131 * 0.013888888888888888;
382 const double xi_193 = xi_110 * 0.083333333333333329 + xi_124 * 0.041666666666666664;
383 const double xi_194 = xi_160 * 0.020833333333333332 + xi_161 * 0.083333333333333329 + xi_193;
384 const double xi_195 = xi_165 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
385 const double xi_202 = xi_151 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
386 const double xi_204 = xi_127 * -0.0071428571428571426;
387 const double xi_205 = xi_198 * (u_2 * xi_197 + xi_132 + xi_17 + xi_252);
388 const double xi_206 = xi_117 * 0.025000000000000001;
389 const double xi_209 = xi_107 * -0.023809523809523808 + xi_131 * -0.003968253968253968;
390 const double xi_210 = xi_162 + xi_193 + xi_203 + xi_204 + xi_205 + xi_206 + xi_207 + xi_208 + xi_209;
391 const double xi_226 = xi_162 + xi_193 + xi_203 * -1.0 + xi_204 + xi_205 * -1.0 + xi_206 + xi_207 + xi_208 + xi_209;
392 const double xi_228 = xi_190 * -1.0;
393 const double xi_229 = xi_127 * 0.017857142857142856;
394 const double xi_231 = xi_188 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
395 const double xi_233 = xi_198 * (rho * u_0 * u_2 + xi_10 + xi_166 + xi_245);
396 const double xi_234 = xi_232 * -1.0 + xi_233 * -1.0;
397 const double xi_240 = xi_232 + xi_233;
398 const double xi_243 = xi_186 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
399 const double forceTerm_0 = xi_25 * xi_28 + xi_25 * -1.0 + xi_26 * xi_28 + xi_26 * -1.0 + xi_27 * xi_28 + xi_27 * -1.0;
400 const double forceTerm_1 = xi_29 + xi_31 * -1.0 + xi_41;
401 const double forceTerm_2 = xi_29 * -1.0 + xi_31 + xi_41;
402 const double forceTerm_3 = xi_42 * -1.0 + xi_44 + xi_48;
403 const double forceTerm_4 = xi_42 + xi_44 * -1.0 + xi_48;
404 const double forceTerm_5 = xi_49 + xi_51 * -1.0 + xi_52;
405 const double forceTerm_6 = xi_49 * -1.0 + xi_51 + xi_52;
406 const double forceTerm_7 = xi_57 + xi_66 + xi_76;
407 const double forceTerm_8 = xi_66 + xi_77 + xi_78;
408 const double forceTerm_9 = xi_57 + xi_78 + xi_80;
409 const double forceTerm_10 = xi_76 + xi_77 + xi_80;
410 const double forceTerm_11 = xi_83 + xi_85 + xi_90;
411 const double forceTerm_12 = xi_83 + xi_91 + xi_92;
412 const double forceTerm_13 = xi_101 + xi_57 + xi_94;
413 const double forceTerm_14 = xi_102 + xi_77 + xi_94;
414 const double forceTerm_15 = xi_103 + xi_85 + xi_92;
415 const double forceTerm_16 = xi_103 + xi_90 + xi_91;
416 const double forceTerm_17 = xi_102 + xi_104 + xi_57;
417 const double forceTerm_18 = xi_101 + xi_104 + xi_77;
418 _data_pdfs_10_20_30[_stride_pdfs_0 * ctr_0] = forceTerm_0 + xi_107 * 0.14285714285714285 + xi_108 * 0.20000000000000001 + xi_110 * -1.0 + xi_111 * 0.085714285714285715 + xi_117 * 0.10000000000000001 + xi_124 * -0.5 + xi_127 * 0.042857142857142858 + xi_131 * 0.023809523809523808 + xi_249;
419 _data_pdfs_10_20_31[_stride_pdfs_0 * ctr_0] = forceTerm_1 + xi_135 * -1.0 + xi_140 * -1.0 + xi_151 + xi_164 + xi_260;
420 _data_pdfs_10_20_32[_stride_pdfs_0 * ctr_0] = forceTerm_2 + xi_135 + xi_140 + xi_164 + xi_165 + xi_246;
421 _data_pdfs_10_20_33[_stride_pdfs_0 * ctr_0] = forceTerm_3 + xi_168 + xi_170 + xi_175 + xi_176 + xi_263;
422 _data_pdfs_10_20_34[_stride_pdfs_0 * ctr_0] = forceTerm_4 + xi_168 * -1.0 + xi_170 * -1.0 + xi_176 + xi_177 + xi_257;
423 _data_pdfs_10_20_35[_stride_pdfs_0 * ctr_0] = forceTerm_5 + xi_179 * -1.0 + xi_181 * -1.0 + xi_186 + xi_187 + xi_259;
424 _data_pdfs_10_20_36[_stride_pdfs_0 * ctr_0] = forceTerm_6 + xi_179 + xi_181 + xi_187 + xi_188 + xi_244;
425 _data_pdfs_10_20_37[_stride_pdfs_0 * ctr_0] = forceTerm_7 + xi_177 + xi_195 + xi_200 + xi_261;
426 _data_pdfs_10_20_38[_stride_pdfs_0 * ctr_0] = forceTerm_8 + xi_175 + xi_195 + xi_201 + xi_250;
427 _data_pdfs_10_20_39[_stride_pdfs_0 * ctr_0] = forceTerm_9 + xi_177 + xi_201 + xi_202 + xi_251;
428 _data_pdfs_10_20_310[_stride_pdfs_0 * ctr_0] = forceTerm_10 + xi_175 + xi_200 + xi_202 + xi_248;
429 _data_pdfs_10_20_311[_stride_pdfs_0 * ctr_0] = forceTerm_11 + xi_210 + xi_219 + xi_224 + xi_258;
430 _data_pdfs_10_20_312[_stride_pdfs_0 * ctr_0] = forceTerm_12 + xi_219 + xi_225 + xi_226 + xi_255;
431 _data_pdfs_10_20_313[_stride_pdfs_0 * ctr_0] = forceTerm_13 + xi_231 + xi_234 + xi_239 + xi_247;
432 _data_pdfs_10_20_314[_stride_pdfs_0 * ctr_0] = forceTerm_14 + xi_231 + xi_240 + xi_241 + xi_264;
433 _data_pdfs_10_20_315[_stride_pdfs_0 * ctr_0] = forceTerm_15 + xi_224 + xi_226 + xi_242 + xi_252;
434 _data_pdfs_10_20_316[_stride_pdfs_0 * ctr_0] = forceTerm_16 + xi_210 + xi_225 + xi_242 + xi_262;
435 _data_pdfs_10_20_317[_stride_pdfs_0 * ctr_0] = forceTerm_17 + xi_239 + xi_240 + xi_243 + xi_253;
436 _data_pdfs_10_20_318[_stride_pdfs_0 * ctr_0] = forceTerm_18 + xi_234 + xi_241 + xi_243 + xi_245;
437 }
438}
439} // namespace internal_collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda
440
442 auto force = block->getData<gpu::GPUField<double>>(forceID);
443 auto pdfs = block->getData<gpu::GPUField<double>>(pdfsID);
444
445 auto block_offset_0 = this->block_offset_0_;
446 auto &seed = this->seed_;
447 auto block_offset_1 = this->block_offset_1_;
448 auto &time_step = this->time_step_;
449 auto block_offset_2 = this->block_offset_2_;
450 auto &omega_bulk = this->omega_bulk_;
451 auto &kT = this->kT_;
452 auto &omega_odd = this->omega_odd_;
453 auto &omega_shear = this->omega_shear_;
454 auto &omega_even = this->omega_even_;
455 block_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);
456 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()))
457 double *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
458 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
459 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
460 double *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
461 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
462 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(force->xSize()) + 0))
463 const int64_t _size_force_0 = int64_t(int64_c(force->xSize()) + 0);
464 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
465 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(force->ySize()) + 0))
466 const int64_t _size_force_1 = int64_t(int64_c(force->ySize()) + 0);
467 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
468 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(force->zSize()) + 0))
469 const int64_t _size_force_2 = int64_t(int64_c(force->zSize()) + 0);
470 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
471 const int64_t _stride_force_0 = int64_t(force->xStride());
472 const int64_t _stride_force_1 = int64_t(force->yStride());
473 const int64_t _stride_force_2 = int64_t(force->zStride());
474 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
475 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
476 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
477 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
478 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
479 dim3 _block(uint32_t(((16 < _size_force_0) ? 16 : _size_force_0)), uint32_t(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))), uint32_t(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))));
480 dim3 _grid(uint32_t(((_size_force_0) % (((16 < _size_force_0) ? 16 : _size_force_0)) == 0 ? (int64_t)(_size_force_0) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)) : ((int64_t)(_size_force_0) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))) + 1)), uint32_t(((_size_force_1) % (((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))) == 0 ? (int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))) : ((int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) + 1)), uint32_t(((_size_force_2) % (((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))) == 0 ? (int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))) : ((int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))))) + 1)));
481 internal_collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda::collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
482}
483
484void CollideSweepDoublePrecisionThermalizedCUDA::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream) {
485 CellInterval ci = globalCellInterval;
486 CellInterval blockBB = blocks->getBlockCellBB(*block);
487 blockBB.expand(ghostLayers);
488 ci.intersect(blockBB);
489 blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
490 if (ci.empty())
491 return;
492
493 auto force = block->getData<gpu::GPUField<double>>(forceID);
494 auto pdfs = block->getData<gpu::GPUField<double>>(pdfsID);
495
496 auto block_offset_0 = this->block_offset_0_;
497 auto &seed = this->seed_;
498 auto block_offset_1 = this->block_offset_1_;
499 auto &time_step = this->time_step_;
500 auto block_offset_2 = this->block_offset_2_;
501 auto &omega_bulk = this->omega_bulk_;
502 auto &kT = this->kT_;
503 auto &omega_odd = this->omega_odd_;
504 auto &omega_shear = this->omega_shear_;
505 auto &omega_even = this->omega_even_;
506 block_offset_generator(block, block_offset_0, block_offset_1, block_offset_2);
507 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()))
508 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()))
509 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()))
510 double *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
511 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
512 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
513 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
514 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
515 double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
516 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
517 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
518 const int64_t _size_force_0 = int64_t(int64_c(ci.xSize()) + 0);
519 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
520 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
521 const int64_t _size_force_1 = int64_t(int64_c(ci.ySize()) + 0);
522 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
523 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
524 const int64_t _size_force_2 = int64_t(int64_c(ci.zSize()) + 0);
525 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
526 const int64_t _stride_force_0 = int64_t(force->xStride());
527 const int64_t _stride_force_1 = int64_t(force->yStride());
528 const int64_t _stride_force_2 = int64_t(force->zStride());
529 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
530 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
531 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
532 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
533 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
534 dim3 _block(uint32_t(((16 < _size_force_0) ? 16 : _size_force_0)), uint32_t(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))), uint32_t(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))));
535 dim3 _grid(uint32_t(((_size_force_0) % (((16 < _size_force_0) ? 16 : _size_force_0)) == 0 ? (int64_t)(_size_force_0) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)) : ((int64_t)(_size_force_0) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))) + 1)), uint32_t(((_size_force_1) % (((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))) == 0 ? (int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))) : ((int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) + 1)), uint32_t(((_size_force_2) % (((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))) == 0 ? (int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))) : ((int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))))) + 1)));
536 internal_collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda::collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
537}
538
539} // namespace pystencils
540} // namespace walberla
541
542#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
543#pragma GCC diagnostic pop
544#endif
545
546#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
547#pragma warning pop
548#endif
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \ingroup lbm \author lbmpy
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
__global__ float * force
std::function< void(IBlock *, uint32_t &, uint32_t &, uint32_t &)> block_offset_generator
void runOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream=nullptr)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:174
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double double double double double omega_shear
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t block_offset_2
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t block_offset_1
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t block_offset_0
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double double double double double uint32_t seed
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_1
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double kT
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_0
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_3
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_2
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_3
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double double double double omega_odd
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_2
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_1
static FUNC_PREFIX __launch_bounds__(256) void collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda(double *RESTRICT const _data_force
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double double omega_bulk
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double double double omega_even
QUALIFIERS void philox_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3, uint32 key0, uint32 key1, double &rnd1, double &rnd2)