Loading [MathJax]/extensions/TeX/AMSmath.js
ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages Concepts
CollideSweepDoublePrecisionThermalizedCUDA.cu
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file CollideSweepDoublePrecisionThermalizedCUDA.cpp
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
21
22#include <cmath>
23
25#include "core/DataTypes.h"
26#include "core/Macros.h"
27
28#include "philox_rand.h"
29
30#define FUNC_PREFIX __global__
31
32#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
33#pragma GCC diagnostic push
34#pragma GCC diagnostic ignored "-Wfloat-equal"
35#pragma GCC diagnostic ignored "-Wshadow"
36#pragma GCC diagnostic ignored "-Wconversion"
37#pragma GCC diagnostic ignored "-Wunused-variable"
38#endif
39
40#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
41#pragma warning push
42#pragma warning(disable : 1599)
43#endif
44
45using namespace std;
46
47namespace walberla {
48namespace pystencils {
49
50namespace internal_collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda {
51static FUNC_PREFIX __launch_bounds__(256) void collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda(double *RESTRICT const _data_force, double *RESTRICT _data_pdfs, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, uint32_t block_offset_0, uint32_t block_offset_1, uint32_t block_offset_2, double kT, double omega_bulk, double omega_even, double omega_odd, double omega_shear, uint32_t seed, uint32_t time_step) {
52 if (blockDim.x * blockIdx.x + threadIdx.x < _size_force_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_force_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_force_2) {
53 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
54 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
55 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
56 const double xi_244 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2];
57 const double xi_245 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
58 const double xi_246 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
59 const double xi_247 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3];
60 const double xi_248 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
61 const double xi_249 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
62 const double xi_250 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
63 const double xi_251 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
64 const double xi_252 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3];
65 const double xi_253 = _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3];
66 const double xi_254 = _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2];
67 const double xi_255 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
68 const double xi_256 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
69 const double xi_257 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
70 const double xi_258 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
71 const double xi_259 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
72 const double xi_260 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3];
73 const double xi_261 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3];
74 const double xi_262 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3];
75 const double xi_263 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
76 const double xi_264 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3];
77 const double xi_265 = _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + _stride_force_3];
78
79 double random_7_0{};
80 double random_7_1{};
81 if (kT > 0.) {
82 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 7, seed, random_7_0, random_7_1);
83 }
84
85 double random_6_0{};
86 double random_6_1{};
87 if (kT > 0.) {
88 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 6, seed, random_6_0, random_6_1);
89 }
90
91 double random_5_0{};
92 double random_5_1{};
93 if (kT > 0.) {
94 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 5, seed, random_5_0, random_5_1);
95 }
96
97 double random_4_0{};
98 double random_4_1{};
99 if (kT > 0.) {
100 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 4, seed, random_4_0, random_4_1);
101 }
102
103 double random_3_0{};
104 double random_3_1{};
105 if (kT > 0.) {
106 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 3, seed, random_3_0, random_3_1);
107 }
108
109 double random_2_0{};
110 double random_2_1{};
111 if (kT > 0.) {
112 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 2, seed, random_2_0, random_2_1);
113 }
114
115 double random_1_0{};
116 double random_1_1{};
117 if (kT > 0.) {
118 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 1, seed, random_1_0, random_1_1);
119 }
120
121 double random_0_0{};
122 double random_0_1{};
123 if (kT > 0.) {
124 philox_double2(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 0, seed, random_0_0, random_0_1);
125 }
126 const double xi_2 = xi_245 + xi_249;
127 const double xi_3 = xi_2 + xi_260;
128 const double xi_4 = xi_251 + xi_258 + xi_261;
129 const double xi_5 = xi_247 + xi_257;
130 const double xi_6 = xi_263 + xi_264;
131 const double xi_8 = -xi_248;
132 const double xi_9 = -xi_255;
133 const double xi_10 = -xi_263;
134 const double xi_11 = -xi_246;
135 const double xi_12 = -xi_262;
136 const double xi_13 = xi_10 + xi_11 + xi_12;
137 const double xi_14 = -xi_252;
138 const double xi_15 = -xi_259;
139 const double xi_16 = xi_14 + xi_15;
140 const double xi_17 = -xi_256;
141 const double xi_18 = -xi_257;
142 const double xi_19 = xi_17 + xi_18;
143 const double xi_20 = -xi_245;
144 const double xi_21 = xi_10 + xi_20;
145 const double xi_22 = -xi_258;
146 const double xi_23 = -xi_264;
147 const double xi_24 = xi_17 + xi_22 + xi_23 + xi_251;
148 const double xi_28 = omega_bulk * 0.5;
149 const double xi_29 = xi_265 * 0.16666666666666666;
150 const double xi_30 = xi_265 * 0.083333333333333329;
151 const double xi_42 = xi_254 * 0.16666666666666666;
152 const double xi_43 = xi_254 * 0.083333333333333329;
153 const double xi_49 = xi_253 * 0.16666666666666666;
154 const double xi_50 = xi_253 * 0.083333333333333329;
155 const double xi_55 = omega_shear * 0.041666666666666664;
156 const double xi_60 = omega_bulk * 0.041666666666666664;
157 const double xi_67 = xi_265 * 0.25;
158 const double xi_71 = omega_shear * 0.125;
159 const double xi_72 = xi_265 * xi_71;
160 const double xi_109 = 2.4494897427831779;
161 const double xi_114 = -xi_244;
162 const double xi_118 = -xi_251;
163 const double xi_119 = xi_118 + xi_18;
164 const double xi_120 = -xi_250 + xi_8;
165 const double xi_122 = -xi_249;
166 const double xi_123 = xi_11 + xi_122 + xi_15 + xi_21;
167 const double xi_125 = xi_251 * 2.0 + xi_256 * 2.0 + xi_257 * 2.0 + xi_258 * 2.0;
168 const double xi_126 = xi_125 + xi_260 * 5.0 + xi_262 * 5.0;
169 const double xi_128 = xi_246 * 2.0;
170 const double xi_129 = xi_249 * 2.0;
171 const double xi_130 = xi_245 * 2.0 + xi_263 * 2.0;
172 const double xi_132 = xi_118 + xi_257;
173 const double xi_133 = xi_132 + xi_14 + xi_22 + xi_256 + xi_261;
174 const double xi_134 = omega_odd * 0.25;
175 const double xi_135 = xi_133 * xi_134;
176 const double xi_136 = random_5_1 - 0.5;
177 const double xi_141 = xi_255 * 2.0;
178 const double xi_142 = xi_259 * 2.0;
179 const double xi_143 = xi_248 * 2.0 + xi_250 * -2.0;
180 const double xi_144 = xi_14 - xi_141 + xi_142 + xi_143 + xi_19 + xi_4;
181 const double xi_145 = omega_odd * 0.083333333333333329;
182 const double xi_146 = xi_144 * xi_145;
183 const double xi_147 = random_3_0 - 0.5;
184 const double xi_152 = random_0_1 - 0.5;
185 const double xi_166 = xi_122 + xi_246;
186 const double xi_167 = xi_12 + xi_166 + xi_20 + xi_260 + xi_263;
187 const double xi_168 = xi_134 * xi_167;
188 const double xi_169 = random_4_1 - 0.5;
189 const double xi_171 = xi_13 + xi_141 - xi_142 + xi_143 + xi_3;
190 const double xi_172 = xi_145 * xi_171;
191 const double xi_173 = random_4_0 - 0.5;
192 const double xi_178 = xi_119 + xi_23 + xi_247 + xi_256 + xi_258;
193 const double xi_179 = xi_134 * xi_178;
194 const double xi_180 = random_5_0 - 0.5;
195 const double xi_182 = -xi_128 - xi_129 + xi_130 + xi_24 + xi_5;
196 const double xi_183 = xi_145 * xi_182;
197 const double xi_184 = random_3_1 - 0.5;
198 const double xi_198 = omega_shear * 0.25;
199 const double xi_211 = omega_odd * 0.041666666666666664;
200 const double xi_212 = xi_182 * xi_211;
201 const double xi_213 = omega_odd * 0.125;
202 const double xi_214 = xi_178 * xi_213;
203 const double xi_220 = xi_144 * xi_211;
204 const double xi_221 = xi_133 * xi_213;
205 const double xi_235 = xi_167 * xi_213;
206 const double xi_236 = xi_171 * xi_211;
207 const double rr_0 = 0.0;
208 const double xi_31 = rr_0 * xi_30;
209 const double xi_44 = rr_0 * xi_43;
210 const double xi_51 = rr_0 * xi_50;
211 const double xi_53 = rr_0 * 0.041666666666666664;
212 const double xi_54 = xi_254 * xi_53;
213 const double xi_59 = xi_265 * xi_53;
214 const double xi_81 = xi_253 * xi_53;
215 const double vel0Term = xi_250 + xi_259 + xi_3;
216 const double vel1Term = xi_255 + xi_4;
217 const double vel2Term = xi_246 + xi_5;
218 const double rho = vel0Term + vel1Term + vel2Term + xi_244 + xi_248 + xi_252 + xi_256 + xi_262 + xi_6;
219 const double xi_105 = kT * rho;
220 const double xi_106 = pow(xi_105 * (1.0 - (-omega_even + 1.0) * (-omega_even + 1.0)), 0.5);
221 const double xi_107 = xi_106 * (random_6_0 - 0.5) * 3.7416573867739413;
222 const double xi_108 = xi_106 * (random_7_0 - 0.5) * 5.4772255750516612;
223 const double xi_110 = xi_109 * (random_2_1 - 0.5) * pow(xi_105 * (1.0 - (-omega_bulk + 1.0) * (-omega_bulk + 1.0)), 0.5);
224 const double xi_111 = xi_106 * (random_6_1 - 0.5) * 8.3666002653407556;
225 const double xi_137 = pow(xi_105 * (1.0 - (-omega_odd + 1.0) * (-omega_odd + 1.0)), 0.5);
226 const double xi_138 = xi_137 * 1.4142135623730951;
227 const double xi_139 = xi_138 * 0.5;
228 const double xi_140 = xi_136 * xi_139;
229 const double xi_148 = xi_109 * xi_137;
230 const double xi_149 = xi_148 * 0.16666666666666666;
231 const double xi_150 = xi_147 * xi_149;
232 const double xi_151 = -xi_146 - xi_150;
233 const double xi_153 = pow(xi_105 * (1.0 - (-omega_shear + 1.0) * (-omega_shear + 1.0)), 0.5);
234 const double xi_154 = xi_153 * 0.5;
235 const double xi_155 = xi_152 * xi_154;
236 const double xi_161 = xi_153 * (random_0_0 - 0.5) * 1.7320508075688772;
237 const double xi_165 = xi_146 + xi_150;
238 const double xi_170 = xi_139 * xi_169;
239 const double xi_174 = xi_149 * xi_173;
240 const double xi_175 = xi_172 + xi_174;
241 const double xi_177 = -xi_172 - xi_174;
242 const double xi_181 = xi_139 * xi_180;
243 const double xi_185 = xi_149 * xi_184;
244 const double xi_186 = -xi_183 - xi_185;
245 const double xi_188 = xi_183 + xi_185;
246 const double xi_189 = xi_152 * xi_153 * 0.25;
247 const double xi_192 = xi_107 * 0.083333333333333329;
248 const double xi_196 = xi_154 * (random_1_0 - 0.5);
249 const double xi_203 = xi_154 * (random_2_0 - 0.5);
250 const double xi_207 = xi_111 * -0.014285714285714285;
251 const double xi_208 = xi_108 * 0.050000000000000003;
252 const double xi_215 = xi_148 * 0.083333333333333329;
253 const double xi_216 = xi_184 * xi_215;
254 const double xi_217 = xi_138 * 0.25;
255 const double xi_218 = xi_180 * xi_217;
256 const double xi_219 = -xi_212 + xi_214 - xi_216 + xi_218;
257 const double xi_222 = xi_147 * xi_215;
258 const double xi_223 = xi_136 * xi_217;
259 const double xi_224 = -xi_220 + xi_221 - xi_222 + xi_223;
260 const double xi_225 = xi_220 - xi_221 + xi_222 - xi_223;
261 const double xi_227 = -xi_189;
262 const double xi_230 = xi_111 * 0.035714285714285712;
263 const double xi_232 = xi_154 * (random_1_1 - 0.5);
264 const double xi_237 = xi_169 * xi_217;
265 const double xi_238 = xi_173 * xi_215;
266 const double xi_239 = -xi_235 + xi_236 - xi_237 + xi_238;
267 const double xi_241 = xi_235 - xi_236 + xi_237 - xi_238;
268 const double xi_242 = xi_212 - xi_214 + xi_216 - xi_218;
269 const double xi_0 = ((1.0) / (rho));
270 const double xi_7 = xi_0 * 0.5;
271 const double u_0 = xi_0 * (vel0Term + xi_13 + xi_8 + xi_9) + xi_254 * xi_7;
272 const double xi_25 = u_0 * xi_254;
273 const double xi_37 = xi_25 * 0.16666666666666666;
274 const double xi_38 = xi_25 * 0.083333333333333329;
275 const double xi_39 = omega_shear * xi_38;
276 const double xi_40 = -xi_37 + xi_39;
277 const double xi_56 = -xi_25 * xi_55 + xi_37;
278 const double xi_57 = -xi_43 + xi_54 + xi_56;
279 const double xi_61 = -xi_25 * xi_60;
280 const double xi_68 = u_0 * xi_67;
281 const double xi_73 = u_0 * xi_72;
282 const double xi_77 = xi_43 - xi_54 + xi_56;
283 const double xi_84 = -xi_38;
284 const double xi_95 = u_0 * xi_253;
285 const double xi_96 = xi_95 * 0.25;
286 const double xi_99 = xi_71 * xi_95;
287 const double xi_113 = rho * (u_0 * u_0);
288 const double u_1 = xi_0 * (vel1Term + xi_16 + xi_19 + xi_250 + xi_8) + xi_265 * xi_7;
289 const double xi_26 = u_1 * xi_265;
290 const double xi_32 = xi_26 * 0.16666666666666666;
291 const double xi_45 = xi_26 * 0.083333333333333329;
292 const double xi_46 = omega_shear * xi_45;
293 const double xi_47 = -xi_32 + xi_46;
294 const double xi_62 = -xi_26 * xi_60;
295 const double xi_69 = u_1 * 0.25;
296 const double xi_70 = xi_254 * xi_69;
297 const double xi_74 = u_1 * xi_71;
298 const double xi_75 = xi_254 * xi_74;
299 const double xi_76 = -xi_68 - xi_70 + xi_73 + xi_75;
300 const double xi_78 = xi_68 + xi_70 - xi_73 - xi_75;
301 const double xi_86 = xi_253 * xi_69;
302 const double xi_88 = xi_253 * xi_74;
303 const double xi_93 = -xi_45;
304 const double xi_112 = rho * (u_1 * u_1);
305 const double xi_121 = xi_112 + xi_120 + xi_9;
306 const double xi_197 = rho * u_1;
307 const double xi_199 = xi_198 * (u_0 * xi_197 + xi_120 + xi_255 + xi_259);
308 const double xi_200 = -xi_196 - xi_199;
309 const double xi_201 = xi_196 + xi_199;
310 const double u_2 = xi_0 * (vel2Term + xi_21 + xi_24 + xi_249) + xi_253 * xi_7;
311 const double xi_27 = u_2 * xi_253;
312 const double xi_33 = xi_27 * 0.16666666666666666;
313 const double xi_34 = xi_27 * 0.083333333333333329;
314 const double xi_35 = omega_shear * xi_34;
315 const double xi_36 = -xi_33 + xi_35;
316 const double xi_41 = -omega_shear * xi_32 + xi_26 * 0.33333333333333331 + xi_36 + xi_40;
317 const double xi_48 = -omega_shear * xi_37 + xi_25 * 0.33333333333333331 + xi_36 + xi_47;
318 const double xi_52 = -omega_shear * xi_33 + xi_27 * 0.33333333333333331 + xi_40 + xi_47;
319 const double xi_58 = -xi_34;
320 const double xi_63 = -xi_27 * xi_60;
321 const double xi_64 = -xi_26 * xi_55 + xi_32 + xi_61 + xi_62 + xi_63;
322 const double xi_65 = xi_30 - xi_59 + xi_64;
323 const double xi_66 = xi_35 + xi_58 + xi_65;
324 const double xi_79 = -xi_30 + xi_59 + xi_64;
325 const double xi_80 = xi_35 + xi_58 + xi_79;
326 const double xi_82 = -xi_27 * xi_55 + xi_33;
327 const double xi_83 = xi_50 - xi_81 + xi_82;
328 const double xi_85 = xi_39 + xi_65 + xi_84;
329 const double xi_87 = u_2 * xi_67;
330 const double xi_89 = u_2 * xi_72;
331 const double xi_90 = xi_86 + xi_87 - xi_88 - xi_89;
332 const double xi_91 = xi_39 + xi_79 + xi_84;
333 const double xi_92 = -xi_86 - xi_87 + xi_88 + xi_89;
334 const double xi_94 = xi_46 + xi_61 + xi_62 + xi_63 + xi_83 + xi_93;
335 const double xi_97 = u_2 * xi_254;
336 const double xi_98 = xi_97 * 0.25;
337 const double xi_100 = xi_71 * xi_97;
338 const double xi_101 = xi_100 - xi_96 - xi_98 + xi_99;
339 const double xi_102 = -xi_100 + xi_96 + xi_98 - xi_99;
340 const double xi_103 = -xi_50 + xi_81 + xi_82;
341 const double xi_104 = xi_103 + xi_46 + xi_61 + xi_62 + xi_63 + xi_93;
342 const double xi_115 = rho * (u_2 * u_2);
343 const double xi_116 = xi_114 + xi_115 * 0.66666666666666663 + xi_247 * 3.0 + xi_264 * 3.0;
344 const double xi_117 = omega_even * (xi_112 * 0.66666666666666663 + xi_113 * 1.6666666666666667 + xi_116 + xi_251 * -3.0 + xi_252 * 3.0 + xi_256 * -3.0 + xi_257 * -3.0 + xi_258 * -3.0 + xi_261 * 3.0);
345 const double xi_124 = omega_bulk * (xi_113 + xi_115 + xi_119 + xi_121 + xi_123 + xi_17 + xi_22 + xi_244);
346 const double xi_127 = omega_even * (xi_112 * 2.3333333333333335 + xi_116 + xi_126 + xi_245 * -5.0 + xi_246 * -5.0 + xi_249 * -5.0 + xi_252 * -2.0 + xi_261 * -2.0 + xi_263 * -5.0);
347 const double xi_131 = omega_even * (xi_114 + xi_115 * 3.0 + xi_126 + xi_128 + xi_129 + xi_130 + xi_247 * -4.0 + xi_248 * -7.0 + xi_250 * -7.0 + xi_252 * 5.0 + xi_255 * -7.0 + xi_259 * -7.0 + xi_261 * 5.0 + xi_264 * -4.0);
348 const double xi_156 = -xi_115 + xi_247;
349 const double xi_157 = omega_shear * (xi_121 + xi_156 + xi_16 + xi_2 + xi_246 - xi_261 + xi_6);
350 const double xi_158 = xi_157 * 0.125;
351 const double xi_159 = xi_107 * -0.11904761904761904 + xi_131 * -0.01984126984126984;
352 const double xi_160 = omega_shear * (-xi_112 + xi_113 * 2.0 + xi_120 + xi_123 + xi_125 + xi_156 + xi_252 + xi_260 * -2.0 + xi_261 + xi_262 * -2.0 + xi_264 + xi_9);
353 const double xi_162 = xi_160 * -0.041666666666666664 + xi_161 * -0.16666666666666666;
354 const double xi_163 = xi_108 * -0.10000000000000001 + xi_117 * -0.050000000000000003 + xi_162;
355 const double xi_164 = xi_111 * 0.028571428571428571 + xi_127 * 0.014285714285714285 + xi_155 + xi_158 + xi_159 + xi_163;
356 const double xi_176 = xi_111 * -0.071428571428571425 + xi_127 * -0.035714285714285712 + xi_159 + xi_160 * 0.083333333333333329 + xi_161 * 0.33333333333333331;
357 const double xi_187 = xi_107 * 0.095238095238095233 + xi_111 * -0.042857142857142858 + xi_127 * -0.021428571428571429 + xi_131 * 0.015873015873015872 - xi_155 - xi_158 + xi_163;
358 const double xi_190 = xi_157 * 0.0625;
359 const double xi_191 = xi_131 * 0.013888888888888888;
360 const double xi_193 = xi_110 * 0.083333333333333329 + xi_124 * 0.041666666666666664;
361 const double xi_194 = xi_160 * 0.020833333333333332 + xi_161 * 0.083333333333333329 + xi_193;
362 const double xi_195 = xi_165 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
363 const double xi_202 = xi_151 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
364 const double xi_204 = xi_127 * -0.0071428571428571426;
365 const double xi_205 = xi_198 * (u_2 * xi_197 + xi_132 + xi_17 + xi_258);
366 const double xi_206 = xi_117 * 0.025000000000000001;
367 const double xi_209 = xi_107 * -0.023809523809523808 + xi_131 * -0.003968253968253968;
368 const double xi_210 = xi_162 + xi_193 + xi_203 + xi_204 + xi_205 + xi_206 + xi_207 + xi_208 + xi_209;
369 const double xi_226 = xi_162 + xi_193 - xi_203 + xi_204 - xi_205 + xi_206 + xi_207 + xi_208 + xi_209;
370 const double xi_228 = -xi_190;
371 const double xi_229 = xi_127 * 0.017857142857142856;
372 const double xi_231 = xi_188 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
373 const double xi_233 = xi_198 * (rho * u_0 * u_2 + xi_10 + xi_166 + xi_245);
374 const double xi_234 = -xi_232 - xi_233;
375 const double xi_240 = xi_232 + xi_233;
376 const double xi_243 = xi_186 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
377 const double forceTerm_0 = xi_25 * xi_28 - xi_25 + xi_26 * xi_28 - xi_26 + xi_27 * xi_28 - xi_27;
378 const double forceTerm_1 = xi_29 - xi_31 + xi_41;
379 const double forceTerm_2 = -xi_29 + xi_31 + xi_41;
380 const double forceTerm_3 = -xi_42 + xi_44 + xi_48;
381 const double forceTerm_4 = xi_42 - xi_44 + xi_48;
382 const double forceTerm_5 = xi_49 - xi_51 + xi_52;
383 const double forceTerm_6 = -xi_49 + xi_51 + xi_52;
384 const double forceTerm_7 = xi_57 + xi_66 + xi_76;
385 const double forceTerm_8 = xi_66 + xi_77 + xi_78;
386 const double forceTerm_9 = xi_57 + xi_78 + xi_80;
387 const double forceTerm_10 = xi_76 + xi_77 + xi_80;
388 const double forceTerm_11 = xi_83 + xi_85 + xi_90;
389 const double forceTerm_12 = xi_83 + xi_91 + xi_92;
390 const double forceTerm_13 = xi_101 + xi_57 + xi_94;
391 const double forceTerm_14 = xi_102 + xi_77 + xi_94;
392 const double forceTerm_15 = xi_103 + xi_85 + xi_92;
393 const double forceTerm_16 = xi_103 + xi_90 + xi_91;
394 const double forceTerm_17 = xi_102 + xi_104 + xi_57;
395 const double forceTerm_18 = xi_101 + xi_104 + xi_77;
396 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2] = forceTerm_0 + xi_107 * 0.14285714285714285 + xi_108 * 0.20000000000000001 - xi_110 + xi_111 * 0.085714285714285715 + xi_117 * 0.10000000000000001 + xi_124 * -0.5 + xi_127 * 0.042857142857142858 + xi_131 * 0.023809523809523808 + xi_244;
397 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = forceTerm_1 - xi_135 - xi_140 + xi_151 + xi_164 + xi_261;
398 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = forceTerm_2 + xi_135 + xi_140 + xi_164 + xi_165 + xi_252;
399 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = forceTerm_3 + xi_168 + xi_170 + xi_175 + xi_176 + xi_262;
400 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = forceTerm_4 - xi_168 - xi_170 + xi_176 + xi_177 + xi_260;
401 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = forceTerm_5 - xi_179 - xi_181 + xi_186 + xi_187 + xi_247;
402 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = forceTerm_6 + xi_179 + xi_181 + xi_187 + xi_188 + xi_264;
403 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = forceTerm_7 + xi_177 + xi_195 + xi_200 + xi_255;
404 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = forceTerm_8 + xi_175 + xi_195 + xi_201 + xi_250;
405 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = forceTerm_9 + xi_177 + xi_201 + xi_202 + xi_248;
406 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = forceTerm_10 + xi_175 + xi_200 + xi_202 + xi_259;
407 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = forceTerm_11 + xi_210 + xi_219 + xi_224 + xi_251;
408 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = forceTerm_12 + xi_219 + xi_225 + xi_226 + xi_257;
409 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = forceTerm_13 + xi_231 + xi_234 + xi_239 + xi_246;
410 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = forceTerm_14 + xi_231 + xi_240 + xi_241 + xi_249;
411 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = forceTerm_15 + xi_224 + xi_226 + xi_242 + xi_258;
412 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = forceTerm_16 + xi_210 + xi_225 + xi_242 + xi_256;
413 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = forceTerm_17 + xi_239 + xi_240 + xi_243 + xi_263;
414 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = forceTerm_18 + xi_234 + xi_241 + xi_243 + xi_245;
415 }
416}
417} // namespace internal_collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda
418
420 if (!this->configured_)
421 WALBERLA_ABORT("This Sweep contains a configure function that needs to be called manually")
422
423 auto force = block->getData<gpu::GPUField<double>>(forceID);
424 auto pdfs = block->getData<gpu::GPUField<double>>(pdfsID);
425
426 auto &block_offset_2 = this->block_offset_2_;
427 auto &omega_even = this->omega_even_;
428 auto &time_step = this->time_step_;
429 auto &block_offset_0 = this->block_offset_0_;
430 auto &omega_bulk = this->omega_bulk_;
431 auto &omega_shear = this->omega_shear_;
432 auto &omega_odd = this->omega_odd_;
433 auto &kT = this->kT_;
434 auto &seed = this->seed_;
435 auto &block_offset_1 = this->block_offset_1_;
436 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()))
437 double *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
438 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
439 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
440 double *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
441 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
442 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(force->xSize()) + 0))
443 const int64_t _size_force_0 = int64_t(int64_c(force->xSize()) + 0);
444 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
445 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(force->ySize()) + 0))
446 const int64_t _size_force_1 = int64_t(int64_c(force->ySize()) + 0);
447 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
448 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(force->zSize()) + 0))
449 const int64_t _size_force_2 = int64_t(int64_c(force->zSize()) + 0);
450 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
451 const int64_t _stride_force_0 = int64_t(force->xStride());
452 const int64_t _stride_force_1 = int64_t(force->yStride());
453 const int64_t _stride_force_2 = int64_t(force->zStride());
454 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
455 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
456 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
457 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
458 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
459 dim3 _block(uint32_c(((128 < _size_force_0) ? 128 : _size_force_0)), uint32_c(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))), uint32_c(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))));
460 dim3 _grid(uint32_c(((_size_force_0) % (((128 < _size_force_0) ? 128 : _size_force_0)) == 0 ? (int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)) : ((int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))) + 1)), uint32_c(((_size_force_1) % (((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) == 0 ? (int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) : ((int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) + 1)), uint32_c(((_size_force_2) % (((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) == 0 ? (int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) : ((int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))))) + 1)));
461 internal_collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda::collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
462}
463
464void CollideSweepDoublePrecisionThermalizedCUDA::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream) {
465 if (!this->configured_)
466 WALBERLA_ABORT("This Sweep contains a configure function that needs to be called manually")
467
468 CellInterval ci = globalCellInterval;
469 CellInterval blockBB = blocks->getBlockCellBB(*block);
470 blockBB.expand(ghostLayers);
471 ci.intersect(blockBB);
472 blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
473 if (ci.empty())
474 return;
475
476 auto force = block->getData<gpu::GPUField<double>>(forceID);
477 auto pdfs = block->getData<gpu::GPUField<double>>(pdfsID);
478
479 auto &block_offset_2 = this->block_offset_2_;
480 auto &omega_even = this->omega_even_;
481 auto &time_step = this->time_step_;
482 auto &block_offset_0 = this->block_offset_0_;
483 auto &omega_bulk = this->omega_bulk_;
484 auto &omega_shear = this->omega_shear_;
485 auto &omega_odd = this->omega_odd_;
486 auto &kT = this->kT_;
487 auto &seed = this->seed_;
488 auto &block_offset_1 = this->block_offset_1_;
489 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()))
490 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()))
491 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()))
492 double *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
493 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
494 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
495 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
496 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
497 double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
498 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
499 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
500 const int64_t _size_force_0 = int64_t(int64_c(ci.xSize()) + 0);
501 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
502 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
503 const int64_t _size_force_1 = int64_t(int64_c(ci.ySize()) + 0);
504 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
505 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
506 const int64_t _size_force_2 = int64_t(int64_c(ci.zSize()) + 0);
507 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
508 const int64_t _stride_force_0 = int64_t(force->xStride());
509 const int64_t _stride_force_1 = int64_t(force->yStride());
510 const int64_t _stride_force_2 = int64_t(force->zStride());
511 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
512 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
513 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
514 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
515 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
516 dim3 _block(uint32_c(((128 < _size_force_0) ? 128 : _size_force_0)), uint32_c(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))), uint32_c(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))));
517 dim3 _grid(uint32_c(((_size_force_0) % (((128 < _size_force_0) ? 128 : _size_force_0)) == 0 ? (int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)) : ((int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))) + 1)), uint32_c(((_size_force_1) % (((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) == 0 ? (int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) : ((int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) + 1)), uint32_c(((_size_force_2) % (((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) == 0 ? (int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) : ((int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))))) + 1)));
518 internal_collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda::collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
519}
520
521} // namespace pystencils
522} // namespace walberla
523
524#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
525#pragma GCC diagnostic pop
526#endif
527
528#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
529#pragma warning pop
530#endif
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
void runOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream=nullptr)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:172
QUALIFIERS void philox_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3, uint32 key0, uint32 key1, double &rnd1, double &rnd2)
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double double double double double omega_shear
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t block_offset_2
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t block_offset_1
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t block_offset_0
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double double double double double uint32_t seed
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_1
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double kT
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_0
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_3
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_2
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_3
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double double double double omega_odd
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_2
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_1
static FUNC_PREFIX __launch_bounds__(256) void collidesweepdoubleprecisionthermalizedcuda_collidesweepdoubleprecisionthermalizedcuda(double *RESTRICT const _data_force
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double double omega_bulk
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t double double double omega_even
\file PackInfoPdfDoublePrecision.cpp \author pystencils