Loading [MathJax]/extensions/TeX/AMSmath.js
ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages Concepts
CollideSweepSinglePrecisionThermalizedCUDA.cu
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file CollideSweepSinglePrecisionThermalizedCUDA.cpp
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
21
22#include <cmath>
23
25#include "core/DataTypes.h"
26#include "core/Macros.h"
27
28#include "philox_rand.h"
29
30#define FUNC_PREFIX __global__
31
32#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
33#pragma GCC diagnostic push
34#pragma GCC diagnostic ignored "-Wfloat-equal"
35#pragma GCC diagnostic ignored "-Wshadow"
36#pragma GCC diagnostic ignored "-Wconversion"
37#pragma GCC diagnostic ignored "-Wunused-variable"
38#endif
39
40#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
41#pragma warning push
42#pragma warning(disable : 1599)
43#endif
44
45using namespace std;
46
47namespace walberla {
48namespace pystencils {
49
50namespace internal_collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda {
51static FUNC_PREFIX __launch_bounds__(256) void collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda(float *RESTRICT const _data_force, float *RESTRICT _data_pdfs, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, uint32_t block_offset_0, uint32_t block_offset_1, uint32_t block_offset_2, float kT, float omega_bulk, float omega_even, float omega_odd, float omega_shear, uint32_t seed, uint32_t time_step) {
52 if (blockDim.x * blockIdx.x + threadIdx.x < _size_force_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_force_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_force_2) {
53 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
54 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
55 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
56 const float xi_244 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2];
57 const float xi_245 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
58 const float xi_246 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
59 const float xi_247 = _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3];
60 const float xi_248 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3];
61 const float xi_249 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
62 const float xi_250 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3];
63 const float xi_251 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
64 const float xi_252 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
65 const float xi_253 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
66 const float xi_254 = _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + _stride_force_3];
67 const float xi_255 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
68 const float xi_256 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
69 const float xi_257 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3];
70 const float xi_258 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
71 const float xi_259 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3];
72 const float xi_260 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
73 const float xi_261 = _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2];
74 const float xi_262 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
75 const float xi_263 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3];
76 const float xi_264 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3];
77 const float xi_265 = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
78
79 float random_3_0{};
80 float random_3_1{};
81 float random_3_2{};
82 float random_3_3{};
83 if (kT > 0.) {
84 philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 3, seed, random_3_0, random_3_1, random_3_2, random_3_3);
85 }
86
87 float random_2_0{};
88 float random_2_1{};
89 float random_2_2{};
90 float random_2_3{};
91 if (kT > 0.) {
92 philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 2, seed, random_2_0, random_2_1, random_2_2, random_2_3);
93 }
94
95 float random_1_0{};
96 float random_1_1{};
97 float random_1_2{};
98 float random_1_3{};
99 if (kT > 0.) {
100 philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 1, seed, random_1_0, random_1_1, random_1_2, random_1_3);
101 }
102
103 float random_0_0{};
104 float random_0_1{};
105 float random_0_2{};
106 float random_0_3{};
107 if (kT > 0.) {
108 philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 0, seed, random_0_0, random_0_1, random_0_2, random_0_3);
109 }
110 const float xi_2 = xi_249 + xi_252;
111 const float xi_3 = xi_2 + xi_257;
112 const float xi_4 = xi_245 + xi_246 + xi_259;
113 const float xi_5 = xi_248 + xi_256;
114 const float xi_6 = xi_260 + xi_263;
115 const float xi_8 = -xi_251;
116 const float xi_9 = -xi_265;
117 const float xi_10 = -xi_260;
118 const float xi_11 = -xi_258;
119 const float xi_12 = -xi_264;
120 const float xi_13 = xi_10 + xi_11 + xi_12;
121 const float xi_14 = -xi_250;
122 const float xi_15 = -xi_262;
123 const float xi_16 = xi_14 + xi_15;
124 const float xi_17 = -xi_255;
125 const float xi_18 = -xi_256;
126 const float xi_19 = xi_17 + xi_18;
127 const float xi_20 = -xi_249;
128 const float xi_21 = xi_10 + xi_20;
129 const float xi_22 = -xi_245;
130 const float xi_23 = -xi_263;
131 const float xi_24 = xi_17 + xi_22 + xi_23 + xi_246;
132 const float xi_28 = omega_bulk * 0.5f;
133 const float xi_29 = xi_254 * 0.16666666666666666f;
134 const float xi_30 = xi_254 * 0.083333333333333329f;
135 const float xi_42 = xi_261 * 0.16666666666666666f;
136 const float xi_43 = xi_261 * 0.083333333333333329f;
137 const float xi_49 = xi_247 * 0.16666666666666666f;
138 const float xi_50 = xi_247 * 0.083333333333333329f;
139 const float xi_55 = omega_shear * 0.041666666666666664f;
140 const float xi_60 = omega_bulk * 0.041666666666666664f;
141 const float xi_67 = xi_254 * 0.25f;
142 const float xi_71 = omega_shear * 0.125f;
143 const float xi_72 = xi_254 * xi_71;
144 const float xi_109 = 2.4494897427831779f;
145 const float xi_114 = -xi_244;
146 const float xi_118 = -xi_246;
147 const float xi_119 = xi_118 + xi_18;
148 const float xi_120 = -xi_253 + xi_8;
149 const float xi_122 = -xi_252;
150 const float xi_123 = xi_11 + xi_122 + xi_15 + xi_21;
151 const float xi_125 = xi_245 * 2.0f + xi_246 * 2.0f + xi_255 * 2.0f + xi_256 * 2.0f;
152 const float xi_126 = xi_125 + xi_257 * 5.0f + xi_264 * 5.0f;
153 const float xi_128 = xi_258 * 2.0f;
154 const float xi_129 = xi_252 * 2.0f;
155 const float xi_130 = xi_249 * 2.0f + xi_260 * 2.0f;
156 const float xi_132 = xi_118 + xi_256;
157 const float xi_133 = xi_132 + xi_14 + xi_22 + xi_255 + xi_259;
158 const float xi_134 = omega_odd * 0.25f;
159 const float xi_135 = xi_133 * xi_134;
160 const float xi_136 = random_2_3 - 0.5f;
161 const float xi_141 = xi_265 * 2.0f;
162 const float xi_142 = xi_262 * 2.0f;
163 const float xi_143 = xi_251 * 2.0f + xi_253 * -2.0f;
164 const float xi_144 = xi_14 - xi_141 + xi_142 + xi_143 + xi_19 + xi_4;
165 const float xi_145 = omega_odd * 0.083333333333333329f;
166 const float xi_146 = xi_144 * xi_145;
167 const float xi_147 = random_1_2 - 0.5f;
168 const float xi_152 = random_0_1 - 0.5f;
169 const float xi_166 = xi_122 + xi_258;
170 const float xi_167 = xi_12 + xi_166 + xi_20 + xi_257 + xi_260;
171 const float xi_168 = xi_134 * xi_167;
172 const float xi_169 = random_2_1 - 0.5f;
173 const float xi_171 = xi_13 + xi_141 - xi_142 + xi_143 + xi_3;
174 const float xi_172 = xi_145 * xi_171;
175 const float xi_173 = random_2_0 - 0.5f;
176 const float xi_178 = xi_119 + xi_23 + xi_245 + xi_248 + xi_255;
177 const float xi_179 = xi_134 * xi_178;
178 const float xi_180 = random_2_2 - 0.5f;
179 const float xi_182 = -xi_128 - xi_129 + xi_130 + xi_24 + xi_5;
180 const float xi_183 = xi_145 * xi_182;
181 const float xi_184 = random_1_3 - 0.5f;
182 const float xi_198 = omega_shear * 0.25f;
183 const float xi_211 = omega_odd * 0.041666666666666664f;
184 const float xi_212 = xi_182 * xi_211;
185 const float xi_213 = omega_odd * 0.125f;
186 const float xi_214 = xi_178 * xi_213;
187 const float xi_220 = xi_144 * xi_211;
188 const float xi_221 = xi_133 * xi_213;
189 const float xi_235 = xi_167 * xi_213;
190 const float xi_236 = xi_171 * xi_211;
191 const float rr_0 = 0.0f;
192 const float xi_31 = rr_0 * xi_30;
193 const float xi_44 = rr_0 * xi_43;
194 const float xi_51 = rr_0 * xi_50;
195 const float xi_53 = rr_0 * 0.041666666666666664f;
196 const float xi_54 = xi_261 * xi_53;
197 const float xi_59 = xi_254 * xi_53;
198 const float xi_81 = xi_247 * xi_53;
199 const float vel0Term = xi_253 + xi_262 + xi_3;
200 const float vel1Term = xi_265 + xi_4;
201 const float vel2Term = xi_258 + xi_5;
202 const float rho = vel0Term + vel1Term + vel2Term + xi_244 + xi_250 + xi_251 + xi_255 + xi_264 + xi_6;
203 const float xi_105 = kT * rho;
204 const float xi_106 = powf(xi_105 * (1.0f - (-omega_even + 1.0f) * (-omega_even + 1.0f)), 0.5f);
205 const float xi_107 = xi_106 * (random_3_0 - 0.5f) * 3.7416573867739413f;
206 const float xi_108 = xi_106 * (random_3_2 - 0.5f) * 5.4772255750516612f;
207 const float xi_110 = xi_109 * (random_1_1 - 0.5f) * powf(xi_105 * (1.0f - (-omega_bulk + 1.0f) * (-omega_bulk + 1.0f)), 0.5f);
208 const float xi_111 = xi_106 * (random_3_1 - 0.5f) * 8.3666002653407556f;
209 const float xi_137 = powf(xi_105 * (1.0f - (-omega_odd + 1.0f) * (-omega_odd + 1.0f)), 0.5f);
210 const float xi_138 = xi_137 * 1.4142135623730951f;
211 const float xi_139 = xi_138 * 0.5f;
212 const float xi_140 = xi_136 * xi_139;
213 const float xi_148 = xi_109 * xi_137;
214 const float xi_149 = xi_148 * 0.16666666666666666f;
215 const float xi_150 = xi_147 * xi_149;
216 const float xi_151 = -xi_146 - xi_150;
217 const float xi_153 = powf(xi_105 * (1.0f - (-omega_shear + 1.0f) * (-omega_shear + 1.0f)), 0.5f);
218 const float xi_154 = xi_153 * 0.5f;
219 const float xi_155 = xi_152 * xi_154;
220 const float xi_161 = xi_153 * (random_0_0 - 0.5f) * 1.7320508075688772f;
221 const float xi_165 = xi_146 + xi_150;
222 const float xi_170 = xi_139 * xi_169;
223 const float xi_174 = xi_149 * xi_173;
224 const float xi_175 = xi_172 + xi_174;
225 const float xi_177 = -xi_172 - xi_174;
226 const float xi_181 = xi_139 * xi_180;
227 const float xi_185 = xi_149 * xi_184;
228 const float xi_186 = -xi_183 - xi_185;
229 const float xi_188 = xi_183 + xi_185;
230 const float xi_189 = xi_152 * xi_153 * 0.25f;
231 const float xi_192 = xi_107 * 0.083333333333333329f;
232 const float xi_196 = xi_154 * (random_0_2 - 0.5f);
233 const float xi_203 = xi_154 * (random_1_0 - 0.5f);
234 const float xi_207 = xi_111 * -0.014285714285714285f;
235 const float xi_208 = xi_108 * 0.050000000000000003f;
236 const float xi_215 = xi_148 * 0.083333333333333329f;
237 const float xi_216 = xi_184 * xi_215;
238 const float xi_217 = xi_138 * 0.25f;
239 const float xi_218 = xi_180 * xi_217;
240 const float xi_219 = -xi_212 + xi_214 - xi_216 + xi_218;
241 const float xi_222 = xi_147 * xi_215;
242 const float xi_223 = xi_136 * xi_217;
243 const float xi_224 = -xi_220 + xi_221 - xi_222 + xi_223;
244 const float xi_225 = xi_220 - xi_221 + xi_222 - xi_223;
245 const float xi_227 = -xi_189;
246 const float xi_230 = xi_111 * 0.035714285714285712f;
247 const float xi_232 = xi_154 * (random_0_3 - 0.5f);
248 const float xi_237 = xi_169 * xi_217;
249 const float xi_238 = xi_173 * xi_215;
250 const float xi_239 = -xi_235 + xi_236 - xi_237 + xi_238;
251 const float xi_241 = xi_235 - xi_236 + xi_237 - xi_238;
252 const float xi_242 = xi_212 - xi_214 + xi_216 - xi_218;
253 const float xi_0 = ((1.0f) / (rho));
254 const float xi_7 = xi_0 * 0.5f;
255 const float u_0 = xi_0 * (vel0Term + xi_13 + xi_8 + xi_9) + xi_261 * xi_7;
256 const float xi_25 = u_0 * xi_261;
257 const float xi_37 = xi_25 * 0.16666666666666666f;
258 const float xi_38 = xi_25 * 0.083333333333333329f;
259 const float xi_39 = omega_shear * xi_38;
260 const float xi_40 = -xi_37 + xi_39;
261 const float xi_56 = -xi_25 * xi_55 + xi_37;
262 const float xi_57 = -xi_43 + xi_54 + xi_56;
263 const float xi_61 = -xi_25 * xi_60;
264 const float xi_68 = u_0 * xi_67;
265 const float xi_73 = u_0 * xi_72;
266 const float xi_77 = xi_43 - xi_54 + xi_56;
267 const float xi_84 = -xi_38;
268 const float xi_95 = u_0 * xi_247;
269 const float xi_96 = xi_95 * 0.25f;
270 const float xi_99 = xi_71 * xi_95;
271 const float xi_113 = rho * (u_0 * u_0);
272 const float u_1 = xi_0 * (vel1Term + xi_16 + xi_19 + xi_253 + xi_8) + xi_254 * xi_7;
273 const float xi_26 = u_1 * xi_254;
274 const float xi_32 = xi_26 * 0.16666666666666666f;
275 const float xi_45 = xi_26 * 0.083333333333333329f;
276 const float xi_46 = omega_shear * xi_45;
277 const float xi_47 = -xi_32 + xi_46;
278 const float xi_62 = -xi_26 * xi_60;
279 const float xi_69 = u_1 * 0.25f;
280 const float xi_70 = xi_261 * xi_69;
281 const float xi_74 = u_1 * xi_71;
282 const float xi_75 = xi_261 * xi_74;
283 const float xi_76 = -xi_68 - xi_70 + xi_73 + xi_75;
284 const float xi_78 = xi_68 + xi_70 - xi_73 - xi_75;
285 const float xi_86 = xi_247 * xi_69;
286 const float xi_88 = xi_247 * xi_74;
287 const float xi_93 = -xi_45;
288 const float xi_112 = rho * (u_1 * u_1);
289 const float xi_121 = xi_112 + xi_120 + xi_9;
290 const float xi_197 = rho * u_1;
291 const float xi_199 = xi_198 * (u_0 * xi_197 + xi_120 + xi_262 + xi_265);
292 const float xi_200 = -xi_196 - xi_199;
293 const float xi_201 = xi_196 + xi_199;
294 const float u_2 = xi_0 * (vel2Term + xi_21 + xi_24 + xi_252) + xi_247 * xi_7;
295 const float xi_27 = u_2 * xi_247;
296 const float xi_33 = xi_27 * 0.16666666666666666f;
297 const float xi_34 = xi_27 * 0.083333333333333329f;
298 const float xi_35 = omega_shear * xi_34;
299 const float xi_36 = -xi_33 + xi_35;
300 const float xi_41 = -omega_shear * xi_32 + xi_26 * 0.33333333333333331f + xi_36 + xi_40;
301 const float xi_48 = -omega_shear * xi_37 + xi_25 * 0.33333333333333331f + xi_36 + xi_47;
302 const float xi_52 = -omega_shear * xi_33 + xi_27 * 0.33333333333333331f + xi_40 + xi_47;
303 const float xi_58 = -xi_34;
304 const float xi_63 = -xi_27 * xi_60;
305 const float xi_64 = -xi_26 * xi_55 + xi_32 + xi_61 + xi_62 + xi_63;
306 const float xi_65 = xi_30 - xi_59 + xi_64;
307 const float xi_66 = xi_35 + xi_58 + xi_65;
308 const float xi_79 = -xi_30 + xi_59 + xi_64;
309 const float xi_80 = xi_35 + xi_58 + xi_79;
310 const float xi_82 = -xi_27 * xi_55 + xi_33;
311 const float xi_83 = xi_50 - xi_81 + xi_82;
312 const float xi_85 = xi_39 + xi_65 + xi_84;
313 const float xi_87 = u_2 * xi_67;
314 const float xi_89 = u_2 * xi_72;
315 const float xi_90 = xi_86 + xi_87 - xi_88 - xi_89;
316 const float xi_91 = xi_39 + xi_79 + xi_84;
317 const float xi_92 = -xi_86 - xi_87 + xi_88 + xi_89;
318 const float xi_94 = xi_46 + xi_61 + xi_62 + xi_63 + xi_83 + xi_93;
319 const float xi_97 = u_2 * xi_261;
320 const float xi_98 = xi_97 * 0.25f;
321 const float xi_100 = xi_71 * xi_97;
322 const float xi_101 = xi_100 - xi_96 - xi_98 + xi_99;
323 const float xi_102 = -xi_100 + xi_96 + xi_98 - xi_99;
324 const float xi_103 = -xi_50 + xi_81 + xi_82;
325 const float xi_104 = xi_103 + xi_46 + xi_61 + xi_62 + xi_63 + xi_93;
326 const float xi_115 = rho * (u_2 * u_2);
327 const float xi_116 = xi_114 + xi_115 * 0.66666666666666663f + xi_248 * 3.0f + xi_263 * 3.0f;
328 const float xi_117 = omega_even * (xi_112 * 0.66666666666666663f + xi_113 * 1.6666666666666667f + xi_116 + xi_245 * -3.0f + xi_246 * -3.0f + xi_250 * 3.0f + xi_255 * -3.0f + xi_256 * -3.0f + xi_259 * 3.0f);
329 const float xi_124 = omega_bulk * (xi_113 + xi_115 + xi_119 + xi_121 + xi_123 + xi_17 + xi_22 + xi_244);
330 const float xi_127 = omega_even * (xi_112 * 2.3333333333333335f + xi_116 + xi_126 + xi_249 * -5.0f + xi_250 * -2.0f + xi_252 * -5.0f + xi_258 * -5.0f + xi_259 * -2.0f + xi_260 * -5.0f);
331 const float xi_131 = omega_even * (xi_114 + xi_115 * 3.0f + xi_126 + xi_128 + xi_129 + xi_130 + xi_248 * -4.0f + xi_250 * 5.0f + xi_251 * -7.0f + xi_253 * -7.0f + xi_259 * 5.0f + xi_262 * -7.0f + xi_263 * -4.0f + xi_265 * -7.0f);
332 const float xi_156 = -xi_115 + xi_248;
333 const float xi_157 = omega_shear * (xi_121 + xi_156 + xi_16 + xi_2 + xi_258 - xi_259 + xi_6);
334 const float xi_158 = xi_157 * 0.125f;
335 const float xi_159 = xi_107 * -0.11904761904761904f + xi_131 * -0.01984126984126984f;
336 const float xi_160 = omega_shear * (-xi_112 + xi_113 * 2.0f + xi_120 + xi_123 + xi_125 + xi_156 + xi_250 + xi_257 * -2.0f + xi_259 + xi_263 + xi_264 * -2.0f + xi_9);
337 const float xi_162 = xi_160 * -0.041666666666666664f + xi_161 * -0.16666666666666666f;
338 const float xi_163 = xi_108 * -0.10000000000000001f + xi_117 * -0.050000000000000003f + xi_162;
339 const float xi_164 = xi_111 * 0.028571428571428571f + xi_127 * 0.014285714285714285f + xi_155 + xi_158 + xi_159 + xi_163;
340 const float xi_176 = xi_111 * -0.071428571428571425f + xi_127 * -0.035714285714285712f + xi_159 + xi_160 * 0.083333333333333329f + xi_161 * 0.33333333333333331f;
341 const float xi_187 = xi_107 * 0.095238095238095233f + xi_111 * -0.042857142857142858f + xi_127 * -0.021428571428571429f + xi_131 * 0.015873015873015872f - xi_155 - xi_158 + xi_163;
342 const float xi_190 = xi_157 * 0.0625f;
343 const float xi_191 = xi_131 * 0.013888888888888888f;
344 const float xi_193 = xi_110 * 0.083333333333333329f + xi_124 * 0.041666666666666664f;
345 const float xi_194 = xi_160 * 0.020833333333333332f + xi_161 * 0.083333333333333329f + xi_193;
346 const float xi_195 = xi_165 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
347 const float xi_202 = xi_151 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
348 const float xi_204 = xi_127 * -0.0071428571428571426f;
349 const float xi_205 = xi_198 * (u_2 * xi_197 + xi_132 + xi_17 + xi_245);
350 const float xi_206 = xi_117 * 0.025000000000000001f;
351 const float xi_209 = xi_107 * -0.023809523809523808f + xi_131 * -0.003968253968253968f;
352 const float xi_210 = xi_162 + xi_193 + xi_203 + xi_204 + xi_205 + xi_206 + xi_207 + xi_208 + xi_209;
353 const float xi_226 = xi_162 + xi_193 - xi_203 + xi_204 - xi_205 + xi_206 + xi_207 + xi_208 + xi_209;
354 const float xi_228 = -xi_190;
355 const float xi_229 = xi_127 * 0.017857142857142856f;
356 const float xi_231 = xi_188 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
357 const float xi_233 = xi_198 * (rho * u_0 * u_2 + xi_10 + xi_166 + xi_249);
358 const float xi_234 = -xi_232 - xi_233;
359 const float xi_240 = xi_232 + xi_233;
360 const float xi_243 = xi_186 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
361 const float forceTerm_0 = xi_25 * xi_28 - xi_25 + xi_26 * xi_28 - xi_26 + xi_27 * xi_28 - xi_27;
362 const float forceTerm_1 = xi_29 - xi_31 + xi_41;
363 const float forceTerm_2 = -xi_29 + xi_31 + xi_41;
364 const float forceTerm_3 = -xi_42 + xi_44 + xi_48;
365 const float forceTerm_4 = xi_42 - xi_44 + xi_48;
366 const float forceTerm_5 = xi_49 - xi_51 + xi_52;
367 const float forceTerm_6 = -xi_49 + xi_51 + xi_52;
368 const float forceTerm_7 = xi_57 + xi_66 + xi_76;
369 const float forceTerm_8 = xi_66 + xi_77 + xi_78;
370 const float forceTerm_9 = xi_57 + xi_78 + xi_80;
371 const float forceTerm_10 = xi_76 + xi_77 + xi_80;
372 const float forceTerm_11 = xi_83 + xi_85 + xi_90;
373 const float forceTerm_12 = xi_83 + xi_91 + xi_92;
374 const float forceTerm_13 = xi_101 + xi_57 + xi_94;
375 const float forceTerm_14 = xi_102 + xi_77 + xi_94;
376 const float forceTerm_15 = xi_103 + xi_85 + xi_92;
377 const float forceTerm_16 = xi_103 + xi_90 + xi_91;
378 const float forceTerm_17 = xi_102 + xi_104 + xi_57;
379 const float forceTerm_18 = xi_101 + xi_104 + xi_77;
380 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2] = forceTerm_0 + xi_107 * 0.14285714285714285f + xi_108 * 0.20000000000000001f - xi_110 + xi_111 * 0.085714285714285715f + xi_117 * 0.10000000000000001f + xi_124 * -0.5f + xi_127 * 0.042857142857142858f + xi_131 * 0.023809523809523808f + xi_244;
381 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = forceTerm_1 - xi_135 - xi_140 + xi_151 + xi_164 + xi_259;
382 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = forceTerm_2 + xi_135 + xi_140 + xi_164 + xi_165 + xi_250;
383 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = forceTerm_3 + xi_168 + xi_170 + xi_175 + xi_176 + xi_264;
384 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = forceTerm_4 - xi_168 - xi_170 + xi_176 + xi_177 + xi_257;
385 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = forceTerm_5 - xi_179 - xi_181 + xi_186 + xi_187 + xi_248;
386 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = forceTerm_6 + xi_179 + xi_181 + xi_187 + xi_188 + xi_263;
387 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = forceTerm_7 + xi_177 + xi_195 + xi_200 + xi_265;
388 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = forceTerm_8 + xi_175 + xi_195 + xi_201 + xi_253;
389 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = forceTerm_9 + xi_177 + xi_201 + xi_202 + xi_251;
390 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = forceTerm_10 + xi_175 + xi_200 + xi_202 + xi_262;
391 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = forceTerm_11 + xi_210 + xi_219 + xi_224 + xi_246;
392 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = forceTerm_12 + xi_219 + xi_225 + xi_226 + xi_256;
393 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = forceTerm_13 + xi_231 + xi_234 + xi_239 + xi_258;
394 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = forceTerm_14 + xi_231 + xi_240 + xi_241 + xi_252;
395 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = forceTerm_15 + xi_224 + xi_226 + xi_242 + xi_245;
396 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = forceTerm_16 + xi_210 + xi_225 + xi_242 + xi_255;
397 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = forceTerm_17 + xi_239 + xi_240 + xi_243 + xi_260;
398 _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = forceTerm_18 + xi_234 + xi_241 + xi_243 + xi_249;
399 }
400}
401} // namespace internal_collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda
402
404 if (!this->configured_)
405 WALBERLA_ABORT("This Sweep contains a configure function that needs to be called manually")
406
407 auto pdfs = block->getData<gpu::GPUField<float>>(pdfsID);
408 auto force = block->getData<gpu::GPUField<float>>(forceID);
409
410 auto &kT = this->kT_;
411 auto &block_offset_1 = this->block_offset_1_;
412 auto &omega_bulk = this->omega_bulk_;
413 auto &omega_odd = this->omega_odd_;
414 auto &omega_even = this->omega_even_;
415 auto &block_offset_0 = this->block_offset_0_;
416 auto &block_offset_2 = this->block_offset_2_;
417 auto &omega_shear = this->omega_shear_;
418 auto &seed = this->seed_;
419 auto &time_step = this->time_step_;
420 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()))
421 float *RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
422 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
423 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
424 float *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
425 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
426 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(force->xSize()) + 0))
427 const int64_t _size_force_0 = int64_t(int64_c(force->xSize()) + 0);
428 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
429 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(force->ySize()) + 0))
430 const int64_t _size_force_1 = int64_t(int64_c(force->ySize()) + 0);
431 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
432 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(force->zSize()) + 0))
433 const int64_t _size_force_2 = int64_t(int64_c(force->zSize()) + 0);
434 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
435 const int64_t _stride_force_0 = int64_t(force->xStride());
436 const int64_t _stride_force_1 = int64_t(force->yStride());
437 const int64_t _stride_force_2 = int64_t(force->zStride());
438 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
439 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
440 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
441 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
442 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
443 dim3 _block(uint32_c(((128 < _size_force_0) ? 128 : _size_force_0)), uint32_c(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))), uint32_c(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))));
444 dim3 _grid(uint32_c(((_size_force_0) % (((128 < _size_force_0) ? 128 : _size_force_0)) == 0 ? (int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)) : ((int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))) + 1)), uint32_c(((_size_force_1) % (((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) == 0 ? (int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) : ((int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) + 1)), uint32_c(((_size_force_2) % (((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) == 0 ? (int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) : ((int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))))) + 1)));
445 internal_collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda::collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
446}
447
448void CollideSweepSinglePrecisionThermalizedCUDA::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream) {
449 if (!this->configured_)
450 WALBERLA_ABORT("This Sweep contains a configure function that needs to be called manually")
451
452 CellInterval ci = globalCellInterval;
453 CellInterval blockBB = blocks->getBlockCellBB(*block);
454 blockBB.expand(ghostLayers);
455 ci.intersect(blockBB);
456 blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
457 if (ci.empty())
458 return;
459
460 auto pdfs = block->getData<gpu::GPUField<float>>(pdfsID);
461 auto force = block->getData<gpu::GPUField<float>>(forceID);
462
463 auto &kT = this->kT_;
464 auto &block_offset_1 = this->block_offset_1_;
465 auto &omega_bulk = this->omega_bulk_;
466 auto &omega_odd = this->omega_odd_;
467 auto &omega_even = this->omega_even_;
468 auto &block_offset_0 = this->block_offset_0_;
469 auto &block_offset_2 = this->block_offset_2_;
470 auto &omega_shear = this->omega_shear_;
471 auto &seed = this->seed_;
472 auto &time_step = this->time_step_;
473 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()))
474 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()))
475 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()))
476 float *RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
477 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
478 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
479 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
480 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
481 float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
482 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
483 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
484 const int64_t _size_force_0 = int64_t(int64_c(ci.xSize()) + 0);
485 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
486 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
487 const int64_t _size_force_1 = int64_t(int64_c(ci.ySize()) + 0);
488 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
489 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
490 const int64_t _size_force_2 = int64_t(int64_c(ci.zSize()) + 0);
491 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
492 const int64_t _stride_force_0 = int64_t(force->xStride());
493 const int64_t _stride_force_1 = int64_t(force->yStride());
494 const int64_t _stride_force_2 = int64_t(force->zStride());
495 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
496 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
497 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
498 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
499 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
500 dim3 _block(uint32_c(((128 < _size_force_0) ? 128 : _size_force_0)), uint32_c(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))), uint32_c(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))));
501 dim3 _grid(uint32_c(((_size_force_0) % (((128 < _size_force_0) ? 128 : _size_force_0)) == 0 ? (int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)) : ((int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))) + 1)), uint32_c(((_size_force_1) % (((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) == 0 ? (int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) : ((int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) + 1)), uint32_c(((_size_force_2) % (((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) == 0 ? (int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) : ((int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))))) + 1)));
502 internal_collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda::collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
503}
504
505} // namespace pystencils
506} // namespace walberla
507
508#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
509#pragma GCC diagnostic pop
510#endif
511
512#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
513#pragma warning pop
514#endif
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
void runOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream=nullptr)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:172
QUALIFIERS void philox_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3, uint32 key0, uint32 key1, float &rnd1, float &rnd2, float &rnd3, float &rnd4)
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float kT
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_0
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_3
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t block_offset_0
static FUNC_PREFIX __launch_bounds__(256) void collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda(float *RESTRICT const _data_force
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float float float float float uint32_t seed
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_1
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_1
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float float omega_bulk
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t block_offset_1
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float float float omega_even
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_2
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float float float float omega_odd
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float float float float float omega_shear
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_2
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_3
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t block_offset_2
\file PackInfoPdfDoublePrecision.cpp \author pystencils