50namespace internal_collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda {
51static FUNC_PREFIX __launch_bounds__(256) void collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda(
float *
RESTRICT const _data_force,
float *
RESTRICT _data_pdfs, int64_t const
_size_force_0, int64_t const
_size_force_1, int64_t const
_size_force_2, int64_t const
_stride_force_0, int64_t const
_stride_force_1, int64_t const
_stride_force_2, int64_t const
_stride_force_3, int64_t const
_stride_pdfs_0, int64_t const
_stride_pdfs_1, int64_t const
_stride_pdfs_2, int64_t const
_stride_pdfs_3, uint32_t
block_offset_0, uint32_t
block_offset_1, uint32_t
block_offset_2,
float kT,
float omega_bulk,
float omega_even,
float omega_odd,
float omega_shear, uint32_t
seed, uint32_t time_step) {
53 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
54 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
55 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
110 const float xi_2 = xi_249 + xi_252;
111 const float xi_3 = xi_2 + xi_257;
112 const float xi_4 = xi_245 + xi_246 + xi_259;
113 const float xi_5 = xi_248 + xi_256;
114 const float xi_6 = xi_260 + xi_263;
115 const float xi_8 = -xi_251;
116 const float xi_9 = -xi_265;
117 const float xi_10 = -xi_260;
118 const float xi_11 = -xi_258;
119 const float xi_12 = -xi_264;
120 const float xi_13 = xi_10 + xi_11 + xi_12;
121 const float xi_14 = -xi_250;
122 const float xi_15 = -xi_262;
123 const float xi_16 = xi_14 + xi_15;
124 const float xi_17 = -xi_255;
125 const float xi_18 = -xi_256;
126 const float xi_19 = xi_17 + xi_18;
127 const float xi_20 = -xi_249;
128 const float xi_21 = xi_10 + xi_20;
129 const float xi_22 = -xi_245;
130 const float xi_23 = -xi_263;
131 const float xi_24 = xi_17 + xi_22 + xi_23 + xi_246;
133 const float xi_29 = xi_254 * 0.16666666666666666f;
134 const float xi_30 = xi_254 * 0.083333333333333329f;
135 const float xi_42 = xi_261 * 0.16666666666666666f;
136 const float xi_43 = xi_261 * 0.083333333333333329f;
137 const float xi_49 = xi_247 * 0.16666666666666666f;
138 const float xi_50 = xi_247 * 0.083333333333333329f;
139 const float xi_55 =
omega_shear * 0.041666666666666664f;
140 const float xi_60 =
omega_bulk * 0.041666666666666664f;
141 const float xi_67 = xi_254 * 0.25f;
143 const float xi_72 = xi_254 * xi_71;
144 const float xi_109 = 2.4494897427831779f;
145 const float xi_114 = -xi_244;
146 const float xi_118 = -xi_246;
147 const float xi_119 = xi_118 + xi_18;
148 const float xi_120 = -xi_253 + xi_8;
149 const float xi_122 = -xi_252;
150 const float xi_123 = xi_11 + xi_122 + xi_15 + xi_21;
151 const float xi_125 = xi_245 * 2.0f + xi_246 * 2.0f + xi_255 * 2.0f + xi_256 * 2.0f;
152 const float xi_126 = xi_125 + xi_257 * 5.0f + xi_264 * 5.0f;
153 const float xi_128 = xi_258 * 2.0f;
154 const float xi_129 = xi_252 * 2.0f;
155 const float xi_130 = xi_249 * 2.0f + xi_260 * 2.0f;
156 const float xi_132 = xi_118 + xi_256;
157 const float xi_133 = xi_132 + xi_14 + xi_22 + xi_255 + xi_259;
159 const float xi_135 = xi_133 * xi_134;
160 const float xi_136 = random_2_3 - 0.5f;
161 const float xi_141 = xi_265 * 2.0f;
162 const float xi_142 = xi_262 * 2.0f;
163 const float xi_143 = xi_251 * 2.0f + xi_253 * -2.0f;
164 const float xi_144 = xi_14 - xi_141 + xi_142 + xi_143 + xi_19 + xi_4;
165 const float xi_145 =
omega_odd * 0.083333333333333329f;
166 const float xi_146 = xi_144 * xi_145;
167 const float xi_147 = random_1_2 - 0.5f;
168 const float xi_152 = random_0_1 - 0.5f;
169 const float xi_166 = xi_122 + xi_258;
170 const float xi_167 = xi_12 + xi_166 + xi_20 + xi_257 + xi_260;
171 const float xi_168 = xi_134 * xi_167;
172 const float xi_169 = random_2_1 - 0.5f;
173 const float xi_171 = xi_13 + xi_141 - xi_142 + xi_143 + xi_3;
174 const float xi_172 = xi_145 * xi_171;
175 const float xi_173 = random_2_0 - 0.5f;
176 const float xi_178 = xi_119 + xi_23 + xi_245 + xi_248 + xi_255;
177 const float xi_179 = xi_134 * xi_178;
178 const float xi_180 = random_2_2 - 0.5f;
179 const float xi_182 = -xi_128 - xi_129 + xi_130 + xi_24 + xi_5;
180 const float xi_183 = xi_145 * xi_182;
181 const float xi_184 = random_1_3 - 0.5f;
183 const float xi_211 =
omega_odd * 0.041666666666666664f;
184 const float xi_212 = xi_182 * xi_211;
186 const float xi_214 = xi_178 * xi_213;
187 const float xi_220 = xi_144 * xi_211;
188 const float xi_221 = xi_133 * xi_213;
189 const float xi_235 = xi_167 * xi_213;
190 const float xi_236 = xi_171 * xi_211;
191 const float rr_0 = 0.0f;
192 const float xi_31 = rr_0 * xi_30;
193 const float xi_44 = rr_0 * xi_43;
194 const float xi_51 = rr_0 * xi_50;
195 const float xi_53 = rr_0 * 0.041666666666666664f;
196 const float xi_54 = xi_261 * xi_53;
197 const float xi_59 = xi_254 * xi_53;
198 const float xi_81 = xi_247 * xi_53;
199 const float vel0Term = xi_253 + xi_262 + xi_3;
200 const float vel1Term = xi_265 + xi_4;
201 const float vel2Term = xi_258 + xi_5;
202 const float rho = vel0Term + vel1Term + vel2Term + xi_244 + xi_250 + xi_251 + xi_255 + xi_264 + xi_6;
203 const float xi_105 =
kT * rho;
205 const float xi_107 = xi_106 * (random_3_0 - 0.5f) * 3.7416573867739413f;
206 const float xi_108 = xi_106 * (random_3_2 - 0.5f) * 5.4772255750516612f;
207 const float xi_110 = xi_109 * (random_1_1 - 0.5f) * powf(xi_105 * (1.0f - (-
omega_bulk + 1.0f) * (-
omega_bulk + 1.0f)), 0.5f);
208 const float xi_111 = xi_106 * (random_3_1 - 0.5f) * 8.3666002653407556f;
209 const float xi_137 = powf(xi_105 * (1.0f - (-
omega_odd + 1.0f) * (-
omega_odd + 1.0f)), 0.5f);
210 const float xi_138 = xi_137 * 1.4142135623730951f;
211 const float xi_139 = xi_138 * 0.5f;
212 const float xi_140 = xi_136 * xi_139;
213 const float xi_148 = xi_109 * xi_137;
214 const float xi_149 = xi_148 * 0.16666666666666666f;
215 const float xi_150 = xi_147 * xi_149;
216 const float xi_151 = -xi_146 - xi_150;
218 const float xi_154 = xi_153 * 0.5f;
219 const float xi_155 = xi_152 * xi_154;
220 const float xi_161 = xi_153 * (random_0_0 - 0.5f) * 1.7320508075688772f;
221 const float xi_165 = xi_146 + xi_150;
222 const float xi_170 = xi_139 * xi_169;
223 const float xi_174 = xi_149 * xi_173;
224 const float xi_175 = xi_172 + xi_174;
225 const float xi_177 = -xi_172 - xi_174;
226 const float xi_181 = xi_139 * xi_180;
227 const float xi_185 = xi_149 * xi_184;
228 const float xi_186 = -xi_183 - xi_185;
229 const float xi_188 = xi_183 + xi_185;
230 const float xi_189 = xi_152 * xi_153 * 0.25f;
231 const float xi_192 = xi_107 * 0.083333333333333329f;
232 const float xi_196 = xi_154 * (random_0_2 - 0.5f);
233 const float xi_203 = xi_154 * (random_1_0 - 0.5f);
234 const float xi_207 = xi_111 * -0.014285714285714285f;
235 const float xi_208 = xi_108 * 0.050000000000000003f;
236 const float xi_215 = xi_148 * 0.083333333333333329f;
237 const float xi_216 = xi_184 * xi_215;
238 const float xi_217 = xi_138 * 0.25f;
239 const float xi_218 = xi_180 * xi_217;
240 const float xi_219 = -xi_212 + xi_214 - xi_216 + xi_218;
241 const float xi_222 = xi_147 * xi_215;
242 const float xi_223 = xi_136 * xi_217;
243 const float xi_224 = -xi_220 + xi_221 - xi_222 + xi_223;
244 const float xi_225 = xi_220 - xi_221 + xi_222 - xi_223;
245 const float xi_227 = -xi_189;
246 const float xi_230 = xi_111 * 0.035714285714285712f;
247 const float xi_232 = xi_154 * (random_0_3 - 0.5f);
248 const float xi_237 = xi_169 * xi_217;
249 const float xi_238 = xi_173 * xi_215;
250 const float xi_239 = -xi_235 + xi_236 - xi_237 + xi_238;
251 const float xi_241 = xi_235 - xi_236 + xi_237 - xi_238;
252 const float xi_242 = xi_212 - xi_214 + xi_216 - xi_218;
253 const float xi_0 = ((1.0f) / (rho));
254 const float xi_7 = xi_0 * 0.5f;
255 const float u_0 = xi_0 * (vel0Term + xi_13 + xi_8 + xi_9) + xi_261 * xi_7;
256 const float xi_25 = u_0 * xi_261;
257 const float xi_37 = xi_25 * 0.16666666666666666f;
258 const float xi_38 = xi_25 * 0.083333333333333329f;
260 const float xi_40 = -xi_37 + xi_39;
261 const float xi_56 = -xi_25 * xi_55 + xi_37;
262 const float xi_57 = -xi_43 + xi_54 + xi_56;
263 const float xi_61 = -xi_25 * xi_60;
264 const float xi_68 = u_0 * xi_67;
265 const float xi_73 = u_0 * xi_72;
266 const float xi_77 = xi_43 - xi_54 + xi_56;
267 const float xi_84 = -xi_38;
268 const float xi_95 = u_0 * xi_247;
269 const float xi_96 = xi_95 * 0.25f;
270 const float xi_99 = xi_71 * xi_95;
271 const float xi_113 = rho * (u_0 * u_0);
272 const float u_1 = xi_0 * (vel1Term + xi_16 + xi_19 + xi_253 + xi_8) + xi_254 * xi_7;
273 const float xi_26 = u_1 * xi_254;
274 const float xi_32 = xi_26 * 0.16666666666666666f;
275 const float xi_45 = xi_26 * 0.083333333333333329f;
277 const float xi_47 = -xi_32 + xi_46;
278 const float xi_62 = -xi_26 * xi_60;
279 const float xi_69 = u_1 * 0.25f;
280 const float xi_70 = xi_261 * xi_69;
281 const float xi_74 = u_1 * xi_71;
282 const float xi_75 = xi_261 * xi_74;
283 const float xi_76 = -xi_68 - xi_70 + xi_73 + xi_75;
284 const float xi_78 = xi_68 + xi_70 - xi_73 - xi_75;
285 const float xi_86 = xi_247 * xi_69;
286 const float xi_88 = xi_247 * xi_74;
287 const float xi_93 = -xi_45;
288 const float xi_112 = rho * (u_1 * u_1);
289 const float xi_121 = xi_112 + xi_120 + xi_9;
290 const float xi_197 = rho * u_1;
291 const float xi_199 = xi_198 * (u_0 * xi_197 + xi_120 + xi_262 + xi_265);
292 const float xi_200 = -xi_196 - xi_199;
293 const float xi_201 = xi_196 + xi_199;
294 const float u_2 = xi_0 * (vel2Term + xi_21 + xi_24 + xi_252) + xi_247 * xi_7;
295 const float xi_27 = u_2 * xi_247;
296 const float xi_33 = xi_27 * 0.16666666666666666f;
297 const float xi_34 = xi_27 * 0.083333333333333329f;
299 const float xi_36 = -xi_33 + xi_35;
300 const float xi_41 = -
omega_shear * xi_32 + xi_26 * 0.33333333333333331f + xi_36 + xi_40;
301 const float xi_48 = -
omega_shear * xi_37 + xi_25 * 0.33333333333333331f + xi_36 + xi_47;
302 const float xi_52 = -
omega_shear * xi_33 + xi_27 * 0.33333333333333331f + xi_40 + xi_47;
303 const float xi_58 = -xi_34;
304 const float xi_63 = -xi_27 * xi_60;
305 const float xi_64 = -xi_26 * xi_55 + xi_32 + xi_61 + xi_62 + xi_63;
306 const float xi_65 = xi_30 - xi_59 + xi_64;
307 const float xi_66 = xi_35 + xi_58 + xi_65;
308 const float xi_79 = -xi_30 + xi_59 + xi_64;
309 const float xi_80 = xi_35 + xi_58 + xi_79;
310 const float xi_82 = -xi_27 * xi_55 + xi_33;
311 const float xi_83 = xi_50 - xi_81 + xi_82;
312 const float xi_85 = xi_39 + xi_65 + xi_84;
313 const float xi_87 = u_2 * xi_67;
314 const float xi_89 = u_2 * xi_72;
315 const float xi_90 = xi_86 + xi_87 - xi_88 - xi_89;
316 const float xi_91 = xi_39 + xi_79 + xi_84;
317 const float xi_92 = -xi_86 - xi_87 + xi_88 + xi_89;
318 const float xi_94 = xi_46 + xi_61 + xi_62 + xi_63 + xi_83 + xi_93;
319 const float xi_97 = u_2 * xi_261;
320 const float xi_98 = xi_97 * 0.25f;
321 const float xi_100 = xi_71 * xi_97;
322 const float xi_101 = xi_100 - xi_96 - xi_98 + xi_99;
323 const float xi_102 = -xi_100 + xi_96 + xi_98 - xi_99;
324 const float xi_103 = -xi_50 + xi_81 + xi_82;
325 const float xi_104 = xi_103 + xi_46 + xi_61 + xi_62 + xi_63 + xi_93;
326 const float xi_115 = rho * (u_2 * u_2);
327 const float xi_116 = xi_114 + xi_115 * 0.66666666666666663f + xi_248 * 3.0f + xi_263 * 3.0f;
328 const float xi_117 =
omega_even * (xi_112 * 0.66666666666666663f + xi_113 * 1.6666666666666667f + xi_116 + xi_245 * -3.0f + xi_246 * -3.0f + xi_250 * 3.0f + xi_255 * -3.0f + xi_256 * -3.0f + xi_259 * 3.0f);
329 const float xi_124 =
omega_bulk * (xi_113 + xi_115 + xi_119 + xi_121 + xi_123 + xi_17 + xi_22 + xi_244);
330 const float xi_127 =
omega_even * (xi_112 * 2.3333333333333335f + xi_116 + xi_126 + xi_249 * -5.0f + xi_250 * -2.0f + xi_252 * -5.0f + xi_258 * -5.0f + xi_259 * -2.0f + xi_260 * -5.0f);
331 const float xi_131 =
omega_even * (xi_114 + xi_115 * 3.0f + xi_126 + xi_128 + xi_129 + xi_130 + xi_248 * -4.0f + xi_250 * 5.0f + xi_251 * -7.0f + xi_253 * -7.0f + xi_259 * 5.0f + xi_262 * -7.0f + xi_263 * -4.0f + xi_265 * -7.0f);
332 const float xi_156 = -xi_115 + xi_248;
333 const float xi_157 =
omega_shear * (xi_121 + xi_156 + xi_16 + xi_2 + xi_258 - xi_259 + xi_6);
334 const float xi_158 = xi_157 * 0.125f;
335 const float xi_159 = xi_107 * -0.11904761904761904f + xi_131 * -0.01984126984126984f;
336 const float xi_160 =
omega_shear * (-xi_112 + xi_113 * 2.0f + xi_120 + xi_123 + xi_125 + xi_156 + xi_250 + xi_257 * -2.0f + xi_259 + xi_263 + xi_264 * -2.0f + xi_9);
337 const float xi_162 = xi_160 * -0.041666666666666664f + xi_161 * -0.16666666666666666f;
338 const float xi_163 = xi_108 * -0.10000000000000001f + xi_117 * -0.050000000000000003f + xi_162;
339 const float xi_164 = xi_111 * 0.028571428571428571f + xi_127 * 0.014285714285714285f + xi_155 + xi_158 + xi_159 + xi_163;
340 const float xi_176 = xi_111 * -0.071428571428571425f + xi_127 * -0.035714285714285712f + xi_159 + xi_160 * 0.083333333333333329f + xi_161 * 0.33333333333333331f;
341 const float xi_187 = xi_107 * 0.095238095238095233f + xi_111 * -0.042857142857142858f + xi_127 * -0.021428571428571429f + xi_131 * 0.015873015873015872f - xi_155 - xi_158 + xi_163;
342 const float xi_190 = xi_157 * 0.0625f;
343 const float xi_191 = xi_131 * 0.013888888888888888f;
344 const float xi_193 = xi_110 * 0.083333333333333329f + xi_124 * 0.041666666666666664f;
345 const float xi_194 = xi_160 * 0.020833333333333332f + xi_161 * 0.083333333333333329f + xi_193;
346 const float xi_195 = xi_165 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
347 const float xi_202 = xi_151 + xi_189 + xi_190 + xi_191 + xi_192 + xi_194;
348 const float xi_204 = xi_127 * -0.0071428571428571426f;
349 const float xi_205 = xi_198 * (u_2 * xi_197 + xi_132 + xi_17 + xi_245);
350 const float xi_206 = xi_117 * 0.025000000000000001f;
351 const float xi_209 = xi_107 * -0.023809523809523808f + xi_131 * -0.003968253968253968f;
352 const float xi_210 = xi_162 + xi_193 + xi_203 + xi_204 + xi_205 + xi_206 + xi_207 + xi_208 + xi_209;
353 const float xi_226 = xi_162 + xi_193 - xi_203 + xi_204 - xi_205 + xi_206 + xi_207 + xi_208 + xi_209;
354 const float xi_228 = -xi_190;
355 const float xi_229 = xi_127 * 0.017857142857142856f;
356 const float xi_231 = xi_188 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
357 const float xi_233 = xi_198 * (rho * u_0 * u_2 + xi_10 + xi_166 + xi_249);
358 const float xi_234 = -xi_232 - xi_233;
359 const float xi_240 = xi_232 + xi_233;
360 const float xi_243 = xi_186 + xi_194 + xi_209 + xi_227 + xi_228 + xi_229 + xi_230;
361 const float forceTerm_0 = xi_25 * xi_28 - xi_25 + xi_26 * xi_28 - xi_26 + xi_27 * xi_28 - xi_27;
362 const float forceTerm_1 = xi_29 - xi_31 + xi_41;
363 const float forceTerm_2 = -xi_29 + xi_31 + xi_41;
364 const float forceTerm_3 = -xi_42 + xi_44 + xi_48;
365 const float forceTerm_4 = xi_42 - xi_44 + xi_48;
366 const float forceTerm_5 = xi_49 - xi_51 + xi_52;
367 const float forceTerm_6 = -xi_49 + xi_51 + xi_52;
368 const float forceTerm_7 = xi_57 + xi_66 + xi_76;
369 const float forceTerm_8 = xi_66 + xi_77 + xi_78;
370 const float forceTerm_9 = xi_57 + xi_78 + xi_80;
371 const float forceTerm_10 = xi_76 + xi_77 + xi_80;
372 const float forceTerm_11 = xi_83 + xi_85 + xi_90;
373 const float forceTerm_12 = xi_83 + xi_91 + xi_92;
374 const float forceTerm_13 = xi_101 + xi_57 + xi_94;
375 const float forceTerm_14 = xi_102 + xi_77 + xi_94;
376 const float forceTerm_15 = xi_103 + xi_85 + xi_92;
377 const float forceTerm_16 = xi_103 + xi_90 + xi_91;
378 const float forceTerm_17 = xi_102 + xi_104 + xi_57;
379 const float forceTerm_18 = xi_101 + xi_104 + xi_77;
380 _data_pdfs[
_stride_pdfs_0 * ctr_0 +
_stride_pdfs_1 * ctr_1 +
_stride_pdfs_2 * ctr_2] = forceTerm_0 + xi_107 * 0.14285714285714285f + xi_108 * 0.20000000000000001f - xi_110 + xi_111 * 0.085714285714285715f + xi_117 * 0.10000000000000001f + xi_124 * -0.5f + xi_127 * 0.042857142857142858f + xi_131 * 0.023809523809523808f + xi_244;
405 WALBERLA_ABORT(
"This Sweep contains a configure function that needs to be called manually")
407 auto pdfs =
block->getData<gpu::GPUField<float>>(
pdfsID);
408 auto force =
block->getData<gpu::GPUField<float>>(
forceID);
410 auto &kT = this->
kT_;
418 auto &seed = this->
seed_;
420 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(force->nrOfGhostLayers()))
421 float *
RESTRICT const _data_force = force->dataAt(0, 0, 0, 0);
422 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
423 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
424 float *
RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
425 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
426 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(force->xSize()) + 0))
427 const int64_t _size_force_0 = int64_t(int64_c(force->xSize()) + 0);
428 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
429 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(force->ySize()) + 0))
430 const int64_t _size_force_1 = int64_t(int64_c(force->ySize()) + 0);
431 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
432 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(force->zSize()) + 0))
433 const int64_t _size_force_2 = int64_t(int64_c(force->zSize()) + 0);
434 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
435 const int64_t _stride_force_0 = int64_t(force->xStride());
436 const int64_t _stride_force_1 = int64_t(force->yStride());
437 const int64_t _stride_force_2 = int64_t(force->zStride());
438 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
439 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
440 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
441 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
442 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
443 dim3 _block(uint32_c(((128 < _size_force_0) ? 128 : _size_force_0)), uint32_c(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))), uint32_c(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))));
444 dim3 _grid(uint32_c(((_size_force_0) % (((128 < _size_force_0) ? 128 : _size_force_0)) == 0 ? (int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)) : ((int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))) + 1)), uint32_c(((_size_force_1) % (((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) == 0 ? (int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) : ((int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) + 1)), uint32_c(((_size_force_2) % (((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) == 0 ? (int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) : ((int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))))) + 1)));
445 internal_collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda::collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
450 WALBERLA_ABORT(
"This Sweep contains a configure function that needs to be called manually")
452 CellInterval ci = globalCellInterval;
453 CellInterval blockBB = blocks->getBlockCellBB(*
block);
454 blockBB.expand(ghostLayers);
455 ci.intersect(blockBB);
456 blocks->transformGlobalToBlockLocalCellInterval(ci, *
block);
460 auto pdfs =
block->getData<gpu::GPUField<float>>(
pdfsID);
461 auto force =
block->getData<gpu::GPUField<float>>(
forceID);
463 auto &kT = this->
kT_;
471 auto &seed = this->
seed_;
473 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(force->nrOfGhostLayers()))
474 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(force->nrOfGhostLayers()))
475 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(force->nrOfGhostLayers()))
476 float *
RESTRICT const _data_force = force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
477 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
478 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
479 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
480 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
481 float *
RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
482 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
483 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
484 const int64_t _size_force_0 = int64_t(int64_c(ci.xSize()) + 0);
485 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
486 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
487 const int64_t _size_force_1 = int64_t(int64_c(ci.ySize()) + 0);
488 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
489 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
490 const int64_t _size_force_2 = int64_t(int64_c(ci.zSize()) + 0);
491 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
492 const int64_t _stride_force_0 = int64_t(force->xStride());
493 const int64_t _stride_force_1 = int64_t(force->yStride());
494 const int64_t _stride_force_2 = int64_t(force->zStride());
495 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
496 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
497 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
498 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
499 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
500 dim3 _block(uint32_c(((128 < _size_force_0) ? 128 : _size_force_0)), uint32_c(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))), uint32_c(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))));
501 dim3 _grid(uint32_c(((_size_force_0) % (((128 < _size_force_0) ? 128 : _size_force_0)) == 0 ? (int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)) : ((int64_t)(_size_force_0) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))) + 1)), uint32_c(((_size_force_1) % (((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) == 0 ? (int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))) : ((int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))) ? 1024 : ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) + 1)), uint32_c(((_size_force_2) % (((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) == 0 ? (int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))))) : ((int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0) * ((_size_force_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))) ? _size_force_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0) ? 128 : _size_force_0)))))))))) + 1)));
502 internal_collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda::collidesweepsingleprecisionthermalizedcuda_collidesweepsingleprecisionthermalizedcuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);