50namespace internal_streamcollidesweepthermalizeddoubleprecisioncuda_streamcollidesweepthermalizeddoubleprecisioncuda {
51static FUNC_PREFIX __launch_bounds__(256) void streamcollidesweepthermalizeddoubleprecisioncuda_streamcollidesweepthermalizeddoubleprecisioncuda(
double *
RESTRICT const _data_force,
double *
RESTRICT const
_data_pdfs,
double *
RESTRICT _data_pdfs_tmp, int64_t const
_size_force_0, int64_t const
_size_force_1, int64_t const
_size_force_2, int64_t const
_stride_force_0, int64_t const
_stride_force_1, int64_t const
_stride_force_2, int64_t const
_stride_force_3, int64_t const
_stride_pdfs_0, int64_t const
_stride_pdfs_1, int64_t const
_stride_pdfs_2, int64_t const
_stride_pdfs_3, int64_t const
_stride_pdfs_tmp_0, int64_t const
_stride_pdfs_tmp_1, int64_t const
_stride_pdfs_tmp_2, int64_t const
_stride_pdfs_tmp_3, uint32_t
block_offset_0, uint32_t
block_offset_1, uint32_t
block_offset_2,
double kT,
double omega_bulk,
double omega_even,
double omega_odd,
double omega_shear, uint32_t
seed, uint32_t time_step) {
52 if (blockDim.x * blockIdx.x + threadIdx.x + 1 <
_size_force_0 - 1 && blockDim.y * blockIdx.y + threadIdx.y + 1 <
_size_force_1 - 1 && blockDim.z * blockIdx.z + threadIdx.z + 1 <
_size_force_2 - 1) {
53 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x + 1;
54 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y + 1;
55 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z + 1;
125 const double xi_47 =
omega_shear * 0.041666666666666664;
126 const double xi_51 =
omega_bulk * 0.041666666666666664;
130 const double xi_97 = 3.7416573867739413;
131 const double xi_98 = random_6_0 - 0.5;
132 const double xi_100 = 5.4772255750516612;
133 const double xi_101 = random_7_0 - 0.5;
134 const double xi_103 = random_2_1 - 0.5;
135 const double xi_104 = 2.4494897427831779;
136 const double xi_107 = 8.3666002653407556;
137 const double xi_108 = random_6_1 - 0.5;
139 const double xi_118 = xi_11 + xi_3;
145 const double xi_133 = random_0_1 - 0.5;
156 const double xi_151 = random_5_1 - 0.5;
162 const double xi_160 =
omega_odd * 0.083333333333333329;
163 const double xi_161 = xi_159 * xi_160;
164 const double xi_162 = random_3_0 - 0.5;
165 const double xi_173 = 1.7320508075688772;
166 const double xi_174 = random_0_0 - 0.5;
168 const double xi_186 = xi_138 + xi_185;
170 const double xi_188 = random_4_1 - 0.5;
173 const double xi_191 = xi_160 * xi_190;
174 const double xi_192 = random_4_0 - 0.5;
177 const double xi_200 = random_5_0 - 0.5;
178 const double xi_201 = -xi_124 - xi_125 + xi_126 + xi_140 + xi_198 + xi_6;
179 const double xi_202 = xi_160 * xi_201;
180 const double xi_203 = random_3_1 - 0.5;
182 const double xi_223 =
omega_odd * 0.041666666666666664;
183 const double xi_224 = xi_159 * xi_223;
185 const double xi_227 = xi_149 * xi_226;
186 const double xi_233 = xi_201 * xi_223;
187 const double xi_234 = xi_199 * xi_226;
188 const double xi_254 = xi_187 * xi_226;
189 const double xi_255 = xi_190 * xi_223;
190 const double rr_0 = 0.0;
191 const double xi_23 = rr_0 * xi_22;
192 const double xi_35 = rr_0 * xi_34;
193 const double xi_41 = rr_0 * xi_40;
194 const double xi_45 = rr_0 * 0.041666666666666664;
199 const double vel1Term = xi_4 + xi_5;
202 const double rho = delta_rho + 1.0;
203 const double xi_95 =
kT * rho;
205 const double xi_99 = xi_96 * xi_97 * xi_98;
206 const double xi_102 = xi_100 * xi_101 * xi_96;
208 const double xi_106 = xi_103 * xi_104 * xi_105;
209 const double xi_109 = xi_107 * xi_108 * xi_96;
210 const double xi_131 = xi_99 * 0.11904761904761904;
212 const double xi_135 = xi_134 * 0.5;
213 const double xi_136 = xi_133 * xi_135;
214 const double xi_152 = pow(xi_95 * (1.0 - ((-
omega_odd + 1.0) * (-
omega_odd + 1.0))), 0.5);
215 const double xi_153 = xi_152 * 1.4142135623730951;
216 const double xi_154 = xi_153 * 0.5;
217 const double xi_163 = xi_104 * xi_152;
218 const double xi_164 = xi_163 * 0.16666666666666666;
219 const double xi_165 = xi_162 * xi_164;
220 const double xi_166 = xi_161 + xi_165;
221 const double xi_167 = xi_149 * xi_150 + xi_151 * xi_154 + xi_166;
222 const double xi_169 = xi_102 * 0.10000000000000001;
223 const double xi_175 = xi_134 * xi_173 * xi_174;
224 const double xi_176 = xi_175 * 0.16666666666666666;
225 const double xi_184 = xi_109 * 0.071428571428571425;
226 const double xi_193 = xi_164 * xi_192;
227 const double xi_194 = xi_191 + xi_193;
228 const double xi_195 = xi_150 * xi_187 + xi_154 * xi_188 + xi_194;
229 const double xi_197 = xi_109 * 0.042857142857142858;
230 const double xi_204 = xi_164 * xi_203;
231 const double xi_205 = xi_202 + xi_204;
232 const double xi_206 = xi_150 * xi_199 + xi_154 * xi_200 + xi_205;
233 const double xi_207 = xi_133 * xi_134 * 0.25;
234 const double xi_210 = xi_99 * 0.083333333333333329;
235 const double xi_214 = -xi_191 - xi_193;
236 const double xi_215 = xi_135 * (random_1_0 - 0.5);
237 const double xi_222 = xi_135 * (random_2_0 - 0.5);
238 const double xi_228 = xi_163 * 0.083333333333333329;
239 const double xi_229 = xi_162 * xi_228;
240 const double xi_230 = xi_153 * 0.25;
241 const double xi_231 = xi_151 * xi_230;
242 const double xi_235 = xi_203 * xi_228;
243 const double xi_236 = xi_200 * xi_230;
244 const double xi_237 = -xi_233 + xi_234 - xi_235 + xi_236;
245 const double xi_239 = xi_109 * 0.014285714285714285;
246 const double xi_241 = xi_99 * 0.023809523809523808;
247 const double xi_244 = xi_233 - xi_234 + xi_235 - xi_236;
248 const double xi_246 = -xi_207;
249 const double xi_249 = xi_109 * 0.035714285714285712;
250 const double xi_251 = xi_135 * (random_1_1 - 0.5);
251 const double xi_256 = xi_188 * xi_230;
252 const double xi_257 = xi_192 * xi_228;
253 const double xi_258 = -xi_254 + xi_255 - xi_256 + xi_257;
254 const double xi_260 = xi_254 - xi_255 + xi_256 - xi_257;
255 const double xi_0 = ((1.0) / (rho));
256 const double xi_10 = xi_0 * 0.5;
259 const double xi_28 = xi_17 * 0.16666666666666666;
260 const double xi_29 = -xi_28;
261 const double xi_30 = xi_17 * 0.083333333333333329;
263 const double xi_48 = xi_17 * xi_47 + xi_29;
264 const double xi_49 = xi_34 - xi_46 + xi_48;
265 const double xi_52 = xi_17 * xi_51;
266 const double xi_59 = u_0 * xi_58;
267 const double xi_64 = u_0 * xi_63;
268 const double xi_68 = -xi_34 + xi_46 + xi_48;
271 const double xi_86 = xi_85 * 0.25;
272 const double xi_89 = xi_62 * xi_85;
273 const double xi_111 = u_0 * u_0;
276 const double xi_26 = xi_18 * 0.16666666666666666;
278 const double xi_42 = -xi_26;
279 const double xi_43 = xi_18 * 0.083333333333333329;
280 const double xi_53 = xi_18 * xi_51;
281 const double xi_60 = u_1 * 0.25;
283 const double xi_65 = u_1 * xi_62;
285 const double xi_67 = xi_59 + xi_61 - xi_64 - xi_66;
286 const double xi_69 = -xi_59 - xi_61 + xi_64 + xi_66;
289 const double xi_110 = rho * (u_1 * u_1);
290 const double xi_117 = -xi_110;
291 const double xi_216 = rho * u_1;
293 const double xi_219 = -xi_215 - xi_218;
294 const double xi_220 = xi_215 + xi_218;
297 const double xi_24 = xi_19 * 0.16666666666666666;
298 const double xi_25 = -xi_24;
299 const double xi_27 = xi_19 * 0.083333333333333329;
300 const double xi_32 = -
omega_shear * xi_26 +
omega_shear * xi_27 + xi_18 * 0.33333333333333331 + xi_25 + xi_31;
303 const double xi_44 = -
omega_shear * xi_24 +
omega_shear * xi_43 + xi_19 * 0.33333333333333331 + xi_31 + xi_42;
304 const double xi_54 = xi_19 * xi_51;
305 const double xi_55 = xi_18 * xi_47 + xi_42 + xi_52 + xi_53 + xi_54;
306 const double xi_56 = -xi_22 + xi_50 + xi_55;
307 const double xi_57 = xi_27 + xi_37 + xi_56;
308 const double xi_70 = xi_22 - xi_50 + xi_55;
309 const double xi_71 = xi_27 + xi_37 + xi_70;
310 const double xi_73 = xi_19 * xi_47 + xi_25;
311 const double xi_74 = -xi_40 + xi_72 + xi_73;
312 const double xi_76 = xi_30 + xi_56 + xi_75;
313 const double xi_78 = u_2 * xi_58;
314 const double xi_80 = u_2 * xi_63;
315 const double xi_81 = -xi_77 - xi_78 + xi_79 + xi_80;
316 const double xi_82 = xi_30 + xi_70 + xi_75;
317 const double xi_83 = xi_77 + xi_78 - xi_79 - xi_80;
318 const double xi_84 = xi_36 + xi_43 + xi_52 + xi_53 + xi_54 + xi_74;
320 const double xi_88 = xi_87 * 0.25;
321 const double xi_90 = xi_62 * xi_87;
322 const double xi_91 = xi_86 + xi_88 - xi_89 - xi_90;
323 const double xi_92 = -xi_86 - xi_88 + xi_89 + xi_90;
324 const double xi_93 = xi_40 - xi_72 + xi_73;
325 const double xi_94 = xi_36 + xi_43 + xi_52 + xi_53 + xi_54 + xi_93;
326 const double xi_113 = rho * (u_2 * u_2);
330 const double xi_119 = rho * xi_111 - xi_112 + xi_113 - xi_117 - xi_118 - xi_13 - xi_16 - xi_5;
336 const double xi_130 = xi_128 * 0.01984126984126984;
337 const double xi_132 = xi_130 + xi_131;
340 const double xi_143 = xi_142 * 0.125;
341 const double xi_144 = -xi_136 - xi_143;
342 const double xi_168 = xi_116 * 0.050000000000000003;
345 const double xi_172 = xi_171 * 0.041666666666666664;
346 const double xi_177 = xi_172 + xi_176;
347 const double xi_178 = xi_168 + xi_169 + xi_177;
348 const double xi_179 = -xi_130 - xi_131;
349 const double xi_180 = xi_136 + xi_143;
350 const double xi_181 = -xi_172 - xi_176;
351 const double xi_182 = -xi_168 - xi_169 + xi_181;
352 const double xi_183 = xi_123 * 0.035714285714285712;
353 const double xi_196 = xi_123 * 0.021428571428571429;
354 const double xi_208 = xi_142 * 0.0625;
355 const double xi_209 = xi_128 * 0.013888888888888888;
356 const double xi_211 = xi_106 * 0.083333333333333329 + xi_120 * 0.041666666666666664;
357 const double xi_212 = xi_171 * 0.020833333333333332 + xi_175 * 0.083333333333333329 + xi_211;
358 const double xi_213 = xi_166 + xi_207 + xi_208 + xi_209 + xi_210 + xi_212;
359 const double xi_221 = -xi_161 - xi_165 + xi_207 + xi_208 + xi_209 + xi_210 + xi_212;
361 const double xi_232 = xi_222 - xi_224 + xi_225 + xi_227 - xi_229 + xi_231;
362 const double xi_238 = xi_123 * 0.0071428571428571426;
363 const double xi_240 = xi_128 * 0.003968253968253968;
364 const double xi_242 = -xi_240 - xi_241;
365 const double xi_243 = xi_102 * 0.050000000000000003 + xi_116 * 0.025000000000000001 + xi_181 + xi_211 - xi_238 - xi_239 + xi_242;
366 const double xi_245 =
omega_bulk * xi_119 * -0.041666666666666664 +
omega_even * xi_115 * -0.025000000000000001 + xi_100 * xi_101 * xi_96 * -0.050000000000000003 + xi_103 * xi_104 * xi_105 * -0.083333333333333329 + xi_177 + xi_238 + xi_239 + xi_240 + xi_241;
367 const double xi_247 = -xi_208;
368 const double xi_248 = xi_123 * 0.017857142857142856;
369 const double xi_250 = xi_205 + xi_212 + xi_242 + xi_246 + xi_247 + xi_248 + xi_249;
371 const double xi_253 = -xi_251 - xi_252;
372 const double xi_259 = xi_251 + xi_252;
373 const double xi_261 = xi_222 + xi_224 + xi_225 - xi_227 + xi_229 - xi_231;
374 const double xi_262 = -xi_202 - xi_204 + xi_212 + xi_242 + xi_246 + xi_247 + xi_248 + xi_249;
375 const double forceTerm_0 = xi_17 * xi_20 - xi_17 + xi_18 * xi_20 - xi_18 + xi_19 * xi_20 - xi_19;
376 const double forceTerm_1 = xi_21 - xi_23 + xi_32;
377 const double forceTerm_2 = -xi_21 + xi_23 + xi_32;
378 const double forceTerm_3 = -xi_33 + xi_35 - xi_38;
379 const double forceTerm_4 = xi_33 - xi_35 - xi_38;
380 const double forceTerm_5 = xi_39 - xi_41 + xi_44;
381 const double forceTerm_6 = -xi_39 + xi_41 + xi_44;
382 const double forceTerm_7 = -xi_49 - xi_57 - xi_67;
383 const double forceTerm_8 = -xi_57 - xi_68 - xi_69;
384 const double forceTerm_9 = -xi_49 - xi_69 - xi_71;
385 const double forceTerm_10 = -xi_67 - xi_68 - xi_71;
386 const double forceTerm_11 = -xi_74 - xi_76 - xi_81;
387 const double forceTerm_12 = -xi_74 - xi_82 - xi_83;
388 const double forceTerm_13 = -xi_49 - xi_84 - xi_91;
389 const double forceTerm_14 = -xi_68 - xi_84 - xi_92;
390 const double forceTerm_15 = -xi_76 - xi_83 - xi_93;
391 const double forceTerm_16 = -xi_81 - xi_82 - xi_93;
392 const double forceTerm_17 = -xi_49 - xi_92 - xi_94;
393 const double forceTerm_18 = -xi_68 - xi_91 - xi_94;
418 if (!this->configured_)
419 WALBERLA_ABORT(
"This Sweep contains a configure function that needs to be called manually")
421 auto force =
block->getData<gpu::GPUField<double>>(forceID);
422 auto pdfs =
block->getData<gpu::GPUField<double>>(pdfsID);
423 gpu::GPUField<double> *pdfs_tmp;
425 if (cache_pdfs_.find(
block) == cache_pdfs_.end()) {
426 pdfs_tmp = pdfs->cloneUninitialized();
427 cache_pdfs_[
block] = pdfs_tmp;
429 pdfs_tmp = cache_pdfs_[
block];
433 auto &kT = this->kT_;
434 auto &block_offset_1 = this->block_offset_1_;
435 auto &omega_bulk = this->omega_bulk_;
436 auto &seed = this->seed_;
437 auto &time_step = this->time_step_;
438 auto &block_offset_0 = this->block_offset_0_;
439 auto &omega_shear = this->omega_shear_;
440 auto &omega_odd = this->omega_odd_;
441 auto &block_offset_2 = this->block_offset_2_;
442 auto &omega_even = this->omega_even_;
443 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(force->nrOfGhostLayers()))
444 double *
RESTRICT const _data_force = force->dataAt(-1, -1, -1, 0);
445 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
446 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers()))
447 double *
RESTRICT const _data_pdfs = pdfs->dataAt(-1, -1, -1, 0);
448 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
449 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers()))
450 double *
RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(-1, -1, -1, 0);
451 WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx)
452 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(force->xSize()) + 2))
453 const int64_t _size_force_0 = int64_t(int64_c(force->xSize()) + 2);
454 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
455 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(force->ySize()) + 2))
456 const int64_t _size_force_1 = int64_t(int64_c(force->ySize()) + 2);
457 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
458 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(force->zSize()) + 2))
459 const int64_t _size_force_2 = int64_t(int64_c(force->zSize()) + 2);
460 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
461 const int64_t _stride_force_0 = int64_t(force->xStride());
462 const int64_t _stride_force_1 = int64_t(force->yStride());
463 const int64_t _stride_force_2 = int64_t(force->zStride());
464 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
465 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
466 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
467 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
468 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
469 const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
470 const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
471 const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
472 const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
473 dim3 _block(uint32_c(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)), uint32_c(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))), uint32_c(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))));
474 dim3 _grid(uint32_c(((_size_force_0 - 2) % (((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) == 0 ? (int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) : ((int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))) + 1)), uint32_c(((_size_force_1 - 2) % (((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) == 0 ? (int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) : ((int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) + 1)), uint32_c(((_size_force_2 - 2) % (((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) == 0 ? (int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) : ((int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))))) + 1)));
475 internal_streamcollidesweepthermalizeddoubleprecisioncuda_streamcollidesweepthermalizeddoubleprecisioncuda::streamcollidesweepthermalizeddoubleprecisioncuda_streamcollidesweepthermalizeddoubleprecisioncuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _data_pdfs_tmp, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
476 pdfs->swapDataPointers(pdfs_tmp);
480 if (!this->configured_)
481 WALBERLA_ABORT(
"This Sweep contains a configure function that needs to be called manually")
483 CellInterval ci = globalCellInterval;
484 CellInterval blockBB = blocks->getBlockCellBB(*
block);
485 blockBB.expand(ghostLayers);
486 ci.intersect(blockBB);
487 blocks->transformGlobalToBlockLocalCellInterval(ci, *
block);
491 auto force =
block->getData<gpu::GPUField<double>>(forceID);
492 auto pdfs =
block->getData<gpu::GPUField<double>>(pdfsID);
493 gpu::GPUField<double> *pdfs_tmp;
495 if (cache_pdfs_.find(
block) == cache_pdfs_.end()) {
496 pdfs_tmp = pdfs->cloneUninitialized();
497 cache_pdfs_[
block] = pdfs_tmp;
499 pdfs_tmp = cache_pdfs_[
block];
503 auto &kT = this->kT_;
504 auto &block_offset_1 = this->block_offset_1_;
505 auto &omega_bulk = this->omega_bulk_;
506 auto &seed = this->seed_;
507 auto &time_step = this->time_step_;
508 auto &block_offset_0 = this->block_offset_0_;
509 auto &omega_shear = this->omega_shear_;
510 auto &omega_odd = this->omega_odd_;
511 auto &block_offset_2 = this->block_offset_2_;
512 auto &omega_even = this->omega_even_;
513 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(force->nrOfGhostLayers()))
514 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(force->nrOfGhostLayers()))
515 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(force->nrOfGhostLayers()))
516 double *
RESTRICT const _data_force = force->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
517 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
518 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
519 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
520 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
521 double *
RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
522 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
523 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
524 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
525 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
526 double *
RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
527 WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx)
528 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 2))
529 const int64_t _size_force_0 = int64_t(int64_c(ci.xSize()) + 2);
530 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
531 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 2))
532 const int64_t _size_force_1 = int64_t(int64_c(ci.ySize()) + 2);
533 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
534 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 2))
535 const int64_t _size_force_2 = int64_t(int64_c(ci.zSize()) + 2);
536 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
537 const int64_t _stride_force_0 = int64_t(force->xStride());
538 const int64_t _stride_force_1 = int64_t(force->yStride());
539 const int64_t _stride_force_2 = int64_t(force->zStride());
540 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
541 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
542 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
543 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
544 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
545 const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
546 const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
547 const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
548 const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
549 dim3 _block(uint32_c(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)), uint32_c(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))), uint32_c(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))));
550 dim3 _grid(uint32_c(((_size_force_0 - 2) % (((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) == 0 ? (int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) : ((int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))) + 1)), uint32_c(((_size_force_1 - 2) % (((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) == 0 ? (int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) : ((int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) + 1)), uint32_c(((_size_force_2 - 2) % (((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) == 0 ? (int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) : ((int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))))) + 1)));
551 internal_streamcollidesweepthermalizeddoubleprecisioncuda_streamcollidesweepthermalizeddoubleprecisioncuda::streamcollidesweepthermalizeddoubleprecisioncuda_streamcollidesweepthermalizeddoubleprecisioncuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _data_pdfs_tmp, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
552 pdfs->swapDataPointers(pdfs_tmp);