48namespace internal_streamcollidesweepleesedwardssingleprecisioncuda_streamcollidesweepleesedwardssingleprecisioncuda {
49static FUNC_PREFIX __launch_bounds__(256) void streamcollidesweepleesedwardssingleprecisioncuda_streamcollidesweepleesedwardssingleprecisioncuda(
float *
RESTRICT const _data_force,
float *
RESTRICT const
_data_pdfs,
float *
RESTRICT _data_pdfs_tmp, int64_t const
_size_force_0, int64_t const
_size_force_1, int64_t const
_size_force_2, int64_t const
_stride_force_0, int64_t const
_stride_force_1, int64_t const
_stride_force_2, int64_t const
_stride_force_3, int64_t const
_stride_pdfs_0, int64_t const
_stride_pdfs_1, int64_t const
_stride_pdfs_2, int64_t const
_stride_pdfs_3, int64_t const
_stride_pdfs_tmp_0, int64_t const
_stride_pdfs_tmp_1, int64_t const
_stride_pdfs_tmp_2, int64_t const
_stride_pdfs_tmp_3, int64_t
lebc_bot_index, int64_t
lebc_top_index,
float omega_bulk,
float omega_even,
float omega_odd,
float omega_shear,
float v_s) {
50 if (blockDim.x * blockIdx.x + threadIdx.x + 1 <
_size_force_0 - 1 && blockDim.y * blockIdx.y + threadIdx.y + 1 <
_size_force_1 - 1 && blockDim.z * blockIdx.z + threadIdx.z + 1 <
_size_force_2 - 1) {
51 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x + 1;
52 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y + 1;
53 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z + 1;
75 const float xi_47 =
omega_shear * 0.041666666666666664f;
76 const float xi_51 =
omega_bulk * 0.041666666666666664f;
81 const float xi_102 = xi_11 + xi_3;
101 const float xi_132 =
omega_odd * 0.083333333333333329f;
102 const float xi_133 = xi_131 * xi_132;
103 const float xi_134 = xi_126 * xi_127 + xi_133;
105 const float xi_145 = xi_115 + xi_144;
109 const float xi_149 = xi_132 * xi_148;
110 const float xi_150 = xi_127 * xi_146 + xi_149;
113 const float xi_154 = -xi_107 - xi_108 + xi_109 + xi_117 + xi_152 + xi_6;
114 const float xi_155 = xi_132 * xi_154;
115 const float xi_156 = xi_127 * xi_153 + xi_155;
117 const float xi_161 = -xi_149;
118 const float xi_173 =
omega_odd * 0.041666666666666664f;
119 const float xi_174 = xi_154 * xi_173;
121 const float xi_176 = xi_153 * xi_175;
122 const float xi_177 = -xi_174 + xi_176;
123 const float xi_178 = xi_131 * xi_173;
124 const float xi_179 = xi_126 * xi_175;
125 const float xi_180 = -xi_178 + xi_179;
126 const float xi_181 = xi_178 - xi_179;
127 const float xi_188 = xi_146 * xi_175;
128 const float xi_189 = xi_148 * xi_173;
129 const float xi_190 = -xi_188 + xi_189;
130 const float xi_191 = xi_188 - xi_189;
131 const float xi_192 = xi_174 - xi_176;
132 const float rr_0 = 0.0f;
133 const float xi_23 = rr_0 * xi_22;
134 const float xi_35 = rr_0 * xi_34;
135 const float xi_41 = rr_0 * xi_40;
136 const float xi_45 = rr_0 * 0.041666666666666664f;
141 const float vel1Term = xi_4 + xi_5;
144 const float rho = delta_rho + 1.0f;
145 const float xi_0 = ((1.0f) / (rho));
146 const float xi_10 = xi_0 * 0.5f;
149 const float xi_28 = xi_17 * 0.16666666666666666f;
150 const float xi_29 = -xi_28;
151 const float xi_30 = xi_17 * 0.083333333333333329f;
153 const float xi_48 = xi_17 * xi_47 + xi_29;
154 const float xi_49 = xi_34 - xi_46 + xi_48;
155 const float xi_52 = xi_17 * xi_51;
156 const float xi_59 = u_0 * xi_58;
157 const float xi_64 = u_0 * xi_63;
158 const float xi_68 = -xi_34 + xi_46 + xi_48;
161 const float xi_86 = xi_85 * 0.25f;
162 const float xi_89 = xi_62 * xi_85;
163 const float xi_96 = u_0 * u_0;
166 const float xi_26 = xi_18 * 0.16666666666666666f;
168 const float xi_42 = -xi_26;
169 const float xi_43 = xi_18 * 0.083333333333333329f;
170 const float xi_53 = xi_18 * xi_51;
171 const float xi_60 = u_1 * 0.25f;
173 const float xi_65 = u_1 * xi_62;
175 const float xi_67 = xi_59 + xi_61 - xi_64 - xi_66;
176 const float xi_69 = -xi_59 - xi_61 + xi_64 + xi_66;
179 const float xi_95 = rho * (u_1 * u_1);
180 const float xi_101 = -xi_95;
181 const float xi_157 = rho * u_1;
183 const float xi_160 = -xi_159;
186 const float xi_24 = xi_19 * 0.16666666666666666f;
187 const float xi_25 = -xi_24;
188 const float xi_27 = xi_19 * 0.083333333333333329f;
189 const float xi_32 = -
omega_shear * xi_26 +
omega_shear * xi_27 + xi_18 * 0.33333333333333331f + xi_25 + xi_31;
192 const float xi_44 = -
omega_shear * xi_24 +
omega_shear * xi_43 + xi_19 * 0.33333333333333331f + xi_31 + xi_42;
193 const float xi_54 = xi_19 * xi_51;
194 const float xi_55 = xi_18 * xi_47 + xi_42 + xi_52 + xi_53 + xi_54;
195 const float xi_56 = -xi_22 + xi_50 + xi_55;
196 const float xi_57 = xi_27 + xi_37 + xi_56;
197 const float xi_70 = xi_22 - xi_50 + xi_55;
198 const float xi_71 = xi_27 + xi_37 + xi_70;
199 const float xi_73 = xi_19 * xi_47 + xi_25;
200 const float xi_74 = -xi_40 + xi_72 + xi_73;
201 const float xi_76 = xi_30 + xi_56 + xi_75;
202 const float xi_78 = u_2 * xi_58;
203 const float xi_80 = u_2 * xi_63;
204 const float xi_81 = -xi_77 - xi_78 + xi_79 + xi_80;
205 const float xi_82 = xi_30 + xi_70 + xi_75;
206 const float xi_83 = xi_77 + xi_78 - xi_79 - xi_80;
207 const float xi_84 = xi_36 + xi_43 + xi_52 + xi_53 + xi_54 + xi_74;
209 const float xi_88 = xi_87 * 0.25f;
210 const float xi_90 = xi_62 * xi_87;
211 const float xi_91 = xi_86 + xi_88 - xi_89 - xi_90;
212 const float xi_92 = -xi_86 - xi_88 + xi_89 + xi_90;
213 const float xi_93 = xi_40 - xi_72 + xi_73;
214 const float xi_94 = xi_36 + xi_43 + xi_52 + xi_53 + xi_54 + xi_93;
215 const float xi_98 = rho * (u_2 * u_2);
218 const float xi_103 =
omega_bulk * (rho * xi_96 - xi_101 - xi_102 - xi_13 - xi_16 - xi_5 - xi_97 + xi_98);
223 const float xi_112 = xi_111 * 0.01984126984126984f;
226 const float xi_120 = xi_119 * 0.125f;
227 const float xi_121 = -xi_120;
228 const float xi_135 = xi_100 * 0.050000000000000003f;
231 const float xi_138 = xi_137 * 0.041666666666666664f;
232 const float xi_139 = xi_135 + xi_138;
233 const float xi_140 = -xi_112;
234 const float xi_141 = -xi_138;
235 const float xi_142 = -xi_135 + xi_141;
236 const float xi_143 = xi_106 * 0.035714285714285712f;
237 const float xi_151 = xi_106 * 0.021428571428571429f;
238 const float xi_162 = xi_119 * 0.0625f;
239 const float xi_163 = xi_111 * 0.013888888888888888f;
240 const float xi_164 = xi_103 * 0.041666666666666664f;
241 const float xi_165 = xi_137 * 0.020833333333333332f + xi_164;
242 const float xi_166 = xi_133 + xi_162 + xi_163 + xi_165;
243 const float xi_167 = -xi_133 + xi_162 + xi_163 + xi_165;
244 const float xi_168 = xi_111 * -0.003968253968253968f;
245 const float xi_169 = xi_106 * -0.0071428571428571426f;
247 const float xi_171 = xi_100 * 0.025000000000000001f;
248 const float xi_172 = xi_141 + xi_164 + xi_168 + xi_169 + xi_170 + xi_171;
249 const float xi_182 = xi_141 + xi_164 + xi_168 + xi_169 - xi_170 + xi_171;
251 const float xi_184 = -xi_183;
252 const float xi_185 = -xi_162;
253 const float xi_186 = xi_106 * 0.017857142857142856f;
254 const float xi_187 = xi_155 + xi_165 + xi_168 + xi_185 + xi_186;
255 const float xi_193 = -xi_155 + xi_165 + xi_168 + xi_185 + xi_186;
256 const float forceTerm_0 = xi_17 * xi_20 - xi_17 + xi_18 * xi_20 - xi_18 + xi_19 * xi_20 - xi_19;
257 const float forceTerm_1 = xi_21 - xi_23 + xi_32;
258 const float forceTerm_2 = -xi_21 + xi_23 + xi_32;
259 const float forceTerm_3 = -xi_33 + xi_35 - xi_38;
260 const float forceTerm_4 = xi_33 - xi_35 - xi_38;
261 const float forceTerm_5 = xi_39 - xi_41 + xi_44;
262 const float forceTerm_6 = -xi_39 + xi_41 + xi_44;
263 const float forceTerm_7 = -xi_49 - xi_57 - xi_67;
264 const float forceTerm_8 = -xi_57 - xi_68 - xi_69;
265 const float forceTerm_9 = -xi_49 - xi_69 - xi_71;
266 const float forceTerm_10 = -xi_67 - xi_68 - xi_71;
267 const float forceTerm_11 = -xi_74 - xi_76 - xi_81;
268 const float forceTerm_12 = -xi_74 - xi_82 - xi_83;
269 const float forceTerm_13 = -xi_49 - xi_84 - xi_91;
270 const float forceTerm_14 = -xi_68 - xi_84 - xi_92;
271 const float forceTerm_15 = -xi_76 - xi_83 - xi_93;
272 const float forceTerm_16 = -xi_81 - xi_82 - xi_93;
273 const float forceTerm_17 = -xi_49 - xi_92 - xi_94;
274 const float forceTerm_18 = -xi_68 - xi_91 - xi_94;
300 auto force =
block->getData<gpu::GPUField<float>>(forceID);
301 auto pdfs =
block->getData<gpu::GPUField<float>>(pdfsID);
302 gpu::GPUField<float> *pdfs_tmp;
304 if (cache_pdfs_.find(
block) == cache_pdfs_.end()) {
305 pdfs_tmp = pdfs->cloneUninitialized();
306 cache_pdfs_[
block] = pdfs_tmp;
308 pdfs_tmp = cache_pdfs_[
block];
312 auto &lebc_top_index = this->lebc_top_index_;
313 auto &lebc_bot_index = this->lebc_bot_index_;
314 auto &omega_odd = this->omega_odd_;
315 auto &omega_even = this->omega_even_;
316 auto &v_s = this->v_s_;
317 auto &omega_shear = this->omega_shear_;
318 auto &omega_bulk = this->omega_bulk_;
319 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(force->nrOfGhostLayers()))
320 float *
RESTRICT const _data_force = force->dataAt(-1, -1, -1, 0);
321 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
322 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers()))
323 float *
RESTRICT const _data_pdfs = pdfs->dataAt(-1, -1, -1, 0);
324 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
325 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers()))
326 float *
RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(-1, -1, -1, 0);
327 WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx)
328 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(force->xSize()) + 2))
329 const int64_t _size_force_0 = int64_t(int64_c(force->xSize()) + 2);
330 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
331 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(force->ySize()) + 2))
332 const int64_t _size_force_1 = int64_t(int64_c(force->ySize()) + 2);
333 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
334 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(force->zSize()) + 2))
335 const int64_t _size_force_2 = int64_t(int64_c(force->zSize()) + 2);
336 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
337 const int64_t _stride_force_0 = int64_t(force->xStride());
338 const int64_t _stride_force_1 = int64_t(force->yStride());
339 const int64_t _stride_force_2 = int64_t(force->zStride());
340 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
341 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
342 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
343 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
344 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
345 const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
346 const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
347 const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
348 const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
349 dim3 _block(uint32_c(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)), uint32_c(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))), uint32_c(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))));
350 dim3 _grid(uint32_c(((_size_force_0 - 2) % (((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) == 0 ? (int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) : ((int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))) + 1)), uint32_c(((_size_force_1 - 2) % (((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) == 0 ? (int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) : ((int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) + 1)), uint32_c(((_size_force_2 - 2) % (((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) == 0 ? (int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) : ((int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))))) + 1)));
351 internal_streamcollidesweepleesedwardssingleprecisioncuda_streamcollidesweepleesedwardssingleprecisioncuda::streamcollidesweepleesedwardssingleprecisioncuda_streamcollidesweepleesedwardssingleprecisioncuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _data_pdfs_tmp, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, lebc_bot_index, lebc_top_index, omega_bulk, omega_even, omega_odd, omega_shear, v_s);
352 pdfs->swapDataPointers(pdfs_tmp);
357 CellInterval ci = globalCellInterval;
358 CellInterval blockBB = blocks->getBlockCellBB(*
block);
359 blockBB.expand(ghostLayers);
360 ci.intersect(blockBB);
361 blocks->transformGlobalToBlockLocalCellInterval(ci, *
block);
365 auto force =
block->getData<gpu::GPUField<float>>(forceID);
366 auto pdfs =
block->getData<gpu::GPUField<float>>(pdfsID);
367 gpu::GPUField<float> *pdfs_tmp;
369 if (cache_pdfs_.find(
block) == cache_pdfs_.end()) {
370 pdfs_tmp = pdfs->cloneUninitialized();
371 cache_pdfs_[
block] = pdfs_tmp;
373 pdfs_tmp = cache_pdfs_[
block];
377 auto &lebc_top_index = this->lebc_top_index_;
378 auto &lebc_bot_index = this->lebc_bot_index_;
379 auto &omega_odd = this->omega_odd_;
380 auto &omega_even = this->omega_even_;
381 auto &v_s = this->v_s_;
382 auto &omega_shear = this->omega_shear_;
383 auto &omega_bulk = this->omega_bulk_;
384 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(force->nrOfGhostLayers()))
385 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(force->nrOfGhostLayers()))
386 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(force->nrOfGhostLayers()))
387 float *
RESTRICT const _data_force = force->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
388 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
389 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
390 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
391 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
392 float *
RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
393 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
394 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
395 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
396 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
397 float *
RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
398 WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx)
399 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 2))
400 const int64_t _size_force_0 = int64_t(int64_c(ci.xSize()) + 2);
401 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
402 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 2))
403 const int64_t _size_force_1 = int64_t(int64_c(ci.ySize()) + 2);
404 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
405 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 2))
406 const int64_t _size_force_2 = int64_t(int64_c(ci.zSize()) + 2);
407 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
408 const int64_t _stride_force_0 = int64_t(force->xStride());
409 const int64_t _stride_force_1 = int64_t(force->yStride());
410 const int64_t _stride_force_2 = int64_t(force->zStride());
411 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
412 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
413 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
414 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
415 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
416 const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
417 const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
418 const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
419 const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
420 dim3 _block(uint32_c(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)), uint32_c(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))), uint32_c(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))));
421 dim3 _grid(uint32_c(((_size_force_0 - 2) % (((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) == 0 ? (int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) : ((int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))) + 1)), uint32_c(((_size_force_1 - 2) % (((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) == 0 ? (int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) : ((int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) + 1)), uint32_c(((_size_force_2 - 2) % (((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) == 0 ? (int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) : ((int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))))) + 1)));
422 internal_streamcollidesweepleesedwardssingleprecisioncuda_streamcollidesweepleesedwardssingleprecisioncuda::streamcollidesweepleesedwardssingleprecisioncuda_streamcollidesweepleesedwardssingleprecisioncuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _data_pdfs_tmp, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, lebc_bot_index, lebc_top_index, omega_bulk, omega_even, omega_odd, omega_shear, v_s);
423 pdfs->swapDataPointers(pdfs_tmp);