48namespace internal_streamsweepdoubleprecisioncuda_streamsweepdoubleprecisioncuda {
49static FUNC_PREFIX __launch_bounds__(256) void streamsweepdoubleprecisioncuda_streamsweepdoubleprecisioncuda(
double *
RESTRICT const _data_force,
double *
RESTRICT const
_data_pdfs,
double *
RESTRICT _data_pdfs_tmp,
double *
RESTRICT _data_velocity, int64_t const
_size_force_0, int64_t const
_size_force_1, int64_t const
_size_force_2, int64_t const
_stride_force_0, int64_t const
_stride_force_1, int64_t const
_stride_force_2, int64_t const
_stride_force_3, int64_t const
_stride_pdfs_0, int64_t const
_stride_pdfs_1, int64_t const
_stride_pdfs_2, int64_t const
_stride_pdfs_3, int64_t const
_stride_pdfs_tmp_0, int64_t const
_stride_pdfs_tmp_1, int64_t const
_stride_pdfs_tmp_2, int64_t const
_stride_pdfs_tmp_3, int64_t const
_stride_velocity_0, int64_t const
_stride_velocity_1, int64_t const
_stride_velocity_2, int64_t const _stride_velocity_3) {
50 if (blockDim.x * blockIdx.x + threadIdx.x + 1 <
_size_force_0 - 1 && blockDim.y * blockIdx.y + threadIdx.y + 1 <
_size_force_1 - 1 && blockDim.z * blockIdx.z + threadIdx.z + 1 <
_size_force_2 - 1) {
51 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x + 1;
52 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y + 1;
53 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z + 1;
55 const double streamed_0 = _data_pdfs_10_20_30[
_stride_pdfs_0 * ctr_0];
57 const double streamed_1 = _data_pdfs_1m1_20_31[
_stride_pdfs_0 * ctr_0];
59 const double streamed_2 = _data_pdfs_11_20_32[
_stride_pdfs_0 * ctr_0];
65 const double streamed_5 = _data_pdfs_10_2m1_35[
_stride_pdfs_0 * ctr_0];
67 const double streamed_6 = _data_pdfs_10_21_36[
_stride_pdfs_0 * ctr_0];
77 const double streamed_11 = _data_pdfs_1m1_2m1_311[
_stride_pdfs_0 * ctr_0];
79 const double streamed_12 = _data_pdfs_11_2m1_312[
_stride_pdfs_0 * ctr_0];
85 const double streamed_15 = _data_pdfs_1m1_21_315[
_stride_pdfs_0 * ctr_0];
87 const double streamed_16 = _data_pdfs_11_21_316[
_stride_pdfs_0 * ctr_0];
92 const double vel0Term = streamed_10 + streamed_14 + streamed_18 + streamed_4 + streamed_8;
93 const double momdensity_0 = streamed_13 * -1.0 + streamed_17 * -1.0 + streamed_3 * -1.0 + streamed_7 * -1.0 + streamed_9 * -1.0 + vel0Term;
94 const double vel1Term = streamed_1 + streamed_11 + streamed_15 + streamed_7;
95 const double momdensity_1 = streamed_10 * -1.0 + streamed_12 * -1.0 + streamed_16 * -1.0 + streamed_2 * -1.0 + streamed_8 + streamed_9 * -1.0 + vel1Term;
96 const double vel2Term = streamed_12 + streamed_13 + streamed_5;
97 const double rho = streamed_0 + streamed_16 + streamed_17 + streamed_2 + streamed_3 + streamed_6 + streamed_9 + vel0Term + vel1Term + vel2Term;
98 const double momdensity_2 = streamed_11 + streamed_14 + streamed_15 * -1.0 + streamed_16 * -1.0 + streamed_17 * -1.0 + streamed_18 * -1.0 + streamed_6 * -1.0 + vel2Term;
100 const double u_0 = momdensity_0 * ((1.0) / (rho)) + 0.5 * ((1.0) / (rho)) * _data_force_10_20_30[
_stride_force_0 * ctr_0];
102 const double u_1 = momdensity_1 * ((1.0) / (rho)) + 0.5 * ((1.0) / (rho)) * _data_force_10_20_31[
_stride_force_0 * ctr_0];
104 const double u_2 = momdensity_2 * ((1.0) / (rho)) + 0.5 * ((1.0) / (rho)) * _data_force_10_20_32[
_stride_force_0 * ctr_0];
156 auto pdfs =
block->getData<gpu::GPUField<double>>(
pdfsID);
157 gpu::GPUField<double> *pdfs_tmp;
160 auto it = cache_pdfs_.find(pdfs);
161 if (it != cache_pdfs_.end()) {
164 pdfs_tmp = pdfs->cloneUninitialized();
165 cache_pdfs_.insert(pdfs_tmp);
169 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(
force->nrOfGhostLayers()))
170 double *
RESTRICT const _data_force =
force->dataAt(-1, -1, -1, 0);
171 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
172 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers()))
173 double *
RESTRICT const _data_pdfs = pdfs->dataAt(-1, -1, -1, 0);
174 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
175 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers()))
176 double *
RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(-1, -1, -1, 0);
177 WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx)
178 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(
velocity->nrOfGhostLayers()))
180 WALBERLA_ASSERT_EQUAL(
velocity->layout(), field::fzyx)
181 WALBERLA_ASSERT_GREATER_EQUAL(
force->xSizeWithGhostLayer(), int64_t(int64_c(
force->xSize()) + 2))
182 const int64_t _size_force_0 = int64_t(int64_c(
force->xSize()) + 2);
183 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
184 WALBERLA_ASSERT_GREATER_EQUAL(
force->ySizeWithGhostLayer(), int64_t(int64_c(
force->ySize()) + 2))
185 const int64_t _size_force_1 = int64_t(int64_c(
force->ySize()) + 2);
186 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
187 WALBERLA_ASSERT_GREATER_EQUAL(
force->zSizeWithGhostLayer(), int64_t(int64_c(
force->zSize()) + 2))
188 const int64_t _size_force_2 = int64_t(int64_c(
force->zSize()) + 2);
189 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
190 const int64_t _stride_force_0 = int64_t(
force->xStride());
191 const int64_t _stride_force_1 = int64_t(
force->yStride());
192 const int64_t _stride_force_2 = int64_t(
force->zStride());
193 const int64_t _stride_force_3 = int64_t(1 * int64_t(
force->fStride()));
194 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
195 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
196 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
197 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
198 const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
199 const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
200 const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
201 const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
202 const int64_t _stride_velocity_0 = int64_t(
velocity->xStride());
203 const int64_t _stride_velocity_1 = int64_t(
velocity->yStride());
204 const int64_t _stride_velocity_2 = int64_t(
velocity->zStride());
205 const int64_t _stride_velocity_3 = int64_t(1 * int64_t(
velocity->fStride()));
206 dim3 _block(uint32_t(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)), uint32_t(((1024 < ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))), uint32_t(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))))));
207 dim3 _grid(uint32_t(((_size_force_0 - 2) % (((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)) == 0 ? (int64_t)(_size_force_0 - 2) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)) : ((int64_t)(_size_force_0 - 2) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))) + 1)), uint32_t(((_size_force_1 - 2) % (((1024 < ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))) == 0 ? (int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))) : ((int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) + 1)), uint32_t(((_size_force_2 - 2) % (((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))))) == 0 ? (int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))))) : ((int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))))))) + 1)));
208 internal_streamsweepdoubleprecisioncuda_streamsweepdoubleprecisioncuda::streamsweepdoubleprecisioncuda_streamsweepdoubleprecisioncuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
209 pdfs->swapDataPointers(pdfs_tmp);
213 CellInterval ci = globalCellInterval;
214 CellInterval blockBB = blocks->getBlockCellBB(*
block);
215 blockBB.expand(ghostLayers);
216 ci.intersect(blockBB);
217 blocks->transformGlobalToBlockLocalCellInterval(ci, *
block);
223 auto pdfs =
block->getData<gpu::GPUField<double>>(
pdfsID);
224 gpu::GPUField<double> *pdfs_tmp;
227 auto it = cache_pdfs_.find(pdfs);
228 if (it != cache_pdfs_.end()) {
231 pdfs_tmp = pdfs->cloneUninitialized();
232 cache_pdfs_.insert(pdfs_tmp);
236 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(
force->nrOfGhostLayers()))
237 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(
force->nrOfGhostLayers()))
238 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(
force->nrOfGhostLayers()))
239 double *
RESTRICT const _data_force =
force->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
240 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
241 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
242 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
243 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
244 double *
RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
245 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
246 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
247 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
248 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
249 double *
RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
250 WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx)
251 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(
velocity->nrOfGhostLayers()))
252 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(
velocity->nrOfGhostLayers()))
253 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(
velocity->nrOfGhostLayers()))
254 double *
RESTRICT _data_velocity =
velocity->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
255 WALBERLA_ASSERT_EQUAL(
velocity->layout(), field::fzyx)
256 WALBERLA_ASSERT_GREATER_EQUAL(
force->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 2))
257 const int64_t _size_force_0 = int64_t(int64_c(ci.xSize()) + 2);
258 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
259 WALBERLA_ASSERT_GREATER_EQUAL(
force->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 2))
260 const int64_t _size_force_1 = int64_t(int64_c(ci.ySize()) + 2);
261 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
262 WALBERLA_ASSERT_GREATER_EQUAL(
force->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 2))
263 const int64_t _size_force_2 = int64_t(int64_c(ci.zSize()) + 2);
264 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
265 const int64_t _stride_force_0 = int64_t(
force->xStride());
266 const int64_t _stride_force_1 = int64_t(
force->yStride());
267 const int64_t _stride_force_2 = int64_t(
force->zStride());
268 const int64_t _stride_force_3 = int64_t(1 * int64_t(
force->fStride()));
269 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
270 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
271 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
272 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
273 const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
274 const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
275 const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
276 const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
277 const int64_t _stride_velocity_0 = int64_t(
velocity->xStride());
278 const int64_t _stride_velocity_1 = int64_t(
velocity->yStride());
279 const int64_t _stride_velocity_2 = int64_t(
velocity->zStride());
280 const int64_t _stride_velocity_3 = int64_t(1 * int64_t(
velocity->fStride()));
281 dim3 _block(uint32_t(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)), uint32_t(((1024 < ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))), uint32_t(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))))));
282 dim3 _grid(uint32_t(((_size_force_0 - 2) % (((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)) == 0 ? (int64_t)(_size_force_0 - 2) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)) : ((int64_t)(_size_force_0 - 2) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))) + 1)), uint32_t(((_size_force_1 - 2) % (((1024 < ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))) == 0 ? (int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))) : ((int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) + 1)), uint32_t(((_size_force_2 - 2) % (((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))))) == 0 ? (int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))))) : ((int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2) * ((_size_force_1 - 2 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0 - 2) ? 16 : _size_force_0 - 2)))))))))) + 1)));
283 internal_streamsweepdoubleprecisioncuda_streamsweepdoubleprecisioncuda::streamsweepdoubleprecisioncuda_streamsweepdoubleprecisioncuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
284 pdfs->swapDataPointers(pdfs_tmp);