48namespace internal_initialpdfssetterdoubleprecisioncuda_initialpdfssetterdoubleprecisioncuda {
49static FUNC_PREFIX __launch_bounds__(256) void initialpdfssetterdoubleprecisioncuda_initialpdfssetterdoubleprecisioncuda(
double *
RESTRICT const _data_force,
double *
RESTRICT _data_pdfs,
double *
RESTRICT const
_data_velocity, int64_t const
_size_force_0, int64_t const
_size_force_1, int64_t const
_size_force_2, int64_t const
_stride_force_0, int64_t const
_stride_force_1, int64_t const
_stride_force_2, int64_t const
_stride_force_3, int64_t const
_stride_pdfs_0, int64_t const
_stride_pdfs_1, int64_t const
_stride_pdfs_2, int64_t const
_stride_pdfs_3, int64_t const
_stride_velocity_0, int64_t const
_stride_velocity_1, int64_t const
_stride_velocity_2, int64_t const
_stride_velocity_3,
double rho_0) {
51 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
52 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
53 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
54 const double rho = rho_0;
65 _data_pdfs_10_20_30[
_stride_pdfs_0 * ctr_0] = rho * -0.33333333333333331 * (u_0 * u_0) + rho * -0.33333333333333331 * (u_1 * u_1) + rho * -0.33333333333333331 * (u_2 * u_2) + rho * 0.33333333333333331;
67 _data_pdfs_10_20_31[
_stride_pdfs_0 * ctr_0] = rho * u_1 * 0.16666666666666666 + rho * -0.16666666666666666 * (u_0 * u_0) + rho * -0.16666666666666666 * (u_2 * u_2) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_1 * u_1);
69 _data_pdfs_10_20_32[
_stride_pdfs_0 * ctr_0] = rho * u_1 * -0.16666666666666666 + rho * -0.16666666666666666 * (u_0 * u_0) + rho * -0.16666666666666666 * (u_2 * u_2) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_1 * u_1);
71 _data_pdfs_10_20_33[
_stride_pdfs_0 * ctr_0] = rho * u_0 * -0.16666666666666666 + rho * -0.16666666666666666 * (u_1 * u_1) + rho * -0.16666666666666666 * (u_2 * u_2) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_0 * u_0);
73 _data_pdfs_10_20_34[
_stride_pdfs_0 * ctr_0] = rho * u_0 * 0.16666666666666666 + rho * -0.16666666666666666 * (u_1 * u_1) + rho * -0.16666666666666666 * (u_2 * u_2) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_0 * u_0);
75 _data_pdfs_10_20_35[
_stride_pdfs_0 * ctr_0] = rho * u_2 * 0.16666666666666666 + rho * -0.16666666666666666 * (u_0 * u_0) + rho * -0.16666666666666666 * (u_1 * u_1) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_2 * u_2);
77 _data_pdfs_10_20_36[
_stride_pdfs_0 * ctr_0] = rho * u_2 * -0.16666666666666666 + rho * -0.16666666666666666 * (u_0 * u_0) + rho * -0.16666666666666666 * (u_1 * u_1) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u_2 * u_2);
79 _data_pdfs_10_20_37[
_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * -0.25 + rho * u_0 * -0.083333333333333329 + rho * u_1 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_1 * u_1);
81 _data_pdfs_10_20_38[
_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * 0.25 + rho * u_0 * 0.083333333333333329 + rho * u_1 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_1 * u_1);
83 _data_pdfs_10_20_39[
_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * 0.25 + rho * u_0 * -0.083333333333333329 + rho * u_1 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_1 * u_1);
85 _data_pdfs_10_20_310[
_stride_pdfs_0 * ctr_0] = rho * u_0 * u_1 * -0.25 + rho * u_0 * 0.083333333333333329 + rho * u_1 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_1 * u_1);
87 _data_pdfs_10_20_311[
_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * 0.25 + rho * u_1 * 0.083333333333333329 + rho * u_2 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_1 * u_1) + rho * 0.083333333333333329 * (u_2 * u_2);
89 _data_pdfs_10_20_312[
_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * -0.25 + rho * u_1 * -0.083333333333333329 + rho * u_2 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_1 * u_1) + rho * 0.083333333333333329 * (u_2 * u_2);
91 _data_pdfs_10_20_313[
_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * -0.25 + rho * u_0 * -0.083333333333333329 + rho * u_2 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_2 * u_2);
93 _data_pdfs_10_20_314[
_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * 0.25 + rho * u_0 * 0.083333333333333329 + rho * u_2 * 0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_2 * u_2);
95 _data_pdfs_10_20_315[
_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * -0.25 + rho * u_1 * 0.083333333333333329 + rho * u_2 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_1 * u_1) + rho * 0.083333333333333329 * (u_2 * u_2);
97 _data_pdfs_10_20_316[
_stride_pdfs_0 * ctr_0] = rho * u_1 * u_2 * 0.25 + rho * u_1 * -0.083333333333333329 + rho * u_2 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_1 * u_1) + rho * 0.083333333333333329 * (u_2 * u_2);
99 _data_pdfs_10_20_317[
_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * 0.25 + rho * u_0 * -0.083333333333333329 + rho * u_2 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_2 * u_2);
101 _data_pdfs_10_20_318[
_stride_pdfs_0 * ctr_0] = rho * u_0 * u_2 * -0.25 + rho * u_0 * 0.083333333333333329 + rho * u_2 * -0.083333333333333329 + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u_0 * u_0) + rho * 0.083333333333333329 * (u_2 * u_2);
109 auto pdfs =
block->getData<gpu::GPUField<double>>(
pdfsID);
111 auto &rho_0 = this->
rho_0_;
112 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(
force->nrOfGhostLayers()))
113 double *
RESTRICT const _data_force =
force->dataAt(0, 0, 0, 0);
114 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
115 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
116 double *
RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
117 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
118 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(
velocity->nrOfGhostLayers()))
120 WALBERLA_ASSERT_EQUAL(
velocity->layout(), field::fzyx)
121 WALBERLA_ASSERT_GREATER_EQUAL(
force->xSizeWithGhostLayer(), int64_t(int64_c(
force->xSize()) + 0))
122 const int64_t _size_force_0 = int64_t(int64_c(
force->xSize()) + 0);
123 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
124 WALBERLA_ASSERT_GREATER_EQUAL(
force->ySizeWithGhostLayer(), int64_t(int64_c(
force->ySize()) + 0))
125 const int64_t _size_force_1 = int64_t(int64_c(
force->ySize()) + 0);
126 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
127 WALBERLA_ASSERT_GREATER_EQUAL(
force->zSizeWithGhostLayer(), int64_t(int64_c(
force->zSize()) + 0))
128 const int64_t _size_force_2 = int64_t(int64_c(
force->zSize()) + 0);
129 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
130 const int64_t _stride_force_0 = int64_t(
force->xStride());
131 const int64_t _stride_force_1 = int64_t(
force->yStride());
132 const int64_t _stride_force_2 = int64_t(
force->zStride());
133 const int64_t _stride_force_3 = int64_t(1 * int64_t(
force->fStride()));
134 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
135 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
136 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
137 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
138 const int64_t _stride_velocity_0 = int64_t(
velocity->xStride());
139 const int64_t _stride_velocity_1 = int64_t(
velocity->yStride());
140 const int64_t _stride_velocity_2 = int64_t(
velocity->zStride());
141 const int64_t _stride_velocity_3 = int64_t(1 * int64_t(
velocity->fStride()));
142 dim3 _block(uint32_t(((16 < _size_force_0) ? 16 : _size_force_0)), uint32_t(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))), uint32_t(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))));
143 dim3 _grid(uint32_t(((_size_force_0) % (((16 < _size_force_0) ? 16 : _size_force_0)) == 0 ? (int64_t)(_size_force_0) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)) : ((int64_t)(_size_force_0) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))) + 1)), uint32_t(((_size_force_1) % (((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))) == 0 ? (int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))) : ((int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) + 1)), uint32_t(((_size_force_2) % (((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))) == 0 ? (int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))) : ((int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))))) + 1)));
144 internal_initialpdfssetterdoubleprecisioncuda_initialpdfssetterdoubleprecisioncuda::initialpdfssetterdoubleprecisioncuda_initialpdfssetterdoubleprecisioncuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3, rho_0);
148 CellInterval ci = globalCellInterval;
149 CellInterval blockBB = blocks->getBlockCellBB(*
block);
150 blockBB.expand(ghostLayers);
151 ci.intersect(blockBB);
152 blocks->transformGlobalToBlockLocalCellInterval(ci, *
block);
158 auto pdfs =
block->getData<gpu::GPUField<double>>(
pdfsID);
160 auto &rho_0 = this->
rho_0_;
161 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(
force->nrOfGhostLayers()))
162 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(
force->nrOfGhostLayers()))
163 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(
force->nrOfGhostLayers()))
164 double *
RESTRICT const _data_force =
force->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
165 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
166 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
167 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
168 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
169 double *
RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
170 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
171 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(
velocity->nrOfGhostLayers()))
172 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(
velocity->nrOfGhostLayers()))
173 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(
velocity->nrOfGhostLayers()))
174 double *
RESTRICT const _data_velocity =
velocity->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
175 WALBERLA_ASSERT_EQUAL(
velocity->layout(), field::fzyx)
176 WALBERLA_ASSERT_GREATER_EQUAL(
force->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
177 const int64_t _size_force_0 = int64_t(int64_c(ci.xSize()) + 0);
178 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
179 WALBERLA_ASSERT_GREATER_EQUAL(
force->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
180 const int64_t _size_force_1 = int64_t(int64_c(ci.ySize()) + 0);
181 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
182 WALBERLA_ASSERT_GREATER_EQUAL(
force->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
183 const int64_t _size_force_2 = int64_t(int64_c(ci.zSize()) + 0);
184 WALBERLA_ASSERT_EQUAL(
force->layout(), field::fzyx)
185 const int64_t _stride_force_0 = int64_t(
force->xStride());
186 const int64_t _stride_force_1 = int64_t(
force->yStride());
187 const int64_t _stride_force_2 = int64_t(
force->zStride());
188 const int64_t _stride_force_3 = int64_t(1 * int64_t(
force->fStride()));
189 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
190 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
191 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
192 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
193 const int64_t _stride_velocity_0 = int64_t(
velocity->xStride());
194 const int64_t _stride_velocity_1 = int64_t(
velocity->yStride());
195 const int64_t _stride_velocity_2 = int64_t(
velocity->zStride());
196 const int64_t _stride_velocity_3 = int64_t(1 * int64_t(
velocity->fStride()));
197 dim3 _block(uint32_t(((16 < _size_force_0) ? 16 : _size_force_0)), uint32_t(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))), uint32_t(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))));
198 dim3 _grid(uint32_t(((_size_force_0) % (((16 < _size_force_0) ? 16 : _size_force_0)) == 0 ? (int64_t)(_size_force_0) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)) : ((int64_t)(_size_force_0) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))) + 1)), uint32_t(((_size_force_1) % (((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))) == 0 ? (int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))) : ((int64_t)(_size_force_1) / (int64_t)(((1024 < ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))) ? 1024 : ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) + 1)), uint32_t(((_size_force_2) % (((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))) == 0 ? (int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))))) : ((int64_t)(_size_force_2) / (int64_t)(((64 < ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))) ? 64 : ((_size_force_2 < ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0))))))) ? _size_force_2 : ((int64_t)(256) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0) * ((_size_force_1 < 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))) ? _size_force_1 : 16 * ((int64_t)(16) / (int64_t)(((16 < _size_force_0) ? 16 : _size_force_0)))))))))) + 1)));
199 internal_initialpdfssetterdoubleprecisioncuda_initialpdfssetterdoubleprecisioncuda::initialpdfssetterdoubleprecisioncuda_initialpdfssetterdoubleprecisioncuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3, rho_0);