63 auto f =
block->getData<gpu::GPUField<float>>(fID);
64 auto j =
block->getData<gpu::GPUField<float>>(jID);
66 auto &rho_lb = this->rho_lb_;
69 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(f->nrOfGhostLayers()))
70 float *
RESTRICT _data_f = f->dataAt(-1, -1, -1, 0);
71 WALBERLA_ASSERT_EQUAL(f->layout(), field::fzyx)
72 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(j->nrOfGhostLayers()))
73 float *
RESTRICT const _data_j = j->dataAt(-1, -1, -1, 0);
74 WALBERLA_ASSERT_EQUAL(j->layout(), field::fzyx)
75 WALBERLA_ASSERT_GREATER_EQUAL(f->xSizeWithGhostLayer(), int64_t(int64_c(f->xSize()) + 2))
76 const int64_t _size_f_0 = int64_t(int64_c(f->xSize()) + 2);
77 WALBERLA_ASSERT_EQUAL(f->layout(), field::fzyx)
78 WALBERLA_ASSERT_GREATER_EQUAL(f->ySizeWithGhostLayer(), int64_t(int64_c(f->ySize()) + 2))
79 const int64_t _size_f_1 = int64_t(int64_c(f->ySize()) + 2);
80 WALBERLA_ASSERT_EQUAL(f->layout(), field::fzyx)
81 WALBERLA_ASSERT_GREATER_EQUAL(f->zSizeWithGhostLayer(), int64_t(int64_c(f->zSize()) + 2))
82 const int64_t _size_f_2 = int64_t(int64_c(f->zSize()) + 2);
83 WALBERLA_ASSERT_EQUAL(f->layout(), field::fzyx)
84 const int64_t _stride_f_0 = int64_t(f->xStride());
85 const int64_t _stride_f_1 = int64_t(f->yStride());
86 const int64_t _stride_f_2 = int64_t(f->zStride());
87 const int64_t _stride_f_3 = int64_t(1 * int64_t(f->fStride()));
88 const int64_t _stride_j_0 = int64_t(j->xStride());
89 const int64_t _stride_j_1 = int64_t(j->yStride());
90 const int64_t _stride_j_2 = int64_t(j->zStride());
91 const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
92 dim3 _block(uint32_c(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)), uint32_c(((1024 < ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))) ? 1024 : ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))), uint32_c(((64 < ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))))) ? 64 : ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))))));
93 dim3 _grid(uint32_c(((_size_f_0 - 2) % (((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)) == 0 ? (int64_t)(_size_f_0 - 2) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)) : ((int64_t)(_size_f_0 - 2) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))) + 1)), uint32_c(((_size_f_1 - 2) % (((1024 < ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))) ? 1024 : ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))) == 0 ? (int64_t)(_size_f_1 - 2) / (int64_t)(((1024 < ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))) ? 1024 : ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))) : ((int64_t)(_size_f_1 - 2) / (int64_t)(((1024 < ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))) ? 1024 : ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) + 1)), uint32_c(((_size_f_2 - 2) % (((64 < ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))))) ? 64 : ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))))) == 0 ? (int64_t)(_size_f_2 - 2) / (int64_t)(((64 < ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))))) ? 64 : ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))))) : ((int64_t)(_size_f_2 - 2) / (int64_t)(((64 < ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))))) ? 64 : ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))))))) + 1)));
94 internal_frictioncouplingkernel_single_precision_cuda_frictioncouplingkernel_single_precision_cuda::frictioncouplingkernel_single_precision_cuda_frictioncouplingkernel_single_precision_cuda<<<_grid, _block, 0, stream>>>(D, _data_f, _data_j, _size_f_0, _size_f_1, _size_f_2, _stride_f_0, _stride_f_1, _stride_f_2, _stride_f_3, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, kT, rho_lb);
99 CellInterval ci = globalCellInterval;
100 CellInterval blockBB = blocks->getBlockCellBB(*
block);
101 blockBB.expand(ghostLayers);
102 ci.intersect(blockBB);
103 blocks->transformGlobalToBlockLocalCellInterval(ci, *
block);
107 auto f =
block->getData<gpu::GPUField<float>>(fID);
108 auto j =
block->getData<gpu::GPUField<float>>(jID);
110 auto &rho_lb = this->rho_lb_;
112 auto &kT = this->kT_;
113 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(f->nrOfGhostLayers()))
114 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(f->nrOfGhostLayers()))
115 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(f->nrOfGhostLayers()))
116 float *
RESTRICT _data_f = f->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
117 WALBERLA_ASSERT_EQUAL(f->layout(), field::fzyx)
118 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(j->nrOfGhostLayers()))
119 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(j->nrOfGhostLayers()))
120 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(j->nrOfGhostLayers()))
121 float *
RESTRICT const _data_j = j->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
122 WALBERLA_ASSERT_EQUAL(j->layout(), field::fzyx)
123 WALBERLA_ASSERT_GREATER_EQUAL(f->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 2))
124 const int64_t _size_f_0 = int64_t(int64_c(ci.xSize()) + 2);
125 WALBERLA_ASSERT_EQUAL(f->layout(), field::fzyx)
126 WALBERLA_ASSERT_GREATER_EQUAL(f->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 2))
127 const int64_t _size_f_1 = int64_t(int64_c(ci.ySize()) + 2);
128 WALBERLA_ASSERT_EQUAL(f->layout(), field::fzyx)
129 WALBERLA_ASSERT_GREATER_EQUAL(f->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 2))
130 const int64_t _size_f_2 = int64_t(int64_c(ci.zSize()) + 2);
131 WALBERLA_ASSERT_EQUAL(f->layout(), field::fzyx)
132 const int64_t _stride_f_0 = int64_t(f->xStride());
133 const int64_t _stride_f_1 = int64_t(f->yStride());
134 const int64_t _stride_f_2 = int64_t(f->zStride());
135 const int64_t _stride_f_3 = int64_t(1 * int64_t(f->fStride()));
136 const int64_t _stride_j_0 = int64_t(j->xStride());
137 const int64_t _stride_j_1 = int64_t(j->yStride());
138 const int64_t _stride_j_2 = int64_t(j->zStride());
139 const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
140 dim3 _block(uint32_c(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)), uint32_c(((1024 < ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))) ? 1024 : ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))), uint32_c(((64 < ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))))) ? 64 : ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))))));
141 dim3 _grid(uint32_c(((_size_f_0 - 2) % (((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)) == 0 ? (int64_t)(_size_f_0 - 2) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)) : ((int64_t)(_size_f_0 - 2) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))) + 1)), uint32_c(((_size_f_1 - 2) % (((1024 < ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))) ? 1024 : ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))) == 0 ? (int64_t)(_size_f_1 - 2) / (int64_t)(((1024 < ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))) ? 1024 : ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))) : ((int64_t)(_size_f_1 - 2) / (int64_t)(((1024 < ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))) ? 1024 : ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) + 1)), uint32_c(((_size_f_2 - 2) % (((64 < ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))))) ? 64 : ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))))) == 0 ? (int64_t)(_size_f_2 - 2) / (int64_t)(((64 < ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))))) ? 64 : ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))))) : ((int64_t)(_size_f_2 - 2) / (int64_t)(((64 < ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))))) ? 64 : ((_size_f_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2))))))) ? _size_f_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2) * ((_size_f_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))) ? _size_f_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_f_0 - 2) ? 128 : _size_f_0 - 2)))))))))) + 1)));
142 internal_frictioncouplingkernel_single_precision_cuda_frictioncouplingkernel_single_precision_cuda::frictioncouplingkernel_single_precision_cuda_frictioncouplingkernel_single_precision_cuda<<<_grid, _block, 0, stream>>>(D, _data_f, _data_j, _size_f_0, _size_f_1, _size_f_2, _stride_f_0, _stride_f_1, _stride_f_2, _stride_f_3, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3, kT, rho_lb);