76namespace internal_dynamicubbsingleprecisioncuda_boundary_DynamicUBBSinglePrecisionCUDA {
79 const int32_t f_in_inv_dir_idx[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13};
80 const int32_t
f_in_inv_offsets_x[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1};
81 const int32_t
f_in_inv_offsets_y[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0};
82 const int32_t
f_in_inv_offsets_z[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1};
84 const float weights[] = {((float)(0.33333333333333333)), ((
float)(0.055555555555555556)), ((float)(0.055555555555555556)), ((
float)(0.055555555555555556)), ((float)(0.055555555555555556)), ((
float)(0.055555555555555556)), ((float)(0.055555555555555556)), ((
float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((
float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((
float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((
float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((
float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((
float)(0.027777777777777778)), ((float)(0.027777777777777778))};
86 const int32_t
neighbour_offset_x[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1};
87 const int32_t
neighbour_offset_y[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0};
88 const int32_t
neighbour_offset_z[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1};
91 uint8_t *
RESTRICT _data_indexVector_10 = _data_indexVector;
92 const int32_t x = *((int32_t *)(&_data_indexVector_10[28 * blockDim.x * blockIdx.x + 28 * threadIdx.x]));
93 uint8_t *
RESTRICT _data_indexVector_14 = _data_indexVector + 4;
94 const int32_t y = *((int32_t *)(&_data_indexVector_14[28 * blockDim.x * blockIdx.x + 28 * threadIdx.x]));
95 uint8_t *
RESTRICT _data_indexVector_18 = _data_indexVector + 8;
96 const int32_t z = *((int32_t *)(&_data_indexVector_18[28 * blockDim.x * blockIdx.x + 28 * threadIdx.x]));
97 uint8_t *
RESTRICT _data_indexVector_112 = _data_indexVector + 12;
98 const int32_t dir = *((int32_t *)(&_data_indexVector_112[28 * blockDim.x * blockIdx.x + 28 * threadIdx.x]));
123 uint8_t *
RESTRICT _data_indexVector_116 = _data_indexVector + 16;
124 uint8_t *
RESTRICT _data_indexVector_120 = _data_indexVector + 20;
125 uint8_t *
RESTRICT _data_indexVector_124 = _data_indexVector + 24;
174 internal_dynamicubbsingleprecisioncuda_boundary_DynamicUBBSinglePrecisionCUDA::dynamicubbsingleprecisioncuda_boundary_DynamicUBBSinglePrecisionCUDA<<<_grid, _block, 0,
stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);