23#include "core/DataTypes.h"
24#include "core/Macros.h"
25#include "gpu/ErrorChecking.h"
27#define FUNC_PREFIX __global__
35#define RESTRICT __restrict__
36#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
37#pragma nv_diagnostic push
38#pragma nv_diag_suppress 177
41#pragma diag_suppress 177
43#elif defined(__clang__)
45#if defined(__CUDA_ARCH__)
47#define RESTRICT __restrict__
48#pragma clang diagnostic push
49#pragma clang diagnostic ignored "-Wstrict-aliasing"
50#pragma clang diagnostic ignored "-Wunused-variable"
51#pragma clang diagnostic ignored "-Wconversion"
52#pragma clang diagnostic ignored "-Wsign-compare"
55#define RESTRICT __restrict__
56#pragma clang diagnostic push
57#pragma clang diagnostic ignored "-Wstrict-aliasing"
58#pragma clang diagnostic ignored "-Wunused-variable"
59#pragma clang diagnostic ignored "-Wconversion"
60#pragma clang diagnostic ignored "-Wsign-compare"
63#elif defined(__GNUC__) or defined(__GNUG__)
64#define RESTRICT __restrict__
65#pragma GCC diagnostic push
66#pragma GCC diagnostic ignored "-Wstrict-aliasing"
67#pragma GCC diagnostic ignored "-Wunused-variable"
68#pragma GCC diagnostic ignored "-Wconversion"
69#elif defined(_MSC_VER)
70#define RESTRICT __restrict
76namespace internal_dynamic_ubb_single_precisioncuda_boundary_Dynamic_UBB_single_precisionCUDA {
79 const int32_t f_in_inv_dir_idx[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13};
81 const float weights[] = {0.33333333333333333f, 0.055555555555555556f, 0.055555555555555556f, 0.055555555555555556f, 0.055555555555555556f, 0.055555555555555556f, 0.055555555555555556f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f, 0.027777777777777778f};
83 const int32_t
neighbour_offset_x[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1};
84 const int32_t
neighbour_offset_y[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0};
85 const int32_t
neighbour_offset_z[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1};
88 uint8_t *
RESTRICT _data_indexVector_10 = _data_indexVector;
89 const int32_t x = *((int32_t *)(&_data_indexVector_10[28 * blockDim.x * blockIdx.x + 28 * threadIdx.x]));
90 uint8_t *
RESTRICT _data_indexVector_14 = _data_indexVector + 4;
91 const int32_t y = *((int32_t *)(&_data_indexVector_14[28 * blockDim.x * blockIdx.x + 28 * threadIdx.x]));
92 uint8_t *
RESTRICT _data_indexVector_18 = _data_indexVector + 8;
93 const int32_t z = *((int32_t *)(&_data_indexVector_18[28 * blockDim.x * blockIdx.x + 28 * threadIdx.x]));
94 uint8_t *
RESTRICT _data_indexVector_112 = _data_indexVector + 12;
95 const int32_t dir = *((int32_t *)(&_data_indexVector_112[28 * blockDim.x * blockIdx.x + 28 * threadIdx.x]));
120 uint8_t *
RESTRICT _data_indexVector_116 = _data_indexVector + 16;
121 uint8_t *
RESTRICT _data_indexVector_120 = _data_indexVector + 20;
122 uint8_t *
RESTRICT _data_indexVector_124 = _data_indexVector + 24;
132#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
133#pragma nv_diagnostic pop
137#elif defined(__clang__)
139#if defined(__CUDA_ARCH__)
141#pragma clang diagnostic pop
144#pragma clang diagnostic pop
147#elif defined(__GNUC__) or defined(__GNUG__)
148#pragma GCC diagnostic pop
151void Dynamic_UBB_single_precisionCUDA::run_impl(IBlock *
block, IndexVectors::Type type, gpuStream_t
stream) {
152 auto *indexVectors =
block->getData<IndexVectors>(indexVectorID);
153 int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
154 if (indexVectorSize == 0)
157 auto pointer = indexVectors->pointerGpu(type);
159 uint8_t *_data_indexVector =
reinterpret_cast<uint8_t *
>(pointer);
161 auto pdfs =
block->getData<gpu::GPUField<float>>(
pdfsID);
163 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))
164 float *
RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
165 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
166 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
167 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
168 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
169 dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
170 dim3 _grid(uint32_c(((indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ((int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize))) + 1)), uint32_c(1), uint32_c(1));
171 internal_dynamic_ubb_single_precisioncuda_boundary_Dynamic_UBB_single_precisionCUDA::dynamic_ubb_single_precisioncuda_boundary_Dynamic_UBB_single_precisionCUDA<<<_grid, _block, 0,
stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
void inner(IBlock *block, gpuStream_t stream=nullptr)
void outer(IBlock *block, gpuStream_t stream=nullptr)
void run(IBlock *block, gpuStream_t stream=nullptr)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
const int32_t neighbour_offset_y[]
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const _stride_pdfs_2
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const _stride_pdfs_3
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const _stride_pdfs_1
const int32_t neighbour_offset_x[]
const int32_t neighbour_offset_z[]
static FUNC_PREFIX float *RESTRICT int64_t const int64_t const int64_t const int64_t const int32_t indexVectorSize
static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_single_precisioncuda_boundary_Dynamic_UBB_single_precisionCUDA(uint8_t *RESTRICT const _data_indexVector
static FUNC_PREFIX float *RESTRICT _data_pdfs
static FUNC_PREFIX float *RESTRICT int64_t const _stride_pdfs_0
\file PackInfoPdfDoublePrecision.cpp \author pystencils