23#include "core/DataTypes.h"
24#include "core/Macros.h"
25#include "gpu/ErrorChecking.h"
27#define FUNC_PREFIX __global__
35#define RESTRICT __restrict__
36#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
37#pragma nv_diagnostic push
38#pragma nv_diag_suppress 177
41#pragma diag_suppress 177
43#elif defined(__clang__)
45#if defined(__CUDA_ARCH__)
47#define RESTRICT __restrict__
48#pragma clang diagnostic push
49#pragma clang diagnostic ignored "-Wunused-variable"
52#define RESTRICT __restrict__
53#pragma clang diagnostic push
54#pragma clang diagnostic ignored "-Wunused-variable"
57#elif defined(__GNUC__) or defined(__GNUG__)
58#define RESTRICT __restrict__
59#pragma GCC diagnostic push
60#pragma GCC diagnostic ignored "-Wunused-variable"
61#elif defined(_MSC_VER)
62#define RESTRICT __restrict
68namespace internal_reactionkernelindexed_4_double_precision_cuda_boundary_ReactionKernelIndexed_4_double_precision_CUDA {
69static FUNC_PREFIX __launch_bounds__(256) void reactionkernelindexed_4_double_precision_cuda_boundary_ReactionKernelIndexed_4_double_precision_CUDA(uint8_t *
RESTRICT const _data_indexVector,
double *
RESTRICT _data_rho_0,
double *
RESTRICT _data_rho_1,
double *
RESTRICT _data_rho_2,
double *
RESTRICT _data_rho_3, int64_t const
_stride_rho_0_0, int64_t const
_stride_rho_0_1, int64_t const
_stride_rho_0_2, int64_t const
_stride_rho_1_0, int64_t const
_stride_rho_1_1, int64_t const
_stride_rho_1_2, int64_t const
_stride_rho_2_0, int64_t const
_stride_rho_2_1, int64_t const
_stride_rho_2_2, int64_t const
_stride_rho_3_0, int64_t const
_stride_rho_3_1, int64_t const
_stride_rho_3_2, int32_t
indexVectorSize,
double order_0,
double order_1,
double order_2,
double order_3,
double rate_coefficient,
double stoech_0,
double stoech_1,
double stoech_2,
double stoech_3) {
71 uint8_t *
RESTRICT _data_indexVector_10 = _data_indexVector;
72 const int32_t x = *((int32_t *)(&_data_indexVector_10[12 * blockDim.x * blockIdx.x + 12 * threadIdx.x]));
73 uint8_t *
RESTRICT _data_indexVector_14 = _data_indexVector + 4;
74 const int32_t y = *((int32_t *)(&_data_indexVector_14[12 * blockDim.x * blockIdx.x + 12 * threadIdx.x]));
75 uint8_t *
RESTRICT _data_indexVector_18 = _data_indexVector + 8;
76 const int32_t z = *((int32_t *)(&_data_indexVector_18[12 * blockDim.x * blockIdx.x + 12 * threadIdx.x]));
78 const int32_t cx[] = {0};
79 const int32_t cy[] = {0};
80 const int32_t cz[] = {0};
81 const int32_t invdir[] = {0};
95 _data_rho_3_10_20[
_stride_rho_3_0 * x] = local_rho_3 + rate_factor * stoech_3;
103#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
104#pragma nv_diagnostic pop
108#elif defined(__clang__)
110#if defined(__CUDA_ARCH__)
112#pragma clang diagnostic pop
115#pragma clang diagnostic pop
118#elif defined(__GNUC__) or defined(__GNUG__)
119#pragma GCC diagnostic pop
122void ReactionKernelIndexed_4_double_precision_CUDA::run_impl(IBlock *
block, IndexVectors::Type type, gpuStream_t
stream) {
123 auto *indexVectors =
block->uncheckedFastGetData<IndexVectors>(indexVectorID);
124 int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
125 if (indexVectorSize == 0)
128 auto pointer = indexVectors->pointerGpu(type);
130 uint8_t *_data_indexVector =
reinterpret_cast<uint8_t *
>(pointer);
132 auto rho_3 =
block->getData<gpu::GPUField<double>>(
rho_3ID);
133 auto rho_0 =
block->getData<gpu::GPUField<double>>(
rho_0ID);
134 auto rho_2 =
block->getData<gpu::GPUField<double>>(
rho_2ID);
135 auto rho_1 =
block->getData<gpu::GPUField<double>>(
rho_1ID);
146 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()))
147 double *
RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);
148 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()))
149 double *
RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);
150 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()))
151 double *
RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);
152 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_3->nrOfGhostLayers()))
153 double *
RESTRICT _data_rho_3 = rho_3->dataAt(0, 0, 0, 0);
154 const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());
155 const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());
156 const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());
157 const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());
158 const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());
159 const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());
160 const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());
161 const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());
162 const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());
163 const int64_t _stride_rho_3_0 = int64_t(rho_3->xStride());
164 const int64_t _stride_rho_3_1 = int64_t(rho_3->yStride());
165 const int64_t _stride_rho_3_2 = int64_t(rho_3->zStride());
166 dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
167 dim3 _grid(uint32_c(((indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ((int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize))) + 1)), uint32_c(1), uint32_c(1));
168 internal_reactionkernelindexed_4_double_precision_cuda_boundary_ReactionKernelIndexed_4_double_precision_CUDA::reactionkernelindexed_4_double_precision_cuda_boundary_ReactionKernelIndexed_4_double_precision_CUDA<<<_grid, _block, 0,
stream>>>(_data_indexVector, _data_rho_0, _data_rho_1, _data_rho_2, _data_rho_3, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, _stride_rho_3_0, _stride_rho_3_1, _stride_rho_3_2, indexVectorSize, order_0, order_1, order_2, order_3, rate_coefficient, stoech_0, stoech_1, stoech_2, stoech_3);
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
void run(IBlock *block, gpuStream_t stream=nullptr)
void inner(IBlock *block, gpuStream_t stream=nullptr)
void outer(IBlock *block, gpuStream_t stream=nullptr)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double order_3
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_1_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_3_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_1_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double double double stoech_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double order_2
static FUNC_PREFIX double *RESTRICT _data_rho_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT _data_rho_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double double double double double stoech_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_3_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double double double double stoech_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT _data_rho_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t indexVectorSize
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const _stride_rho_1_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const _stride_rho_0_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_3_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const _stride_rho_0_2
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double order_0
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const _stride_rho_0_1
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double double double double rate_coefficient
static FUNC_PREFIX __launch_bounds__(256) void reactionkernelindexed_4_double_precision_cuda_boundary_ReactionKernelIndexed_4_double_precision_CUDA(uint8_t *RESTRICT const _data_indexVector
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT _data_rho_3
static FUNC_PREFIX double *RESTRICT double *RESTRICT double *RESTRICT double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int32_t double double order_1
\file PackInfoPdfDoublePrecision.cpp \author pystencils