23#include "core/DataTypes.h"
24#include "core/Macros.h"
25#include "gpu/ErrorChecking.h"
27#define FUNC_PREFIX __global__
35#define RESTRICT __restrict__
36#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
37#pragma nv_diagnostic push
38#pragma nv_diag_suppress 177
41#pragma diag_suppress 177
43#elif defined(__clang__)
45#if defined(__CUDA_ARCH__)
47#define RESTRICT __restrict__
48#pragma clang diagnostic push
49#pragma clang diagnostic ignored "-Wstrict-aliasing"
50#pragma clang diagnostic ignored "-Wunused-variable"
51#pragma clang diagnostic ignored "-Wconversion"
52#pragma clang diagnostic ignored "-Wsign-compare"
55#define RESTRICT __restrict__
56#pragma clang diagnostic push
57#pragma clang diagnostic ignored "-Wstrict-aliasing"
58#pragma clang diagnostic ignored "-Wunused-variable"
59#pragma clang diagnostic ignored "-Wconversion"
60#pragma clang diagnostic ignored "-Wsign-compare"
63#elif defined(__GNUC__) or defined(__GNUG__)
64#define RESTRICT __restrict__
65#pragma GCC diagnostic push
66#pragma GCC diagnostic ignored "-Wstrict-aliasing"
67#pragma GCC diagnostic ignored "-Wunused-variable"
68#pragma GCC diagnostic ignored "-Wconversion"
69#elif defined(_MSC_VER)
70#define RESTRICT __restrict
76namespace internal_dirichlet_double_precision_cuda_boundary_Dirichlet_double_precision_CUDA {
96#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
97#pragma nv_diagnostic pop
101#elif defined(__clang__)
103#if defined(__CUDA_ARCH__)
105#pragma clang diagnostic pop
108#pragma clang diagnostic pop
111#elif defined(__GNUC__) or defined(__GNUG__)
112#pragma GCC diagnostic pop
115void Dirichlet_double_precision_CUDA::run_impl(IBlock *
block, IndexVectors::Type type,
gpuStream_t stream) {
118 if (indexVectorSize == 0)
123 uint8_t *_data_indexVector =
reinterpret_cast<uint8_t *
>(pointer);
125 auto field =
block->getData<gpu::GPUField<double>>(
fieldID);
133 dim3 _grid(
uint32_c(((indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (
int64_t)(indexVectorSize) / (
int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ((
int64_t)(indexVectorSize) / (
int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize))) + 1)),
uint32_c(1),
uint32_c(1));
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
void inner(IBlock *block, gpuStream_t stream=nullptr)
void outer(IBlock *block, gpuStream_t stream=nullptr)
void run(IBlock *block, gpuStream_t stream=nullptr)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
static FUNC_PREFIX __launch_bounds__(256) void dirichlet_double_precision_cuda_boundary_Dirichlet_double_precision_CUDA(double *RESTRICT _data_field
static FUNC_PREFIX uint8_t *RESTRICT const int64_t const int64_t const _stride_field_1
static FUNC_PREFIX uint8_t *RESTRICT const _data_indexVector
static FUNC_PREFIX uint8_t *RESTRICT const int64_t const _stride_field_0
static FUNC_PREFIX uint8_t *RESTRICT const int64_t const int64_t const int64_t const _stride_field_2
\file PackInfoPdfDoublePrecision.cpp \author pystencils