ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
Dirichlet_double_precision_CUDA.cu
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file Dirichlet_double_precision_CUDA.cpp
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.3.7+13.gdfd203a, lbmpy v1.3.7+10.gd3f6236, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit c69cb11d6a95d32b2280544d3d9abde1fe5fdbb5
21
23#include "core/DataTypes.h"
24#include "core/Macros.h"
25#include "gpu/ErrorChecking.h"
26
27#define FUNC_PREFIX __global__
28
29using namespace std;
30
31namespace walberla {
32namespace pystencils {
33
34#if defined(__NVCC__)
35#define RESTRICT __restrict__
36#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
37#pragma nv_diagnostic push
38#pragma nv_diag_suppress 177 // unused variable
39#else
40#pragma push
41#pragma diag_suppress 177 // unused variable
42#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
43#elif defined(__clang__)
44#if defined(__CUDA__)
45#if defined(__CUDA_ARCH__)
46// clang compiling CUDA code in device mode
47#define RESTRICT __restrict__
48#pragma clang diagnostic push
49#pragma clang diagnostic ignored "-Wstrict-aliasing"
50#pragma clang diagnostic ignored "-Wunused-variable"
51#pragma clang diagnostic ignored "-Wconversion"
52#pragma clang diagnostic ignored "-Wsign-compare"
53#else
54// clang compiling CUDA code in host mode
55#define RESTRICT __restrict__
56#pragma clang diagnostic push
57#pragma clang diagnostic ignored "-Wstrict-aliasing"
58#pragma clang diagnostic ignored "-Wunused-variable"
59#pragma clang diagnostic ignored "-Wconversion"
60#pragma clang diagnostic ignored "-Wsign-compare"
61#endif // defined(__CUDA_ARCH__)
62#endif // defined(__CUDA__)
63#elif defined(__GNUC__) or defined(__GNUG__)
64#define RESTRICT __restrict__
65#pragma GCC diagnostic push
66#pragma GCC diagnostic ignored "-Wstrict-aliasing"
67#pragma GCC diagnostic ignored "-Wunused-variable"
68#pragma GCC diagnostic ignored "-Wconversion"
69#elif defined(_MSC_VER)
70#define RESTRICT __restrict
71#else
72#define RESTRICT
73#endif
74
75// NOLINTBEGIN(readability-non-const-parameter*)
76namespace internal_dirichlet_double_precision_cuda_boundary_Dirichlet_double_precision_CUDA {
77static FUNC_PREFIX __launch_bounds__(256) void dirichlet_double_precision_cuda_boundary_Dirichlet_double_precision_CUDA(double *RESTRICT _data_field, uint8_t *RESTRICT const _data_indexVector, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int32_t indexVectorSize) {
78 if (blockDim.x * blockIdx.x + threadIdx.x < indexVectorSize) {
79 uint8_t *RESTRICT _data_indexVector_10 = _data_indexVector;
80 const int32_t x = *((int32_t *)(&_data_indexVector_10[24 * blockDim.x * blockIdx.x + 24 * threadIdx.x]));
81 uint8_t *RESTRICT _data_indexVector_14 = _data_indexVector + 4;
82 const int32_t y = *((int32_t *)(&_data_indexVector_14[24 * blockDim.x * blockIdx.x + 24 * threadIdx.x]));
83 uint8_t *RESTRICT _data_indexVector_18 = _data_indexVector + 8;
84 const int32_t z = *((int32_t *)(&_data_indexVector_18[24 * blockDim.x * blockIdx.x + 24 * threadIdx.x]));
85
86 const int32_t cx[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1, 1, -1, 1, -1, 1, -1, 1, -1};
87 const int32_t cy[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0, 1, 1, -1, -1, 1, 1, -1, -1};
88 const int32_t cz[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1};
89 const int32_t invdir[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13, 26, 25, 24, 23, 22, 21, 20, 19};
90
91 double *RESTRICT _data_field_10_20 = _data_field + _stride_field_1 * y + _stride_field_2 * z;
92 uint8_t *RESTRICT _data_indexVector_116 = _data_indexVector + 16;
93 _data_field_10_20[_stride_field_0 * x] = *((double *)(&_data_indexVector_116[24 * blockDim.x * blockIdx.x + 24 * threadIdx.x]));
94 }
95}
96} // namespace internal_dirichlet_double_precision_cuda_boundary_Dirichlet_double_precision_CUDA
97
98// NOLINTEND(readability-non-const-parameter*)
99
100#if defined(__NVCC__)
101#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
102#pragma nv_diagnostic pop
103#else
104#pragma pop
105#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
106#elif defined(__clang__)
107#if defined(__CUDA__)
108#if defined(__CUDA_ARCH__)
109// clang compiling CUDA code in device mode
110#pragma clang diagnostic pop
111#else
112// clang compiling CUDA code in host mode
113#pragma clang diagnostic pop
114#endif // defined(__CUDA_ARCH__)
115#endif // defined(__CUDA__)
116#elif defined(__GNUC__) or defined(__GNUG__)
117#pragma GCC diagnostic pop
118#endif
119
120void Dirichlet_double_precision_CUDA::run_impl(IBlock *block, IndexVectors::Type type, gpuStream_t stream) {
121 auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
122 int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());
123 if (indexVectorSize == 0)
124 return;
125
126 auto pointer = indexVectors->pointerGpu(type);
127
128 uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);
129
130 auto field = block->getData<gpu::GPUField<double>>(fieldID);
131
132 WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(field->nrOfGhostLayers()))
133 double *RESTRICT _data_field = field->dataAt(0, 0, 0, 0);
134 const int64_t _stride_field_0 = int64_t(field->xStride());
135 const int64_t _stride_field_1 = int64_t(field->yStride());
136 const int64_t _stride_field_2 = int64_t(field->zStride());
137 dim3 _block(uint32_c(((256 < indexVectorSize) ? 256 : indexVectorSize)), uint32_c(1), uint32_c(1));
138 dim3 _grid(uint32_c(((indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ((int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize))) + 1)), uint32_c(1), uint32_c(1));
139 internal_dirichlet_double_precision_cuda_boundary_Dirichlet_double_precision_CUDA::dirichlet_double_precision_cuda_boundary_Dirichlet_double_precision_CUDA<<<_grid, _block, 0, stream>>>(_data_field, _data_indexVector, _stride_field_0, _stride_field_1, _stride_field_2, indexVectorSize);
140}
141
142void Dirichlet_double_precision_CUDA::run(IBlock *block, gpuStream_t stream) {
143 run_impl(block, IndexVectors::ALL, stream);
144}
145
147 run_impl(block, IndexVectors::INNER, stream);
148}
149
151 run_impl(block, IndexVectors::OUTER, stream);
152}
153
154} // namespace pystencils
155} // namespace walberla
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:176
STL namespace.
static FUNC_PREFIX __launch_bounds__(256) void dirichlet_double_precision_cuda_boundary_Dirichlet_double_precision_CUDA(double *RESTRICT _data_field
static FUNC_PREFIX uint8_t *RESTRICT const int64_t const int64_t const int64_t const _stride_field_2
\file PackInfoPdfDoublePrecision.cpp \author pystencils