Loading [MathJax]/extensions/TeX/AMSmath.js
ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages Concepts
PackInfoVecDoublePrecisionCUDA.cu
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file PackInfoVecDoublePrecisionCUDA.cpp
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.3.7, lbmpy v1.3.7, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit f36fa0a68bae59f0b516f6587ea8fa7c24a41141
21
22#include "core/DataTypes.h"
23#include "core/cell/CellInterval.h"
24
25#include "domain_decomposition/IBlock.h"
26
27#include "stencil/Directions.h"
28
29#include "core/DataTypes.h"
30#include "core/cell/CellInterval.h"
31#include "domain_decomposition/IBlock.h"
32#include "stencil/Directions.h"
33
35
36#define FUNC_PREFIX __global__
37
38#if defined(__NVCC__)
39#define RESTRICT __restrict__
40#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
41#pragma nv_diagnostic push
42#pragma nv_diag_suppress 177 // unused variable
43#else
44#pragma push
45#pragma diag_suppress 177 // unused variable
46#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
47#elif defined(__clang__)
48#if defined(__CUDA__)
49#if defined(__CUDA_ARCH__)
50// clang compiling CUDA code in device mode
51#define RESTRICT __restrict__
52#pragma clang diagnostic push
53#pragma clang diagnostic ignored "-Wunused-variable"
54#else
55// clang compiling CUDA code in host mode
56#define RESTRICT __restrict__
57#pragma clang diagnostic push
58#pragma clang diagnostic ignored "-Wunused-variable"
59#endif // defined(__CUDA_ARCH__)
60#endif // defined(__CUDA__)
61#elif defined(__GNUC__) or defined(__GNUG__)
62#define RESTRICT __restrict__
63#pragma GCC diagnostic push
64#pragma GCC diagnostic ignored "-Wunused-variable"
65#elif defined(_MSC_VER)
66#define RESTRICT __restrict
67#else
68#define RESTRICT
69#endif
70
71namespace walberla {
72namespace pystencils {
73
74using walberla::cell::CellInterval;
75using walberla::stencil::Direction;
76
77namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE {
78static FUNC_PREFIX __launch_bounds__(256) void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT _data_buffer, double *RESTRICT const _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) {
79 if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) {
80 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
81 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
82 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
83 _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2];
84 _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3];
85 _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3];
86 }
87}
88} // namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE
89
90namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE {
91static FUNC_PREFIX __launch_bounds__(256) void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT const _data_buffer, double *RESTRICT _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) {
92 if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) {
93 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
94 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
95 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
96 _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0];
97 _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1];
98 _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2];
99 }
100}
101} // namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE
102
103void PackInfoVecDoublePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
104 double *buffer = reinterpret_cast<double *>(byte_buffer);
105
106 auto field = block->getData<gpu::GPUField<double>>(fieldID);
107
108 CellInterval ci;
109 field->getSliceBeforeGhostLayer(dir, ci, 1, false);
110
111 switch (dir) {
112 case stencil::SW:
113 case stencil::BW:
114 case stencil::W:
115 case stencil::TW:
116 case stencil::NW:
117 case stencil::BS:
118 case stencil::S:
119 case stencil::TS:
120 case stencil::B:
121 case stencil::C:
122 case stencil::T:
123 case stencil::BN:
124 case stencil::N:
125 case stencil::TN:
126 case stencil::SE:
127 case stencil::BE:
128 case stencil::E:
129 case stencil::TE:
130 case stencil::NE: {
131 double *RESTRICT _data_buffer = buffer;
132 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers()))
133 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers()))
134 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers()))
135 double *RESTRICT const _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
136 WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
137 const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0);
138 WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
139 const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0);
140 WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
141 const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0);
142 const int64_t _stride_field_0 = int64_t(field->xStride());
143 const int64_t _stride_field_1 = int64_t(field->yStride());
144 const int64_t _stride_field_2 = int64_t(field->zStride());
145 const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride()));
146 dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))));
147 dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1)));
148 internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3);
149 break;
150 }
151
152 default:
153 return;
154 }
155}
156
157void PackInfoVecDoublePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
158 double *buffer = reinterpret_cast<double *>(byte_buffer);
159
160 auto field = block->getData<gpu::GPUField<double>>(fieldID);
161
162 CellInterval ci;
163 field->getGhostRegion(dir, ci, 1, false);
164 auto communciationDirection = stencil::inverseDir[dir];
165
166 switch (communciationDirection) {
167 case stencil::SW:
168 case stencil::BW:
169 case stencil::W:
170 case stencil::TW:
171 case stencil::NW:
172 case stencil::BS:
173 case stencil::S:
174 case stencil::TS:
175 case stencil::B:
176 case stencil::C:
177 case stencil::T:
178 case stencil::BN:
179 case stencil::N:
180 case stencil::TN:
181 case stencil::SE:
182 case stencil::BE:
183 case stencil::E:
184 case stencil::TE:
185 case stencil::NE: {
186 double *RESTRICT const _data_buffer = buffer;
187 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers()))
188 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers()))
189 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers()))
190 double *RESTRICT _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
191 WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
192 const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0);
193 WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
194 const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0);
195 WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
196 const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0);
197 const int64_t _stride_field_0 = int64_t(field->xStride());
198 const int64_t _stride_field_1 = int64_t(field->yStride());
199 const int64_t _stride_field_2 = int64_t(field->zStride());
200 const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride()));
201 dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))));
202 dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1)));
203 internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3);
204 break;
205 }
206
207 default:
208 return;
209 }
210}
211
212uint_t PackInfoVecDoublePrecisionCUDA::size(stencil::Direction dir, IBlock *block) {
213 auto field = block->getData<gpu::GPUField<double>>(fieldID);
214
215 CellInterval ci;
216 field->getGhostRegion(dir, ci, 1, false);
217
218 uint_t elementsPerCell = 0;
219
220 switch (dir) {
221 case stencil::SW:
222 case stencil::BW:
223 case stencil::W:
224 case stencil::TW:
225 case stencil::NW:
226 case stencil::BS:
227 case stencil::S:
228 case stencil::TS:
229 case stencil::B:
230 case stencil::C:
231 case stencil::T:
232 case stencil::BN:
233 case stencil::N:
234 case stencil::TN:
235 case stencil::SE:
236 case stencil::BE:
237 case stencil::E:
238 case stencil::TE:
239 case stencil::NE:
240 elementsPerCell = 3;
241 break;
242
243 default:
244 elementsPerCell = 0;
245 }
246 return ci.numCells() * elementsPerCell * sizeof(double);
247}
248
249} // namespace pystencils
250} // namespace walberla
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
uint_t size(stencil::Direction dir, IBlock *block) override
void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block, gpuStream_t stream) override
void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block, gpuStream_t stream) override
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:172
static FUNC_PREFIX double *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_field_1
static FUNC_PREFIX double *RESTRICT const int64_t const int64_t const int64_t const _size_field_2
static FUNC_PREFIX __launch_bounds__(256) void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT _data_buffer
static FUNC_PREFIX double *RESTRICT const int64_t const int64_t const _size_field_1
static FUNC_PREFIX double *RESTRICT const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_field_2
static FUNC_PREFIX double *RESTRICT const int64_t const int64_t const int64_t const int64_t const _stride_field_0
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_field_2
static FUNC_PREFIX __launch_bounds__(256) void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT const _data_buffer
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const _stride_field_0
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const _stride_field_1
static FUNC_PREFIX double *RESTRICT int64_t const int64_t const int64_t const _size_field_2
\file PackInfoPdfDoublePrecision.cpp \author pystencils