Loading [MathJax]/extensions/TeX/AMSmath.js
ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages Concepts
CollideSweepSinglePrecisionThermalizedCUDA.h
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file CollideSweepSinglePrecisionThermalizedCUDA.h
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.3.7, lbmpy v1.3.7, sympy v1.12.1,
21// lbmpy_walberla/pystencils_walberla from waLBerla commit
22// f36fa0a68bae59f0b516f6587ea8fa7c24a41141
23
24#pragma once
25#include "core/DataTypes.h"
26#include "core/logging/Logging.h"
27
28#include "gpu/GPUField.h"
29#include "gpu/GPUWrapper.h"
30
31#include "domain_decomposition/BlockDataID.h"
32#include "domain_decomposition/IBlock.h"
33#include "domain_decomposition/StructuredBlockStorage.h"
34#include "field/SwapableCompare.h"
35
36#include <functional>
37#include <unordered_map>
38
39#ifdef __GNUC__
40#define RESTRICT __restrict__
41#elif _MSC_VER
42#define RESTRICT __restrict
43#else
44#define RESTRICT
45#endif
46
47#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || \
48 (defined WALBERLA_CXX_COMPILER_IS_CLANG)
49#pragma GCC diagnostic push
50#pragma GCC diagnostic ignored "-Wunused-parameter"
51#pragma GCC diagnostic ignored "-Wreorder"
52#endif
53
54namespace walberla {
55namespace pystencils {
56
58public:
60 BlockDataID pdfsID_, float kT,
61 float omega_bulk, float omega_even,
62 float omega_odd, float omega_shear,
63 uint32_t seed, uint32_t time_step)
64 : forceID(forceID_), pdfsID(pdfsID_), kT_(kT), omega_bulk_(omega_bulk),
65 omega_even_(omega_even), omega_odd_(omega_odd),
66 omega_shear_(omega_shear), seed_(seed), time_step_(time_step),
67 block_offset_0_(uint32_t(0)), block_offset_1_(uint32_t(0)),
68 block_offset_2_(uint32_t(0)), configured_(false) {}
69
70 void run(IBlock *block, gpuStream_t stream = nullptr);
71
72 void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
73 const CellInterval &globalCellInterval,
74 cell_idx_t ghostLayers, IBlock *block,
75 gpuStream_t stream = nullptr);
76
77 void operator()(IBlock *block, gpuStream_t stream = nullptr) {
79 }
80
81 static std::function<void(IBlock *)> getSweep(
82 const shared_ptr<CollideSweepSinglePrecisionThermalizedCUDA> &kernel) {
83 return [kernel](IBlock *b) { kernel->run(b); };
84 }
85
86 static std::function<void(IBlock *, gpuStream_t)> getSweepOnCellInterval(
87 const shared_ptr<CollideSweepSinglePrecisionThermalizedCUDA> &kernel,
88 const shared_ptr<StructuredBlockStorage> &blocks,
89 const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {
90 return [kernel, blocks, globalCellInterval,
91 ghostLayers](IBlock *b, gpuStream_t stream = nullptr) {
92 kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b,
93 stream);
94 };
95 }
96
97 std::function<void(IBlock *)> getSweep(gpuStream_t stream = nullptr) {
98 return [this, stream](IBlock *b) { this->run(b, stream); };
99 }
100
101 std::function<void(IBlock *)>
102 getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,
103 const CellInterval &globalCellInterval,
104 cell_idx_t ghostLayers = 1,
105 gpuStream_t stream = nullptr) {
106 return [this, blocks, globalCellInterval, ghostLayers, stream](IBlock *b) {
107 this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b,
108 stream);
109 };
110 }
111
112 void configure(const shared_ptr<StructuredBlockStorage> &blocks,
113 IBlock *block) {
114 Cell BlockCellBB = blocks->getBlockCellBB(*block).min();
115 block_offset_0_ = uint32_t(BlockCellBB[0]);
116 block_offset_1_ = uint32_t(BlockCellBB[1]);
117 block_offset_2_ = uint32_t(BlockCellBB[2]);
118 configured_ = true;
119 }
120
121 inline uint32_t getBlock_offset_0() const { return block_offset_0_; }
122 inline uint32_t getBlock_offset_1() const { return block_offset_1_; }
123 inline uint32_t getBlock_offset_2() const { return block_offset_2_; }
124 inline float getKt() const { return kT_; }
125 inline float getOmega_bulk() const { return omega_bulk_; }
126 inline float getOmega_even() const { return omega_even_; }
127 inline float getOmega_odd() const { return omega_odd_; }
128 inline float getOmega_shear() const { return omega_shear_; }
129 inline uint32_t getSeed() const { return seed_; }
130 inline uint32_t getTime_step() const { return time_step_; }
131 inline void setBlock_offset_0(const uint32_t value) {
132 block_offset_0_ = value;
133 }
134 inline void setBlock_offset_1(const uint32_t value) {
135 block_offset_1_ = value;
136 }
137 inline void setBlock_offset_2(const uint32_t value) {
138 block_offset_2_ = value;
139 }
140 inline void setKt(const float value) { kT_ = value; }
141 inline void setOmega_bulk(const float value) { omega_bulk_ = value; }
142 inline void setOmega_even(const float value) { omega_even_ = value; }
143 inline void setOmega_odd(const float value) { omega_odd_ = value; }
144 inline void setOmega_shear(const float value) { omega_shear_ = value; }
145 inline void setSeed(const uint32_t value) { seed_ = value; }
146 inline void setTime_step(const uint32_t value) { time_step_ = value; }
147
148private:
149 BlockDataID forceID;
150 BlockDataID pdfsID;
151 uint32_t block_offset_0_;
152 uint32_t block_offset_1_;
153 uint32_t block_offset_2_;
154 float kT_;
155 float omega_bulk_;
156 float omega_even_;
157 float omega_odd_;
158 float omega_shear_;
159 uint32_t seed_;
160 uint32_t time_step_;
161
162 bool configured_;
163};
164
165} // namespace pystencils
166} // namespace walberla
167
168#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || \
169 (defined WALBERLA_CXX_COMPILER_IS_CLANG)
170#pragma GCC diagnostic pop
171#endif
Definition Cell.hpp:96
CollideSweepSinglePrecisionThermalizedCUDA(BlockDataID forceID_, BlockDataID pdfsID_, float kT, float omega_bulk, float omega_even, float omega_odd, float omega_shear, uint32_t seed, uint32_t time_step)
std::function< void(IBlock *)> getSweepOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers=1, gpuStream_t stream=nullptr)
void runOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream=nullptr)
void configure(const shared_ptr< StructuredBlockStorage > &blocks, IBlock *block)
static std::function< void(IBlock *, gpuStream_t)> getSweepOnCellInterval(const shared_ptr< CollideSweepSinglePrecisionThermalizedCUDA > &kernel, const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers=1)
static std::function< void(IBlock *)> getSweep(const shared_ptr< CollideSweepSinglePrecisionThermalizedCUDA > &kernel)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:172
\file PackInfoPdfDoublePrecision.cpp \author pystencils