ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
StreamCollideSweepThermalizedSinglePrecisionCUDA.cu
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file StreamCollideSweepThermalizedSinglePrecisionCUDA.cpp
17//! \\author pystencils
18//======================================================================================================================
19
20// kernel generated with pystencils v1.3.7+13.gdfd203a, lbmpy v1.3.7+10.gd3f6236, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit c69cb11d6a95d32b2280544d3d9abde1fe5fdbb5
21
22#include <cmath>
23
25#include "core/DataTypes.h"
26#include "core/Macros.h"
27
28#include "philox_rand.h"
29
30#define FUNC_PREFIX __global__
31
32#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
33#pragma GCC diagnostic push
34#pragma GCC diagnostic ignored "-Wfloat-equal"
35#pragma GCC diagnostic ignored "-Wshadow"
36#pragma GCC diagnostic ignored "-Wconversion"
37#pragma GCC diagnostic ignored "-Wunused-variable"
38#endif
39
40#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
41#pragma warning push
42#pragma warning(disable : 1599)
43#endif
44
45using namespace std;
46
47namespace walberla {
48namespace pystencils {
49
50namespace internal_streamcollidesweepthermalizedsingleprecisioncuda_streamcollidesweepthermalizedsingleprecisioncuda {
51static FUNC_PREFIX __launch_bounds__(256) void streamcollidesweepthermalizedsingleprecisioncuda_streamcollidesweepthermalizedsingleprecisioncuda(float *RESTRICT const _data_force, float *RESTRICT const _data_pdfs, float *RESTRICT _data_pdfs_tmp, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3, uint32_t block_offset_0, uint32_t block_offset_1, uint32_t block_offset_2, float kT, float omega_bulk, float omega_even, float omega_odd, float omega_shear, uint32_t seed, uint32_t time_step) {
52 if (blockDim.x * blockIdx.x + threadIdx.x + 1 < _size_force_0 - 1 && blockDim.y * blockIdx.y + threadIdx.y + 1 < _size_force_1 - 1 && blockDim.z * blockIdx.z + threadIdx.z + 1 < _size_force_2 - 1) {
53 const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x + 1;
54 const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y + 1;
55 const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z + 1;
56
57 float random_3_0{};
58 float random_3_1{};
59 float random_3_2{};
60 float random_3_3{};
61 if (kT > 0.) {
62 philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 3, seed, random_3_0, random_3_1, random_3_2, random_3_3);
63 }
64
65 float random_2_0{};
66 float random_2_1{};
67 float random_2_2{};
68 float random_2_3{};
69 if (kT > 0.) {
70 philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 2, seed, random_2_0, random_2_1, random_2_2, random_2_3);
71 }
72
73 float random_1_0{};
74 float random_1_1{};
75 float random_1_2{};
76 float random_1_3{};
77 if (kT > 0.) {
78 philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 1, seed, random_1_0, random_1_1, random_1_2, random_1_3);
79 }
80
81 float random_0_0{};
82 float random_0_1{};
83 float random_0_2{};
84 float random_0_3{};
85 if (kT > 0.) {
86 philox_float4(time_step, block_offset_0 + ctr_0, block_offset_1 + ctr_1, block_offset_2 + ctr_2, 0, seed, random_0_0, random_0_1, random_0_2, random_0_3);
87 }
89 const float xi_3 = xi_2 + _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 14 * _stride_pdfs_3];
94 const float xi_8 = xi_7 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
99 const float xi_14 = -_data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 11 * _stride_pdfs_3];
100 const float xi_15 = -_data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 14 * _stride_pdfs_3];
102 const float xi_20 = omega_bulk * 0.5f;
103 const float xi_21 = 0.16666666666666666f * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + _stride_force_3];
104 const float xi_22 = 0.083333333333333329f * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + _stride_force_3];
105 const float xi_33 = 0.16666666666666666f * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2];
106 const float xi_34 = 0.083333333333333329f * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2];
107 const float xi_39 = 0.16666666666666666f * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3];
108 const float xi_40 = 0.083333333333333329f * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3];
109 const float xi_47 = omega_shear * 0.041666666666666664f;
110 const float xi_51 = omega_bulk * 0.041666666666666664f;
111 const float xi_58 = 0.25f * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + _stride_force_3];
112 const float xi_62 = omega_shear * 0.125f;
113 const float xi_63 = xi_62 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + _stride_force_3];
114 const float xi_97 = 3.7416573867739413f;
115 const float xi_98 = random_3_0 - 0.5f;
116 const float xi_100 = 5.4772255750516612f;
117 const float xi_101 = random_3_2 - 0.5f;
118 const float xi_103 = random_1_1 - 0.5f;
119 const float xi_104 = 2.4494897427831779f;
120 const float xi_107 = 8.3666002653407556f;
121 const float xi_108 = random_3_1 - 0.5f;
122 const float xi_112 = -_data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2];
123 const float xi_118 = xi_11 + xi_3;
125 const float xi_124 = 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3];
126 const float xi_125 = 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 14 * _stride_pdfs_3];
127 const float xi_126 = 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3] + 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3];
128 const float xi_129 = -_data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3];
129 const float xi_133 = random_0_1 - 0.5f;
130 const float xi_137 = -_data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3];
131 const float xi_138 = -_data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3];
132 const float xi_139 = -_data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 5 * _stride_pdfs_3];
133 const float xi_140 = -_data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3];
134 const float xi_145 = -_data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 15 * _stride_pdfs_3];
135 const float xi_146 = xi_14 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 12 * _stride_pdfs_3];
136 const float xi_147 = -_data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3];
137 const float xi_148 = xi_147 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3];
138 const float xi_149 = xi_145 + xi_146 + xi_148 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 16 * _stride_pdfs_3];
139 const float xi_150 = omega_odd * 0.25f;
140 const float xi_151 = random_2_3 - 0.5f;
141 const float xi_155 = -_data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 16 * _stride_pdfs_3];
142 const float xi_156 = -_data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 12 * _stride_pdfs_3];
143 const float xi_157 = 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
144 const float xi_158 = -2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] + 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
145 const float xi_159 = xi_148 + xi_155 + xi_156 - xi_157 + xi_158 + xi_5 + 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
146 const float xi_160 = omega_odd * 0.083333333333333329f;
147 const float xi_161 = xi_159 * xi_160;
148 const float xi_162 = random_1_2 - 0.5f;
149 const float xi_173 = 1.7320508075688772f;
150 const float xi_174 = random_0_0 - 0.5f;
151 const float xi_185 = xi_15 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3];
152 const float xi_186 = xi_138 + xi_185;
153 const float xi_187 = xi_186 - _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3];
154 const float xi_188 = random_2_1 - 0.5f;
155 const float xi_189 = -_data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3];
156 const float xi_190 = -xi_157 - xi_158 - xi_186 - xi_189 - xi_7 + 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
157 const float xi_191 = xi_160 * xi_190;
158 const float xi_192 = random_2_0 - 0.5f;
159 const float xi_198 = xi_145 + xi_155 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 11 * _stride_pdfs_3];
160 const float xi_199 = -xi_139 - xi_198 - _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 12 * _stride_pdfs_3] - _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3];
161 const float xi_200 = random_2_2 - 0.5f;
162 const float xi_201 = -xi_124 - xi_125 + xi_126 + xi_140 + xi_198 + xi_6;
163 const float xi_202 = xi_160 * xi_201;
164 const float xi_203 = random_1_3 - 0.5f;
165 const float xi_217 = omega_shear * 0.25f;
166 const float xi_223 = omega_odd * 0.041666666666666664f;
167 const float xi_224 = xi_159 * xi_223;
168 const float xi_226 = omega_odd * 0.125f;
169 const float xi_227 = xi_149 * xi_226;
170 const float xi_233 = xi_201 * xi_223;
171 const float xi_234 = xi_199 * xi_226;
172 const float xi_254 = xi_187 * xi_226;
173 const float xi_255 = xi_190 * xi_223;
174 const float rr_0 = 0.0f;
175 const float xi_23 = rr_0 * xi_22;
176 const float xi_35 = rr_0 * xi_34;
177 const float xi_41 = rr_0 * xi_40;
178 const float xi_45 = rr_0 * 0.041666666666666664f;
179 const float xi_46 = xi_45 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2];
180 const float xi_50 = xi_45 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + _stride_force_3];
181 const float xi_72 = xi_45 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3];
182 const float vel0Term = xi_3 + _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3];
183 const float vel1Term = xi_4 + xi_5;
184 const float vel2Term = xi_6 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3];
185 const float delta_rho = vel0Term + vel1Term + vel2Term + xi_8 + xi_9 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2];
186 const float rho = delta_rho + 1.0f;
187 const float xi_95 = kT * rho;
188 const float xi_96 = powf(xi_95 * (1.0f - ((-omega_even + 1.0f) * (-omega_even + 1.0f))), 0.5f);
189 const float xi_99 = xi_96 * xi_97 * xi_98;
190 const float xi_102 = xi_100 * xi_101 * xi_96;
191 const float xi_105 = powf(xi_95 * (1.0f - ((-omega_bulk + 1.0f) * (-omega_bulk + 1.0f))), 0.5f);
192 const float xi_106 = xi_103 * xi_104 * xi_105;
193 const float xi_109 = xi_107 * xi_108 * xi_96;
194 const float xi_131 = xi_99 * 0.11904761904761904f;
195 const float xi_134 = powf(xi_95 * (1.0f - ((-omega_shear + 1.0f) * (-omega_shear + 1.0f))), 0.5f);
196 const float xi_135 = xi_134 * 0.5f;
197 const float xi_136 = xi_133 * xi_135;
198 const float xi_152 = powf(xi_95 * (1.0f - ((-omega_odd + 1.0f) * (-omega_odd + 1.0f))), 0.5f);
199 const float xi_153 = xi_152 * 1.4142135623730951f;
200 const float xi_154 = xi_153 * 0.5f;
201 const float xi_163 = xi_104 * xi_152;
202 const float xi_164 = xi_163 * 0.16666666666666666f;
203 const float xi_165 = xi_162 * xi_164;
204 const float xi_166 = xi_161 + xi_165;
205 const float xi_167 = xi_149 * xi_150 + xi_151 * xi_154 + xi_166;
206 const float xi_169 = xi_102 * 0.10000000000000001f;
207 const float xi_175 = xi_134 * xi_173 * xi_174;
208 const float xi_176 = xi_175 * 0.16666666666666666f;
209 const float xi_184 = xi_109 * 0.071428571428571425f;
210 const float xi_193 = xi_164 * xi_192;
211 const float xi_194 = xi_191 + xi_193;
212 const float xi_195 = xi_150 * xi_187 + xi_154 * xi_188 + xi_194;
213 const float xi_197 = xi_109 * 0.042857142857142858f;
214 const float xi_204 = xi_164 * xi_203;
215 const float xi_205 = xi_202 + xi_204;
216 const float xi_206 = xi_150 * xi_199 + xi_154 * xi_200 + xi_205;
217 const float xi_207 = xi_133 * xi_134 * 0.25f;
218 const float xi_210 = xi_99 * 0.083333333333333329f;
219 const float xi_214 = -xi_191 - xi_193;
220 const float xi_215 = xi_135 * (random_0_2 - 0.5f);
221 const float xi_222 = xi_135 * (random_1_0 - 0.5f);
222 const float xi_228 = xi_163 * 0.083333333333333329f;
223 const float xi_229 = xi_162 * xi_228;
224 const float xi_230 = xi_153 * 0.25f;
225 const float xi_231 = xi_151 * xi_230;
226 const float xi_235 = xi_203 * xi_228;
227 const float xi_236 = xi_200 * xi_230;
228 const float xi_237 = -xi_233 + xi_234 - xi_235 + xi_236;
229 const float xi_239 = xi_109 * 0.014285714285714285f;
230 const float xi_241 = xi_99 * 0.023809523809523808f;
231 const float xi_244 = xi_233 - xi_234 + xi_235 - xi_236;
232 const float xi_246 = -xi_207;
233 const float xi_249 = xi_109 * 0.035714285714285712f;
234 const float xi_251 = xi_135 * (random_0_3 - 0.5f);
235 const float xi_256 = xi_188 * xi_230;
236 const float xi_257 = xi_192 * xi_228;
237 const float xi_258 = -xi_254 + xi_255 - xi_256 + xi_257;
238 const float xi_260 = xi_254 - xi_255 + xi_256 - xi_257;
239 const float xi_0 = ((1.0f) / (rho));
240 const float xi_10 = xi_0 * 0.5f;
241 const float u_0 = xi_0 * (vel0Term - xi_11 - xi_8) + xi_10 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2];
242 const float xi_17 = u_0 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2];
243 const float xi_28 = xi_17 * 0.16666666666666666f;
244 const float xi_29 = -xi_28;
245 const float xi_30 = xi_17 * 0.083333333333333329f;
246 const float xi_31 = omega_shear * xi_30 + xi_29;
247 const float xi_48 = xi_17 * xi_47 + xi_29;
248 const float xi_49 = xi_34 - xi_46 + xi_48;
249 const float xi_52 = xi_17 * xi_51;
250 const float xi_59 = u_0 * xi_58;
251 const float xi_64 = u_0 * xi_63;
252 const float xi_68 = -xi_34 + xi_46 + xi_48;
253 const float xi_75 = omega_shear * u_0 * -0.083333333333333329f * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2];
254 const float xi_85 = u_0 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3];
255 const float xi_86 = xi_85 * 0.25f;
256 const float xi_89 = xi_62 * xi_85;
257 const float xi_111 = u_0 * u_0;
258 const float u_1 = xi_0 * (vel1Term - xi_12 - xi_13 - xi_9) + xi_10 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + _stride_force_3];
259 const float xi_18 = u_1 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + _stride_force_3];
260 const float xi_26 = xi_18 * 0.16666666666666666f;
261 const float xi_36 = omega_shear * u_1 * -0.083333333333333329f * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + _stride_force_3];
262 const float xi_42 = -xi_26;
263 const float xi_43 = xi_18 * 0.083333333333333329f;
264 const float xi_53 = xi_18 * xi_51;
265 const float xi_60 = u_1 * 0.25f;
266 const float xi_61 = xi_60 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2];
267 const float xi_65 = u_1 * xi_62;
268 const float xi_66 = xi_65 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2];
269 const float xi_67 = xi_59 + xi_61 - xi_64 - xi_66;
270 const float xi_69 = -xi_59 - xi_61 + xi_64 + xi_66;
271 const float xi_77 = xi_60 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3];
272 const float xi_79 = xi_65 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3];
273 const float xi_110 = rho * (u_1 * u_1);
274 const float xi_117 = -xi_110;
275 const float xi_216 = rho * u_1;
276 const float xi_218 = xi_217 * (u_0 * xi_216 + xi_12 - _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]);
277 const float xi_219 = -xi_215 - xi_218;
278 const float xi_220 = xi_215 + xi_218;
279 const float u_2 = xi_0 * (vel2Term - xi_14 - xi_15 - xi_16 - _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3] - _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 15 * _stride_pdfs_3] - _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3]) + xi_10 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3];
280 const float xi_19 = u_2 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3];
281 const float xi_24 = xi_19 * 0.16666666666666666f;
282 const float xi_25 = -xi_24;
283 const float xi_27 = xi_19 * 0.083333333333333329f;
284 const float xi_32 = -omega_shear * xi_26 + omega_shear * xi_27 + xi_18 * 0.33333333333333331f + xi_25 + xi_31;
285 const float xi_37 = omega_shear * u_2 * -0.083333333333333329f * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2 + 2 * _stride_force_3];
286 const float xi_38 = omega_shear * xi_28 + u_0 * -0.33333333333333331f * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2] + xi_24 + xi_26 + xi_36 + xi_37;
287 const float xi_44 = -omega_shear * xi_24 + omega_shear * xi_43 + xi_19 * 0.33333333333333331f + xi_31 + xi_42;
288 const float xi_54 = xi_19 * xi_51;
289 const float xi_55 = xi_18 * xi_47 + xi_42 + xi_52 + xi_53 + xi_54;
290 const float xi_56 = -xi_22 + xi_50 + xi_55;
291 const float xi_57 = xi_27 + xi_37 + xi_56;
292 const float xi_70 = xi_22 - xi_50 + xi_55;
293 const float xi_71 = xi_27 + xi_37 + xi_70;
294 const float xi_73 = xi_19 * xi_47 + xi_25;
295 const float xi_74 = -xi_40 + xi_72 + xi_73;
296 const float xi_76 = xi_30 + xi_56 + xi_75;
297 const float xi_78 = u_2 * xi_58;
298 const float xi_80 = u_2 * xi_63;
299 const float xi_81 = -xi_77 - xi_78 + xi_79 + xi_80;
300 const float xi_82 = xi_30 + xi_70 + xi_75;
301 const float xi_83 = xi_77 + xi_78 - xi_79 - xi_80;
302 const float xi_84 = xi_36 + xi_43 + xi_52 + xi_53 + xi_54 + xi_74;
303 const float xi_87 = u_2 * _data_force[_stride_force_0 * ctr_0 + _stride_force_1 * ctr_1 + _stride_force_2 * ctr_2];
304 const float xi_88 = xi_87 * 0.25f;
305 const float xi_90 = xi_62 * xi_87;
306 const float xi_91 = xi_86 + xi_88 - xi_89 - xi_90;
307 const float xi_92 = -xi_86 - xi_88 + xi_89 + xi_90;
308 const float xi_93 = xi_40 - xi_72 + xi_73;
309 const float xi_94 = xi_36 + xi_43 + xi_52 + xi_53 + xi_54 + xi_93;
310 const float xi_113 = rho * (u_2 * u_2);
311 const float xi_114 = xi_112 + xi_113 * 0.66666666666666663f + 3.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3] + 3.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 5 * _stride_pdfs_3];
312 const float xi_115 = rho * xi_111 * 1.6666666666666667f + xi_110 * 0.66666666666666663f + xi_114 - 3.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 16 * _stride_pdfs_3] - 3.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 12 * _stride_pdfs_3] - 3.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 15 * _stride_pdfs_3] - 3.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 11 * _stride_pdfs_3] + 3.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] + 3.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3];
313 const float xi_116 = omega_even * xi_115;
314 const float xi_119 = rho * xi_111 - xi_112 + xi_113 - xi_117 - xi_118 - xi_13 - xi_16 - xi_5;
315 const float xi_120 = omega_bulk * xi_119;
316 const float xi_122 = xi_110 * 2.3333333333333335f + xi_114 + xi_121 - 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] - 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] - 5.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3] - 5.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3] - 5.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3] - 5.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 14 * _stride_pdfs_3];
317 const float xi_123 = omega_even * xi_122;
318 const float xi_127 = xi_112 + xi_113 * 3.0f + xi_121 + xi_124 + xi_125 + xi_126 - 4.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3] - 4.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 5 * _stride_pdfs_3] - 7.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] - 7.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] - 7.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] - 7.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] + 5.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] + 5.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3];
319 const float xi_128 = omega_even * xi_127;
320 const float xi_130 = xi_128 * 0.01984126984126984f;
321 const float xi_132 = xi_130 + xi_131;
322 const float xi_141 = xi_113 + xi_139 + xi_140 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
323 const float xi_142 = omega_shear * (-xi_117 - xi_137 - xi_138 - xi_141 - xi_15 - xi_2 - xi_4 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3] - _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3]);
324 const float xi_143 = xi_142 * 0.125f;
325 const float xi_144 = -xi_136 - xi_143;
326 const float xi_168 = xi_116 * 0.050000000000000003f;
327 const float xi_170 = rho * xi_111 * 2.0f - xi_110 - xi_118 - xi_129 - xi_141 - xi_147 - 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] - 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] + 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 16 * _stride_pdfs_3] + 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 12 * _stride_pdfs_3] + 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 15 * _stride_pdfs_3] + 2.0f * _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 11 * _stride_pdfs_3] - _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3];
328 const float xi_171 = omega_shear * xi_170;
329 const float xi_172 = xi_171 * 0.041666666666666664f;
330 const float xi_177 = xi_172 + xi_176;
331 const float xi_178 = xi_168 + xi_169 + xi_177;
332 const float xi_179 = -xi_130 - xi_131;
333 const float xi_180 = xi_136 + xi_143;
334 const float xi_181 = -xi_172 - xi_176;
335 const float xi_182 = -xi_168 - xi_169 + xi_181;
336 const float xi_183 = xi_123 * 0.035714285714285712f;
337 const float xi_196 = xi_123 * 0.021428571428571429f;
338 const float xi_208 = xi_142 * 0.0625f;
339 const float xi_209 = xi_128 * 0.013888888888888888f;
340 const float xi_211 = xi_106 * 0.083333333333333329f + xi_120 * 0.041666666666666664f;
341 const float xi_212 = xi_171 * 0.020833333333333332f + xi_175 * 0.083333333333333329f + xi_211;
342 const float xi_213 = xi_166 + xi_207 + xi_208 + xi_209 + xi_210 + xi_212;
343 const float xi_221 = -xi_161 - xi_165 + xi_207 + xi_208 + xi_209 + xi_210 + xi_212;
344 const float xi_225 = xi_217 * (u_2 * xi_216 + xi_146 + xi_155 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 15 * _stride_pdfs_3]);
345 const float xi_232 = xi_222 - xi_224 + xi_225 + xi_227 - xi_229 + xi_231;
346 const float xi_238 = xi_123 * 0.0071428571428571426f;
347 const float xi_240 = xi_128 * 0.003968253968253968f;
348 const float xi_242 = -xi_240 - xi_241;
349 const float xi_243 = xi_102 * 0.050000000000000003f + xi_116 * 0.025000000000000001f + xi_181 + xi_211 - xi_238 - xi_239 + xi_242;
350 const float xi_245 = omega_bulk * xi_119 * -0.041666666666666664f + omega_even * xi_115 * -0.025000000000000001f + xi_100 * xi_101 * xi_96 * -0.050000000000000003f + xi_103 * xi_104 * xi_105 * -0.083333333333333329f + xi_177 + xi_238 + xi_239 + xi_240 + xi_241;
351 const float xi_247 = -xi_208;
352 const float xi_248 = xi_123 * 0.017857142857142856f;
353 const float xi_250 = xi_205 + xi_212 + xi_242 + xi_246 + xi_247 + xi_248 + xi_249;
354 const float xi_252 = xi_217 * (rho * u_0 * u_2 + xi_137 + xi_185 + _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3]);
355 const float xi_253 = -xi_251 - xi_252;
356 const float xi_259 = xi_251 + xi_252;
357 const float xi_261 = xi_222 + xi_224 + xi_225 - xi_227 + xi_229 - xi_231;
358 const float xi_262 = -xi_202 - xi_204 + xi_212 + xi_242 + xi_246 + xi_247 + xi_248 + xi_249;
359 const float forceTerm_0 = xi_17 * xi_20 - xi_17 + xi_18 * xi_20 - xi_18 + xi_19 * xi_20 - xi_19;
360 const float forceTerm_1 = xi_21 - xi_23 + xi_32;
361 const float forceTerm_2 = -xi_21 + xi_23 + xi_32;
362 const float forceTerm_3 = -xi_33 + xi_35 - xi_38;
363 const float forceTerm_4 = xi_33 - xi_35 - xi_38;
364 const float forceTerm_5 = xi_39 - xi_41 + xi_44;
365 const float forceTerm_6 = -xi_39 + xi_41 + xi_44;
366 const float forceTerm_7 = -xi_49 - xi_57 - xi_67;
367 const float forceTerm_8 = -xi_57 - xi_68 - xi_69;
368 const float forceTerm_9 = -xi_49 - xi_69 - xi_71;
369 const float forceTerm_10 = -xi_67 - xi_68 - xi_71;
370 const float forceTerm_11 = -xi_74 - xi_76 - xi_81;
371 const float forceTerm_12 = -xi_74 - xi_82 - xi_83;
372 const float forceTerm_13 = -xi_49 - xi_84 - xi_91;
373 const float forceTerm_14 = -xi_68 - xi_84 - xi_92;
374 const float forceTerm_15 = -xi_76 - xi_83 - xi_93;
375 const float forceTerm_16 = -xi_81 - xi_82 - xi_93;
376 const float forceTerm_17 = -xi_49 - xi_92 - xi_94;
377 const float forceTerm_18 = -xi_68 - xi_91 - xi_94;
378 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2] = forceTerm_0 + xi_102 * 0.20000000000000001f - xi_106 + xi_109 * 0.085714285714285715f + xi_116 * 0.10000000000000001f + xi_120 * -0.5f + xi_123 * 0.042857142857142858f + xi_128 * 0.023809523809523808f + xi_99 * 0.14285714285714285f + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2];
379 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + _stride_pdfs_tmp_3] = forceTerm_1 + omega_even * xi_122 * 0.014285714285714285f + xi_107 * xi_108 * xi_96 * 0.028571428571428571f - xi_129 - xi_132 - xi_144 - xi_167 - xi_178;
380 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 2 * _stride_pdfs_tmp_3] = forceTerm_2 + xi_109 * 0.028571428571428571f + xi_123 * 0.014285714285714285f + xi_167 + xi_179 + xi_180 + xi_182 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3];
381 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 3 * _stride_pdfs_tmp_3] = forceTerm_3 + xi_171 * 0.083333333333333329f + xi_175 * 0.33333333333333331f + xi_179 - xi_183 - xi_184 + xi_195 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3];
382 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 4 * _stride_pdfs_tmp_3] = forceTerm_4 + omega_shear * xi_170 * 0.083333333333333329f - xi_132 + xi_134 * xi_173 * xi_174 * 0.33333333333333331f - xi_183 - xi_184 - xi_189 - xi_195;
383 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 5 * _stride_pdfs_tmp_3] = forceTerm_5 + omega_even * xi_127 * 0.015873015873015872f - xi_139 - xi_178 - xi_180 - xi_196 - xi_197 - xi_206 + xi_96 * xi_97 * xi_98 * 0.095238095238095233f;
384 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 6 * _stride_pdfs_tmp_3] = forceTerm_6 + xi_128 * 0.015873015873015872f + xi_144 + xi_182 - xi_196 - xi_197 + xi_206 + xi_99 * 0.095238095238095233f + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3];
385 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 7 * _stride_pdfs_tmp_3] = forceTerm_7 + xi_213 + xi_214 + xi_219 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
386 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 8 * _stride_pdfs_tmp_3] = forceTerm_8 + xi_194 + xi_213 + xi_220 + _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
387 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 9 * _stride_pdfs_tmp_3] = forceTerm_9 + xi_214 + xi_220 + xi_221 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
388 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 10 * _stride_pdfs_tmp_3] = forceTerm_10 + xi_194 + xi_219 + xi_221 + _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
389 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 11 * _stride_pdfs_tmp_3] = forceTerm_11 + xi_232 + xi_237 + xi_243 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 11 * _stride_pdfs_3];
390 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 12 * _stride_pdfs_tmp_3] = forceTerm_12 - xi_156 - xi_232 - xi_244 - xi_245;
391 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 13 * _stride_pdfs_tmp_3] = forceTerm_13 + xi_250 + xi_253 + xi_258 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3];
392 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 14 * _stride_pdfs_tmp_3] = forceTerm_14 + xi_250 + xi_259 + xi_260 + _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 14 * _stride_pdfs_3];
393 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 15 * _stride_pdfs_tmp_3] = forceTerm_15 - xi_145 - xi_237 - xi_245 - xi_261;
394 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 16 * _stride_pdfs_tmp_3] = forceTerm_16 + xi_243 + xi_244 + xi_261 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 16 * _stride_pdfs_3];
395 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 17 * _stride_pdfs_tmp_3] = forceTerm_17 + xi_258 + xi_259 + xi_262 + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3];
396 _data_pdfs_tmp[_stride_pdfs_tmp_0 * ctr_0 + _stride_pdfs_tmp_1 * ctr_1 + _stride_pdfs_tmp_2 * ctr_2 + 18 * _stride_pdfs_tmp_3] = forceTerm_18 + xi_253 + xi_260 + xi_262 + _data_pdfs[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3];
397 }
398}
399} // namespace internal_streamcollidesweepthermalizedsingleprecisioncuda_streamcollidesweepthermalizedsingleprecisioncuda
400
402 if (!this->configured_)
403 WALBERLA_ABORT("This Sweep contains a configure function that needs to be called manually")
404
405 auto pdfs = block->getData<gpu::GPUField<float>>(pdfsID);
406 auto force = block->getData<gpu::GPUField<float>>(forceID);
407 gpu::GPUField<float> *pdfs_tmp;
408 {
409 if (cache_pdfs_.find(block) == cache_pdfs_.end()) {
410 pdfs_tmp = pdfs->cloneUninitialized();
411 cache_pdfs_[block] = pdfs_tmp;
412 } else {
413 pdfs_tmp = cache_pdfs_[block];
414 }
415 }
416
417 auto &seed = this->seed_;
418 auto &omega_shear = this->omega_shear_;
419 auto &kT = this->kT_;
420 auto &block_offset_0 = this->block_offset_0_;
421 auto &omega_bulk = this->omega_bulk_;
422 auto &omega_odd = this->omega_odd_;
423 auto &time_step = this->time_step_;
424 auto &block_offset_2 = this->block_offset_2_;
425 auto &block_offset_1 = this->block_offset_1_;
426 auto &omega_even = this->omega_even_;
427 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(force->nrOfGhostLayers()))
428 float *RESTRICT const _data_force = force->dataAt(-1, -1, -1, 0);
429 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
430 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers()))
431 float *RESTRICT const _data_pdfs = pdfs->dataAt(-1, -1, -1, 0);
432 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
433 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers()))
434 float *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(-1, -1, -1, 0);
435 WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx)
436 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(force->xSize()) + 2))
437 const int64_t _size_force_0 = int64_t(int64_c(force->xSize()) + 2);
438 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
439 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(force->ySize()) + 2))
440 const int64_t _size_force_1 = int64_t(int64_c(force->ySize()) + 2);
441 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
442 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(force->zSize()) + 2))
443 const int64_t _size_force_2 = int64_t(int64_c(force->zSize()) + 2);
444 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
445 const int64_t _stride_force_0 = int64_t(force->xStride());
446 const int64_t _stride_force_1 = int64_t(force->yStride());
447 const int64_t _stride_force_2 = int64_t(force->zStride());
448 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
449 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
450 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
451 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
452 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
453 const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
454 const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
455 const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
456 const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
457 dim3 _block(uint32_c(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)), uint32_c(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))), uint32_c(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))));
458 dim3 _grid(uint32_c(((_size_force_0 - 2) % (((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) == 0 ? (int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) : ((int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))) + 1)), uint32_c(((_size_force_1 - 2) % (((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) == 0 ? (int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) : ((int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) + 1)), uint32_c(((_size_force_2 - 2) % (((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) == 0 ? (int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) : ((int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))))) + 1)));
459 internal_streamcollidesweepthermalizedsingleprecisioncuda_streamcollidesweepthermalizedsingleprecisioncuda::streamcollidesweepthermalizedsingleprecisioncuda_streamcollidesweepthermalizedsingleprecisioncuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _data_pdfs_tmp, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
460 pdfs->swapDataPointers(pdfs_tmp);
461}
462
463void StreamCollideSweepThermalizedSinglePrecisionCUDA::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream) {
464 if (!this->configured_)
465 WALBERLA_ABORT("This Sweep contains a configure function that needs to be called manually")
466
467 CellInterval ci = globalCellInterval;
468 CellInterval blockBB = blocks->getBlockCellBB(*block);
469 blockBB.expand(ghostLayers);
470 ci.intersect(blockBB);
471 blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
472 if (ci.empty())
473 return;
474
475 auto pdfs = block->getData<gpu::GPUField<float>>(pdfsID);
476 auto force = block->getData<gpu::GPUField<float>>(forceID);
477 gpu::GPUField<float> *pdfs_tmp;
478 {
479 if (cache_pdfs_.find(block) == cache_pdfs_.end()) {
480 pdfs_tmp = pdfs->cloneUninitialized();
481 cache_pdfs_[block] = pdfs_tmp;
482 } else {
483 pdfs_tmp = cache_pdfs_[block];
484 }
485 }
486
487 auto &seed = this->seed_;
488 auto &omega_shear = this->omega_shear_;
489 auto &kT = this->kT_;
490 auto &block_offset_0 = this->block_offset_0_;
491 auto &omega_bulk = this->omega_bulk_;
492 auto &omega_odd = this->omega_odd_;
493 auto &time_step = this->time_step_;
494 auto &block_offset_2 = this->block_offset_2_;
495 auto &block_offset_1 = this->block_offset_1_;
496 auto &omega_even = this->omega_even_;
497 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(force->nrOfGhostLayers()))
498 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(force->nrOfGhostLayers()))
499 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(force->nrOfGhostLayers()))
500 float *RESTRICT const _data_force = force->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
501 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
502 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
503 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
504 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()))
505 float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
506 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx)
507 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
508 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
509 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()))
510 float *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
511 WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx)
512 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 2))
513 const int64_t _size_force_0 = int64_t(int64_c(ci.xSize()) + 2);
514 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
515 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 2))
516 const int64_t _size_force_1 = int64_t(int64_c(ci.ySize()) + 2);
517 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
518 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 2))
519 const int64_t _size_force_2 = int64_t(int64_c(ci.zSize()) + 2);
520 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx)
521 const int64_t _stride_force_0 = int64_t(force->xStride());
522 const int64_t _stride_force_1 = int64_t(force->yStride());
523 const int64_t _stride_force_2 = int64_t(force->zStride());
524 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
525 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
526 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
527 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
528 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
529 const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
530 const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
531 const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
532 const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
533 dim3 _block(uint32_c(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)), uint32_c(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))), uint32_c(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))));
534 dim3 _grid(uint32_c(((_size_force_0 - 2) % (((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) == 0 ? (int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)) : ((int64_t)(_size_force_0 - 2) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))) + 1)), uint32_c(((_size_force_1 - 2) % (((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) == 0 ? (int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))) : ((int64_t)(_size_force_1 - 2) / (int64_t)(((1024 < ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))) ? 1024 : ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) + 1)), uint32_c(((_size_force_2 - 2) % (((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) == 0 ? (int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))))) : ((int64_t)(_size_force_2 - 2) / (int64_t)(((64 < ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))) ? 64 : ((_size_force_2 - 2 < ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2))))))) ? _size_force_2 - 2 : ((int64_t)(256) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2) * ((_size_force_1 - 2 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))) ? _size_force_1 - 2 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_force_0 - 2) ? 128 : _size_force_0 - 2)))))))))) + 1)));
535 internal_streamcollidesweepthermalizedsingleprecisioncuda_streamcollidesweepthermalizedsingleprecisioncuda::streamcollidesweepthermalizedsingleprecisioncuda_streamcollidesweepthermalizedsingleprecisioncuda<<<_grid, _block, 0, stream>>>(_data_force, _data_pdfs, _data_pdfs_tmp, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, block_offset_0, block_offset_1, block_offset_2, kT, omega_bulk, omega_even, omega_odd, omega_shear, seed, time_step);
536 pdfs->swapDataPointers(pdfs_tmp);
537}
538
539} // namespace pystencils
540} // namespace walberla
541
542#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
543#pragma GCC diagnostic pop
544#endif
545
546#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
547#pragma warning pop
548#endif
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
void runOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream=nullptr)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:176
QUALIFIERS void philox_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3, uint32 key0, uint32 key1, float &rnd1, float &rnd2, float &rnd3, float &rnd4)
STL namespace.
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float float omega_bulk
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_tmp_3
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float float float float omega_odd
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_tmp_2
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_3
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_0
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_2
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t block_offset_0
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_tmp_1
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t block_offset_1
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float float float float float omega_shear
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float float float float float uint32_t seed
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float float float omega_even
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_3
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_tmp_0
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const _stride_force_0
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_pdfs_1
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_2
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t float kT
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const _stride_force_1
static FUNC_PREFIX float *RESTRICT const float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const uint32_t uint32_t uint32_t block_offset_2
static FUNC_PREFIX __launch_bounds__(256) void streamcollidesweepthermalizedsingleprecisioncuda_streamcollidesweepthermalizedsingleprecisioncuda(float *RESTRICT const _data_force
\file PackInfoPdfDoublePrecision.cpp \author pystencils