ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
StreamSweepSinglePrecision.cpp
Go to the documentation of this file.
1//======================================================================================================================
2//
3// This file is part of waLBerla. waLBerla is free software: you can
4// redistribute it and/or modify it under the terms of the GNU General Public
5// License as published by the Free Software Foundation, either version 3 of
6// the License, or (at your option) any later version.
7//
8// waLBerla is distributed in the hope that it will be useful, but WITHOUT
9// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11// for more details.
12//
13// You should have received a copy of the GNU General Public License along
14// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
15//
16//! \\file StreamSweepSinglePrecision.cpp
17//! \\ingroup lbm
18//! \\author lbmpy
19//======================================================================================================================
20
21// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 4d10e7f2358fc4a4f7e99195d0f67f0b759ecb6f
22
23#include <cmath>
24
26#include "core/DataTypes.h"
27#include "core/Macros.h"
28
29#define FUNC_PREFIX
30
31#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
32#pragma GCC diagnostic push
33#pragma GCC diagnostic ignored "-Wfloat-equal"
34#pragma GCC diagnostic ignored "-Wshadow"
35#pragma GCC diagnostic ignored "-Wconversion"
36#pragma GCC diagnostic ignored "-Wunused-variable"
37#endif
38
39#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
40#pragma warning push
41#pragma warning(disable : 1599)
42#endif
43
44using namespace std;
45
46namespace walberla {
47namespace pystencils {
48
49namespace internal_streamsweepsingleprecision_streamsweepsingleprecision {
50static FUNC_PREFIX void streamsweepsingleprecision_streamsweepsingleprecision(float *RESTRICT const _data_force, float *RESTRICT const _data_pdfs, float *RESTRICT _data_pdfs_tmp, float *RESTRICT _data_velocity, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3, int64_t const _stride_velocity_0, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3) {
51 for (int64_t ctr_2 = 1; ctr_2 < _size_force_2 - 1; ctr_2 += 1) {
52 float *RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
53 float *RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
54 float *RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
55 float *RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
56 float *RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
57 float *RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 5 * _stride_pdfs_3;
58 float *RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3;
59 float *RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
60 float *RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
61 float *RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
62 float *RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
63 float *RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 11 * _stride_pdfs_3;
64 float *RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 12 * _stride_pdfs_3;
65 float *RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3;
66 float *RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 14 * _stride_pdfs_3;
67 float *RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 15 * _stride_pdfs_3;
68 float *RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 16 * _stride_pdfs_3;
69 float *RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3;
70 float *RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3;
71 float *RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
72 float *RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
73 float *RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
74 float *RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2 * ctr_2;
75 float *RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2 * ctr_2 + _stride_velocity_3;
76 float *RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2 * ctr_2 + 2 * _stride_velocity_3;
77 float *RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2;
78 float *RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + _stride_pdfs_tmp_3;
79 float *RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 2 * _stride_pdfs_tmp_3;
80 float *RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 3 * _stride_pdfs_tmp_3;
81 float *RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 4 * _stride_pdfs_tmp_3;
82 float *RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 5 * _stride_pdfs_tmp_3;
83 float *RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 6 * _stride_pdfs_tmp_3;
84 float *RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 7 * _stride_pdfs_tmp_3;
85 float *RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 8 * _stride_pdfs_tmp_3;
86 float *RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 9 * _stride_pdfs_tmp_3;
87 float *RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 10 * _stride_pdfs_tmp_3;
88 float *RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 11 * _stride_pdfs_tmp_3;
89 float *RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 12 * _stride_pdfs_tmp_3;
90 float *RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 13 * _stride_pdfs_tmp_3;
91 float *RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 14 * _stride_pdfs_tmp_3;
92 float *RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 15 * _stride_pdfs_tmp_3;
93 float *RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 16 * _stride_pdfs_tmp_3;
94 float *RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 17 * _stride_pdfs_tmp_3;
95 float *RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 18 * _stride_pdfs_tmp_3;
96 for (int64_t ctr_1 = 1; ctr_1 < _size_force_1 - 1; ctr_1 += 1) {
97 float *RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
98 float *RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
99 float *RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
100 float *RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
101 float *RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
102 float *RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_35;
103 float *RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_36;
104 float *RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
105 float *RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
106 float *RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
107 float *RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
108 float *RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
109 float *RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
110 float *RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_313;
111 float *RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_314;
112 float *RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
113 float *RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
114 float *RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_317;
115 float *RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_318;
116 float *RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
117 float *RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
118 float *RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
119 float *RESTRICT _data_velocity_20_30_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_30;
120 float *RESTRICT _data_velocity_20_31_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_31;
121 float *RESTRICT _data_velocity_20_32_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_32;
122 float *RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_30;
123 float *RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_31;
124 float *RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_32;
125 float *RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_33;
126 float *RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_34;
127 float *RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_35;
128 float *RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_36;
129 float *RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_37;
130 float *RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_38;
131 float *RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_39;
132 float *RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_310;
133 float *RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_311;
134 float *RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_312;
135 float *RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_313;
136 float *RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_314;
137 float *RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_315;
138 float *RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_316;
139 float *RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_317;
140 float *RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_318;
141 for (int64_t ctr_0 = 1; ctr_0 < _size_force_0 - 1; ctr_0 += 1) {
142 const float streamed_0 = _data_pdfs_20_30_10[_stride_pdfs_0 * ctr_0];
143 const float streamed_1 = _data_pdfs_20_31_1m1[_stride_pdfs_0 * ctr_0];
144 const float streamed_2 = _data_pdfs_20_32_11[_stride_pdfs_0 * ctr_0];
145 const float streamed_3 = _data_pdfs_20_33_10[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
146 const float streamed_4 = _data_pdfs_20_34_10[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
147 const float streamed_5 = _data_pdfs_2m1_35_10[_stride_pdfs_0 * ctr_0];
148 const float streamed_6 = _data_pdfs_21_36_10[_stride_pdfs_0 * ctr_0];
149 const float streamed_7 = _data_pdfs_20_37_1m1[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
150 const float streamed_8 = _data_pdfs_20_38_1m1[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
151 const float streamed_9 = _data_pdfs_20_39_11[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
152 const float streamed_10 = _data_pdfs_20_310_11[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
153 const float streamed_11 = _data_pdfs_2m1_311_1m1[_stride_pdfs_0 * ctr_0];
154 const float streamed_12 = _data_pdfs_2m1_312_11[_stride_pdfs_0 * ctr_0];
155 const float streamed_13 = _data_pdfs_2m1_313_10[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
156 const float streamed_14 = _data_pdfs_2m1_314_10[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
157 const float streamed_15 = _data_pdfs_21_315_1m1[_stride_pdfs_0 * ctr_0];
158 const float streamed_16 = _data_pdfs_21_316_11[_stride_pdfs_0 * ctr_0];
159 const float streamed_17 = _data_pdfs_21_317_10[_stride_pdfs_0 * ctr_0 + _stride_pdfs_0];
160 const float streamed_18 = _data_pdfs_21_318_10[_stride_pdfs_0 * ctr_0 - _stride_pdfs_0];
161 const float vel0Term = streamed_10 + streamed_14 + streamed_18 + streamed_4 + streamed_8;
162 const float momdensity_0 = streamed_13 * -1.0f + streamed_17 * -1.0f + streamed_3 * -1.0f + streamed_7 * -1.0f + streamed_9 * -1.0f + vel0Term;
163 const float vel1Term = streamed_1 + streamed_11 + streamed_15 + streamed_7;
164 const float momdensity_1 = streamed_10 * -1.0f + streamed_12 * -1.0f + streamed_16 * -1.0f + streamed_2 * -1.0f + streamed_8 + streamed_9 * -1.0f + vel1Term;
165 const float vel2Term = streamed_12 + streamed_13 + streamed_5;
166 const float rho = streamed_0 + streamed_16 + streamed_17 + streamed_2 + streamed_3 + streamed_6 + streamed_9 + vel0Term + vel1Term + vel2Term;
167 const float momdensity_2 = streamed_11 + streamed_14 + streamed_15 * -1.0f + streamed_16 * -1.0f + streamed_17 * -1.0f + streamed_18 * -1.0f + streamed_6 * -1.0f + vel2Term;
168 const float u_0 = momdensity_0 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_30_10[_stride_force_0 * ctr_0];
169 const float u_1 = momdensity_1 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_31_10[_stride_force_0 * ctr_0];
170 const float u_2 = momdensity_2 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_32_10[_stride_force_0 * ctr_0];
171 _data_velocity_20_30_10[_stride_velocity_0 * ctr_0] = u_0;
172 _data_velocity_20_31_10[_stride_velocity_0 * ctr_0] = u_1;
173 _data_velocity_20_32_10[_stride_velocity_0 * ctr_0] = u_2;
174 _data_pdfs_tmp_20_30_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_0;
175 _data_pdfs_tmp_20_31_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_1;
176 _data_pdfs_tmp_20_32_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_2;
177 _data_pdfs_tmp_20_33_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_3;
178 _data_pdfs_tmp_20_34_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_4;
179 _data_pdfs_tmp_20_35_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_5;
180 _data_pdfs_tmp_20_36_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_6;
181 _data_pdfs_tmp_20_37_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_7;
182 _data_pdfs_tmp_20_38_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_8;
183 _data_pdfs_tmp_20_39_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_9;
184 _data_pdfs_tmp_20_310_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_10;
185 _data_pdfs_tmp_20_311_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_11;
186 _data_pdfs_tmp_20_312_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_12;
187 _data_pdfs_tmp_20_313_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_13;
188 _data_pdfs_tmp_20_314_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_14;
189 _data_pdfs_tmp_20_315_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_15;
190 _data_pdfs_tmp_20_316_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_16;
191 _data_pdfs_tmp_20_317_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_17;
192 _data_pdfs_tmp_20_318_10[_stride_pdfs_tmp_0 * ctr_0] = streamed_18;
193 }
194 }
195 }
196}
197} // namespace internal_streamsweepsingleprecision_streamsweepsingleprecision
198
200 auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
201 auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
202 auto velocity = block->getData<field::GhostLayerField<float, 3>>(velocityID);
203 field::GhostLayerField<float, 19> *pdfs_tmp;
204 {
205 // Getting temporary field pdfs_tmp
206 auto it = cache_pdfs_.find(pdfs);
207 if (it != cache_pdfs_.end()) {
208 pdfs_tmp = *it;
209 } else {
210 pdfs_tmp = pdfs->cloneUninitialized();
211 cache_pdfs_.insert(pdfs_tmp);
212 }
213 }
214
215 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(force->nrOfGhostLayers()));
216 float *RESTRICT const _data_force = force->dataAt(-1, -1, -1, 0);
217 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
218 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs->nrOfGhostLayers()));
219 float *RESTRICT const _data_pdfs = pdfs->dataAt(-1, -1, -1, 0);
220 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
221 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(pdfs_tmp->nrOfGhostLayers()));
222 float *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(-1, -1, -1, 0);
223 WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx);
224 WALBERLA_ASSERT_GREATER_EQUAL(-1, -int_c(velocity->nrOfGhostLayers()));
225 float *RESTRICT _data_velocity = velocity->dataAt(-1, -1, -1, 0);
226 WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
227 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(force->xSize()) + 2));
228 const int64_t _size_force_0 = int64_t(cell_idx_c(force->xSize()) + 2);
229 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
230 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(force->ySize()) + 2));
231 const int64_t _size_force_1 = int64_t(cell_idx_c(force->ySize()) + 2);
232 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
233 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(force->zSize()) + 2));
234 const int64_t _size_force_2 = int64_t(cell_idx_c(force->zSize()) + 2);
235 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
236 const int64_t _stride_force_0 = int64_t(force->xStride());
237 const int64_t _stride_force_1 = int64_t(force->yStride());
238 const int64_t _stride_force_2 = int64_t(force->zStride());
239 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
240 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
241 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
242 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
243 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
244 const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
245 const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
246 const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
247 const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
248 const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
249 const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
250 const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
251 const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
252 internal_streamsweepsingleprecision_streamsweepsingleprecision::streamsweepsingleprecision_streamsweepsingleprecision(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
253 pdfs->swapDataPointers(pdfs_tmp);
254}
255
256void StreamSweepSinglePrecision::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block) {
257 CellInterval ci = globalCellInterval;
258 CellInterval blockBB = blocks->getBlockCellBB(*block);
259 blockBB.expand(ghostLayers);
260 ci.intersect(blockBB);
261 blocks->transformGlobalToBlockLocalCellInterval(ci, *block);
262 if (ci.empty())
263 return;
264
265 auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);
266 auto force = block->getData<field::GhostLayerField<float, 3>>(forceID);
267 auto velocity = block->getData<field::GhostLayerField<float, 3>>(velocityID);
268 field::GhostLayerField<float, 19> *pdfs_tmp;
269 {
270 // Getting temporary field pdfs_tmp
271 auto it = cache_pdfs_.find(pdfs);
272 if (it != cache_pdfs_.end()) {
273 pdfs_tmp = *it;
274 } else {
275 pdfs_tmp = pdfs->cloneUninitialized();
276 cache_pdfs_.insert(pdfs_tmp);
277 }
278 }
279
280 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(force->nrOfGhostLayers()));
281 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(force->nrOfGhostLayers()));
282 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(force->nrOfGhostLayers()));
283 float *RESTRICT const _data_force = force->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
284 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
285 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
286 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
287 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
288 float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
289 WALBERLA_ASSERT_EQUAL(pdfs->layout(), field::fzyx);
290 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
291 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
292 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
293 float *RESTRICT _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
294 WALBERLA_ASSERT_EQUAL(pdfs_tmp->layout(), field::fzyx);
295 WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(velocity->nrOfGhostLayers()));
296 WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(velocity->nrOfGhostLayers()));
297 WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(velocity->nrOfGhostLayers()));
298 float *RESTRICT _data_velocity = velocity->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
299 WALBERLA_ASSERT_EQUAL(velocity->layout(), field::fzyx);
300 WALBERLA_ASSERT_GREATER_EQUAL(force->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 2));
301 const int64_t _size_force_0 = int64_t(cell_idx_c(ci.xSize()) + 2);
302 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
303 WALBERLA_ASSERT_GREATER_EQUAL(force->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 2));
304 const int64_t _size_force_1 = int64_t(cell_idx_c(ci.ySize()) + 2);
305 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
306 WALBERLA_ASSERT_GREATER_EQUAL(force->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 2));
307 const int64_t _size_force_2 = int64_t(cell_idx_c(ci.zSize()) + 2);
308 WALBERLA_ASSERT_EQUAL(force->layout(), field::fzyx);
309 const int64_t _stride_force_0 = int64_t(force->xStride());
310 const int64_t _stride_force_1 = int64_t(force->yStride());
311 const int64_t _stride_force_2 = int64_t(force->zStride());
312 const int64_t _stride_force_3 = int64_t(1 * int64_t(force->fStride()));
313 const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
314 const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
315 const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
316 const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
317 const int64_t _stride_pdfs_tmp_0 = int64_t(pdfs_tmp->xStride());
318 const int64_t _stride_pdfs_tmp_1 = int64_t(pdfs_tmp->yStride());
319 const int64_t _stride_pdfs_tmp_2 = int64_t(pdfs_tmp->zStride());
320 const int64_t _stride_pdfs_tmp_3 = int64_t(1 * int64_t(pdfs_tmp->fStride()));
321 const int64_t _stride_velocity_0 = int64_t(velocity->xStride());
322 const int64_t _stride_velocity_1 = int64_t(velocity->yStride());
323 const int64_t _stride_velocity_2 = int64_t(velocity->zStride());
324 const int64_t _stride_velocity_3 = int64_t(1 * int64_t(velocity->fStride()));
325 internal_streamsweepsingleprecision_streamsweepsingleprecision::streamsweepsingleprecision_streamsweepsingleprecision(_data_force, _data_pdfs, _data_pdfs_tmp, _data_velocity, _size_force_0, _size_force_1, _size_force_2, _stride_force_0, _stride_force_1, _stride_force_2, _stride_force_3, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, _stride_pdfs_tmp_0, _stride_pdfs_tmp_1, _stride_pdfs_tmp_2, _stride_pdfs_tmp_3, _stride_velocity_0, _stride_velocity_1, _stride_velocity_2, _stride_velocity_3);
326 pdfs->swapDataPointers(pdfs_tmp);
327}
328
329} // namespace pystencils
330} // namespace walberla
331
332#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
333#pragma GCC diagnostic pop
334#endif
335
336#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)
337#pragma warning pop
338#endif
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \ingroup lbm \author lbmpy
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
__global__ float * force
void runOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block)
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:174
static FUNC_PREFIX void streamsweepsingleprecision_streamsweepsingleprecision(float *RESTRICT const _data_force, float *RESTRICT const _data_pdfs, float *RESTRICT _data_pdfs_tmp, float *RESTRICT _data_velocity, int64_t const _size_force_0, int64_t const _size_force_1, int64_t const _size_force_2, int64_t const _stride_force_0, int64_t const _stride_force_1, int64_t const _stride_force_2, int64_t const _stride_force_3, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t const _stride_pdfs_tmp_0, int64_t const _stride_pdfs_tmp_1, int64_t const _stride_pdfs_tmp_2, int64_t const _stride_pdfs_tmp_3, int64_t const _stride_velocity_0, int64_t const _stride_velocity_1, int64_t const _stride_velocity_2, int64_t const _stride_velocity_3)
static Utils::Vector3d velocity(Particle const &p_ref, Particle const &p_vs)
Velocity of the virtual site.
Definition relative.cpp:64