52static FUNC_PREFIX void streamsweepsingleprecisionavx_streamsweepsingleprecisionavx(
float *
RESTRICT const _data_force,
float *
RESTRICT const _data_pdfs,
float *
RESTRICT _data_pdfs_tmp,
float *
RESTRICT _data_velocity, int64_t
const _size_force_0, int64_t
const _size_force_1, int64_t
const _size_force_2, int64_t
const _stride_force_1, int64_t
const _stride_force_2, int64_t
const _stride_force_3, int64_t
const _stride_pdfs_1, int64_t
const _stride_pdfs_2, int64_t
const _stride_pdfs_3, int64_t
const _stride_pdfs_tmp_1, int64_t
const _stride_pdfs_tmp_2, int64_t
const _stride_pdfs_tmp_3, int64_t
const _stride_velocity_1, int64_t
const _stride_velocity_2, int64_t
const _stride_velocity_3) {
53 for (int64_t ctr_2 = 1; ctr_2 < _size_force_2 - 1; ctr_2 += 1) {
54 float *
RESTRICT _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2 * ctr_2;
55 float *
RESTRICT _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3;
56 float *
RESTRICT _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3;
57 float *
RESTRICT _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3;
58 float *
RESTRICT _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3;
59 float *
RESTRICT _data_pdfs_2m1_35 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 5 * _stride_pdfs_3;
60 float *
RESTRICT _data_pdfs_21_36 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 6 * _stride_pdfs_3;
61 float *
RESTRICT _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3;
62 float *
RESTRICT _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3;
63 float *
RESTRICT _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3;
64 float *
RESTRICT _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3;
65 float *
RESTRICT _data_pdfs_2m1_311 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 11 * _stride_pdfs_3;
66 float *
RESTRICT _data_pdfs_2m1_312 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 12 * _stride_pdfs_3;
67 float *
RESTRICT _data_pdfs_2m1_313 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 13 * _stride_pdfs_3;
68 float *
RESTRICT _data_pdfs_2m1_314 = _data_pdfs + _stride_pdfs_2 * ctr_2 - _stride_pdfs_2 + 14 * _stride_pdfs_3;
69 float *
RESTRICT _data_pdfs_21_315 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 15 * _stride_pdfs_3;
70 float *
RESTRICT _data_pdfs_21_316 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 16 * _stride_pdfs_3;
71 float *
RESTRICT _data_pdfs_21_317 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 17 * _stride_pdfs_3;
72 float *
RESTRICT _data_pdfs_21_318 = _data_pdfs + _stride_pdfs_2 * ctr_2 + _stride_pdfs_2 + 18 * _stride_pdfs_3;
73 float *
RESTRICT _data_force_20_30 = _data_force + _stride_force_2 * ctr_2;
74 float *
RESTRICT _data_force_20_31 = _data_force + _stride_force_2 * ctr_2 + _stride_force_3;
75 float *
RESTRICT _data_force_20_32 = _data_force + _stride_force_2 * ctr_2 + 2 * _stride_force_3;
76 float *
RESTRICT _data_velocity_20_30 = _data_velocity + _stride_velocity_2 * ctr_2;
77 float *
RESTRICT _data_velocity_20_31 = _data_velocity + _stride_velocity_2 * ctr_2 + _stride_velocity_3;
78 float *
RESTRICT _data_velocity_20_32 = _data_velocity + _stride_velocity_2 * ctr_2 + 2 * _stride_velocity_3;
79 float *
RESTRICT _data_pdfs_tmp_20_30 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2;
80 float *
RESTRICT _data_pdfs_tmp_20_31 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + _stride_pdfs_tmp_3;
81 float *
RESTRICT _data_pdfs_tmp_20_32 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 2 * _stride_pdfs_tmp_3;
82 float *
RESTRICT _data_pdfs_tmp_20_33 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 3 * _stride_pdfs_tmp_3;
83 float *
RESTRICT _data_pdfs_tmp_20_34 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 4 * _stride_pdfs_tmp_3;
84 float *
RESTRICT _data_pdfs_tmp_20_35 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 5 * _stride_pdfs_tmp_3;
85 float *
RESTRICT _data_pdfs_tmp_20_36 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 6 * _stride_pdfs_tmp_3;
86 float *
RESTRICT _data_pdfs_tmp_20_37 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 7 * _stride_pdfs_tmp_3;
87 float *
RESTRICT _data_pdfs_tmp_20_38 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 8 * _stride_pdfs_tmp_3;
88 float *
RESTRICT _data_pdfs_tmp_20_39 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 9 * _stride_pdfs_tmp_3;
89 float *
RESTRICT _data_pdfs_tmp_20_310 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 10 * _stride_pdfs_tmp_3;
90 float *
RESTRICT _data_pdfs_tmp_20_311 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 11 * _stride_pdfs_tmp_3;
91 float *
RESTRICT _data_pdfs_tmp_20_312 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 12 * _stride_pdfs_tmp_3;
92 float *
RESTRICT _data_pdfs_tmp_20_313 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 13 * _stride_pdfs_tmp_3;
93 float *
RESTRICT _data_pdfs_tmp_20_314 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 14 * _stride_pdfs_tmp_3;
94 float *
RESTRICT _data_pdfs_tmp_20_315 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 15 * _stride_pdfs_tmp_3;
95 float *
RESTRICT _data_pdfs_tmp_20_316 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 16 * _stride_pdfs_tmp_3;
96 float *
RESTRICT _data_pdfs_tmp_20_317 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 17 * _stride_pdfs_tmp_3;
97 float *
RESTRICT _data_pdfs_tmp_20_318 = _data_pdfs_tmp + _stride_pdfs_tmp_2 * ctr_2 + 18 * _stride_pdfs_tmp_3;
98 for (int64_t ctr_1 = 1; ctr_1 < _size_force_1 - 1; ctr_1 += 1) {
99 float *
RESTRICT _data_pdfs_20_30_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_30;
100 float *
RESTRICT _data_pdfs_20_31_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_31;
101 float *
RESTRICT _data_pdfs_20_32_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_32;
102 float *
RESTRICT _data_pdfs_20_33_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_33;
103 float *
RESTRICT _data_pdfs_20_34_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_20_34;
104 float *
RESTRICT _data_pdfs_2m1_35_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_35;
105 float *
RESTRICT _data_pdfs_21_36_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_36;
106 float *
RESTRICT _data_pdfs_20_37_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_37;
107 float *
RESTRICT _data_pdfs_20_38_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_20_38;
108 float *
RESTRICT _data_pdfs_20_39_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_39;
109 float *
RESTRICT _data_pdfs_20_310_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_20_310;
110 float *
RESTRICT _data_pdfs_2m1_311_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_2m1_311;
111 float *
RESTRICT _data_pdfs_2m1_312_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_2m1_312;
112 float *
RESTRICT _data_pdfs_2m1_313_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_313;
113 float *
RESTRICT _data_pdfs_2m1_314_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_2m1_314;
114 float *
RESTRICT _data_pdfs_21_315_1m1 = _stride_pdfs_1 * ctr_1 - _stride_pdfs_1 + _data_pdfs_21_315;
115 float *
RESTRICT _data_pdfs_21_316_11 = _stride_pdfs_1 * ctr_1 + _stride_pdfs_1 + _data_pdfs_21_316;
116 float *
RESTRICT _data_pdfs_21_317_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_317;
117 float *
RESTRICT _data_pdfs_21_318_10 = _stride_pdfs_1 * ctr_1 + _data_pdfs_21_318;
118 float *
RESTRICT _data_force_20_30_10 = _stride_force_1 * ctr_1 + _data_force_20_30;
119 float *
RESTRICT _data_force_20_31_10 = _stride_force_1 * ctr_1 + _data_force_20_31;
120 float *
RESTRICT _data_force_20_32_10 = _stride_force_1 * ctr_1 + _data_force_20_32;
121 float *
RESTRICT _data_velocity_20_30_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_30;
122 float *
RESTRICT _data_velocity_20_31_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_31;
123 float *
RESTRICT _data_velocity_20_32_10 = _stride_velocity_1 * ctr_1 + _data_velocity_20_32;
124 float *
RESTRICT _data_pdfs_tmp_20_30_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_30;
125 float *
RESTRICT _data_pdfs_tmp_20_31_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_31;
126 float *
RESTRICT _data_pdfs_tmp_20_32_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_32;
127 float *
RESTRICT _data_pdfs_tmp_20_33_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_33;
128 float *
RESTRICT _data_pdfs_tmp_20_34_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_34;
129 float *
RESTRICT _data_pdfs_tmp_20_35_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_35;
130 float *
RESTRICT _data_pdfs_tmp_20_36_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_36;
131 float *
RESTRICT _data_pdfs_tmp_20_37_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_37;
132 float *
RESTRICT _data_pdfs_tmp_20_38_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_38;
133 float *
RESTRICT _data_pdfs_tmp_20_39_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_39;
134 float *
RESTRICT _data_pdfs_tmp_20_310_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_310;
135 float *
RESTRICT _data_pdfs_tmp_20_311_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_311;
136 float *
RESTRICT _data_pdfs_tmp_20_312_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_312;
137 float *
RESTRICT _data_pdfs_tmp_20_313_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_313;
138 float *
RESTRICT _data_pdfs_tmp_20_314_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_314;
139 float *
RESTRICT _data_pdfs_tmp_20_315_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_315;
140 float *
RESTRICT _data_pdfs_tmp_20_316_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_316;
141 float *
RESTRICT _data_pdfs_tmp_20_317_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_317;
142 float *
RESTRICT _data_pdfs_tmp_20_318_10 = _stride_pdfs_tmp_1 * ctr_1 + _data_pdfs_tmp_20_318;
144 for (int64_t ctr_0 = 1; ctr_0 < (int64_t)((_size_force_0 - 2) / (8)) * (8) + 1; ctr_0 += 8) {
145 const __m256 streamed_0 = _mm256_load_ps(&_data_pdfs_20_30_10[ctr_0]);
146 const __m256 streamed_1 = _mm256_load_ps(&_data_pdfs_20_31_1m1[ctr_0]);
147 const __m256 streamed_2 = _mm256_load_ps(&_data_pdfs_20_32_11[ctr_0]);
148 const __m256 streamed_3 = _mm256_loadu_ps(&_data_pdfs_20_33_10[ctr_0 + 1]);
149 const __m256 streamed_4 = _mm256_loadu_ps(&_data_pdfs_20_34_10[ctr_0 - 1]);
150 const __m256 streamed_5 = _mm256_load_ps(&_data_pdfs_2m1_35_10[ctr_0]);
151 const __m256 streamed_6 = _mm256_load_ps(&_data_pdfs_21_36_10[ctr_0]);
152 const __m256 streamed_7 = _mm256_loadu_ps(&_data_pdfs_20_37_1m1[ctr_0 + 1]);
153 const __m256 streamed_8 = _mm256_loadu_ps(&_data_pdfs_20_38_1m1[ctr_0 - 1]);
154 const __m256 streamed_9 = _mm256_loadu_ps(&_data_pdfs_20_39_11[ctr_0 + 1]);
155 const __m256 streamed_10 = _mm256_loadu_ps(&_data_pdfs_20_310_11[ctr_0 - 1]);
156 const __m256 streamed_11 = _mm256_load_ps(&_data_pdfs_2m1_311_1m1[ctr_0]);
157 const __m256 streamed_12 = _mm256_load_ps(&_data_pdfs_2m1_312_11[ctr_0]);
158 const __m256 streamed_13 = _mm256_loadu_ps(&_data_pdfs_2m1_313_10[ctr_0 + 1]);
159 const __m256 streamed_14 = _mm256_loadu_ps(&_data_pdfs_2m1_314_10[ctr_0 - 1]);
160 const __m256 streamed_15 = _mm256_load_ps(&_data_pdfs_21_315_1m1[ctr_0]);
161 const __m256 streamed_16 = _mm256_load_ps(&_data_pdfs_21_316_11[ctr_0]);
162 const __m256 streamed_17 = _mm256_loadu_ps(&_data_pdfs_21_317_10[ctr_0 + 1]);
163 const __m256 streamed_18 = _mm256_loadu_ps(&_data_pdfs_21_318_10[ctr_0 - 1]);
164 const __m256 vel0Term = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(streamed_10, streamed_14), streamed_18), streamed_4), streamed_8);
165 const __m256 momdensity_0 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(streamed_13, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(streamed_17, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_3, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_7, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_9, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), vel0Term);
166 const __m256 vel1Term = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(streamed_1, streamed_11), streamed_15), streamed_7);
167 const __m256 momdensity_1 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(streamed_10, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(streamed_12, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_16, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_2, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_9, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), streamed_8), vel1Term);
168 const __m256 vel2Term = _mm256_add_ps(_mm256_add_ps(streamed_12, streamed_13), streamed_5);
169 const __m256 rho = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(streamed_0, streamed_16), streamed_17), streamed_2), streamed_3), streamed_6), streamed_9), vel0Term), vel1Term), vel2Term);
170 const __m256 momdensity_2 = _mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(streamed_15, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f)), _mm256_mul_ps(streamed_16, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_17, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_18, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), _mm256_mul_ps(streamed_6, _mm256_set_ps(-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f))), streamed_11), streamed_14), vel2Term);
171 const __m256 u_0 = _mm256_add_ps(_mm256_mul_ps(momdensity_0, _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_mul_ps(_mm256_mul_ps(_mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f), _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_load_ps(&_data_force_20_30_10[ctr_0])));
172 const __m256 u_1 = _mm256_add_ps(_mm256_mul_ps(momdensity_1, _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_mul_ps(_mm256_mul_ps(_mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f), _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_load_ps(&_data_force_20_31_10[ctr_0])));
173 const __m256 u_2 = _mm256_add_ps(_mm256_mul_ps(momdensity_2, _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_mul_ps(_mm256_mul_ps(_mm256_set_ps(0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f), _mm256_div_ps(_mm256_set_ps(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f), rho)), _mm256_load_ps(&_data_force_20_32_10[ctr_0])));
174 _mm256_store_ps(&_data_velocity_20_30_10[ctr_0], u_0);
175 _mm256_store_ps(&_data_velocity_20_31_10[ctr_0], u_1);
176 _mm256_store_ps(&_data_velocity_20_32_10[ctr_0], u_2);
177 _mm256_store_ps(&_data_pdfs_tmp_20_30_10[ctr_0], streamed_0);
178 _mm256_store_ps(&_data_pdfs_tmp_20_31_10[ctr_0], streamed_1);
179 _mm256_store_ps(&_data_pdfs_tmp_20_32_10[ctr_0], streamed_2);
180 _mm256_store_ps(&_data_pdfs_tmp_20_33_10[ctr_0], streamed_3);
181 _mm256_store_ps(&_data_pdfs_tmp_20_34_10[ctr_0], streamed_4);
182 _mm256_store_ps(&_data_pdfs_tmp_20_35_10[ctr_0], streamed_5);
183 _mm256_store_ps(&_data_pdfs_tmp_20_36_10[ctr_0], streamed_6);
184 _mm256_store_ps(&_data_pdfs_tmp_20_37_10[ctr_0], streamed_7);
185 _mm256_store_ps(&_data_pdfs_tmp_20_38_10[ctr_0], streamed_8);
186 _mm256_store_ps(&_data_pdfs_tmp_20_39_10[ctr_0], streamed_9);
187 _mm256_store_ps(&_data_pdfs_tmp_20_310_10[ctr_0], streamed_10);
188 _mm256_store_ps(&_data_pdfs_tmp_20_311_10[ctr_0], streamed_11);
189 _mm256_store_ps(&_data_pdfs_tmp_20_312_10[ctr_0], streamed_12);
190 _mm256_store_ps(&_data_pdfs_tmp_20_313_10[ctr_0], streamed_13);
191 _mm256_store_ps(&_data_pdfs_tmp_20_314_10[ctr_0], streamed_14);
192 _mm256_store_ps(&_data_pdfs_tmp_20_315_10[ctr_0], streamed_15);
193 _mm256_store_ps(&_data_pdfs_tmp_20_316_10[ctr_0], streamed_16);
194 _mm256_store_ps(&_data_pdfs_tmp_20_317_10[ctr_0], streamed_17);
195 _mm256_store_ps(&_data_pdfs_tmp_20_318_10[ctr_0], streamed_18);
197 for (int64_t ctr_0 = (int64_t)((_size_force_0 - 2) / (8)) * (8) + 1; ctr_0 < _size_force_0 - 1; ctr_0 += 1) {
198 const float streamed_0 = _data_pdfs_20_30_10[ctr_0];
199 const float streamed_1 = _data_pdfs_20_31_1m1[ctr_0];
200 const float streamed_2 = _data_pdfs_20_32_11[ctr_0];
201 const float streamed_3 = _data_pdfs_20_33_10[ctr_0 + 1];
202 const float streamed_4 = _data_pdfs_20_34_10[ctr_0 - 1];
203 const float streamed_5 = _data_pdfs_2m1_35_10[ctr_0];
204 const float streamed_6 = _data_pdfs_21_36_10[ctr_0];
205 const float streamed_7 = _data_pdfs_20_37_1m1[ctr_0 + 1];
206 const float streamed_8 = _data_pdfs_20_38_1m1[ctr_0 - 1];
207 const float streamed_9 = _data_pdfs_20_39_11[ctr_0 + 1];
208 const float streamed_10 = _data_pdfs_20_310_11[ctr_0 - 1];
209 const float streamed_11 = _data_pdfs_2m1_311_1m1[ctr_0];
210 const float streamed_12 = _data_pdfs_2m1_312_11[ctr_0];
211 const float streamed_13 = _data_pdfs_2m1_313_10[ctr_0 + 1];
212 const float streamed_14 = _data_pdfs_2m1_314_10[ctr_0 - 1];
213 const float streamed_15 = _data_pdfs_21_315_1m1[ctr_0];
214 const float streamed_16 = _data_pdfs_21_316_11[ctr_0];
215 const float streamed_17 = _data_pdfs_21_317_10[ctr_0 + 1];
216 const float streamed_18 = _data_pdfs_21_318_10[ctr_0 - 1];
217 const float vel0Term = streamed_10 + streamed_14 + streamed_18 + streamed_4 + streamed_8;
218 const float momdensity_0 = streamed_13 * -1.0f + streamed_17 * -1.0f + streamed_3 * -1.0f + streamed_7 * -1.0f + streamed_9 * -1.0f + vel0Term;
219 const float vel1Term = streamed_1 + streamed_11 + streamed_15 + streamed_7;
220 const float momdensity_1 = streamed_10 * -1.0f + streamed_12 * -1.0f + streamed_16 * -1.0f + streamed_2 * -1.0f + streamed_8 + streamed_9 * -1.0f + vel1Term;
221 const float vel2Term = streamed_12 + streamed_13 + streamed_5;
222 const float rho = streamed_0 + streamed_16 + streamed_17 + streamed_2 + streamed_3 + streamed_6 + streamed_9 + vel0Term + vel1Term + vel2Term;
223 const float momdensity_2 = streamed_11 + streamed_14 + streamed_15 * -1.0f + streamed_16 * -1.0f + streamed_17 * -1.0f + streamed_18 * -1.0f + streamed_6 * -1.0f + vel2Term;
224 const float u_0 = momdensity_0 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_30_10[ctr_0];
225 const float u_1 = momdensity_1 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_31_10[ctr_0];
226 const float u_2 = momdensity_2 * ((1.0f) / (rho)) + 0.5f * ((1.0f) / (rho)) * _data_force_20_32_10[ctr_0];
227 _data_velocity_20_30_10[ctr_0] = u_0;
228 _data_velocity_20_31_10[ctr_0] = u_1;
229 _data_velocity_20_32_10[ctr_0] = u_2;
230 _data_pdfs_tmp_20_30_10[ctr_0] = streamed_0;
231 _data_pdfs_tmp_20_31_10[ctr_0] = streamed_1;
232 _data_pdfs_tmp_20_32_10[ctr_0] = streamed_2;
233 _data_pdfs_tmp_20_33_10[ctr_0] = streamed_3;
234 _data_pdfs_tmp_20_34_10[ctr_0] = streamed_4;
235 _data_pdfs_tmp_20_35_10[ctr_0] = streamed_5;
236 _data_pdfs_tmp_20_36_10[ctr_0] = streamed_6;
237 _data_pdfs_tmp_20_37_10[ctr_0] = streamed_7;
238 _data_pdfs_tmp_20_38_10[ctr_0] = streamed_8;
239 _data_pdfs_tmp_20_39_10[ctr_0] = streamed_9;
240 _data_pdfs_tmp_20_310_10[ctr_0] = streamed_10;
241 _data_pdfs_tmp_20_311_10[ctr_0] = streamed_11;
242 _data_pdfs_tmp_20_312_10[ctr_0] = streamed_12;
243 _data_pdfs_tmp_20_313_10[ctr_0] = streamed_13;
244 _data_pdfs_tmp_20_314_10[ctr_0] = streamed_14;
245 _data_pdfs_tmp_20_315_10[ctr_0] = streamed_15;
246 _data_pdfs_tmp_20_316_10[ctr_0] = streamed_16;
247 _data_pdfs_tmp_20_317_10[ctr_0] = streamed_17;
248 _data_pdfs_tmp_20_318_10[ctr_0] = streamed_18;