ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
FieldAccessorsSinglePrecisionCUDA.cu
Go to the documentation of this file.
1/*
2 * Copyright (C) 2023-2025 The ESPResSo project
3 * Copyright (C) 2020 The waLBerla project
4 *
5 * This file is part of ESPResSo.
6 *
7 * ESPResSo is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * ESPResSo is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21// kernel generated with pystencils v1.3.7, lbmpy v1.3.7+4.gc7d65a7, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit 0aab9c0af2335b1f6fec75deae06e514ccb233ab
22
23/**
24 * @file
25 * Lattice field accessors.
26 * Adapted from the waLBerla source file
27 * https://i10git.cs.fau.de/walberla/walberla/-/blob/a16141524c58ab88386e2a0f8fdd7c63c5edd704/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
28 */
29
30#include <core/DataTypes.h>
31#include <core/cell/Cell.h>
32#include <core/cell/CellInterval.h>
33#include <core/math/Matrix3.h>
34#include <core/math/Vector3.h>
35
36#include <field/iterators/IteratorMacros.h>
37
38#include <gpu/FieldAccessor.h>
39#include <gpu/FieldIndexing.h>
40#include <gpu/GPUField.h>
41#include <gpu/Kernel.h>
42
43#include <thrust/copy.h>
44#include <thrust/device_ptr.h>
45#include <thrust/device_vector.h>
46#include <thrust/functional.h>
47#include <thrust/transform.h>
48
49#include <array>
50#include <vector>
51
52#if defined(__NVCC__)
53#define RESTRICT __restrict__
54#elif defined(__clang__)
55#if defined(__CUDA__)
56#if defined(__CUDA_ARCH__)
57// clang compiling CUDA code in device mode
58#define RESTRICT __restrict__
59#else
60// clang compiling CUDA code in host mode
61#define RESTRICT __restrict__
62#endif
63#endif
64#elif defined(__GNUC__) or defined(__GNUG__)
65#define RESTRICT __restrict__
66#elif defined(_MSC_VER)
67#define RESTRICT __restrict
68#else
69#define RESTRICT
70#endif
71
72// LCOV_EXCL_START
73/** @brief Get linear index of flattened data with original layout @c fzyx. */
75 auto const x = threadIdx.x;
76 auto const y = blockIdx.x;
77 auto const z = blockIdx.y;
78 auto const f = blockIdx.z;
79 auto const ySize = gridDim.x;
80 auto const zSize = gridDim.y;
81 auto const fSize = fOffset;
82 return f +
83 z * fSize +
84 y * fSize * zSize +
85 x * fSize * zSize * ySize;
86}
87
88/** @brief Rescale values in a device vector by some constant. */
89struct algo_rescale {
92
93 __host__ __device__ constexpr float operator()(float const &x) const {
94 return x * m_scale_factor;
95 }
96};
97// LCOV_EXCL_STOP
98
109
110namespace walberla {
111namespace lbm {
112namespace accessor {
113
114namespace Population {
115// LCOV_EXCL_START
117 gpu::FieldAccessor<float> pdf,
118 float *RESTRICT pop) {
119 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u);
120 pdf.set(blockIdx, threadIdx);
121 pop += offset;
122 if (pdf.isValidPosition()) {
123 pop[0u] = pdf.get(0u);
124 pop[1u] = pdf.get(1u);
125 pop[2u] = pdf.get(2u);
126 pop[3u] = pdf.get(3u);
127 pop[4u] = pdf.get(4u);
128 pop[5u] = pdf.get(5u);
129 pop[6u] = pdf.get(6u);
130 pop[7u] = pdf.get(7u);
131 pop[8u] = pdf.get(8u);
132 pop[9u] = pdf.get(9u);
133 pop[10u] = pdf.get(10u);
134 pop[11u] = pdf.get(11u);
135 pop[12u] = pdf.get(12u);
136 pop[13u] = pdf.get(13u);
137 pop[14u] = pdf.get(14u);
138 pop[15u] = pdf.get(15u);
139 pop[16u] = pdf.get(16u);
140 pop[17u] = pdf.get(17u);
141 pop[18u] = pdf.get(18u);
142 }
143}
144
146 gpu::FieldAccessor<float> pdf,
147 float const *RESTRICT pop) {
148 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u);
149 pdf.set(blockIdx, threadIdx);
150 pop += offset;
151 if (pdf.isValidPosition()) {
152 pdf.get(0u) = pop[0u];
153 pdf.get(1u) = pop[1u];
154 pdf.get(2u) = pop[2u];
155 pdf.get(3u) = pop[3u];
156 pdf.get(4u) = pop[4u];
157 pdf.get(5u) = pop[5u];
158 pdf.get(6u) = pop[6u];
159 pdf.get(7u) = pop[7u];
160 pdf.get(8u) = pop[8u];
161 pdf.get(9u) = pop[9u];
162 pdf.get(10u) = pop[10u];
163 pdf.get(11u) = pop[11u];
164 pdf.get(12u) = pop[12u];
165 pdf.get(13u) = pop[13u];
166 pdf.get(14u) = pop[14u];
167 pdf.get(15u) = pop[15u];
168 pdf.get(16u) = pop[16u];
169 pdf.get(17u) = pop[17u];
170 pdf.get(18u) = pop[18u];
171 }
172}
173
175 gpu::FieldAccessor<float> pdf,
176 float const *RESTRICT pop) {
177 pdf.set(blockIdx, threadIdx);
178 if (pdf.isValidPosition()) {
179 pdf.get(0u) = pop[0u];
180 pdf.get(1u) = pop[1u];
181 pdf.get(2u) = pop[2u];
182 pdf.get(3u) = pop[3u];
183 pdf.get(4u) = pop[4u];
184 pdf.get(5u) = pop[5u];
185 pdf.get(6u) = pop[6u];
186 pdf.get(7u) = pop[7u];
187 pdf.get(8u) = pop[8u];
188 pdf.get(9u) = pop[9u];
189 pdf.get(10u) = pop[10u];
190 pdf.get(11u) = pop[11u];
191 pdf.get(12u) = pop[12u];
192 pdf.get(13u) = pop[13u];
193 pdf.get(14u) = pop[14u];
194 pdf.get(15u) = pop[15u];
195 pdf.get(16u) = pop[16u];
196 pdf.get(17u) = pop[17u];
197 pdf.get(18u) = pop[18u];
198 }
199}
200
202 gpu::FieldAccessor<float> pdf,
203 gpu::FieldAccessor<float> velocity,
204 gpu::FieldAccessor<float> force,
205 float const *RESTRICT pop) {
206 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u);
207 pdf.set(blockIdx, threadIdx);
209 force.set(blockIdx, threadIdx);
210 pop += offset;
211 if (pdf.isValidPosition()) {
212 const float f_0 = pdf.get(0u) = pop[0u];
213 const float f_1 = pdf.get(1u) = pop[1u];
214 const float f_2 = pdf.get(2u) = pop[2u];
215 const float f_3 = pdf.get(3u) = pop[3u];
216 const float f_4 = pdf.get(4u) = pop[4u];
217 const float f_5 = pdf.get(5u) = pop[5u];
218 const float f_6 = pdf.get(6u) = pop[6u];
219 const float f_7 = pdf.get(7u) = pop[7u];
220 const float f_8 = pdf.get(8u) = pop[8u];
221 const float f_9 = pdf.get(9u) = pop[9u];
222 const float f_10 = pdf.get(10u) = pop[10u];
223 const float f_11 = pdf.get(11u) = pop[11u];
224 const float f_12 = pdf.get(12u) = pop[12u];
225 const float f_13 = pdf.get(13u) = pop[13u];
226 const float f_14 = pdf.get(14u) = pop[14u];
227 const float f_15 = pdf.get(15u) = pop[15u];
228 const float f_16 = pdf.get(16u) = pop[16u];
229 const float f_17 = pdf.get(17u) = pop[17u];
230 const float f_18 = pdf.get(18u) = pop[18u];
231 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
232 const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
233 const float vel1Term = f_1 + f_11 + f_15 + f_7;
234 const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
235 const float vel2Term = f_12 + f_13 + f_5;
236 const float delta_rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
237 const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
238 const float rho = delta_rho + 1;
239 const float md_0 = force.get(0) * 0.50000000000000000f + momdensity_0;
240 const float md_1 = force.get(1) * 0.50000000000000000f + momdensity_1;
241 const float md_2 = force.get(2) * 0.50000000000000000f + momdensity_2;
242 const float rho_inv = float{1} / rho;
243 velocity.get(0u) = md_0 * rho_inv;
244 velocity.get(1u) = md_1 * rho_inv;
245 velocity.get(2u) = md_2 * rho_inv;
246 }
247}
248// LCOV_EXCL_STOP
249
250std::array<float, 19u> get(
251 gpu::GPUField<float> const *pdf_field,
252 Cell const &cell) {
253 CellInterval ci(cell, cell);
254 thrust::device_vector<float> dev_data(19u);
255 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
256 auto kernel = gpu::make_kernel(kernel_get);
257 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
258 kernel.addParam(dev_data_ptr);
259 kernel();
260 std::array<float, 19u> pop;
261 thrust::copy(dev_data.begin(), dev_data.end(), pop.data());
262 return pop;
263}
264
265void set(
266 gpu::GPUField<float> *pdf_field,
267 std::array<float, 19u> const &pop,
268 Cell const &cell) {
269 thrust::device_vector<float> dev_data(pop.begin(), pop.end());
270 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
271 CellInterval ci(cell, cell);
272 auto kernel = gpu::make_kernel(kernel_set);
273 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
274 kernel.addParam(const_cast<const float *>(dev_data_ptr));
275 kernel();
276}
277
278void set(
279 gpu::GPUField<float> *pdf_field,
280 gpu::GPUField<float> *velocity_field,
281 gpu::GPUField<float> const *force_field,
282 std::array<float, 19u> const &pop,
283 Cell const &cell) {
284 thrust::device_vector<float> dev_data(pop.begin(), pop.end());
285 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
286 CellInterval ci(cell, cell);
287 auto kernel = gpu::make_kernel(kernel_set_vel);
288 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
289 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
290 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
291 kernel.addParam(const_cast<const float *>(dev_data_ptr));
292 kernel();
293}
294
296 gpu::GPUField<float> *pdf_field,
297 std::array<float, 19u> const &pop) {
298 CellInterval ci = pdf_field->xyzSizeWithGhostLayer();
299 thrust::device_vector<float> dev_data(pop.begin(), pop.end());
300 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
301 auto kernel = gpu::make_kernel(kernel_broadcast);
302 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
303 kernel.addParam(const_cast<const float *>(dev_data_ptr));
304 kernel();
305}
306
307std::vector<float> get(
308 gpu::GPUField<float> const *pdf_field,
309 CellInterval const &ci) {
310 thrust::device_vector<float> dev_data(ci.numCells() * 19u);
311 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
312 auto kernel = gpu::make_kernel(kernel_get);
313 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
314 kernel.addParam(dev_data_ptr);
315 kernel();
316 std::vector<float> out(ci.numCells() * 19u);
317 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
318 return out;
319}
320
321void set(
322 gpu::GPUField<float> *pdf_field,
323 std::vector<float> const &values,
324 CellInterval const &ci) {
325 thrust::device_vector<float> dev_data(values.begin(), values.end());
326 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
327 auto kernel = gpu::make_kernel(kernel_set);
328 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
329 kernel.addParam(const_cast<const float *>(dev_data_ptr));
330 kernel();
331}
332
333void set(
334 gpu::GPUField<float> *pdf_field,
335 gpu::GPUField<float> *velocity_field,
336 gpu::GPUField<float> const *force_field,
337 std::vector<float> const &values,
338 CellInterval const &ci) {
339 thrust::device_vector<float> dev_data(values.begin(), values.end());
340 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
341 auto kernel = gpu::make_kernel(kernel_set_vel);
342 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
343 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
344 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
345 kernel.addParam(const_cast<const float *>(dev_data_ptr));
346 kernel();
347}
348} // namespace Population
349
350namespace Vector {
351// LCOV_EXCL_START
353 gpu::FieldAccessor<float> vec,
354 float *u_out) {
355 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
356 vec.set(blockIdx, threadIdx);
357 u_out += offset;
358 if (vec.isValidPosition()) {
359 u_out[0u] = vec.get(0u);
360 u_out[1u] = vec.get(1u);
361 u_out[2u] = vec.get(2u);
362 }
363}
364
366 gpu::FieldAccessor<float> vec,
367 float const *RESTRICT u_in) {
368 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
369 vec.set(blockIdx, threadIdx);
370 u_in += offset;
371 if (vec.isValidPosition()) {
372 vec.get(0u) = u_in[0u];
373 vec.get(1u) = u_in[1u];
374 vec.get(2u) = u_in[2u];
375 }
376}
377
379 gpu::FieldAccessor<float> vec,
380 float const *RESTRICT u_in) {
381 vec.set(blockIdx, threadIdx);
382 if (vec.isValidPosition()) {
383 vec.get(0u) = u_in[0u];
384 vec.get(1u) = u_in[1u];
385 vec.get(2u) = u_in[2u];
386 }
387}
388
390 gpu::FieldAccessor<float> vec,
391 float const *RESTRICT u_in) {
392 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
393 vec.set(blockIdx, threadIdx);
394 u_in += offset;
395 if (vec.isValidPosition()) {
396 vec.get(0u) += u_in[0u];
397 vec.get(1u) += u_in[1u];
398 vec.get(2u) += u_in[2u];
399 }
400}
401
403 gpu::FieldAccessor<float> vec,
404 float const *RESTRICT u_in) {
405 vec.set(blockIdx, threadIdx);
406 if (vec.isValidPosition()) {
407 vec.get(0u) += u_in[0u];
408 vec.get(1u) += u_in[1u];
409 vec.get(2u) += u_in[2u];
410 }
411}
412
414 gpu::FieldAccessor<float> vec,
415 int const *RESTRICT const indices,
416 float const *RESTRICT const values,
417 uint length) {
418
419 uint index = blockIdx.y * gridDim.x * blockDim.x +
420 blockDim.x * blockIdx.x + threadIdx.x;
421
422 vec.set({0u, 0u, 0u}, {0u, 0u, 0u});
423 if (vec.isValidPosition() and index < length) {
424 auto const array_index = index * uint(3u);
425 auto const cx = indices[array_index + 0u];
426 auto const cy = indices[array_index + 1u];
427 auto const cz = indices[array_index + 2u];
428#pragma unroll
429 for (uint cf = 0u; cf < 3u; ++cf) {
430 vec.getNeighbor(cx, cy, cz, cf) = values[array_index + cf];
431 }
432 }
433}
434// LCOV_EXCL_STOP
435
437 gpu::GPUField<float> const *vec_field,
438 Cell const &cell) {
439 CellInterval ci(cell, cell);
440 thrust::device_vector<float> dev_data(3u);
441 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
442 auto kernel = gpu::make_kernel(kernel_get);
443 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
444 kernel.addParam(dev_data_ptr);
445 kernel();
447 thrust::copy(dev_data.begin(), dev_data.end(), vec.data());
448 return vec;
449}
450
451void set(
452 gpu::GPUField<float> *vec_field,
453 Vector3<float> const &vec,
454 Cell const &cell) {
455 CellInterval ci(cell, cell);
456 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
457 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
458 auto kernel = gpu::make_kernel(kernel_set);
459 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
460 kernel.addParam(const_cast<const float *>(dev_data_ptr));
461 kernel();
462}
463
464void add(
465 gpu::GPUField<float> *vec_field,
466 Vector3<float> const &vec,
467 Cell const &cell) {
468 CellInterval ci(cell, cell);
469 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
470 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
471 auto kernel = gpu::make_kernel(kernel_add);
472 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
473 kernel.addParam(const_cast<const float *>(dev_data_ptr));
474 kernel();
475}
476
478 gpu::GPUField<float> *vec_field,
479 Vector3<float> const &vec) {
480 CellInterval ci = vec_field->xyzSizeWithGhostLayer();
481 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
482 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
483 auto kernel = gpu::make_kernel(kernel_broadcast);
484 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
485 kernel.addParam(const_cast<const float *>(dev_data_ptr));
486 kernel();
487}
488
490 gpu::GPUField<float> *vec_field,
491 Vector3<float> const &vec) {
492 CellInterval ci = vec_field->xyzSizeWithGhostLayer();
493 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
494 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
495 auto kernel = gpu::make_kernel(kernel_broadcast_add);
496 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
497 kernel.addParam(const_cast<const float *>(dev_data_ptr));
498 kernel();
499}
500
501std::vector<float> get(
502 gpu::GPUField<float> const *vec_field,
503 CellInterval const &ci) {
504 thrust::device_vector<float> dev_data(ci.numCells() * 3u);
505 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
506 auto kernel = gpu::make_kernel(kernel_get);
507 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
508 kernel.addParam(dev_data_ptr);
509 kernel();
510 std::vector<float> out(ci.numCells() * 3u);
511 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
512 return out;
513}
514
515void set(
516 gpu::GPUField<float> *vec_field,
517 std::vector<float> const &values,
518 CellInterval const &ci) {
519 thrust::device_vector<float> dev_data(values.begin(), values.end());
520 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
521 auto kernel = gpu::make_kernel(kernel_set);
522 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
523 kernel.addParam(const_cast<const float *>(dev_data_ptr));
524 kernel();
525}
526
528 gpu::GPUField<float> const *field,
529 thrust::device_vector<int> const &indices,
530 thrust::device_vector<float> const &values,
531 uint gl) {
532 auto const dev_idx_ptr = thrust::raw_pointer_cast(indices.data());
533 auto const dev_val_ptr = thrust::raw_pointer_cast(values.data());
534
535 auto const threads_per_block = uint(64u);
536 auto const length = static_cast<uint>(indices.size() / 3ul);
537 auto const dim_grid = calculate_dim_grid(length, 4u, threads_per_block);
539 gpu::FieldIndexing<float>::withGhostLayerXYZ(*field, gl).gpuAccess(),
540 dev_idx_ptr, dev_val_ptr, length);
541}
542} // namespace Vector
543
544namespace Interpolation {
545// LCOV_EXCL_START
546/** @brief Calculate interpolation weights. */
548 float const *RESTRICT const pos,
549 int *RESTRICT const corner,
550 float *RESTRICT const weights,
551 uint gl) {
552#pragma unroll
553 for (int dim = 0; dim < 3; ++dim) {
554 auto const fractional_index = pos[dim] - float{0.5};
555 auto const nmp = floorf(fractional_index);
556 auto const distance = fractional_index - nmp - float{0.5};
557 corner[dim] = __float2int_rn(nmp) + static_cast<int>(gl);
558 weights[dim * 2 + 0] = float{0.5} - distance;
559 weights[dim * 2 + 1] = float{0.5} + distance;
560 }
561}
562
564 gpu::FieldAccessor<float> pdf,
565 float const *RESTRICT const pos,
566 float *RESTRICT const rho_out,
567 float const density,
568 uint n_pos,
569 uint gl) {
570
572 blockDim.x * blockIdx.x + threadIdx.x;
573
574 pdf.set({0u, 0u, 0u}, {0u, 0u, 0u});
575 if (pdf.isValidPosition() and pos_index < n_pos) {
576 auto const array_offset = pos_index * uint(3u);
577 int corner[3];
578 float weights[3][2];
579 calculate_weights(pos + array_offset, corner, &weights[0][0], gl);
580#pragma unroll
581 for (int i = 0; i < 2; i++) {
582 auto const cx = corner[0] + i;
583 auto const wx = weights[0][i];
584#pragma unroll
585 for (int j = 0; j < 2; j++) {
586 auto const cy = corner[1] + j;
587 auto const wxy = wx * weights[1][j];
588#pragma unroll
589 for (int k = 0; k < 2; k++) {
590 auto const cz = corner[2] + k;
591 auto const weight = wxy * weights[2][k];
592 float const f_0 = pdf.getNeighbor(cx, cy, cz, 0u);
593 float const f_1 = pdf.getNeighbor(cx, cy, cz, 1u);
594 float const f_2 = pdf.getNeighbor(cx, cy, cz, 2u);
595 float const f_3 = pdf.getNeighbor(cx, cy, cz, 3u);
596 float const f_4 = pdf.getNeighbor(cx, cy, cz, 4u);
597 float const f_5 = pdf.getNeighbor(cx, cy, cz, 5u);
598 float const f_6 = pdf.getNeighbor(cx, cy, cz, 6u);
599 float const f_7 = pdf.getNeighbor(cx, cy, cz, 7u);
600 float const f_8 = pdf.getNeighbor(cx, cy, cz, 8u);
601 float const f_9 = pdf.getNeighbor(cx, cy, cz, 9u);
602 float const f_10 = pdf.getNeighbor(cx, cy, cz, 10u);
603 float const f_11 = pdf.getNeighbor(cx, cy, cz, 11u);
604 float const f_12 = pdf.getNeighbor(cx, cy, cz, 12u);
605 float const f_13 = pdf.getNeighbor(cx, cy, cz, 13u);
606 float const f_14 = pdf.getNeighbor(cx, cy, cz, 14u);
607 float const f_15 = pdf.getNeighbor(cx, cy, cz, 15u);
608 float const f_16 = pdf.getNeighbor(cx, cy, cz, 16u);
609 float const f_17 = pdf.getNeighbor(cx, cy, cz, 17u);
610 float const f_18 = pdf.getNeighbor(cx, cy, cz, 18u);
611 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
612 const float vel1Term = f_1 + f_11 + f_15 + f_7;
613 const float vel2Term = f_12 + f_13 + f_5;
614 const float delta_rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
615 const float rho = density * (delta_rho + 1);
617 }
618 }
619 }
620 }
621}
622
624 gpu::FieldAccessor<float> vel,
625 float const *RESTRICT const pos,
626 float *RESTRICT const vel_out,
627 uint n_pos,
628 uint gl) {
629
631 blockDim.x * blockIdx.x + threadIdx.x;
632
633 vel.set({0u, 0u, 0u}, {0u, 0u, 0u});
634 if (vel.isValidPosition() and pos_index < n_pos) {
635 auto const array_offset = pos_index * uint(3u);
636 int corner[3];
637 float weights[3][2];
638 calculate_weights(pos + array_offset, corner, &weights[0][0], gl);
639#pragma unroll
640 for (int i = 0; i < 2; i++) {
641 auto const cx = corner[0] + i;
642 auto const wx = weights[0][i];
643#pragma unroll
644 for (int j = 0; j < 2; j++) {
645 auto const cy = corner[1] + j;
646 auto const wxy = wx * weights[1][j];
647#pragma unroll
648 for (int k = 0; k < 2; k++) {
649 auto const cz = corner[2] + k;
650 auto const weight = wxy * weights[2][k];
651#pragma unroll
652 for (uint cf = 0u; cf < 3u; ++cf) {
653 vel_out[array_offset + cf] += weight * vel.getNeighbor(cx, cy, cz, cf);
654 }
655 }
656 }
657 }
658 }
659}
660
662 gpu::FieldAccessor<float> force,
663 float const *RESTRICT const pos,
664 float const *RESTRICT const forces,
665 uint n_pos,
666 uint gl) {
667
669 blockDim.x * blockIdx.x + threadIdx.x;
670
671 force.set({0u, 0u, 0u}, {0u, 0u, 0u});
672 if (force.isValidPosition() and pos_index < n_pos) {
673 auto const array_offset = pos_index * uint(3u);
674 int corner[3];
675 float weights[3][2];
676 calculate_weights(pos + array_offset, corner, &weights[0][0], gl);
677#pragma unroll
678 for (int i = 0; i < 2; i++) {
679 auto const cx = corner[0] + i;
680 auto const wx = weights[0][i];
681#pragma unroll
682 for (int j = 0; j < 2; j++) {
683 auto const cy = corner[1] + j;
684 auto const wxy = wx * weights[1][j];
685#pragma unroll
686 for (int k = 0; k < 2; k++) {
687 auto const cz = corner[2] + k;
688 auto const weight = wxy * weights[2][k];
689#pragma unroll
690 for (uint cf = 0u; cf < 3u; ++cf) {
691 atomicAdd(&force.getNeighbor(cx, cy, cz, cf),
692 weight * forces[array_offset + cf]);
693 }
694 }
695 }
696 }
697 }
698}
699// LCOV_EXCL_STOP
700
701std::vector<float>
703 gpu::GPUField<float> const *field,
704 std::vector<float> const &pos,
705 float const density,
706 uint gl) {
707 thrust::device_vector<float> dev_pos(pos.begin(), pos.end());
708 thrust::device_vector<float> dev_rho(pos.size() / 3ul);
709 auto const dev_pos_ptr = thrust::raw_pointer_cast(dev_pos.data());
710 auto const dev_rho_ptr = thrust::raw_pointer_cast(dev_rho.data());
711
712 auto const threads_per_block = uint(64u);
713 auto const n_pos = static_cast<uint>(pos.size() / 3ul);
716 gpu::FieldIndexing<float>::withGhostLayerXYZ(*field, gl).gpuAccess(),
718
719 std::vector<float> out(dev_rho.size());
720 thrust::copy(dev_rho.begin(), dev_rho.end(), out.data());
721 return out;
722}
723
724std::vector<float>
726 gpu::GPUField<float> const *field,
727 std::vector<float> const &pos,
728 uint gl) {
729 thrust::device_vector<float> dev_pos(pos.begin(), pos.end());
730 thrust::device_vector<float> dev_vel(pos.size());
731 auto const dev_pos_ptr = thrust::raw_pointer_cast(dev_pos.data());
732 auto const dev_vel_ptr = thrust::raw_pointer_cast(dev_vel.data());
733
734 auto const threads_per_block = uint(64u);
735 auto const n_pos = static_cast<uint>(pos.size() / 3ul);
738 gpu::FieldIndexing<float>::withGhostLayerXYZ(*field, gl).gpuAccess(),
740
741 std::vector<float> out(dev_vel.size());
742 thrust::copy(dev_vel.begin(), dev_vel.end(), out.data());
743 return out;
744}
745
747 gpu::GPUField<float> const *field,
748 std::vector<float> const &pos,
749 std::vector<float> const &forces,
750 uint gl) {
751 thrust::device_vector<float> dev_pos(pos.begin(), pos.end());
752 thrust::device_vector<float> dev_for(forces.begin(), forces.end());
753 auto const dev_pos_ptr = thrust::raw_pointer_cast(dev_pos.data());
754 auto const dev_for_ptr = thrust::raw_pointer_cast(dev_for.data());
755
756 auto const threads_per_block = uint(64u);
757 auto const n_pos = static_cast<uint>(pos.size() / 3ul);
760 gpu::FieldIndexing<float>::withGhostLayerXYZ(*field, gl).gpuAccess(),
762}
763} // namespace Interpolation
764
765namespace Equilibrium {
766// LCOV_EXCL_START
768 gpu::FieldAccessor<float> pdf,
769 float const *RESTRICT const u,
770 float rho) {
771
772 float delta_rho = rho - float{1};
773
774 pdf.get(0u) = delta_rho * 0.33333333333333331f + rho * -0.33333333333333331f * (u[0] * u[0]) + rho * -0.33333333333333331f * (u[1] * u[1]) + rho * -0.33333333333333331f * (u[2] * u[2]);
775 pdf.get(1u) = delta_rho * 0.055555555555555552f + rho * -0.16666666666666666f * (u[0] * u[0]) + rho * -0.16666666666666666f * (u[2] * u[2]) + rho * 0.16666666666666666f * u[1] + rho * 0.16666666666666666f * (u[1] * u[1]);
776 pdf.get(2u) = delta_rho * 0.055555555555555552f + rho * -0.16666666666666666f * u[1] + rho * -0.16666666666666666f * (u[0] * u[0]) + rho * -0.16666666666666666f * (u[2] * u[2]) + rho * 0.16666666666666666f * (u[1] * u[1]);
777 pdf.get(3u) = delta_rho * 0.055555555555555552f + rho * -0.16666666666666666f * u[0] + rho * -0.16666666666666666f * (u[1] * u[1]) + rho * -0.16666666666666666f * (u[2] * u[2]) + rho * 0.16666666666666666f * (u[0] * u[0]);
778 pdf.get(4u) = delta_rho * 0.055555555555555552f + rho * -0.16666666666666666f * (u[1] * u[1]) + rho * -0.16666666666666666f * (u[2] * u[2]) + rho * 0.16666666666666666f * u[0] + rho * 0.16666666666666666f * (u[0] * u[0]);
779 pdf.get(5u) = delta_rho * 0.055555555555555552f + rho * -0.16666666666666666f * (u[0] * u[0]) + rho * -0.16666666666666666f * (u[1] * u[1]) + rho * 0.16666666666666666f * u[2] + rho * 0.16666666666666666f * (u[2] * u[2]);
780 pdf.get(6u) = delta_rho * 0.055555555555555552f + rho * -0.16666666666666666f * u[2] + rho * -0.16666666666666666f * (u[0] * u[0]) + rho * -0.16666666666666666f * (u[1] * u[1]) + rho * 0.16666666666666666f * (u[2] * u[2]);
781 pdf.get(7u) = delta_rho * 0.027777777777777776f + rho * -0.083333333333333329f * u[0] + rho * -0.25f * u[0] * u[1] + rho * 0.083333333333333329f * u[1] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[1] * u[1]);
782 pdf.get(8u) = delta_rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] + rho * 0.083333333333333329f * u[1] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.25f * u[0] * u[1];
783 pdf.get(9u) = delta_rho * 0.027777777777777776f + rho * -0.083333333333333329f * u[0] + rho * -0.083333333333333329f * u[1] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.25f * u[0] * u[1];
784 pdf.get(10u) = delta_rho * 0.027777777777777776f + rho * -0.083333333333333329f * u[1] + rho * -0.25f * u[0] * u[1] + rho * 0.083333333333333329f * u[0] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[1] * u[1]);
785 pdf.get(11u) = delta_rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[1] + rho * 0.083333333333333329f * u[2] + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[1] * u[2];
786 pdf.get(12u) = delta_rho * 0.027777777777777776f + rho * -0.083333333333333329f * u[1] + rho * -0.25f * u[1] * u[2] + rho * 0.083333333333333329f * u[2] + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.083333333333333329f * (u[2] * u[2]);
787 pdf.get(13u) = delta_rho * 0.027777777777777776f + rho * -0.083333333333333329f * u[0] + rho * -0.25f * u[0] * u[2] + rho * 0.083333333333333329f * u[2] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[2] * u[2]);
788 pdf.get(14u) = delta_rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] + rho * 0.083333333333333329f * u[2] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[0] * u[2];
789 pdf.get(15u) = delta_rho * 0.027777777777777776f + rho * -0.083333333333333329f * u[2] + rho * -0.25f * u[1] * u[2] + rho * 0.083333333333333329f * u[1] + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.083333333333333329f * (u[2] * u[2]);
790 pdf.get(16u) = delta_rho * 0.027777777777777776f + rho * -0.083333333333333329f * u[1] + rho * -0.083333333333333329f * u[2] + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[1] * u[2];
791 pdf.get(17u) = delta_rho * 0.027777777777777776f + rho * -0.083333333333333329f * u[0] + rho * -0.083333333333333329f * u[2] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[0] * u[2];
792 pdf.get(18u) = delta_rho * 0.027777777777777776f + rho * -0.083333333333333329f * u[2] + rho * -0.25f * u[0] * u[2] + rho * 0.083333333333333329f * u[0] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[2] * u[2]);
793}
794// LCOV_EXCL_STOP
795} // namespace Equilibrium
796
797namespace Density {
798// LCOV_EXCL_START
800 gpu::FieldAccessor<float> pdf,
801 float *RESTRICT rho_out,
802 float const density) {
803 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u);
804 pdf.set(blockIdx, threadIdx);
805 rho_out += offset;
806 if (pdf.isValidPosition()) {
807 float const f_0 = pdf.get(0u);
808 float const f_1 = pdf.get(1u);
809 float const f_2 = pdf.get(2u);
810 float const f_3 = pdf.get(3u);
811 float const f_4 = pdf.get(4u);
812 float const f_5 = pdf.get(5u);
813 float const f_6 = pdf.get(6u);
814 float const f_7 = pdf.get(7u);
815 float const f_8 = pdf.get(8u);
816 float const f_9 = pdf.get(9u);
817 float const f_10 = pdf.get(10u);
818 float const f_11 = pdf.get(11u);
819 float const f_12 = pdf.get(12u);
820 float const f_13 = pdf.get(13u);
821 float const f_14 = pdf.get(14u);
822 float const f_15 = pdf.get(15u);
823 float const f_16 = pdf.get(16u);
824 float const f_17 = pdf.get(17u);
825 float const f_18 = pdf.get(18u);
826 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
827 const float vel1Term = f_1 + f_11 + f_15 + f_7;
828 const float vel2Term = f_12 + f_13 + f_5;
829 const float delta_rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
830 const float rho = density * (delta_rho + 1);
831 rho_out[0u] = rho;
832 }
833}
834
836 gpu::FieldAccessor<float> pdf,
837 float const *RESTRICT rho_in,
838 float const density) {
839 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u);
840 pdf.set(blockIdx, threadIdx);
841 rho_in += offset;
842 if (pdf.isValidPosition()) {
843 float const f_0 = pdf.get(0u);
844 float const f_1 = pdf.get(1u);
845 float const f_2 = pdf.get(2u);
846 float const f_3 = pdf.get(3u);
847 float const f_4 = pdf.get(4u);
848 float const f_5 = pdf.get(5u);
849 float const f_6 = pdf.get(6u);
850 float const f_7 = pdf.get(7u);
851 float const f_8 = pdf.get(8u);
852 float const f_9 = pdf.get(9u);
853 float const f_10 = pdf.get(10u);
854 float const f_11 = pdf.get(11u);
855 float const f_12 = pdf.get(12u);
856 float const f_13 = pdf.get(13u);
857 float const f_14 = pdf.get(14u);
858 float const f_15 = pdf.get(15u);
859 float const f_16 = pdf.get(16u);
860 float const f_17 = pdf.get(17u);
861 float const f_18 = pdf.get(18u);
862 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
863 const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
864 const float vel1Term = f_1 + f_11 + f_15 + f_7;
865 const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
866 const float vel2Term = f_12 + f_13 + f_5;
867 const float delta_rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
868 const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
869 const float rho = delta_rho + 1;
870
871 // calculate current velocity (before density change)
872 float const rho_inv = float{1} / rho;
874
876 }
877}
878// LCOV_EXCL_STOP
879
880float get(
881 gpu::GPUField<float> const *pdf_field,
882 float const density,
883 Cell const &cell) {
884 CellInterval ci(cell, cell);
885 thrust::device_vector<float> dev_data(1u);
886 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
887 auto kernel = gpu::make_kernel(kernel_get);
888 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
889 kernel.addParam(dev_data_ptr);
890 kernel.addParam(density);
891 kernel();
892 float rho{};
893 thrust::copy(dev_data.begin(), dev_data.end(), &rho);
894 return rho;
895}
896
897std::vector<float> get(
898 gpu::GPUField<float> const *pdf_field,
899 float const density,
900 CellInterval const &ci) {
901 thrust::device_vector<float> dev_data(ci.numCells());
902 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
903 auto kernel = gpu::make_kernel(kernel_get);
904 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
905 kernel.addParam(dev_data_ptr);
906 kernel.addParam(density);
907 kernel();
908 std::vector<float> out(dev_data.size());
909 thrust::copy(dev_data.begin(), dev_data.end(), out.begin());
910 return out;
911}
912
913void set(
914 gpu::GPUField<float> *pdf_field,
915 float const rho,
916 float const density,
917 Cell const &cell) {
918 CellInterval ci(cell, cell);
919 thrust::device_vector<float> dev_data(1u, rho);
920 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
921 auto kernel = gpu::make_kernel(kernel_set);
922 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
923 kernel.addParam(const_cast<const float *>(dev_data_ptr));
924 kernel.addParam(density);
925 kernel();
926}
927
928void set(
929 gpu::GPUField<float> *pdf_field,
930 std::vector<float> const &values,
931 float const density,
932 CellInterval const &ci) {
933 thrust::device_vector<float> dev_data(values.begin(), values.end());
934 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
935 auto kernel = gpu::make_kernel(kernel_set);
936 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
937 kernel.addParam(const_cast<const float *>(dev_data_ptr));
938 kernel.addParam(density);
939 kernel();
940}
941} // namespace Density
942
943namespace Velocity {
944// LCOV_EXCL_START
946 gpu::FieldAccessor<float> pdf,
947 gpu::FieldAccessor<float> force,
948 float *RESTRICT u_out) {
949 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
950 pdf.set(blockIdx, threadIdx);
951 force.set(blockIdx, threadIdx);
952 u_out += offset;
953 if (pdf.isValidPosition()) {
954 float const f_0 = pdf.get(0u);
955 float const f_1 = pdf.get(1u);
956 float const f_2 = pdf.get(2u);
957 float const f_3 = pdf.get(3u);
958 float const f_4 = pdf.get(4u);
959 float const f_5 = pdf.get(5u);
960 float const f_6 = pdf.get(6u);
961 float const f_7 = pdf.get(7u);
962 float const f_8 = pdf.get(8u);
963 float const f_9 = pdf.get(9u);
964 float const f_10 = pdf.get(10u);
965 float const f_11 = pdf.get(11u);
966 float const f_12 = pdf.get(12u);
967 float const f_13 = pdf.get(13u);
968 float const f_14 = pdf.get(14u);
969 float const f_15 = pdf.get(15u);
970 float const f_16 = pdf.get(16u);
971 float const f_17 = pdf.get(17u);
972 float const f_18 = pdf.get(18u);
973 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
974 const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
975 const float vel1Term = f_1 + f_11 + f_15 + f_7;
976 const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
977 const float vel2Term = f_12 + f_13 + f_5;
978 const float delta_rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
979 const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
980 const float rho = delta_rho + 1;
981 const float md_0 = force.get(0) * 0.50000000000000000f + momdensity_0;
982 const float md_1 = force.get(1) * 0.50000000000000000f + momdensity_1;
983 const float md_2 = force.get(2) * 0.50000000000000000f + momdensity_2;
984 auto const rho_inv = float{1} / rho;
985 u_out[0u] = md_0 * rho_inv;
986 u_out[1u] = md_1 * rho_inv;
987 u_out[2u] = md_2 * rho_inv;
988 }
989}
990
992 gpu::FieldAccessor<float> pdf,
993 gpu::FieldAccessor<float> velocity,
994 gpu::FieldAccessor<float> force,
995 float const *RESTRICT u_in) {
996 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
997 pdf.set(blockIdx, threadIdx);
999 force.set(blockIdx, threadIdx);
1000 u_in += offset;
1001 if (pdf.isValidPosition()) {
1002 float const f_0 = pdf.get(0u);
1003 float const f_1 = pdf.get(1u);
1004 float const f_2 = pdf.get(2u);
1005 float const f_3 = pdf.get(3u);
1006 float const f_4 = pdf.get(4u);
1007 float const f_5 = pdf.get(5u);
1008 float const f_6 = pdf.get(6u);
1009 float const f_7 = pdf.get(7u);
1010 float const f_8 = pdf.get(8u);
1011 float const f_9 = pdf.get(9u);
1012 float const f_10 = pdf.get(10u);
1013 float const f_11 = pdf.get(11u);
1014 float const f_12 = pdf.get(12u);
1015 float const f_13 = pdf.get(13u);
1016 float const f_14 = pdf.get(14u);
1017 float const f_15 = pdf.get(15u);
1018 float const f_16 = pdf.get(16u);
1019 float const f_17 = pdf.get(17u);
1020 float const f_18 = pdf.get(18u);
1021 float const *RESTRICT const u = u_in;
1022 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
1023 const float vel1Term = f_1 + f_11 + f_15 + f_7;
1024 const float vel2Term = f_12 + f_13 + f_5;
1025 const float delta_rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
1026 const float rho = delta_rho + 1;
1027 const float u_0 = -force.get(0) * 0.50000000000000000f / rho + u[0];
1028 const float u_1 = -force.get(1) * 0.50000000000000000f / rho + u[1];
1029 const float u_2 = -force.get(2) * 0.50000000000000000f / rho + u[2];
1030 velocity.get(0u) = u_in[0u];
1031 velocity.get(1u) = u_in[1u];
1032 velocity.get(2u) = u_in[2u];
1033
1034 float u_new[3] = {u_0, u_1, u_2};
1035
1037 }
1038}
1039// LCOV_EXCL_STOP
1040
1042 gpu::GPUField<float> const *pdf_field,
1043 gpu::GPUField<float> const *force_field,
1044 Cell const &cell) {
1045 CellInterval ci(cell, cell);
1046 thrust::device_vector<float> dev_data(3u);
1047 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1048 auto kernel = gpu::make_kernel(kernel_get);
1049 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1050 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
1051 kernel.addParam(dev_data_ptr);
1052 kernel();
1054 thrust::copy(dev_data.begin(), dev_data.end(), vec.data());
1055 return vec;
1056}
1057
1058std::vector<float> get(
1059 gpu::GPUField<float> const *pdf_field,
1060 gpu::GPUField<float> const *force_field,
1061 CellInterval const &ci) {
1062 thrust::device_vector<float> dev_data(3u);
1063 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1064 auto kernel = gpu::make_kernel(kernel_get);
1065 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1066 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
1067 kernel.addParam(dev_data_ptr);
1068 kernel();
1069 std::vector<float> out(dev_data.size());
1070 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
1071 return out;
1072}
1073
1074void set(
1075 gpu::GPUField<float> *pdf_field,
1076 gpu::GPUField<float> *velocity_field,
1077 gpu::GPUField<float> const *force_field,
1078 Vector3<float> const &u,
1079 Cell const &cell) {
1080 CellInterval ci(cell, cell);
1081 thrust::device_vector<float> dev_data(u.data(), u.data() + 3u);
1082 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1083 auto kernel = gpu::make_kernel(kernel_set);
1084 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1085 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
1086 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
1087 kernel.addParam(const_cast<const float *>(dev_data_ptr));
1088 kernel();
1089}
1090
1091void set(
1092 gpu::GPUField<float> *pdf_field,
1093 gpu::GPUField<float> *velocity_field,
1094 gpu::GPUField<float> const *force_field,
1095 std::vector<float> const &values,
1096 CellInterval const &ci) {
1097 thrust::device_vector<float> dev_data(values.begin(), values.end());
1098 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1099 auto kernel = gpu::make_kernel(kernel_set);
1100 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1101 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
1102 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
1103 kernel.addParam(const_cast<const float *>(dev_data_ptr));
1104 kernel();
1105}
1106} // namespace Velocity
1107
1108namespace Force {
1109// LCOV_EXCL_START
1111 gpu::FieldAccessor<float> pdf,
1112 gpu::FieldAccessor<float> velocity,
1113 gpu::FieldAccessor<float> force,
1114 float const *RESTRICT f_in,
1115 float const density) {
1116 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
1117 pdf.set(blockIdx, threadIdx);
1119 force.set(blockIdx, threadIdx);
1120 f_in += offset;
1121 if (pdf.isValidPosition()) {
1122 float const f_0 = pdf.get(0u);
1123 float const f_1 = pdf.get(1u);
1124 float const f_2 = pdf.get(2u);
1125 float const f_3 = pdf.get(3u);
1126 float const f_4 = pdf.get(4u);
1127 float const f_5 = pdf.get(5u);
1128 float const f_6 = pdf.get(6u);
1129 float const f_7 = pdf.get(7u);
1130 float const f_8 = pdf.get(8u);
1131 float const f_9 = pdf.get(9u);
1132 float const f_10 = pdf.get(10u);
1133 float const f_11 = pdf.get(11u);
1134 float const f_12 = pdf.get(12u);
1135 float const f_13 = pdf.get(13u);
1136 float const f_14 = pdf.get(14u);
1137 float const f_15 = pdf.get(15u);
1138 float const f_16 = pdf.get(16u);
1139 float const f_17 = pdf.get(17u);
1140 float const f_18 = pdf.get(18u);
1141 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
1142 const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
1143 const float vel1Term = f_1 + f_11 + f_15 + f_7;
1144 const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
1145 const float vel2Term = f_12 + f_13 + f_5;
1146 const float delta_rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
1147 const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
1148 const float rho = delta_rho + 1;
1149 const float md_0 = momdensity_0 + f_in[0u] * 0.50000000000000000f / density;
1150 const float md_1 = momdensity_1 + f_in[1u] * 0.50000000000000000f / density;
1151 const float md_2 = momdensity_2 + f_in[2u] * 0.50000000000000000f / density;
1152 auto const rho_inv = float{1} / rho;
1153 auto const density_inv = float{1} / density;
1154
1155 force.get(0u) = f_in[0u] * density_inv;
1156 force.get(1u) = f_in[1u] * density_inv;
1157 force.get(2u) = f_in[2u] * density_inv;
1158
1159 velocity.get(0u) = md_0 * rho_inv;
1160 velocity.get(1u) = md_1 * rho_inv;
1161 velocity.get(2u) = md_2 * rho_inv;
1162 }
1163}
1164// LCOV_EXCL_STOP
1165
1166void set(gpu::GPUField<float> const *pdf_field,
1167 gpu::GPUField<float> *velocity_field,
1168 gpu::GPUField<float> *force_field,
1169 Vector3<float> const &u,
1170 float const density,
1171 Cell const &cell) {
1172 CellInterval ci(cell, cell);
1173 thrust::device_vector<float> dev_data(u.data(), u.data() + 3u);
1174 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1175 auto kernel = gpu::make_kernel(kernel_set);
1176 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1177 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
1178 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
1179 kernel.addParam(const_cast<const float *>(dev_data_ptr));
1180 kernel.addParam(density);
1181 kernel();
1182}
1183
1184void set(gpu::GPUField<float> const *pdf_field,
1185 gpu::GPUField<float> *velocity_field,
1186 gpu::GPUField<float> *force_field,
1187 std::vector<float> const &values,
1188 float const density,
1189 CellInterval const &ci) {
1190 thrust::device_vector<float> dev_data(values.begin(), values.end());
1191 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1192 auto kernel = gpu::make_kernel(kernel_set);
1193 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1194 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
1195 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
1196 kernel.addParam(const_cast<const float *>(dev_data_ptr));
1197 kernel.addParam(density);
1198 kernel();
1199}
1200} // namespace Force
1201
1202namespace MomentumDensity {
1203// LCOV_EXCL_START
1205 gpu::FieldAccessor<float> pdf,
1206 gpu::FieldAccessor<float> force,
1207 float *RESTRICT out,
1208 float const density) {
1209 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
1210 pdf.set(blockIdx, threadIdx);
1211 force.set(blockIdx, threadIdx);
1212 out += offset;
1213 if (pdf.isValidPosition()) {
1214 float const f_1 = pdf.get(1u);
1215 float const f_2 = pdf.get(2u);
1216 float const f_3 = pdf.get(3u);
1217 float const f_4 = pdf.get(4u);
1218 float const f_5 = pdf.get(5u);
1219 float const f_6 = pdf.get(6u);
1220 float const f_7 = pdf.get(7u);
1221 float const f_8 = pdf.get(8u);
1222 float const f_9 = pdf.get(9u);
1223 float const f_10 = pdf.get(10u);
1224 float const f_11 = pdf.get(11u);
1225 float const f_12 = pdf.get(12u);
1226 float const f_13 = pdf.get(13u);
1227 float const f_14 = pdf.get(14u);
1228 float const f_15 = pdf.get(15u);
1229 float const f_16 = pdf.get(16u);
1230 float const f_17 = pdf.get(17u);
1231 float const f_18 = pdf.get(18u);
1232 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
1233 const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
1234 const float vel1Term = f_1 + f_11 + f_15 + f_7;
1235 const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
1236 const float vel2Term = f_12 + f_13 + f_5;
1237 const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
1238 const float md_0 = force.get(0) * 0.50000000000000000f + momdensity_0;
1239 const float md_1 = force.get(1) * 0.50000000000000000f + momdensity_1;
1240 const float md_2 = force.get(2) * 0.50000000000000000f + momdensity_2;
1241 out[0u] = md_0 * density;
1242 out[1u] = md_1 * density;
1243 out[2u] = md_2 * density;
1244 }
1245}
1246// LCOV_EXCL_STOP
1247
1249 gpu::GPUField<float> const *pdf_field,
1250 gpu::GPUField<float> const *force_field,
1251 float const density) {
1252 auto const ci = pdf_field->xyzSize();
1253 thrust::device_vector<float> dev_data(3u * ci.numCells());
1254 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1255 auto kernel = gpu::make_kernel(kernel_get);
1256 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1257 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
1258 kernel.addParam(dev_data_ptr);
1259 kernel.addParam(density);
1260 kernel();
1261 std::vector<float> out(dev_data.size());
1262 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
1263 Vector3<float> mom(float{0});
1264 for (auto i = uint_t{0u}; i < 3u * ci.numCells(); i += 3u) {
1265 mom[0u] += out[i + 0u];
1266 mom[1u] += out[i + 1u];
1267 mom[2u] += out[i + 2u];
1268 }
1269 return mom;
1270}
1271} // namespace MomentumDensity
1272
1273namespace PressureTensor {
1274// LCOV_EXCL_START
1276 gpu::FieldAccessor<float> pdf,
1277 float *RESTRICT p_out) {
1278 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 9u);
1279 pdf.set(blockIdx, threadIdx);
1280 p_out += offset;
1281 if (pdf.isValidPosition()) {
1282 float const f_1 = pdf.getNeighbor(0, -1, 0, 1u);
1283 float const f_2 = pdf.getNeighbor(0, 1, 0, 2u);
1284 float const f_3 = pdf.getNeighbor(1, 0, 0, 3u);
1285 float const f_4 = pdf.getNeighbor(-1, 0, 0, 4u);
1286 float const f_5 = pdf.getNeighbor(0, 0, -1, 5u);
1287 float const f_6 = pdf.getNeighbor(0, 0, 1, 6u);
1288 float const f_7 = pdf.getNeighbor(1, -1, 0, 7u);
1289 float const f_8 = pdf.getNeighbor(-1, -1, 0, 8u);
1290 float const f_9 = pdf.getNeighbor(1, 1, 0, 9u);
1291 float const f_10 = pdf.getNeighbor(-1, 1, 0, 10u);
1292 float const f_11 = pdf.getNeighbor(0, -1, -1, 11u);
1293 float const f_12 = pdf.getNeighbor(0, 1, -1, 12u);
1294 float const f_13 = pdf.getNeighbor(1, 0, -1, 13u);
1295 float const f_14 = pdf.getNeighbor(-1, 0, -1, 14u);
1296 float const f_15 = pdf.getNeighbor(0, -1, 1, 15u);
1297 float const f_16 = pdf.getNeighbor(0, 1, 1, 16u);
1298 float const f_17 = pdf.getNeighbor(1, 0, 1, 17u);
1299 float const f_18 = pdf.getNeighbor(-1, 0, 1, 18u);
1300 const float p_0 = f_10 + f_13 + f_14 + f_17 + f_18 + f_3 + f_4 + f_7 + f_8 + f_9 + 0.33333333333333333f;
1301 const float p_1 = -f_10 - f_7 + f_8 + f_9;
1302 const float p_2 = -f_13 + f_14 + f_17 - f_18;
1303 const float p_3 = -f_10 - f_7 + f_8 + f_9;
1304 const float p_4 = f_1 + f_10 + f_11 + f_12 + f_15 + f_16 + f_2 + f_7 + f_8 + f_9 + 0.33333333333333333f;
1305 const float p_5 = f_11 - f_12 - f_15 + f_16;
1306 const float p_6 = -f_13 + f_14 + f_17 - f_18;
1307 const float p_7 = f_11 - f_12 - f_15 + f_16;
1308 const float p_8 = f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18 + f_5 + f_6 + 0.33333333333333333f;
1309 p_out[0u] = p_0;
1310 p_out[1u] = p_1;
1311 p_out[2u] = p_2;
1312 p_out[3u] = p_3;
1313 p_out[4u] = p_4;
1314 p_out[5u] = p_5;
1315 p_out[6u] = p_6;
1316 p_out[7u] = p_7;
1317 p_out[8u] = p_8;
1318 }
1319}
1320// LCOV_EXCL_STOP
1321
1323 gpu::GPUField<float> const *pdf_field,
1324 float const density,
1325 Cell const &cell) {
1326 CellInterval ci(cell, cell);
1327 thrust::device_vector<float> dev_data(9u);
1328 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1329 auto kernel = gpu::make_kernel(kernel_get);
1330 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1331 kernel.addParam(dev_data_ptr);
1332 kernel();
1334 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
1335 return out * density;
1336}
1337
1338std::vector<float> get(
1339 gpu::GPUField<float> const *pdf_field,
1340 float const density,
1341 CellInterval const &ci) {
1342 thrust::device_vector<float> dev_data(9u * ci.numCells());
1343 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1344 auto kernel = gpu::make_kernel(kernel_get);
1345 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1346 kernel.addParam(dev_data_ptr);
1347 kernel();
1348 std::vector<float> out(dev_data.size());
1349 thrust::transform(dev_data.begin(), dev_data.end(), dev_data.begin(),
1350 algo_rescale(static_cast<float>(density)));
1351 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
1352 return out;
1353}
1354
1356 gpu::GPUField<float> const *pdf_field,
1357 float const density) {
1358 auto const ci = pdf_field->xyzSize();
1359 thrust::device_vector<float> dev_data(9u * ci.numCells());
1360 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1361 auto kernel = gpu::make_kernel(kernel_get);
1362 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1363 kernel.addParam(dev_data_ptr);
1364 kernel();
1365 std::vector<float> out(dev_data.size());
1366 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
1368 for (auto i = uint_t{0u}; i < 9u * ci.numCells(); i += 9u) {
1369 pressureTensor[0u] += out[i + 0u];
1370 pressureTensor[1u] += out[i + 1u];
1371 pressureTensor[2u] += out[i + 2u];
1372
1373 pressureTensor[3u] += out[i + 3u];
1374 pressureTensor[4u] += out[i + 4u];
1375 pressureTensor[5u] += out[i + 5u];
1376
1377 pressureTensor[6u] += out[i + 6u];
1378 pressureTensor[7u] += out[i + 7u];
1379 pressureTensor[8u] += out[i + 8u];
1380 }
1381 return pressureTensor * density;
1382}
1383} // namespace PressureTensor
1384
1385} // namespace accessor
1386} // namespace lbm
1387} // namespace walberla
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
static __forceinline__ __device__ uint getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset)
Get linear index of flattened data with original layout fzyx.
static dim3 calculate_dim_grid(uint const threads_x, uint const blocks_per_grid_y, uint const threads_per_block)
static __forceinline__ __device__ uint getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset)
Get linear index of flattened data with original layout fzyx.
static dim3 calculate_dim_grid(uint const threads_x, uint const blocks_per_grid_y, uint const threads_per_block)
Definition Cell.hpp:96
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double weight(int type, double r_cut, double k, double r)
Definition dpd.cpp:79
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, double const rho_in, double const density, Cell const &cell)
double get(GhostLayerField< double, uint_t{19u}> const *pdf_field, double const density, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT rho_out, double const density)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, double const *RESTRICT rho_in, double const density)
__device__ void kernel_set_device(gpu::FieldAccessor< double > pdf, double const *RESTRICT const u, double rho)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT f_in, double const density)
void set(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> *velocity_field, GhostLayerField< double, uint_t{3u}> *force_field, Vector3< double > const &force, double const density, Cell const &cell)
__global__ void kernel_get_rho(gpu::FieldAccessor< double > pdf, double const *RESTRICT const pos, double *RESTRICT const rho_out, double const density, uint n_pos, uint gl)
void add_force(gpu::GPUField< double > const *field, std::vector< double > const &pos, std::vector< double > const &forces, uint gl)
__global__ void kernel_add_force(gpu::FieldAccessor< double > force, double const *RESTRICT const pos, double const *RESTRICT const forces, uint n_pos, uint gl)
std::vector< double > get_rho(gpu::GPUField< double > const *field, std::vector< double > const &pos, double const density, uint gl)
__global__ void kernel_get_vel(gpu::FieldAccessor< double > vel, double const *RESTRICT const pos, double *RESTRICT const vel_out, uint n_pos, uint gl)
std::vector< double > get_vel(gpu::GPUField< double > const *field, std::vector< double > const &pos, uint gl)
static __forceinline__ __device__ void calculate_weights(double const *RESTRICT const pos, int *RESTRICT const corner, double *RESTRICT const weights, uint gl)
Calculate interpolation weights.
auto reduce(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> const *force_field, double const density)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > force, double *RESTRICT out, double const density)
__global__ void kernel_set_vel(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT pop)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > pdf, double const *RESTRICT pop)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, double const *RESTRICT pop)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, std::array< double, 19u > const &pop, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT pop)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, Cell const &cell)
void initialize(GhostLayerField< double, uint_t{19u}> *pdf_field, std::array< double, 19u > const &pop)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT p_out)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, double const density, Cell const &cell)
auto reduce(GhostLayerField< double, uint_t{19u}> const *pdf_field, double const density)
void initialize(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_get(gpu::FieldAccessor< double > vec, double *u_out)
void set_from_list(gpu::GPUField< double > const *field, thrust::device_vector< int > const &indices, thrust::device_vector< double > const &values, uint gl)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void add(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
__global__ void kernel_set(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void set(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
__global__ void kernel_set_from_list(gpu::FieldAccessor< double > vec, int const *RESTRICT const indices, double const *RESTRICT const values, uint length)
auto get(GhostLayerField< double, uint_t{3u}> const *vec_field, Cell const &cell)
__global__ void kernel_broadcast_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void add_to_all(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, GhostLayerField< double, uint_t{3u}> *velocity_field, GhostLayerField< double, uint_t{3u}> const *force_field, Vector3< double > const &u, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > force, double *RESTRICT u_out)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT u_in)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> const *force_field, Cell const &cell)
\file PackInfoPdfDoublePrecision.cpp \author pystencils
static Utils::Vector3d velocity(Particle const &p_ref, Particle const &p_vs)
Velocity of the virtual site.
Definition relative.cpp:65
Rescale values in a device vector by some constant.
__host__ __device__ constexpr float operator()(float const &x) const