ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
FieldAccessorsDoublePrecisionCUDA.cu
Go to the documentation of this file.
1/*
2 * Copyright (C) 2023-2024 The ESPResSo project
3 * Copyright (C) 2020 The waLBerla project
4 *
5 * This file is part of ESPResSo.
6 *
7 * ESPResSo is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * ESPResSo is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit 0c8b4b926c6979288fd8a6846d02ec0870e1fe41
22
23/**
24 * @file
25 * Lattice field accessors.
26 * Adapted from the waLBerla source file
27 * https://i10git.cs.fau.de/walberla/walberla/-/blob/a16141524c58ab88386e2a0f8fdd7c63c5edd704/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
28 */
29
30#include <core/DataTypes.h>
31#include <core/cell/Cell.h>
32#include <core/cell/CellInterval.h>
33#include <core/math/Matrix3.h>
34#include <core/math/Vector3.h>
35
36#include <field/iterators/IteratorMacros.h>
37
38#include <gpu/FieldAccessor.h>
39#include <gpu/FieldIndexing.h>
40#include <gpu/GPUField.h>
41#include <gpu/Kernel.h>
42
43#include <thrust/device_ptr.h>
44#include <thrust/device_vector.h>
45
46#include <array>
47#include <vector>
48
49#if defined(__NVCC__)
50#define RESTRICT __restrict__
51#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
52#pragma nv_diagnostic push
53#pragma nv_diag_suppress 177 // unused variable
54#else
55#pragma push
56#pragma diag_suppress 177 // unused variable
57#endif
58#elif defined(__clang__)
59#if defined(__CUDA__)
60#if defined(__CUDA_ARCH__)
61// clang compiling CUDA code in device mode
62#define RESTRICT __restrict__
63#pragma clang diagnostic push
64#pragma clang diagnostic ignored "-Wunused-variable"
65#pragma clang diagnostic ignored "-Wunused-parameter"
66#else
67// clang compiling CUDA code in host mode
68#define RESTRICT __restrict__
69#pragma clang diagnostic push
70#pragma clang diagnostic ignored "-Wunused-variable"
71#pragma clang diagnostic ignored "-Wunused-parameter"
72#endif
73#endif
74#elif defined(__GNUC__) or defined(__GNUG__)
75#define RESTRICT __restrict__
76#pragma GCC diagnostic push
77#pragma GCC diagnostic ignored "-Wunused-variable"
78#pragma GCC diagnostic ignored "-Wunused-parameter"
79#elif defined(_MSC_VER)
80#define RESTRICT __restrict
81#else
82#define RESTRICT
83#endif
84
85/** @brief Get linear index of flattened data with original layout @c fzyx. */
86static __forceinline__ __device__ uint getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset) {
87 auto const x = threadIdx.x;
88 auto const y = blockIdx.x;
89 auto const z = blockIdx.y;
90 auto const f = blockIdx.z;
91 auto const ySize = gridDim.x;
92 auto const zSize = gridDim.y;
93 auto const fSize = fOffset;
94 return f +
95 z * fSize +
96 y * fSize * zSize +
97 x * fSize * zSize * ySize;
98}
99
100namespace walberla {
101namespace lbm {
102namespace accessor {
103
104namespace Population {
105__global__ void kernel_get_interval(
106 gpu::FieldAccessor<double> pdf,
107 double *RESTRICT const pop) {
108 pdf.set(blockIdx, threadIdx);
109 if (pdf.isValidPosition()) {
110 uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u);
111 pop[offset + 0u] = pdf.get(0);
112 pop[offset + 1u] = pdf.get(1);
113 pop[offset + 2u] = pdf.get(2);
114 pop[offset + 3u] = pdf.get(3);
115 pop[offset + 4u] = pdf.get(4);
116 pop[offset + 5u] = pdf.get(5);
117 pop[offset + 6u] = pdf.get(6);
118 pop[offset + 7u] = pdf.get(7);
119 pop[offset + 8u] = pdf.get(8);
120 pop[offset + 9u] = pdf.get(9);
121 pop[offset + 10u] = pdf.get(10);
122 pop[offset + 11u] = pdf.get(11);
123 pop[offset + 12u] = pdf.get(12);
124 pop[offset + 13u] = pdf.get(13);
125 pop[offset + 14u] = pdf.get(14);
126 pop[offset + 15u] = pdf.get(15);
127 pop[offset + 16u] = pdf.get(16);
128 pop[offset + 17u] = pdf.get(17);
129 pop[offset + 18u] = pdf.get(18);
130 }
131}
132
133__global__ void kernel_get(
134 gpu::FieldAccessor<double> pdf,
135 double *RESTRICT const pop) {
136 pdf.set(blockIdx, threadIdx);
137 if (pdf.isValidPosition()) {
138 pop[0u] = pdf.get(0);
139 pop[1u] = pdf.get(1);
140 pop[2u] = pdf.get(2);
141 pop[3u] = pdf.get(3);
142 pop[4u] = pdf.get(4);
143 pop[5u] = pdf.get(5);
144 pop[6u] = pdf.get(6);
145 pop[7u] = pdf.get(7);
146 pop[8u] = pdf.get(8);
147 pop[9u] = pdf.get(9);
148 pop[10u] = pdf.get(10);
149 pop[11u] = pdf.get(11);
150 pop[12u] = pdf.get(12);
151 pop[13u] = pdf.get(13);
152 pop[14u] = pdf.get(14);
153 pop[15u] = pdf.get(15);
154 pop[16u] = pdf.get(16);
155 pop[17u] = pdf.get(17);
156 pop[18u] = pdf.get(18);
157 }
158}
159
160__global__ void kernel_set_interval(
161 gpu::FieldAccessor<double> pdf,
162 double const *RESTRICT const pop) {
163 pdf.set(blockIdx, threadIdx);
164 if (pdf.isValidPosition()) {
165 uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u);
166 pdf.get(0) = pop[offset + 0u];
167 pdf.get(1) = pop[offset + 1u];
168 pdf.get(2) = pop[offset + 2u];
169 pdf.get(3) = pop[offset + 3u];
170 pdf.get(4) = pop[offset + 4u];
171 pdf.get(5) = pop[offset + 5u];
172 pdf.get(6) = pop[offset + 6u];
173 pdf.get(7) = pop[offset + 7u];
174 pdf.get(8) = pop[offset + 8u];
175 pdf.get(9) = pop[offset + 9u];
176 pdf.get(10) = pop[offset + 10u];
177 pdf.get(11) = pop[offset + 11u];
178 pdf.get(12) = pop[offset + 12u];
179 pdf.get(13) = pop[offset + 13u];
180 pdf.get(14) = pop[offset + 14u];
181 pdf.get(15) = pop[offset + 15u];
182 pdf.get(16) = pop[offset + 16u];
183 pdf.get(17) = pop[offset + 17u];
184 pdf.get(18) = pop[offset + 18u];
185 }
186}
187
188__global__ void kernel_set(
189 gpu::FieldAccessor<double> pdf,
190 double const *RESTRICT const pop) {
191 pdf.set(blockIdx, threadIdx);
192 if (pdf.isValidPosition()) {
193 pdf.get(0) = pop[0u];
194 pdf.get(1) = pop[1u];
195 pdf.get(2) = pop[2u];
196 pdf.get(3) = pop[3u];
197 pdf.get(4) = pop[4u];
198 pdf.get(5) = pop[5u];
199 pdf.get(6) = pop[6u];
200 pdf.get(7) = pop[7u];
201 pdf.get(8) = pop[8u];
202 pdf.get(9) = pop[9u];
203 pdf.get(10) = pop[10u];
204 pdf.get(11) = pop[11u];
205 pdf.get(12) = pop[12u];
206 pdf.get(13) = pop[13u];
207 pdf.get(14) = pop[14u];
208 pdf.get(15) = pop[15u];
209 pdf.get(16) = pop[16u];
210 pdf.get(17) = pop[17u];
211 pdf.get(18) = pop[18u];
212 }
213}
214
215std::array<double, 19u> get(
216 gpu::GPUField<double> const *pdf_field,
217 Cell const &cell) {
218 CellInterval ci(cell, cell);
219 thrust::device_vector<double> dev_data(19u, double{0});
220 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
221 auto kernel = gpu::make_kernel(kernel_get);
222 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
223 kernel.addParam(dev_data_ptr);
224 kernel();
225 std::array<double, 19u> pop;
226 thrust::copy(dev_data.begin(), dev_data.end(), pop.data());
227 return pop;
228}
229
230void set(
231 gpu::GPUField<double> *pdf_field,
232 std::array<double, 19u> const &pop,
233 Cell const &cell) {
234 thrust::device_vector<double> dev_data(pop.data(), pop.data() + 19u);
235 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
236 CellInterval ci(cell, cell);
237 auto kernel = gpu::make_kernel(kernel_set);
238 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
239 kernel.addParam(const_cast<const double *>(dev_data_ptr));
240 kernel();
241}
242
244 gpu::GPUField<double> *pdf_field,
245 std::array<double, 19u> const &pop) {
246 CellInterval ci = pdf_field->xyzSizeWithGhostLayer();
247 thrust::device_vector<double> dev_data(pop.data(), pop.data() + 19u);
248 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
249 auto kernel = gpu::make_kernel(kernel_set);
250 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
251 kernel.addParam(const_cast<const double *>(dev_data_ptr));
252 kernel();
253}
254
255std::vector<double> get(
256 gpu::GPUField<double> const *pdf_field,
257 CellInterval const &ci) {
258 thrust::device_vector<double> dev_data(ci.numCells() * 19u);
259 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
260 auto kernel = gpu::make_kernel(kernel_get_interval);
261 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
262 kernel.addParam(dev_data_ptr);
263 kernel();
264 std::vector<double> out(ci.numCells() * 19u);
265 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
266 return out;
267}
268
269void set(
270 gpu::GPUField<double> *pdf_field,
271 std::vector<double> const &values,
272 CellInterval const &ci) {
273 thrust::device_vector<double> dev_data(values.begin(), values.end());
274 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
275 auto kernel = gpu::make_kernel(kernel_set_interval);
276 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
277 kernel.addParam(const_cast<const double *>(dev_data_ptr));
278 kernel();
279}
280} // namespace Population
281
282namespace Vector {
283__global__ void kernel_get_interval(
284 gpu::FieldAccessor<double> vec,
285 double *const out) {
286 vec.set(blockIdx, threadIdx);
287 if (vec.isValidPosition()) {
288 uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
289 out[offset + 0u] = vec.get(0);
290 out[offset + 1u] = vec.get(1);
291 out[offset + 2u] = vec.get(2);
292 }
293}
294
295__global__ void kernel_get(
296 gpu::FieldAccessor<double> vec,
297 double *const out) {
298 vec.set(blockIdx, threadIdx);
299 if (vec.isValidPosition()) {
300 out[0u] = vec.get(0);
301 out[1u] = vec.get(1);
302 out[2u] = vec.get(2);
303 }
304}
305
306__global__ void kernel_set_interval(
307 gpu::FieldAccessor<double> vec,
308 double const *RESTRICT const u) {
309 vec.set(blockIdx, threadIdx);
310 if (vec.isValidPosition()) {
311 uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
312 vec.get(0) = u[offset + 0u];
313 vec.get(1) = u[offset + 1u];
314 vec.get(2) = u[offset + 2u];
315 }
316}
317
318__global__ void kernel_set(
319 gpu::FieldAccessor<double> vec,
320 const double *RESTRICT const u) {
321 vec.set(blockIdx, threadIdx);
322 if (vec.isValidPosition()) {
323 vec.get(0) = u[0u];
324 vec.get(1) = u[1u];
325 vec.get(2) = u[2u];
326 }
327}
328
329__global__ void kernel_add_interval(
330 gpu::FieldAccessor<double> vec,
331 double const *RESTRICT const u) {
332 vec.set(blockIdx, threadIdx);
333 if (vec.isValidPosition()) {
334 uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
335 vec.get(0) += u[offset + 0u];
336 vec.get(1) += u[offset + 1u];
337 vec.get(2) += u[offset + 2u];
338 }
339}
340
341__global__ void kernel_add(
342 gpu::FieldAccessor<double> vec,
343 double const *RESTRICT const u) {
344 vec.set(blockIdx, threadIdx);
345 if (vec.isValidPosition()) {
346 vec.get(0) += u[0u];
347 vec.get(1) += u[1u];
348 vec.get(2) += u[2u];
349 }
350}
351
352Vector3<double> get(
353 gpu::GPUField<double> const *vec_field,
354 Cell const &cell) {
355 CellInterval ci(cell, cell);
356 thrust::device_vector<double> dev_data(3u);
357 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
358 auto kernel = gpu::make_kernel(kernel_get);
359 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*vec_field, ci));
360 kernel.addParam(dev_data_ptr);
361 kernel();
362 Vector3<double> vec;
363 thrust::copy(dev_data.begin(), dev_data.end(), vec.data());
364 return vec;
365}
366
367void set(
368 gpu::GPUField<double> *vec_field,
369 Vector3<double> const &vec,
370 Cell const &cell) {
371 CellInterval ci(cell, cell);
372 thrust::device_vector<double> dev_data(vec.data(), vec.data() + 3u);
373 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
374 auto kernel = gpu::make_kernel(kernel_set);
375 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*vec_field, ci));
376 kernel.addParam(const_cast<const double *>(dev_data_ptr));
377 kernel();
378}
379
380void add(
381 gpu::GPUField<double> *vec_field,
382 Vector3<double> const &vec,
383 Cell const &cell) {
384 CellInterval ci(cell, cell);
385 thrust::device_vector<double> dev_data(vec.data(), vec.data() + 3u);
386 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
387 auto kernel = gpu::make_kernel(kernel_add);
388 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*vec_field, ci));
389 kernel.addParam(const_cast<const double *>(dev_data_ptr));
390 kernel();
391}
392
394 gpu::GPUField<double> *vec_field,
395 Vector3<double> const &vec) {
396 CellInterval ci = vec_field->xyzSizeWithGhostLayer();
397 thrust::device_vector<double> dev_data(vec.data(), vec.data() + 3u);
398 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
399 auto kernel = gpu::make_kernel(kernel_set);
400 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*vec_field, ci));
401 kernel.addParam(const_cast<const double *>(dev_data_ptr));
402 kernel();
403}
404
406 gpu::GPUField<double> *vec_field,
407 Vector3<double> const &vec) {
408 CellInterval ci = vec_field->xyzSizeWithGhostLayer();
409 thrust::device_vector<double> dev_data(vec.data(), vec.data() + 3u);
410 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
411 auto kernel = gpu::make_kernel(kernel_add);
412 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*vec_field, ci));
413 kernel.addParam(const_cast<const double *>(dev_data_ptr));
414 kernel();
415}
416
417std::vector<double> get(
418 gpu::GPUField<double> const *vec_field,
419 CellInterval const &ci) {
420 thrust::device_vector<double> dev_data(ci.numCells() * 3u);
421 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
422 auto kernel = gpu::make_kernel(kernel_get_interval);
423 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*vec_field, ci));
424 kernel.addParam(dev_data_ptr);
425 kernel();
426 std::vector<double> out(ci.numCells() * 3u);
427 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
428 return out;
429}
430
431void set(
432 gpu::GPUField<double> *vec_field,
433 std::vector<double> const &values,
434 CellInterval const &ci) {
435 thrust::device_vector<double> dev_data(values.begin(), values.end());
436 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
437 auto kernel = gpu::make_kernel(kernel_set_interval);
438 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*vec_field, ci));
439 kernel.addParam(const_cast<const double *>(dev_data_ptr));
440 kernel();
441}
442} // namespace Vector
443
444namespace Interpolation {
445/** @brief Calculate interpolation weights. */
446static __forceinline__ __device__ void calculate_weights(
447 double const *RESTRICT const pos,
448 int *RESTRICT const corner,
449 double *RESTRICT const weights,
450 uint gl) {
451#pragma unroll
452 for (int dim = 0; dim < 3; ++dim) {
453 auto const fractional_index = pos[dim] - double{0.5};
454 auto const nmp = floorf(fractional_index);
455 auto const distance = fractional_index - nmp - double{0.5};
456 corner[dim] = __double2int_rn(nmp) + static_cast<int>(gl);
457 weights[dim * 2 + 0] = double{0.5} - distance;
458 weights[dim * 2 + 1] = double{0.5} + distance;
459 }
460}
461
462__global__ void kernel_get(
463 gpu::FieldAccessor<double> vec,
464 double const *RESTRICT const pos,
465 double *RESTRICT const vel,
466 uint n_pos,
467 uint gl) {
468
469 uint pos_index = blockIdx.y * gridDim.x * blockDim.x +
470 blockDim.x * blockIdx.x + threadIdx.x;
471
472 vec.set({0u, 0u, 0u}, {0u, 0u, 0u});
473 if (vec.isValidPosition() and pos_index < n_pos) {
474 auto const array_offset = pos_index * uint(3u);
475 int corner[3];
476 double weights[3][2];
477 calculate_weights(pos + array_offset, corner, &weights[0][0], gl);
478#pragma unroll
479 for (int i = 0; i < 2; i++) {
480 auto const cx = corner[0] + i;
481 auto const wx = weights[0][i];
482#pragma unroll
483 for (int j = 0; j < 2; j++) {
484 auto const cy = corner[1] + j;
485 auto const wxy = wx * weights[1][j];
486#pragma unroll
487 for (int k = 0; k < 2; k++) {
488 auto const cz = corner[2] + k;
489 auto const weight = wxy * weights[2][k];
490 vel[array_offset + 0u] += weight * vec.getNeighbor(cx, cy, cz, 0);
491 vel[array_offset + 1u] += weight * vec.getNeighbor(cx, cy, cz, 1);
492 vel[array_offset + 2u] += weight * vec.getNeighbor(cx, cy, cz, 2);
493 }
494 }
495 }
496 }
497}
498
499__global__ void kernel_set(
500 gpu::FieldAccessor<double> vec,
501 double const *RESTRICT const pos,
502 double const *RESTRICT const forces,
503 uint n_pos,
504 uint gl) {
505
506 uint pos_index = blockIdx.y * gridDim.x * blockDim.x +
507 blockDim.x * blockIdx.x + threadIdx.x;
508
509 vec.set({0u, 0u, 0u}, {0u, 0u, 0u});
510 if (vec.isValidPosition() and pos_index < n_pos) {
511 auto const array_offset = pos_index * uint(3u);
512 int corner[3];
513 double weights[3][2];
514 calculate_weights(pos + array_offset, corner, &weights[0][0], gl);
515#pragma unroll
516 for (int i = 0; i < 2; i++) {
517 auto const cx = corner[0] + i;
518 auto const wx = weights[0][i];
519#pragma unroll
520 for (int j = 0; j < 2; j++) {
521 auto const cy = corner[1] + j;
522 auto const wxy = wx * weights[1][j];
523#pragma unroll
524 for (int k = 0; k < 2; k++) {
525 auto const cz = corner[2] + k;
526 auto const weight = wxy * weights[2][k];
527 atomicAdd(&vec.getNeighbor(cx, cy, cz, 0),
528 weight * forces[array_offset + 0u]);
529 atomicAdd(&vec.getNeighbor(cx, cy, cz, 1),
530 weight * forces[array_offset + 1u]);
531 atomicAdd(&vec.getNeighbor(cx, cy, cz, 2),
532 weight * forces[array_offset + 2u]);
533 }
534 }
535 }
536 }
537}
538
539static dim3 calculate_dim_grid(uint const threads_x,
540 uint const blocks_per_grid_y,
541 uint const threads_per_block) {
542 assert(threads_x >= 1u);
543 assert(blocks_per_grid_y >= 1u);
544 assert(threads_per_block >= 1u);
545 auto const threads_y = threads_per_block * blocks_per_grid_y;
546 auto const blocks_per_grid_x = (threads_x + threads_y - 1) / threads_y;
547 return make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1);
548}
549
550std::vector<double>
552 gpu::GPUField<double> const *vec_field,
553 std::vector<double> const &pos,
554 uint gl) {
555 thrust::device_vector<double> dev_pos(pos.begin(), pos.end());
556 thrust::device_vector<double> dev_vel(pos.size());
557 auto const dev_pos_ptr = thrust::raw_pointer_cast(dev_pos.data());
558 auto const dev_vel_ptr = thrust::raw_pointer_cast(dev_vel.data());
559
560 auto const threads_per_block = uint(64u);
561 auto const n_pos = static_cast<uint>(pos.size() / 3ul);
562 auto const dim_grid = calculate_dim_grid(n_pos, 4u, threads_per_block);
563 kernel_get<<<dim_grid, threads_per_block, 0u, nullptr>>>(
564 gpu::FieldIndexing<double>::withGhostLayerXYZ(*vec_field, gl).gpuAccess(),
565 dev_pos_ptr, dev_vel_ptr, n_pos, gl);
566
567 std::vector<double> out(pos.size());
568 thrust::copy(dev_vel.begin(), dev_vel.end(), out.data());
569 return out;
570}
571
572void set(
573 gpu::GPUField<double> const *vec_field,
574 std::vector<double> const &pos,
575 std::vector<double> const &forces,
576 uint gl) {
577 thrust::device_vector<double> dev_pos(pos.begin(), pos.end());
578 thrust::device_vector<double> dev_for(forces.begin(), forces.end());
579 auto const dev_pos_ptr = thrust::raw_pointer_cast(dev_pos.data());
580 auto const dev_for_ptr = thrust::raw_pointer_cast(dev_for.data());
581
582 auto const threads_per_block = uint(64u);
583 auto const n_pos = static_cast<uint>(pos.size() / 3ul);
584 auto const dim_grid = calculate_dim_grid(n_pos, 4u, threads_per_block);
585 kernel_set<<<dim_grid, threads_per_block, 0u, nullptr>>>(
586 gpu::FieldIndexing<double>::withGhostLayerXYZ(*vec_field, gl).gpuAccess(),
587 dev_pos_ptr, dev_for_ptr, n_pos, gl);
588}
589} // namespace Interpolation
590
591namespace Equilibrium {
592__device__ void kernel_set_device(
593 gpu::FieldAccessor<double> pdf,
594 double const *RESTRICT const u,
595 double rho) {
596
597 pdf.get(0) = rho * -0.33333333333333331 * (u[0] * u[0]) + rho * -0.33333333333333331 * (u[1] * u[1]) + rho * -0.33333333333333331 * (u[2] * u[2]) + rho * 0.33333333333333331;
598 pdf.get(1) = rho * -0.16666666666666666 * (u[0] * u[0]) + rho * -0.16666666666666666 * (u[2] * u[2]) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * u[1] + rho * 0.16666666666666666 * (u[1] * u[1]);
599 pdf.get(2) = rho * -0.16666666666666666 * u[1] + rho * -0.16666666666666666 * (u[0] * u[0]) + rho * -0.16666666666666666 * (u[2] * u[2]) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u[1] * u[1]);
600 pdf.get(3) = rho * -0.16666666666666666 * u[0] + rho * -0.16666666666666666 * (u[1] * u[1]) + rho * -0.16666666666666666 * (u[2] * u[2]) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u[0] * u[0]);
601 pdf.get(4) = rho * -0.16666666666666666 * (u[1] * u[1]) + rho * -0.16666666666666666 * (u[2] * u[2]) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * u[0] + rho * 0.16666666666666666 * (u[0] * u[0]);
602 pdf.get(5) = rho * -0.16666666666666666 * (u[0] * u[0]) + rho * -0.16666666666666666 * (u[1] * u[1]) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * u[2] + rho * 0.16666666666666666 * (u[2] * u[2]);
603 pdf.get(6) = rho * -0.16666666666666666 * u[2] + rho * -0.16666666666666666 * (u[0] * u[0]) + rho * -0.16666666666666666 * (u[1] * u[1]) + rho * 0.055555555555555552 + rho * 0.16666666666666666 * (u[2] * u[2]);
604 pdf.get(7) = rho * -0.083333333333333329 * u[0] + rho * -0.25 * u[0] * u[1] + rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[1] + rho * 0.083333333333333329 * (u[0] * u[0]) + rho * 0.083333333333333329 * (u[1] * u[1]);
605 pdf.get(8) = rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] + rho * 0.083333333333333329 * u[1] + rho * 0.083333333333333329 * (u[0] * u[0]) + rho * 0.083333333333333329 * (u[1] * u[1]) + rho * 0.25 * u[0] * u[1];
606 pdf.get(9) = rho * -0.083333333333333329 * u[0] + rho * -0.083333333333333329 * u[1] + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u[0] * u[0]) + rho * 0.083333333333333329 * (u[1] * u[1]) + rho * 0.25 * u[0] * u[1];
607 pdf.get(10) = rho * -0.083333333333333329 * u[1] + rho * -0.25 * u[0] * u[1] + rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] + rho * 0.083333333333333329 * (u[0] * u[0]) + rho * 0.083333333333333329 * (u[1] * u[1]);
608 pdf.get(11) = rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[1] + rho * 0.083333333333333329 * u[2] + rho * 0.083333333333333329 * (u[1] * u[1]) + rho * 0.083333333333333329 * (u[2] * u[2]) + rho * 0.25 * u[1] * u[2];
609 pdf.get(12) = rho * -0.083333333333333329 * u[1] + rho * -0.25 * u[1] * u[2] + rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[2] + rho * 0.083333333333333329 * (u[1] * u[1]) + rho * 0.083333333333333329 * (u[2] * u[2]);
610 pdf.get(13) = rho * -0.083333333333333329 * u[0] + rho * -0.25 * u[0] * u[2] + rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[2] + rho * 0.083333333333333329 * (u[0] * u[0]) + rho * 0.083333333333333329 * (u[2] * u[2]);
611 pdf.get(14) = rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] + rho * 0.083333333333333329 * u[2] + rho * 0.083333333333333329 * (u[0] * u[0]) + rho * 0.083333333333333329 * (u[2] * u[2]) + rho * 0.25 * u[0] * u[2];
612 pdf.get(15) = rho * -0.083333333333333329 * u[2] + rho * -0.25 * u[1] * u[2] + rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[1] + rho * 0.083333333333333329 * (u[1] * u[1]) + rho * 0.083333333333333329 * (u[2] * u[2]);
613 pdf.get(16) = rho * -0.083333333333333329 * u[1] + rho * -0.083333333333333329 * u[2] + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u[1] * u[1]) + rho * 0.083333333333333329 * (u[2] * u[2]) + rho * 0.25 * u[1] * u[2];
614 pdf.get(17) = rho * -0.083333333333333329 * u[0] + rho * -0.083333333333333329 * u[2] + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u[0] * u[0]) + rho * 0.083333333333333329 * (u[2] * u[2]) + rho * 0.25 * u[0] * u[2];
615 pdf.get(18) = rho * -0.083333333333333329 * u[2] + rho * -0.25 * u[0] * u[2] + rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] + rho * 0.083333333333333329 * (u[0] * u[0]) + rho * 0.083333333333333329 * (u[2] * u[2]);
616}
617} // namespace Equilibrium
618
619namespace Density {
620__global__ void kernel_get(
621 gpu::FieldAccessor<double> pdf,
622 double *RESTRICT const out) {
623 pdf.set(blockIdx, threadIdx);
624 if (pdf.isValidPosition()) {
625 uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint(1u));
626 double const f_0 = pdf.get(0);
627 double const f_1 = pdf.get(1);
628 double const f_2 = pdf.get(2);
629 double const f_3 = pdf.get(3);
630 double const f_4 = pdf.get(4);
631 double const f_5 = pdf.get(5);
632 double const f_6 = pdf.get(6);
633 double const f_7 = pdf.get(7);
634 double const f_8 = pdf.get(8);
635 double const f_9 = pdf.get(9);
636 double const f_10 = pdf.get(10);
637 double const f_11 = pdf.get(11);
638 double const f_12 = pdf.get(12);
639 double const f_13 = pdf.get(13);
640 double const f_14 = pdf.get(14);
641 double const f_15 = pdf.get(15);
642 double const f_16 = pdf.get(16);
643 double const f_17 = pdf.get(17);
644 double const f_18 = pdf.get(18);
645 const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
646 const double vel1Term = f_1 + f_11 + f_15 + f_7;
647 const double vel2Term = f_12 + f_13 + f_5;
648 const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
649 out[offset] = rho;
650 }
651}
652
653__global__ void kernel_set(
654 gpu::FieldAccessor<double> pdf,
655 double const *RESTRICT const rho_in) {
656 pdf.set(blockIdx, threadIdx);
657 if (pdf.isValidPosition()) {
658 uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint(1u));
659 double const f_0 = pdf.get(0);
660 double const f_1 = pdf.get(1);
661 double const f_2 = pdf.get(2);
662 double const f_3 = pdf.get(3);
663 double const f_4 = pdf.get(4);
664 double const f_5 = pdf.get(5);
665 double const f_6 = pdf.get(6);
666 double const f_7 = pdf.get(7);
667 double const f_8 = pdf.get(8);
668 double const f_9 = pdf.get(9);
669 double const f_10 = pdf.get(10);
670 double const f_11 = pdf.get(11);
671 double const f_12 = pdf.get(12);
672 double const f_13 = pdf.get(13);
673 double const f_14 = pdf.get(14);
674 double const f_15 = pdf.get(15);
675 double const f_16 = pdf.get(16);
676 double const f_17 = pdf.get(17);
677 double const f_18 = pdf.get(18);
678 const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
679 const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
680 const double vel1Term = f_1 + f_11 + f_15 + f_7;
681 const double momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
682 const double vel2Term = f_12 + f_13 + f_5;
683 const double momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
684 const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
685
686 // calculate current velocity (before density change)
687 double const conversion = double(1) / rho;
688 double const u_old[3] = {momdensity_0 * conversion, momdensity_1 * conversion, momdensity_2 * conversion};
689
690 Equilibrium::kernel_set_device(pdf, u_old, rho_in[offset]);
691 }
692}
693
694double get(
695 gpu::GPUField<double> const *pdf_field,
696 Cell const &cell) {
697 CellInterval ci(cell, cell);
698 thrust::device_vector<double> dev_data(1u);
699 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
700 auto kernel = gpu::make_kernel(kernel_get);
701 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
702 kernel.addParam(dev_data_ptr);
703 kernel();
704 double rho = dev_data[0u];
705 return rho;
706}
707
708void set(
709 gpu::GPUField<double> *pdf_field,
710 const double rho,
711 Cell const &cell) {
712 CellInterval ci(cell, cell);
713 thrust::device_vector<double> dev_data(1u, rho);
714 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
715 auto kernel = gpu::make_kernel(kernel_set);
716 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
717 kernel.addParam(const_cast<const double *>(dev_data_ptr));
718 kernel();
719}
720
721std::vector<double> get(
722 gpu::GPUField<double> const *pdf_field,
723 CellInterval const &ci) {
724 thrust::device_vector<double> dev_data(ci.numCells());
725 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
726 auto kernel = gpu::make_kernel(kernel_get);
727 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
728 kernel.addParam(dev_data_ptr);
729 kernel();
730 std::vector<double> out(ci.numCells());
731 thrust::copy(dev_data.begin(), dev_data.end(), out.begin());
732 return out;
733}
734
735void set(
736 gpu::GPUField<double> *pdf_field,
737 std::vector<double> const &values,
738 CellInterval const &ci) {
739 thrust::device_vector<double> dev_data(values.begin(), values.end());
740 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
741 auto kernel = gpu::make_kernel(kernel_set);
742 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
743 kernel.addParam(const_cast<const double *>(dev_data_ptr));
744 kernel();
745}
746} // namespace Density
747
748namespace Velocity {
749__global__ void kernel_set(
750 gpu::FieldAccessor<double> pdf,
751 gpu::FieldAccessor<double> force,
752 double const *RESTRICT const u_in) {
753 pdf.set(blockIdx, threadIdx);
754 force.set(blockIdx, threadIdx);
755 if (pdf.isValidPosition()) {
756 uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint(3u));
757 uint const bufsize = 3u;
758 double const *RESTRICT const u = u_in + bufsize * offset;
759 double const f_0 = pdf.get(0);
760 double const f_1 = pdf.get(1);
761 double const f_2 = pdf.get(2);
762 double const f_3 = pdf.get(3);
763 double const f_4 = pdf.get(4);
764 double const f_5 = pdf.get(5);
765 double const f_6 = pdf.get(6);
766 double const f_7 = pdf.get(7);
767 double const f_8 = pdf.get(8);
768 double const f_9 = pdf.get(9);
769 double const f_10 = pdf.get(10);
770 double const f_11 = pdf.get(11);
771 double const f_12 = pdf.get(12);
772 double const f_13 = pdf.get(13);
773 double const f_14 = pdf.get(14);
774 double const f_15 = pdf.get(15);
775 double const f_16 = pdf.get(16);
776 double const f_17 = pdf.get(17);
777 double const f_18 = pdf.get(18);
778 const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
779 const double vel1Term = f_1 + f_11 + f_15 + f_7;
780 const double vel2Term = f_12 + f_13 + f_5;
781 const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
782 const double u_0 = -force.get(0) * 0.50000000000000000 / rho + u[0];
783 const double u_1 = -force.get(1) * 0.50000000000000000 / rho + u[1];
784 const double u_2 = -force.get(2) * 0.50000000000000000 / rho + u[2];
785 double u_new[3] = {u_0, u_1, u_2};
786
787 Equilibrium::kernel_set_device(pdf, u_new, rho);
788 }
789}
790
791void set(
792 gpu::GPUField<double> *pdf_field,
793 gpu::GPUField<double> *force_field,
794 Vector3<double> const &u,
795 Cell const &cell) {
796 CellInterval ci(cell, cell);
797 thrust::device_vector<double> dev_data(u.data(), u.data() + 3u);
798 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
799 auto kernel = gpu::make_kernel(kernel_set);
800 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
801 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*force_field, ci));
802 kernel.addParam(const_cast<const double *>(dev_data_ptr));
803 kernel();
804}
805} // namespace Velocity
806
807namespace MomentumDensity {
808__global__ void kernel_sum(
809 gpu::FieldAccessor<double> pdf,
810 gpu::FieldAccessor<double> force,
811 double *RESTRICT const out) {
812 pdf.set(blockIdx, threadIdx);
813 force.set(blockIdx, threadIdx);
814 if (pdf.isValidPosition()) {
815 uint const bufsize = 3u;
816 uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, bufsize);
817 double const f_0 = pdf.get(0);
818 double const f_1 = pdf.get(1);
819 double const f_2 = pdf.get(2);
820 double const f_3 = pdf.get(3);
821 double const f_4 = pdf.get(4);
822 double const f_5 = pdf.get(5);
823 double const f_6 = pdf.get(6);
824 double const f_7 = pdf.get(7);
825 double const f_8 = pdf.get(8);
826 double const f_9 = pdf.get(9);
827 double const f_10 = pdf.get(10);
828 double const f_11 = pdf.get(11);
829 double const f_12 = pdf.get(12);
830 double const f_13 = pdf.get(13);
831 double const f_14 = pdf.get(14);
832 double const f_15 = pdf.get(15);
833 double const f_16 = pdf.get(16);
834 double const f_17 = pdf.get(17);
835 double const f_18 = pdf.get(18);
836 const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
837 const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
838 const double vel1Term = f_1 + f_11 + f_15 + f_7;
839 const double momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
840 const double vel2Term = f_12 + f_13 + f_5;
841 const double momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
842 const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
843 const double md_0 = force.get(0) * 0.50000000000000000 + momdensity_0;
844 const double md_1 = force.get(1) * 0.50000000000000000 + momdensity_1;
845 const double md_2 = force.get(2) * 0.50000000000000000 + momdensity_2;
846 out[bufsize * offset + 0u] += md_0;
847 out[bufsize * offset + 1u] += md_1;
848 out[bufsize * offset + 2u] += md_2;
849 }
850}
851
852Vector3<double> reduce(
853 gpu::GPUField<double> const *pdf_field,
854 gpu::GPUField<double> const *force_field) {
855 thrust::device_vector<double> dev_data(3u, double{0});
856 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
857 WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, {
858 Cell cell(x, y, z);
859 CellInterval ci(cell, cell);
860 auto kernel = gpu::make_kernel(kernel_sum);
861 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
862 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*force_field, ci));
863 kernel.addParam(dev_data_ptr);
864 kernel();
865 });
866 Vector3<double> mom(double{0});
867 thrust::copy(dev_data.begin(), dev_data.begin() + 3u, mom.data());
868 return mom;
869}
870} // namespace MomentumDensity
871
872namespace PressureTensor {
873__global__ void kernel_get(
874 gpu::FieldAccessor<double> pdf,
875 double *RESTRICT const out) {
876 pdf.set(blockIdx, threadIdx);
877 if (pdf.isValidPosition()) {
878 uint const bufsize = 9u;
879 uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, bufsize);
880 double const f_0 = pdf.get(0);
881 double const f_1 = pdf.get(1);
882 double const f_2 = pdf.get(2);
883 double const f_3 = pdf.get(3);
884 double const f_4 = pdf.get(4);
885 double const f_5 = pdf.get(5);
886 double const f_6 = pdf.get(6);
887 double const f_7 = pdf.get(7);
888 double const f_8 = pdf.get(8);
889 double const f_9 = pdf.get(9);
890 double const f_10 = pdf.get(10);
891 double const f_11 = pdf.get(11);
892 double const f_12 = pdf.get(12);
893 double const f_13 = pdf.get(13);
894 double const f_14 = pdf.get(14);
895 double const f_15 = pdf.get(15);
896 double const f_16 = pdf.get(16);
897 double const f_17 = pdf.get(17);
898 double const f_18 = pdf.get(18);
899 const double p_0 = f_10 + f_13 + f_14 + f_17 + f_18 + f_3 + f_4 + f_7 + f_8 + f_9;
900 const double p_1 = -f_10 - f_7 + f_8 + f_9;
901 const double p_2 = -f_13 + f_14 + f_17 - f_18;
902 const double p_3 = -f_10 - f_7 + f_8 + f_9;
903 const double p_4 = f_1 + f_10 + f_11 + f_12 + f_15 + f_16 + f_2 + f_7 + f_8 + f_9;
904 const double p_5 = f_11 - f_12 - f_15 + f_16;
905 const double p_6 = -f_13 + f_14 + f_17 - f_18;
906 const double p_7 = f_11 - f_12 - f_15 + f_16;
907 const double p_8 = f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18 + f_5 + f_6;
908 out[bufsize * offset + 0u] = p_0;
909 out[bufsize * offset + 1u] = p_1;
910 out[bufsize * offset + 2u] = p_2;
911
912 out[bufsize * offset + 3u] = p_3;
913 out[bufsize * offset + 4u] = p_4;
914 out[bufsize * offset + 5u] = p_5;
915
916 out[bufsize * offset + 6u] = p_6;
917 out[bufsize * offset + 7u] = p_7;
918 out[bufsize * offset + 8u] = p_8;
919 }
920}
921
922Matrix3<double> get(
923 gpu::GPUField<double> const *pdf_field,
924 Cell const &cell) {
925 CellInterval ci(cell, cell);
926 thrust::device_vector<double> dev_data(9u);
927 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
928 auto kernel = gpu::make_kernel(kernel_get);
929 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*pdf_field, ci));
930 kernel.addParam(dev_data_ptr);
931 kernel();
932 Matrix3<double> out;
933 thrust::copy(dev_data.begin(), dev_data.begin() + 9u, out.data());
934 return out;
935}
936} // namespace PressureTensor
937
938} // namespace accessor
939} // namespace lbm
940} // namespace walberla
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
static __forceinline__ __device__ uint getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset)
Get linear index of flattened data with original layout fzyx.
float f[3]
__global__ float * force
__shared__ int pos[MAXDEPTH *THREADS5/WARPSIZE]
float u[3]
Definition Cell.hpp:98
static double weight(int type, double r_cut, double k, double r)
Definition dpd.cpp:79
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT const out)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, double const rho_in, Cell const &cell)
double get(GhostLayerField< double, uint_t{19u}> const *pdf_field, Cell const &cell)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, double const *RESTRICT const rho_in)
__device__ void kernel_set_device(gpu::FieldAccessor< double > pdf, double const *RESTRICT const u, double rho)
__global__ void kernel_get(gpu::FieldAccessor< double > vec, double const *RESTRICT const pos, double *RESTRICT const vel, uint n_pos, uint gl)
static dim3 calculate_dim_grid(uint const threads_x, uint const blocks_per_grid_y, uint const threads_per_block)
std::vector< double > get(gpu::GPUField< double > const *vec_field, std::vector< double > const &pos, uint gl)
__global__ void kernel_set(gpu::FieldAccessor< double > vec, double const *RESTRICT const pos, double const *RESTRICT const forces, uint n_pos, uint gl)
void set(gpu::GPUField< double > const *vec_field, std::vector< double > const &pos, std::vector< double > const &forces, uint gl)
static __forceinline__ __device__ void calculate_weights(double const *RESTRICT const pos, int *RESTRICT const corner, double *RESTRICT const weights, uint gl)
Calculate interpolation weights.
__global__ void kernel_sum(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > force, double *RESTRICT const out)
Vector3< double > reduce(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> const *force_field)
__global__ void kernel_set_interval(gpu::FieldAccessor< double > pdf, double const *RESTRICT const pop)
std::array< double, 19u > get(GhostLayerField< double, uint_t{19u}> const *pdf_field, Cell const &cell)
__global__ void kernel_get_interval(gpu::FieldAccessor< double > pdf, double *RESTRICT const pop)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, std::array< double, 19u > const &pop, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT const pop)
void initialize(GhostLayerField< double, uint_t{19u}> *pdf_field, std::array< double, 19u > const &pop)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, double const *RESTRICT const pop)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT const out)
Matrix3< double > get(GhostLayerField< double, uint_t{19u}> const *pdf_field, Cell const &cell)
void initialize(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_get(gpu::FieldAccessor< double > vec, double *const out)
__global__ void kernel_add_interval(gpu::FieldAccessor< double > vec, double const *RESTRICT const u)
void add(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
void set(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
__global__ void kernel_set(gpu::FieldAccessor< double > vec, const double *RESTRICT const u)
__global__ void kernel_set_interval(gpu::FieldAccessor< double > vec, double const *RESTRICT const u)
__global__ void kernel_get_interval(gpu::FieldAccessor< double > vec, double *const out)
void add_to_all(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_add(gpu::FieldAccessor< double > vec, double const *RESTRICT const u)
Vector3< double > get(GhostLayerField< double, uint_t{3u}> const *vec_field, Cell const &cell)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, GhostLayerField< double, uint_t{3u}> const *force_field, Vector3< double > const &u, Cell const &cell)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > force, double const *RESTRICT const u_in)