ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
EK_FieldAccessors_single_precision_CUDA.cu
Go to the documentation of this file.
1/*
2 * Copyright (C) 2023-2025 The ESPResSo project
3 * Copyright (C) 2020 The waLBerla project
4 *
5 * This file is part of ESPResSo.
6 *
7 * ESPResSo is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * ESPResSo is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21// kernel generated with pystencils v1.3.7, lbmpy v1.3.7, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit c69cb11d6a95d32b2280544d3d9abde1fe5fdbb5
22
23/*
24 * Lattice field accessors.
25 * Adapted from the waLBerla source file
26 * https://i10git.cs.fau.de/walberla/walberla/-/blob/a16141524c58ab88386e2a0f8fdd7c63c5edd704/python/lbmpy_walberla/templates/LatticeModel.tmpl.h
27 */
28
29#include <core/DataTypes.h>
30#include <core/cell/Cell.h>
31#include <core/cell/CellInterval.h>
32#include <core/math/Vector3.h>
33
34#include <field/iterators/IteratorMacros.h>
35
36#include <gpu/FieldAccessor.h>
37#include <gpu/FieldIndexing.h>
38#include <gpu/GPUField.h>
39#include <gpu/Kernel.h>
40
41#include <thrust/device_ptr.h>
42#include <thrust/device_vector.h>
43
44#include <array>
45#include <vector>
46
47#if defined(__NVCC__)
48#define RESTRICT __restrict__
49#elif defined(__clang__)
50#if defined(__CUDA__)
51#if defined(__CUDA_ARCH__)
52// clang compiling CUDA code in device mode
53#define RESTRICT __restrict__
54#else
55// clang compiling CUDA code in host mode
56#define RESTRICT __restrict__
57#endif
58#endif
59#elif defined(__GNUC__) or defined(__GNUG__)
60#define RESTRICT __restrict__
61#elif defined(_MSC_VER)
62#define RESTRICT __restrict
63#else
64#define RESTRICT
65#endif
66
67/** @brief Get linear index of flattened data with original layout @c fzyx. */
69 auto const x = threadIdx.x;
70 auto const y = blockIdx.x;
71 auto const z = blockIdx.y;
72 auto const f = blockIdx.z;
73 auto const ySize = gridDim.x;
74 auto const zSize = gridDim.y;
75 auto const fSize = fOffset;
76 return f +
77 z * fSize +
78 y * fSize * zSize +
79 x * fSize * zSize * ySize;
80}
81
82namespace walberla {
83namespace ek {
84namespace accessor {
85
86namespace Scalar {
87// LCOV_EXCL_START
89 gpu::FieldAccessor<float> scalar_field,
90 float *out) {
91 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u);
93 out += offset;
94 if (scalar_field.isValidPosition()) {
95 out[0u] = scalar_field.get(0u);
96 }
97}
98
100 gpu::FieldAccessor<float> scalar_field,
101 float const *RESTRICT in) {
102 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u);
104 in += offset;
105 if (scalar_field.isValidPosition()) {
106 scalar_field.get(0u) = in[0u];
107 }
108}
109
111 gpu::FieldAccessor<float> scalar_field,
112 float const in) {
114 if (scalar_field.isValidPosition()) {
115 scalar_field.get(0u) = in;
116 }
117}
118
120 gpu::FieldAccessor<float> scalar_field,
121 float const *RESTRICT in) {
122 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u);
124 in += offset;
125 if (scalar_field.isValidPosition()) {
126 scalar_field.get(0u) += in[0u];
127 }
128}
129
131 gpu::FieldAccessor<float> scalar_field,
132 float const in) {
134 if (scalar_field.isValidPosition()) {
135 scalar_field.get(0u) += in;
136 }
137}
138// LCOV_EXCL_STOP
139
140float get(
141 gpu::GPUField<float> const *scalar_field,
142 Cell const &cell) {
143 CellInterval ci(cell, cell);
144 thrust::device_vector<float> dev_data(1u);
145 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
146 auto kernel = gpu::make_kernel(kernel_get);
147 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*scalar_field, ci));
148 kernel.addParam(dev_data_ptr);
149 kernel();
150 float result{};
151 thrust::copy(dev_data.begin(), dev_data.end(), &result);
152 return result;
153}
154
155void set(
156 gpu::GPUField<float> *scalar_field,
157 float const value,
158 Cell const &cell) {
159 CellInterval ci(cell, cell);
160 auto kernel = gpu::make_kernel(kernel_broadcast);
161 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*scalar_field, ci));
162 kernel.addParam(value);
163 kernel();
164}
165
166void add(
167 gpu::GPUField<float> *scalar_field,
168 float const value,
169 Cell const &cell) {
170 CellInterval ci(cell, cell);
171 auto kernel = gpu::make_kernel(kernel_add);
172 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*scalar_field, ci));
173 kernel.addParam(value);
174 kernel();
175}
176
178 gpu::GPUField<float> *scalar_field,
179 float const value) {
180 CellInterval ci = scalar_field->xyzSizeWithGhostLayer();
181 auto kernel = gpu::make_kernel(kernel_broadcast);
182 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*scalar_field, ci));
183 kernel.addParam(value);
184 kernel();
185}
186
188 gpu::GPUField<float> *scalar_field,
189 Vector3<float> const value) {
190 CellInterval ci = scalar_field->xyzSizeWithGhostLayer();
191 auto kernel = gpu::make_kernel(kernel_broadcast_add);
192 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*scalar_field, ci));
193 kernel.addParam(value);
194 kernel();
195}
196
197std::vector<float> get(
198 gpu::GPUField<float> const *scalar_field,
199 CellInterval const &ci) {
200 thrust::device_vector<float> dev_data(ci.numCells() * 1u);
201 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
202 auto kernel = gpu::make_kernel(kernel_get);
203 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*scalar_field, ci));
204 kernel.addParam(dev_data_ptr);
205 kernel();
206 std::vector<float> out(ci.numCells());
207 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
208 return out;
209}
210
211void set(
212 gpu::GPUField<float> *scalar_field,
213 std::vector<float> const &values,
214 CellInterval const &ci) {
215 thrust::device_vector<float> dev_data(values.begin(), values.end());
216 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
217 auto kernel = gpu::make_kernel(kernel_set);
218 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*scalar_field, ci));
219 kernel.addParam(const_cast<const float *>(dev_data_ptr));
220 kernel();
221}
222} // namespace Scalar
223
224namespace Vector {
225// LCOV_EXCL_START
227 gpu::FieldAccessor<float> vec,
228 float *u_out) {
229 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
230 vec.set(blockIdx, threadIdx);
231 u_out += offset;
232 if (vec.isValidPosition()) {
233 u_out[0u] = vec.get(0u);
234 u_out[1u] = vec.get(1u);
235 u_out[2u] = vec.get(2u);
236 }
237}
238
240 gpu::FieldAccessor<float> vec,
241 float const *RESTRICT u_in) {
242 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
243 vec.set(blockIdx, threadIdx);
244 u_in += offset;
245 if (vec.isValidPosition()) {
246 vec.get(0u) = u_in[0u];
247 vec.get(1u) = u_in[1u];
248 vec.get(2u) = u_in[2u];
249 }
250}
251
253 gpu::FieldAccessor<float> vec,
254 float const *RESTRICT u_in) {
255 vec.set(blockIdx, threadIdx);
256 if (vec.isValidPosition()) {
257 vec.get(0u) = u_in[0u];
258 vec.get(1u) = u_in[1u];
259 vec.get(2u) = u_in[2u];
260 }
261}
262
264 gpu::FieldAccessor<float> vec,
265 float const *RESTRICT u_in) {
266 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
267 vec.set(blockIdx, threadIdx);
268 u_in += offset;
269 if (vec.isValidPosition()) {
270 vec.get(0u) += u_in[0u];
271 vec.get(1u) += u_in[1u];
272 vec.get(2u) += u_in[2u];
273 }
274}
275
277 gpu::FieldAccessor<float> vec,
278 float const *RESTRICT u_in) {
279 vec.set(blockIdx, threadIdx);
280 if (vec.isValidPosition()) {
281 vec.get(0u) += u_in[0u];
282 vec.get(1u) += u_in[1u];
283 vec.get(2u) += u_in[2u];
284 }
285}
286// LCOV_EXCL_STOP
287
289 gpu::GPUField<float> const *vec_field,
290 Cell const &cell) {
291 CellInterval ci(cell, cell);
292 thrust::device_vector<float> dev_data(3u);
293 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
294 auto kernel = gpu::make_kernel(kernel_get);
295 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
296 kernel.addParam(dev_data_ptr);
297 kernel();
299 thrust::copy(dev_data.begin(), dev_data.end(), vec.data());
300 return vec;
301}
302
303void set(
304 gpu::GPUField<float> *vec_field,
305 Vector3<float> const &vec,
306 Cell const &cell) {
307 CellInterval ci(cell, cell);
308 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
309 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
310 auto kernel = gpu::make_kernel(kernel_set);
311 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
312 kernel.addParam(const_cast<const float *>(dev_data_ptr));
313 kernel();
314}
315
316void add(
317 gpu::GPUField<float> *vec_field,
318 Vector3<float> const &vec,
319 Cell const &cell) {
320 CellInterval ci(cell, cell);
321 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
322 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
323 auto kernel = gpu::make_kernel(kernel_add);
324 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
325 kernel.addParam(const_cast<const float *>(dev_data_ptr));
326 kernel();
327}
328
330 gpu::GPUField<float> *vec_field,
331 Vector3<float> const &vec) {
332 CellInterval ci = vec_field->xyzSizeWithGhostLayer();
333 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
334 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
335 auto kernel = gpu::make_kernel(kernel_broadcast);
336 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
337 kernel.addParam(const_cast<const float *>(dev_data_ptr));
338 kernel();
339}
340
342 gpu::GPUField<float> *vec_field,
343 Vector3<float> const &vec) {
344 CellInterval ci = vec_field->xyzSizeWithGhostLayer();
345 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
346 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
347 auto kernel = gpu::make_kernel(kernel_broadcast_add);
348 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
349 kernel.addParam(const_cast<const float *>(dev_data_ptr));
350 kernel();
351}
352
353std::vector<float> get(
354 gpu::GPUField<float> const *vec_field,
355 CellInterval const &ci) {
356 thrust::device_vector<float> dev_data(ci.numCells() * 3u);
357 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
358 auto kernel = gpu::make_kernel(kernel_get);
359 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
360 kernel.addParam(dev_data_ptr);
361 kernel();
362 std::vector<float> out(ci.numCells() * 3u);
363 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
364 return out;
365}
366
367void set(
368 gpu::GPUField<float> *vec_field,
369 std::vector<float> const &values,
370 CellInterval const &ci) {
371 thrust::device_vector<float> dev_data(values.begin(), values.end());
372 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
373 auto kernel = gpu::make_kernel(kernel_set);
374 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
375 kernel.addParam(const_cast<const float *>(dev_data_ptr));
376 kernel();
377}
378} // namespace Vector
379
380namespace Flux {
381// LCOV_EXCL_START
383 gpu::FieldAccessor<float> flux_field,
384 float *j_out) {
385 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 13u);
387 j_out += offset;
388 if (flux_field.isValidPosition()) {
389 j_out[0u] = flux_field.get(0u);
390 j_out[1u] = flux_field.get(1u);
391 j_out[2u] = flux_field.get(2u);
392 j_out[3u] = flux_field.get(3u);
393 j_out[4u] = flux_field.get(4u);
394 j_out[5u] = flux_field.get(5u);
395 j_out[6u] = flux_field.get(6u);
396 j_out[7u] = flux_field.get(7u);
397 j_out[8u] = flux_field.get(8u);
398 j_out[9u] = flux_field.get(9u);
399 j_out[10u] = flux_field.get(10u);
400 j_out[11u] = flux_field.get(11u);
401 j_out[12u] = flux_field.get(12u);
402 }
403}
404
406 gpu::FieldAccessor<float> flux_field,
407 float const *RESTRICT j_in) {
409 if (flux_field.isValidPosition()) {
410 flux_field.get(0u) = j_in[0u];
411 flux_field.get(1u) = j_in[1u];
412 flux_field.get(2u) = j_in[2u];
413 flux_field.get(3u) = j_in[3u];
414 flux_field.get(4u) = j_in[4u];
415 flux_field.get(5u) = j_in[5u];
416 flux_field.get(6u) = j_in[6u];
417 flux_field.get(7u) = j_in[7u];
418 flux_field.get(8u) = j_in[8u];
419 flux_field.get(9u) = j_in[9u];
420 flux_field.get(10u) = j_in[10u];
421 flux_field.get(11u) = j_in[11u];
422 flux_field.get(12u) = j_in[12u];
423 }
424}
425
427 gpu::FieldAccessor<float> flux_field,
428 float *j_out) {
429 auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
431 j_out += offset;
432 if (flux_field.isValidPosition()) {
433 j_out[0u] = float(0.0);
434 j_out[1u] = float(0.0);
435 j_out[2u] = float(0.0);
436
437 int cx = 0;
438 int cy = 0;
439 int cz = 0;
440 float add_flux;
441
442 cx = 0;
443 cy = 1;
444 cz = 0;
445 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 1u);
446 j_out[1u] += add_flux * 1;
447 add_flux = float(0.5) * flux_field.get(1u);
448 j_out[1u] += add_flux * -1;
449 add_flux = float(0.5) * flux_field.get(0u);
450 j_out[0u] += add_flux * -1;
451 cx = 1;
452 cy = 0;
453 cz = 0;
454 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 0u);
455 j_out[0u] += add_flux * 1;
456 cx = 0;
457 cy = 0;
458 cz = 1;
459 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 2u);
460 j_out[2u] += add_flux * 1;
461 add_flux = float(0.5) * flux_field.get(2u);
462 j_out[2u] += add_flux * -1;
463 add_flux = float(0.5) * flux_field.get(4u);
464 j_out[0u] += add_flux * -1;
465 j_out[1u] += add_flux * 1;
466 cx = 1;
467 cy = 1;
468 cz = 0;
469 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 3u);
470 j_out[0u] += add_flux * 1;
471 j_out[1u] += add_flux * 1;
472 add_flux = float(0.5) * flux_field.get(3u);
473 j_out[0u] += add_flux * -1;
474 j_out[1u] += add_flux * -1;
475 cx = 1;
476 cy = -1;
477 cz = 0;
478 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 4u);
479 j_out[0u] += add_flux * 1;
480 j_out[1u] += add_flux * -1;
481 cx = 0;
482 cy = 1;
483 cz = 1;
484 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 7u);
485 j_out[1u] += add_flux * 1;
486 j_out[2u] += add_flux * 1;
487 add_flux = float(0.5) * flux_field.get(8u);
488 j_out[1u] += add_flux * -1;
489 j_out[2u] += add_flux * 1;
490 add_flux = float(0.5) * flux_field.get(6u);
491 j_out[0u] += add_flux * -1;
492 j_out[2u] += add_flux * 1;
493 cx = 1;
494 cy = 0;
495 cz = 1;
496 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 5u);
497 j_out[0u] += add_flux * 1;
498 j_out[2u] += add_flux * 1;
499 cx = 0;
500 cy = 1;
501 cz = -1;
502 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 8u);
503 j_out[1u] += add_flux * 1;
504 j_out[2u] += add_flux * -1;
505 add_flux = float(0.5) * flux_field.get(7u);
506 j_out[1u] += add_flux * -1;
507 j_out[2u] += add_flux * -1;
508 add_flux = float(0.5) * flux_field.get(5u);
509 j_out[0u] += add_flux * -1;
510 j_out[2u] += add_flux * -1;
511 cx = 1;
512 cy = 0;
513 cz = -1;
514 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 6u);
515 j_out[0u] += add_flux * 1;
516 j_out[2u] += add_flux * -1;
517 cx = 1;
518 cy = 1;
519 cz = 1;
520 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 9u);
521 j_out[0u] += add_flux * 1;
522 j_out[1u] += add_flux * 1;
523 j_out[2u] += add_flux * 1;
524 add_flux = float(0.5) * flux_field.get(12u);
525 j_out[0u] += add_flux * -1;
526 j_out[1u] += add_flux * 1;
527 j_out[2u] += add_flux * 1;
528 cx = 1;
529 cy = -1;
530 cz = 1;
531 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 11u);
532 j_out[0u] += add_flux * 1;
533 j_out[1u] += add_flux * -1;
534 j_out[2u] += add_flux * 1;
535 add_flux = float(0.5) * flux_field.get(10u);
536 j_out[0u] += add_flux * -1;
537 j_out[1u] += add_flux * -1;
538 j_out[2u] += add_flux * 1;
539 cx = 1;
540 cy = 1;
541 cz = -1;
542 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 10u);
543 j_out[0u] += add_flux * 1;
544 j_out[1u] += add_flux * 1;
545 j_out[2u] += add_flux * -1;
546 add_flux = float(0.5) * flux_field.get(11u);
547 j_out[0u] += add_flux * -1;
548 j_out[1u] += add_flux * 1;
549 j_out[2u] += add_flux * -1;
550 cx = 1;
551 cy = -1;
552 cz = -1;
553 add_flux = float(-0.5) * flux_field.getNeighbor(cx, cy, cz, 12u);
554 j_out[0u] += add_flux * 1;
555 j_out[1u] += add_flux * -1;
556 j_out[2u] += add_flux * -1;
557 add_flux = float(0.5) * flux_field.get(9u);
558 j_out[0u] += add_flux * -1;
559 j_out[1u] += add_flux * -1;
560 j_out[2u] += add_flux * -1;
561 }
562}
563// LCOV_EXCL_STOP
564
565std::array<float, 13> get(
566 gpu::GPUField<float> const *flux_field,
567 Cell const &cell) {
568 CellInterval ci(cell, cell);
569 thrust::device_vector<float> dev_data(13u);
570 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
571 auto kernel = gpu::make_kernel(kernel_get);
572 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*flux_field, ci));
573 kernel.addParam(dev_data_ptr);
574 kernel();
575 std::array<float, 13> vec;
576 thrust::copy(dev_data.begin(), dev_data.end(), vec.data());
577 return vec;
578}
579
581 gpu::GPUField<float> *flux_field,
582 std::array<float, 13> const &flux) {
583 CellInterval ci = flux_field->xyzSizeWithGhostLayer();
584 thrust::device_vector<float> dev_data(flux.data(), flux.data() + 13u);
585 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
586 auto kernel = gpu::make_kernel(kernel_broadcast);
587 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*flux_field, ci));
588 kernel.addParam(const_cast<const float *>(dev_data_ptr));
589 kernel();
590}
591
592std::vector<float> get(
593 gpu::GPUField<float> const *flux_field,
594 CellInterval const &ci) {
595 thrust::device_vector<float> dev_data(ci.numCells() * 13u);
596 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
597 auto kernel = gpu::make_kernel(kernel_get);
598 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*flux_field, ci));
599 kernel.addParam(dev_data_ptr);
600 kernel();
601 std::vector<float> out(ci.numCells() * 13u);
602 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
603 return out;
604}
605
607 gpu::GPUField<float> const *flux_field,
608 Cell const &cell) {
609 CellInterval ci(cell, cell);
610 thrust::device_vector<float> dev_data(3u);
611 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
612 auto kernel = gpu::make_kernel(kernel_get_vector);
613 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*flux_field, ci));
614 kernel.addParam(dev_data_ptr);
615 kernel();
617 thrust::copy(dev_data.begin(), dev_data.end(), vec.data());
618 return vec;
619}
620
621std::vector<float> get_vector(
622 gpu::GPUField<float> const *flux_field,
623 CellInterval const &ci) {
624 thrust::device_vector<float> dev_data(ci.numCells() * 3u);
625 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
626 auto kernel = gpu::make_kernel(kernel_get_vector);
627 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*flux_field, ci));
628 kernel.addParam(dev_data_ptr);
629 kernel();
630 std::vector<float> out(ci.numCells() * 3u);
631 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
632 return out;
633}
634} // namespace Flux
635
636} // namespace accessor
637} // namespace ek
638} // namespace walberla
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
static __forceinline__ __device__ uint getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset)
Get linear index of flattened data with original layout fzyx.
static __forceinline__ __device__ uint getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset)
Get linear index of flattened data with original layout fzyx.
Definition Cell.hpp:96
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
auto get_vector(GhostLayerField< double, uint_t{13u}> const *flux_field, Cell const &cell)
void initialize(GhostLayerField< double, uint_t{13u}> *flux_field, std::array< double, 13 > const &values)
__global__ void kernel_get(gpu::FieldAccessor< double > flux_field, double *j_out)
__global__ void kernel_get_vector(gpu::FieldAccessor< double > flux_field, double *j_out)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > flux_field, double const *RESTRICT j_in)
auto get(GhostLayerField< double, uint_t{13u}> const *flux_field, Cell const &cell)
__global__ void kernel_set(gpu::FieldAccessor< double > scalar_field, double const *RESTRICT in)
void add_to_all(GhostLayerField< double, 1u > *scalar_field, double const &value)
void initialize(GhostLayerField< double, 1u > *scalar_field, double const &value)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > scalar_field, double const in)
void set(GhostLayerField< double, 1u > *scalar_field, double const &value, Cell const &cell)
__global__ void kernel_add(gpu::FieldAccessor< double > scalar_field, double const *RESTRICT in)
auto get(GhostLayerField< double, 1u > const *scalar_field, Cell const &cell)
__global__ void kernel_broadcast_add(gpu::FieldAccessor< double > scalar_field, double const in)
void add(GhostLayerField< double, 1u > *scalar_field, double const &value, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > scalar_field, double *out)
__global__ void kernel_set(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
__global__ void kernel_get(gpu::FieldAccessor< double > vec, double *u_out)
__global__ void kernel_broadcast_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void set(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
void initialize(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void add(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
void add_to_all(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
auto get(GhostLayerField< double, uint_t{3u}> const *vec_field, Cell const &cell)
\file PackInfoPdfDoublePrecision.cpp \author pystencils