30#include <core/DataTypes.h>
31#include <core/cell/Cell.h>
32#include <core/cell/CellInterval.h>
33#include <core/math/Matrix3.h>
34#include <core/math/Vector3.h>
36#include <field/iterators/IteratorMacros.h>
38#include <gpu/FieldAccessor.h>
39#include <gpu/FieldIndexing.h>
40#include <gpu/GPUField.h>
41#include <gpu/Kernel.h>
43#include <thrust/device_ptr.h>
44#include <thrust/device_vector.h>
50#define RESTRICT __restrict__
51#pragma nv_diagnostic push
52#pragma nv_diag_suppress 177
53#elif defined(__clang__)
55#if defined(__CUDA_ARCH__)
57#define RESTRICT __restrict__
58#pragma clang diagnostic push
59#pragma clang diagnostic ignored "-Wunused-variable"
62#define RESTRICT __restrict__
63#pragma clang diagnostic push
64#pragma clang diagnostic ignored "-Wunused-variable"
67#elif defined(__GNUC__) or defined(__GNUG__)
68#define RESTRICT __restrict__
69#pragma GCC diagnostic push
70#pragma GCC diagnostic ignored "-Wunused-variable"
71#elif defined(_MSC_VER)
72#define RESTRICT __restrict
78static __forceinline__ __device__ uint
getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset) {
79 auto const x = threadIdx.x;
80 auto const y = blockIdx.x;
81 auto const z = blockIdx.y;
82 auto const f = blockIdx.z;
83 auto const ySize = gridDim.x;
84 auto const zSize = gridDim.y;
85 auto const fSize = fOffset;
89 x * fSize * zSize * ySize;
99 gpu::FieldAccessor<float> pdf,
101 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u);
102 pdf.set(blockIdx, threadIdx);
104 if (pdf.isValidPosition()) {
105 pop[0u] = pdf.get(0u);
106 pop[1u] = pdf.get(1u);
107 pop[2u] = pdf.get(2u);
108 pop[3u] = pdf.get(3u);
109 pop[4u] = pdf.get(4u);
110 pop[5u] = pdf.get(5u);
111 pop[6u] = pdf.get(6u);
112 pop[7u] = pdf.get(7u);
113 pop[8u] = pdf.get(8u);
114 pop[9u] = pdf.get(9u);
115 pop[10u] = pdf.get(10u);
116 pop[11u] = pdf.get(11u);
117 pop[12u] = pdf.get(12u);
118 pop[13u] = pdf.get(13u);
119 pop[14u] = pdf.get(14u);
120 pop[15u] = pdf.get(15u);
121 pop[16u] = pdf.get(16u);
122 pop[17u] = pdf.get(17u);
123 pop[18u] = pdf.get(18u);
128 gpu::FieldAccessor<float> pdf,
130 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u);
131 pdf.set(blockIdx, threadIdx);
133 if (pdf.isValidPosition()) {
134 pdf.get(0u) = pop[0u];
135 pdf.get(1u) = pop[1u];
136 pdf.get(2u) = pop[2u];
137 pdf.get(3u) = pop[3u];
138 pdf.get(4u) = pop[4u];
139 pdf.get(5u) = pop[5u];
140 pdf.get(6u) = pop[6u];
141 pdf.get(7u) = pop[7u];
142 pdf.get(8u) = pop[8u];
143 pdf.get(9u) = pop[9u];
144 pdf.get(10u) = pop[10u];
145 pdf.get(11u) = pop[11u];
146 pdf.get(12u) = pop[12u];
147 pdf.get(13u) = pop[13u];
148 pdf.get(14u) = pop[14u];
149 pdf.get(15u) = pop[15u];
150 pdf.get(16u) = pop[16u];
151 pdf.get(17u) = pop[17u];
152 pdf.get(18u) = pop[18u];
157 gpu::FieldAccessor<float> pdf,
159 pdf.set(blockIdx, threadIdx);
160 if (pdf.isValidPosition()) {
161 pdf.get(0u) = pop[0u];
162 pdf.get(1u) = pop[1u];
163 pdf.get(2u) = pop[2u];
164 pdf.get(3u) = pop[3u];
165 pdf.get(4u) = pop[4u];
166 pdf.get(5u) = pop[5u];
167 pdf.get(6u) = pop[6u];
168 pdf.get(7u) = pop[7u];
169 pdf.get(8u) = pop[8u];
170 pdf.get(9u) = pop[9u];
171 pdf.get(10u) = pop[10u];
172 pdf.get(11u) = pop[11u];
173 pdf.get(12u) = pop[12u];
174 pdf.get(13u) = pop[13u];
175 pdf.get(14u) = pop[14u];
176 pdf.get(15u) = pop[15u];
177 pdf.get(16u) = pop[16u];
178 pdf.get(17u) = pop[17u];
179 pdf.get(18u) = pop[18u];
184 gpu::FieldAccessor<float> pdf,
186 gpu::FieldAccessor<float> force,
188 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u);
189 pdf.set(blockIdx, threadIdx);
191 force.set(blockIdx, threadIdx);
193 if (pdf.isValidPosition()) {
194 const float f_0 = pdf.get(0u) = pop[0u];
195 const float f_1 = pdf.get(1u) = pop[1u];
196 const float f_2 = pdf.get(2u) = pop[2u];
197 const float f_3 = pdf.get(3u) = pop[3u];
198 const float f_4 = pdf.get(4u) = pop[4u];
199 const float f_5 = pdf.get(5u) = pop[5u];
200 const float f_6 = pdf.get(6u) = pop[6u];
201 const float f_7 = pdf.get(7u) = pop[7u];
202 const float f_8 = pdf.get(8u) = pop[8u];
203 const float f_9 = pdf.get(9u) = pop[9u];
204 const float f_10 = pdf.get(10u) = pop[10u];
205 const float f_11 = pdf.get(11u) = pop[11u];
206 const float f_12 = pdf.get(12u) = pop[12u];
207 const float f_13 = pdf.get(13u) = pop[13u];
208 const float f_14 = pdf.get(14u) = pop[14u];
209 const float f_15 = pdf.get(15u) = pop[15u];
210 const float f_16 = pdf.get(16u) = pop[16u];
211 const float f_17 = pdf.get(17u) = pop[17u];
212 const float f_18 = pdf.get(18u) = pop[18u];
213 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
214 const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
215 const float vel1Term = f_1 + f_11 + f_15 + f_7;
216 const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
217 const float vel2Term = f_12 + f_13 + f_5;
218 const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
219 const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
220 const float md_0 = force.get(0) * 0.50000000000000000f + momdensity_0;
221 const float md_1 = force.get(1) * 0.50000000000000000f + momdensity_1;
222 const float md_2 = force.get(2) * 0.50000000000000000f + momdensity_2;
223 const float rho_inv =
float{1} / rho;
231std::array<float, 19u>
get(
232 gpu::GPUField<float>
const *pdf_field,
234 CellInterval ci(cell, cell);
235 thrust::device_vector<float> dev_data(19u);
236 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
238 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
239 kernel.addParam(dev_data_ptr);
241 std::array<float, 19u> pop;
242 thrust::copy(dev_data.begin(), dev_data.end(), pop.data());
247 gpu::GPUField<float> *pdf_field,
248 std::array<float, 19u>
const &pop,
250 thrust::device_vector<float> dev_data(pop.begin(), pop.end());
251 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
252 CellInterval ci(cell, cell);
254 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
255 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
260 gpu::GPUField<float> *pdf_field,
261 gpu::GPUField<float> *velocity_field,
262 gpu::GPUField<float>
const *force_field,
263 std::array<float, 19u>
const &pop,
265 thrust::device_vector<float> dev_data(pop.begin(), pop.end());
266 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
267 CellInterval ci(cell, cell);
269 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
270 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
271 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
272 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
277 gpu::GPUField<float> *pdf_field,
278 std::array<float, 19u>
const &pop) {
279 CellInterval ci = pdf_field->xyzSizeWithGhostLayer();
280 thrust::device_vector<float> dev_data(pop.begin(), pop.end());
281 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
283 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
284 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
289 gpu::GPUField<float>
const *pdf_field,
290 CellInterval
const &ci) {
291 thrust::device_vector<float> dev_data(ci.numCells() * 19u);
292 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
294 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
295 kernel.addParam(dev_data_ptr);
297 std::vector<float> out(ci.numCells() * 19u);
298 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
303 gpu::GPUField<float> *pdf_field,
304 std::vector<float>
const &values,
305 CellInterval
const &ci) {
306 thrust::device_vector<float> dev_data(values.begin(), values.end());
307 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
309 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
310 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
315 gpu::GPUField<float> *pdf_field,
316 gpu::GPUField<float> *velocity_field,
317 gpu::GPUField<float>
const *force_field,
318 std::vector<float>
const &values,
319 CellInterval
const &ci) {
320 thrust::device_vector<float> dev_data(values.begin(), values.end());
321 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
323 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
324 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
325 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
326 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
334 gpu::FieldAccessor<float> vec,
336 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
337 vec.set(blockIdx, threadIdx);
339 if (vec.isValidPosition()) {
340 u_out[0u] = vec.get(0u);
341 u_out[1u] = vec.get(1u);
342 u_out[2u] = vec.get(2u);
347 gpu::FieldAccessor<float> vec,
349 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
350 vec.set(blockIdx, threadIdx);
352 if (vec.isValidPosition()) {
353 vec.get(0u) = u_in[0u];
354 vec.get(1u) = u_in[1u];
355 vec.get(2u) = u_in[2u];
360 gpu::FieldAccessor<float> vec,
362 vec.set(blockIdx, threadIdx);
363 if (vec.isValidPosition()) {
364 vec.get(0u) = u_in[0u];
365 vec.get(1u) = u_in[1u];
366 vec.get(2u) = u_in[2u];
371 gpu::FieldAccessor<float> vec,
373 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
374 vec.set(blockIdx, threadIdx);
376 if (vec.isValidPosition()) {
377 vec.get(0u) += u_in[0u];
378 vec.get(1u) += u_in[1u];
379 vec.get(2u) += u_in[2u];
384 gpu::FieldAccessor<float> vec,
386 vec.set(blockIdx, threadIdx);
387 if (vec.isValidPosition()) {
388 vec.get(0u) += u_in[0u];
389 vec.get(1u) += u_in[1u];
390 vec.get(2u) += u_in[2u];
396 gpu::GPUField<float>
const *vec_field,
398 CellInterval ci(cell, cell);
399 thrust::device_vector<float> dev_data(3u);
400 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
402 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
403 kernel.addParam(dev_data_ptr);
406 thrust::copy(dev_data.begin(), dev_data.end(), vec.data());
411 gpu::GPUField<float> *vec_field,
412 Vector3<float>
const &vec,
414 CellInterval ci(cell, cell);
415 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
416 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
418 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
419 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
424 gpu::GPUField<float> *vec_field,
425 Vector3<float>
const &vec,
427 CellInterval ci(cell, cell);
428 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
429 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
431 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
432 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
437 gpu::GPUField<float> *vec_field,
438 Vector3<float>
const &vec) {
439 CellInterval ci = vec_field->xyzSizeWithGhostLayer();
440 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
441 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
443 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
444 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
449 gpu::GPUField<float> *vec_field,
450 Vector3<float>
const &vec) {
451 CellInterval ci = vec_field->xyzSizeWithGhostLayer();
452 thrust::device_vector<float> dev_data(vec.data(), vec.data() + 3u);
453 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
455 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
456 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
461 gpu::GPUField<float>
const *vec_field,
462 CellInterval
const &ci) {
463 thrust::device_vector<float> dev_data(ci.numCells() * 3u);
464 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
466 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
467 kernel.addParam(dev_data_ptr);
469 std::vector<float> out(ci.numCells() * 3u);
470 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
475 gpu::GPUField<float> *vec_field,
476 std::vector<float>
const &values,
477 CellInterval
const &ci) {
478 thrust::device_vector<float> dev_data(values.begin(), values.end());
479 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
481 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*vec_field, ci));
482 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
487namespace Interpolation {
496 for (
int dim = 0; dim < 3; ++dim) {
497 auto const fractional_index = pos[dim] -
float{0.5};
498 auto const nmp = floorf(fractional_index);
499 auto const distance = fractional_index - nmp -
float{0.5};
500 corner[dim] = __float2int_rn(nmp) +
static_cast<int>(gl);
501 weights[dim * 2 + 0] =
float{0.5} - distance;
502 weights[dim * 2 + 1] =
float{0.5} + distance;
507 gpu::FieldAccessor<float> vec,
513 uint pos_index = blockIdx.y * gridDim.x * blockDim.x +
514 blockDim.x * blockIdx.x + threadIdx.x;
516 vec.set({0u, 0u, 0u}, {0u, 0u, 0u});
517 if (vec.isValidPosition() and pos_index < n_pos) {
518 auto const array_offset = pos_index * uint(3u);
523 for (
int i = 0; i < 2; i++) {
524 auto const cx = corner[0] + i;
525 auto const wx = weights[0][i];
527 for (
int j = 0; j < 2; j++) {
528 auto const cy = corner[1] + j;
529 auto const wxy = wx * weights[1][j];
531 for (
int k = 0; k < 2; k++) {
532 auto const cz = corner[2] + k;
533 auto const weight = wxy * weights[2][k];
534 vel[array_offset + 0u] +=
weight * vec.getNeighbor(cx, cy, cz, 0u);
535 vel[array_offset + 1u] +=
weight * vec.getNeighbor(cx, cy, cz, 1u);
536 vel[array_offset + 2u] +=
weight * vec.getNeighbor(cx, cy, cz, 2u);
544 gpu::FieldAccessor<float> vec,
550 uint pos_index = blockIdx.y * gridDim.x * blockDim.x +
551 blockDim.x * blockIdx.x + threadIdx.x;
553 vec.set({0u, 0u, 0u}, {0u, 0u, 0u});
554 if (vec.isValidPosition() and pos_index < n_pos) {
555 auto const array_offset = pos_index * uint(3u);
560 for (
int i = 0; i < 2; i++) {
561 auto const cx = corner[0] + i;
562 auto const wx = weights[0][i];
564 for (
int j = 0; j < 2; j++) {
565 auto const cy = corner[1] + j;
566 auto const wxy = wx * weights[1][j];
568 for (
int k = 0; k < 2; k++) {
569 auto const cz = corner[2] + k;
570 auto const weight = wxy * weights[2][k];
571 atomicAdd(&vec.getNeighbor(cx, cy, cz, 0u),
572 weight * forces[array_offset + 0u]);
573 atomicAdd(&vec.getNeighbor(cx, cy, cz, 1u),
574 weight * forces[array_offset + 1u]);
575 atomicAdd(&vec.getNeighbor(cx, cy, cz, 2u),
576 weight * forces[array_offset + 2u]);
585 uint
const blocks_per_grid_y,
586 uint
const threads_per_block) {
587 assert(threads_x >= 1u);
588 assert(blocks_per_grid_y >= 1u);
589 assert(threads_per_block >= 1u);
590 auto const threads_y = threads_per_block * blocks_per_grid_y;
591 auto const blocks_per_grid_x = (threads_x + threads_y - 1) / threads_y;
592 return make_uint3(blocks_per_grid_x, blocks_per_grid_y, 1);
597 gpu::GPUField<float>
const *vec_field,
598 std::vector<float>
const &pos,
600 thrust::device_vector<float> dev_pos(pos.begin(), pos.end());
601 thrust::device_vector<float> dev_vel(pos.size());
602 auto const dev_pos_ptr = thrust::raw_pointer_cast(dev_pos.data());
603 auto const dev_vel_ptr = thrust::raw_pointer_cast(dev_vel.data());
605 auto const threads_per_block = uint(64u);
606 auto const n_pos =
static_cast<uint
>(pos.size() / 3ul);
608 kernel_get<<<dim_grid, threads_per_block, 0u, nullptr>>>(
609 gpu::FieldIndexing<float>::withGhostLayerXYZ(*vec_field, gl).gpuAccess(),
610 dev_pos_ptr, dev_vel_ptr, n_pos, gl);
612 std::vector<float> out(pos.size());
613 thrust::copy(dev_vel.begin(), dev_vel.end(), out.data());
618 gpu::GPUField<float>
const *vec_field,
619 std::vector<float>
const &pos,
620 std::vector<float>
const &forces,
622 thrust::device_vector<float> dev_pos(pos.begin(), pos.end());
623 thrust::device_vector<float> dev_for(forces.begin(), forces.end());
624 auto const dev_pos_ptr = thrust::raw_pointer_cast(dev_pos.data());
625 auto const dev_for_ptr = thrust::raw_pointer_cast(dev_for.data());
627 auto const threads_per_block = uint(64u);
628 auto const n_pos =
static_cast<uint
>(pos.size() / 3ul);
630 kernel_set<<<dim_grid, threads_per_block, 0u, nullptr>>>(
631 gpu::FieldIndexing<float>::withGhostLayerXYZ(*vec_field, gl).gpuAccess(),
632 dev_pos_ptr, dev_for_ptr, n_pos, gl);
636namespace Equilibrium {
639 gpu::FieldAccessor<float> pdf,
643 pdf.get(0u) = rho * -0.33333333333333331f * (u[0] * u[0]) + rho * -0.33333333333333331f * (u[1] * u[1]) + rho * -0.33333333333333331f * (u[2] * u[2]) + rho * 0.33333333333333331f;
644 pdf.get(1u) = rho * -0.16666666666666666f * (u[0] * u[0]) + rho * -0.16666666666666666f * (u[2] * u[2]) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * u[1] + rho * 0.16666666666666666f * (u[1] * u[1]);
645 pdf.get(2u) = rho * -0.16666666666666666f * u[1] + rho * -0.16666666666666666f * (u[0] * u[0]) + rho * -0.16666666666666666f * (u[2] * u[2]) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u[1] * u[1]);
646 pdf.get(3u) = rho * -0.16666666666666666f * u[0] + rho * -0.16666666666666666f * (u[1] * u[1]) + rho * -0.16666666666666666f * (u[2] * u[2]) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u[0] * u[0]);
647 pdf.get(4u) = rho * -0.16666666666666666f * (u[1] * u[1]) + rho * -0.16666666666666666f * (u[2] * u[2]) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * u[0] + rho * 0.16666666666666666f * (u[0] * u[0]);
648 pdf.get(5u) = rho * -0.16666666666666666f * (u[0] * u[0]) + rho * -0.16666666666666666f * (u[1] * u[1]) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * u[2] + rho * 0.16666666666666666f * (u[2] * u[2]);
649 pdf.get(6u) = rho * -0.16666666666666666f * u[2] + rho * -0.16666666666666666f * (u[0] * u[0]) + rho * -0.16666666666666666f * (u[1] * u[1]) + rho * 0.055555555555555552f + rho * 0.16666666666666666f * (u[2] * u[2]);
650 pdf.get(7u) = rho * -0.083333333333333329f * u[0] + rho * -0.25f * u[0] * u[1] + rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[1] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[1] * u[1]);
651 pdf.get(8u) = rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] + rho * 0.083333333333333329f * u[1] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.25f * u[0] * u[1];
652 pdf.get(9u) = rho * -0.083333333333333329f * u[0] + rho * -0.083333333333333329f * u[1] + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.25f * u[0] * u[1];
653 pdf.get(10u) = rho * -0.083333333333333329f * u[1] + rho * -0.25f * u[0] * u[1] + rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[1] * u[1]);
654 pdf.get(11u) = rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[1] + rho * 0.083333333333333329f * u[2] + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[1] * u[2];
655 pdf.get(12u) = rho * -0.083333333333333329f * u[1] + rho * -0.25f * u[1] * u[2] + rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[2] + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.083333333333333329f * (u[2] * u[2]);
656 pdf.get(13u) = rho * -0.083333333333333329f * u[0] + rho * -0.25f * u[0] * u[2] + rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[2] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[2] * u[2]);
657 pdf.get(14u) = rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] + rho * 0.083333333333333329f * u[2] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[0] * u[2];
658 pdf.get(15u) = rho * -0.083333333333333329f * u[2] + rho * -0.25f * u[1] * u[2] + rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[1] + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.083333333333333329f * (u[2] * u[2]);
659 pdf.get(16u) = rho * -0.083333333333333329f * u[1] + rho * -0.083333333333333329f * u[2] + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u[1] * u[1]) + rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[1] * u[2];
660 pdf.get(17u) = rho * -0.083333333333333329f * u[0] + rho * -0.083333333333333329f * u[2] + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[0] * u[2];
661 pdf.get(18u) = rho * -0.083333333333333329f * u[2] + rho * -0.25f * u[0] * u[2] + rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[2] * u[2]);
669 gpu::FieldAccessor<float> pdf,
671 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u);
672 pdf.set(blockIdx, threadIdx);
674 if (pdf.isValidPosition()) {
675 float const f_0 = pdf.get(0u);
676 float const f_1 = pdf.get(1u);
677 float const f_2 = pdf.get(2u);
678 float const f_3 = pdf.get(3u);
679 float const f_4 = pdf.get(4u);
680 float const f_5 = pdf.get(5u);
681 float const f_6 = pdf.get(6u);
682 float const f_7 = pdf.get(7u);
683 float const f_8 = pdf.get(8u);
684 float const f_9 = pdf.get(9u);
685 float const f_10 = pdf.get(10u);
686 float const f_11 = pdf.get(11u);
687 float const f_12 = pdf.get(12u);
688 float const f_13 = pdf.get(13u);
689 float const f_14 = pdf.get(14u);
690 float const f_15 = pdf.get(15u);
691 float const f_16 = pdf.get(16u);
692 float const f_17 = pdf.get(17u);
693 float const f_18 = pdf.get(18u);
694 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
695 const float vel1Term = f_1 + f_11 + f_15 + f_7;
696 const float vel2Term = f_12 + f_13 + f_5;
697 const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
703 gpu::FieldAccessor<float> pdf,
705 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u);
706 pdf.set(blockIdx, threadIdx);
708 if (pdf.isValidPosition()) {
709 float const f_0 = pdf.get(0u);
710 float const f_1 = pdf.get(1u);
711 float const f_2 = pdf.get(2u);
712 float const f_3 = pdf.get(3u);
713 float const f_4 = pdf.get(4u);
714 float const f_5 = pdf.get(5u);
715 float const f_6 = pdf.get(6u);
716 float const f_7 = pdf.get(7u);
717 float const f_8 = pdf.get(8u);
718 float const f_9 = pdf.get(9u);
719 float const f_10 = pdf.get(10u);
720 float const f_11 = pdf.get(11u);
721 float const f_12 = pdf.get(12u);
722 float const f_13 = pdf.get(13u);
723 float const f_14 = pdf.get(14u);
724 float const f_15 = pdf.get(15u);
725 float const f_16 = pdf.get(16u);
726 float const f_17 = pdf.get(17u);
727 float const f_18 = pdf.get(18u);
728 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
729 const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
730 const float vel1Term = f_1 + f_11 + f_15 + f_7;
731 const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
732 const float vel2Term = f_12 + f_13 + f_5;
733 const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
734 const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
737 float const rho_inv =
float{1} / rho;
738 float const u_old[3] = {momdensity_0 * rho_inv, momdensity_1 * rho_inv, momdensity_2 * rho_inv};
746 gpu::GPUField<float>
const *pdf_field,
748 CellInterval ci(cell, cell);
749 thrust::device_vector<float> dev_data(1u);
750 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
752 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
753 kernel.addParam(dev_data_ptr);
755 float rho = dev_data[0u];
760 gpu::GPUField<float>
const *pdf_field,
761 CellInterval
const &ci) {
762 thrust::device_vector<float> dev_data(ci.numCells());
763 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
765 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
766 kernel.addParam(dev_data_ptr);
768 std::vector<float> out(dev_data.size());
769 thrust::copy(dev_data.begin(), dev_data.end(), out.begin());
774 gpu::GPUField<float> *pdf_field,
777 CellInterval ci(cell, cell);
778 thrust::device_vector<float> dev_data(1u, rho);
779 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
781 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
782 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
787 gpu::GPUField<float> *pdf_field,
788 std::vector<float>
const &values,
789 CellInterval
const &ci) {
790 thrust::device_vector<float> dev_data(values.begin(), values.end());
791 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
793 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
794 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
802 gpu::FieldAccessor<float> pdf,
803 gpu::FieldAccessor<float> force,
805 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
806 pdf.set(blockIdx, threadIdx);
807 force.set(blockIdx, threadIdx);
809 if (pdf.isValidPosition()) {
810 float const f_0 = pdf.get(0u);
811 float const f_1 = pdf.get(1u);
812 float const f_2 = pdf.get(2u);
813 float const f_3 = pdf.get(3u);
814 float const f_4 = pdf.get(4u);
815 float const f_5 = pdf.get(5u);
816 float const f_6 = pdf.get(6u);
817 float const f_7 = pdf.get(7u);
818 float const f_8 = pdf.get(8u);
819 float const f_9 = pdf.get(9u);
820 float const f_10 = pdf.get(10u);
821 float const f_11 = pdf.get(11u);
822 float const f_12 = pdf.get(12u);
823 float const f_13 = pdf.get(13u);
824 float const f_14 = pdf.get(14u);
825 float const f_15 = pdf.get(15u);
826 float const f_16 = pdf.get(16u);
827 float const f_17 = pdf.get(17u);
828 float const f_18 = pdf.get(18u);
829 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
830 const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
831 const float vel1Term = f_1 + f_11 + f_15 + f_7;
832 const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
833 const float vel2Term = f_12 + f_13 + f_5;
834 const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
835 const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
836 const float md_0 = force.get(0) * 0.50000000000000000f + momdensity_0;
837 const float md_1 = force.get(1) * 0.50000000000000000f + momdensity_1;
838 const float md_2 = force.get(2) * 0.50000000000000000f + momdensity_2;
839 auto const rho_inv =
float{1} / rho;
840 u_out[0u] = md_0 * rho_inv;
841 u_out[1u] = md_1 * rho_inv;
842 u_out[2u] = md_2 * rho_inv;
847 gpu::FieldAccessor<float> pdf,
849 gpu::FieldAccessor<float> force,
851 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
852 pdf.set(blockIdx, threadIdx);
854 force.set(blockIdx, threadIdx);
856 if (pdf.isValidPosition()) {
857 float const f_0 = pdf.get(0u);
858 float const f_1 = pdf.get(1u);
859 float const f_2 = pdf.get(2u);
860 float const f_3 = pdf.get(3u);
861 float const f_4 = pdf.get(4u);
862 float const f_5 = pdf.get(5u);
863 float const f_6 = pdf.get(6u);
864 float const f_7 = pdf.get(7u);
865 float const f_8 = pdf.get(8u);
866 float const f_9 = pdf.get(9u);
867 float const f_10 = pdf.get(10u);
868 float const f_11 = pdf.get(11u);
869 float const f_12 = pdf.get(12u);
870 float const f_13 = pdf.get(13u);
871 float const f_14 = pdf.get(14u);
872 float const f_15 = pdf.get(15u);
873 float const f_16 = pdf.get(16u);
874 float const f_17 = pdf.get(17u);
875 float const f_18 = pdf.get(18u);
876 float const *
RESTRICT const u = u_in;
877 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
878 const float vel1Term = f_1 + f_11 + f_15 + f_7;
879 const float vel2Term = f_12 + f_13 + f_5;
880 const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
881 const float u_0 = -force.get(0) * 0.50000000000000000f / rho + u[0];
882 const float u_1 = -force.get(1) * 0.50000000000000000f / rho + u[1];
883 const float u_2 = -force.get(2) * 0.50000000000000000f / rho + u[2];
888 float u_new[3] = {u_0, u_1, u_2};
896 gpu::GPUField<float>
const *pdf_field,
897 gpu::GPUField<float>
const *force_field,
899 CellInterval ci(cell, cell);
900 thrust::device_vector<float> dev_data(3u);
901 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
903 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
904 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
905 kernel.addParam(dev_data_ptr);
908 thrust::copy(dev_data.begin(), dev_data.end(), vec.data());
913 gpu::GPUField<float>
const *pdf_field,
914 gpu::GPUField<float>
const *force_field,
915 CellInterval
const &ci) {
916 thrust::device_vector<float> dev_data(3u);
917 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
919 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
920 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
921 kernel.addParam(dev_data_ptr);
923 std::vector<float> out(dev_data.size());
924 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
929 gpu::GPUField<float> *pdf_field,
930 gpu::GPUField<float> *velocity_field,
931 gpu::GPUField<float>
const *force_field,
932 Vector3<float>
const &u,
934 CellInterval ci(cell, cell);
935 thrust::device_vector<float> dev_data(u.data(), u.data() + 3u);
936 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
938 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
939 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
940 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
941 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
946 gpu::GPUField<float> *pdf_field,
947 gpu::GPUField<float> *velocity_field,
948 gpu::GPUField<float>
const *force_field,
949 std::vector<float>
const &values,
950 CellInterval
const &ci) {
951 thrust::device_vector<float> dev_data(values.begin(), values.end());
952 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
954 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
955 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
956 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
957 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
965 gpu::FieldAccessor<float> pdf,
967 gpu::FieldAccessor<float> force,
969 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
970 pdf.set(blockIdx, threadIdx);
972 force.set(blockIdx, threadIdx);
974 if (pdf.isValidPosition()) {
975 float const f_0 = pdf.get(0u);
976 float const f_1 = pdf.get(1u);
977 float const f_2 = pdf.get(2u);
978 float const f_3 = pdf.get(3u);
979 float const f_4 = pdf.get(4u);
980 float const f_5 = pdf.get(5u);
981 float const f_6 = pdf.get(6u);
982 float const f_7 = pdf.get(7u);
983 float const f_8 = pdf.get(8u);
984 float const f_9 = pdf.get(9u);
985 float const f_10 = pdf.get(10u);
986 float const f_11 = pdf.get(11u);
987 float const f_12 = pdf.get(12u);
988 float const f_13 = pdf.get(13u);
989 float const f_14 = pdf.get(14u);
990 float const f_15 = pdf.get(15u);
991 float const f_16 = pdf.get(16u);
992 float const f_17 = pdf.get(17u);
993 float const f_18 = pdf.get(18u);
994 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
995 const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
996 const float vel1Term = f_1 + f_11 + f_15 + f_7;
997 const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
998 const float vel2Term = f_12 + f_13 + f_5;
999 const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
1000 const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
1001 const float md_0 = f_in[0u] * 0.50000000000000000f + momdensity_0;
1002 const float md_1 = f_in[1u] * 0.50000000000000000f + momdensity_1;
1003 const float md_2 = f_in[2u] * 0.50000000000000000f + momdensity_2;
1004 auto const rho_inv =
float{1} / rho;
1006 force.get(0u) = f_in[0u];
1007 force.get(1u) = f_in[1u];
1008 force.get(2u) = f_in[2u];
1017void set(gpu::GPUField<float>
const *pdf_field,
1018 gpu::GPUField<float> *velocity_field,
1019 gpu::GPUField<float> *force_field,
1020 Vector3<float>
const &u,
1022 CellInterval ci(cell, cell);
1023 thrust::device_vector<float> dev_data(u.data(), u.data() + 3u);
1024 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1026 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1027 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
1028 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
1029 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
1033void set(gpu::GPUField<float>
const *pdf_field,
1034 gpu::GPUField<float> *velocity_field,
1035 gpu::GPUField<float> *force_field,
1036 std::vector<float>
const &values,
1037 CellInterval
const &ci) {
1038 thrust::device_vector<float> dev_data(values.begin(), values.end());
1039 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1041 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1042 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*velocity_field, ci));
1043 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
1044 kernel.addParam(
const_cast<const float *
>(dev_data_ptr));
1049namespace MomentumDensity {
1052 gpu::FieldAccessor<float> pdf,
1053 gpu::FieldAccessor<float> force,
1055 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u);
1056 pdf.set(blockIdx, threadIdx);
1057 force.set(blockIdx, threadIdx);
1059 if (pdf.isValidPosition()) {
1060 float const f_0 = pdf.get(0u);
1061 float const f_1 = pdf.get(1u);
1062 float const f_2 = pdf.get(2u);
1063 float const f_3 = pdf.get(3u);
1064 float const f_4 = pdf.get(4u);
1065 float const f_5 = pdf.get(5u);
1066 float const f_6 = pdf.get(6u);
1067 float const f_7 = pdf.get(7u);
1068 float const f_8 = pdf.get(8u);
1069 float const f_9 = pdf.get(9u);
1070 float const f_10 = pdf.get(10u);
1071 float const f_11 = pdf.get(11u);
1072 float const f_12 = pdf.get(12u);
1073 float const f_13 = pdf.get(13u);
1074 float const f_14 = pdf.get(14u);
1075 float const f_15 = pdf.get(15u);
1076 float const f_16 = pdf.get(16u);
1077 float const f_17 = pdf.get(17u);
1078 float const f_18 = pdf.get(18u);
1079 const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8;
1080 const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term;
1081 const float vel1Term = f_1 + f_11 + f_15 + f_7;
1082 const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term;
1083 const float vel2Term = f_12 + f_13 + f_5;
1084 const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term;
1085 const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term;
1086 const float md_0 = force.get(0) * 0.50000000000000000f + momdensity_0;
1087 const float md_1 = force.get(1) * 0.50000000000000000f + momdensity_1;
1088 const float md_2 = force.get(2) * 0.50000000000000000f + momdensity_2;
1097 gpu::GPUField<float>
const *pdf_field,
1098 gpu::GPUField<float>
const *force_field) {
1099 auto const ci = pdf_field->xyzSize();
1100 thrust::device_vector<float> dev_data(3u * ci.numCells());
1101 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1103 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1104 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*force_field, ci));
1105 kernel.addParam(dev_data_ptr);
1107 std::vector<float> out(dev_data.size());
1108 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
1109 Vector3<float> mom(
float{0});
1110 for (
auto i = uint_t{0u}; i < 3u * ci.numCells(); i += 3u) {
1111 mom[0u] += out[i + 0u];
1112 mom[1u] += out[i + 1u];
1113 mom[2u] += out[i + 2u];
1119namespace PressureTensor {
1122 gpu::FieldAccessor<float> pdf,
1124 auto const offset =
getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 9u);
1125 pdf.set(blockIdx, threadIdx);
1127 if (pdf.isValidPosition()) {
1128 float const f_0 = pdf.get(0u);
1129 float const f_1 = pdf.get(1u);
1130 float const f_2 = pdf.get(2u);
1131 float const f_3 = pdf.get(3u);
1132 float const f_4 = pdf.get(4u);
1133 float const f_5 = pdf.get(5u);
1134 float const f_6 = pdf.get(6u);
1135 float const f_7 = pdf.get(7u);
1136 float const f_8 = pdf.get(8u);
1137 float const f_9 = pdf.get(9u);
1138 float const f_10 = pdf.get(10u);
1139 float const f_11 = pdf.get(11u);
1140 float const f_12 = pdf.get(12u);
1141 float const f_13 = pdf.get(13u);
1142 float const f_14 = pdf.get(14u);
1143 float const f_15 = pdf.get(15u);
1144 float const f_16 = pdf.get(16u);
1145 float const f_17 = pdf.get(17u);
1146 float const f_18 = pdf.get(18u);
1147 const float p_0 = f_10 + f_13 + f_14 + f_17 + f_18 + f_3 + f_4 + f_7 + f_8 + f_9;
1148 const float p_1 = -f_10 - f_7 + f_8 + f_9;
1149 const float p_2 = -f_13 + f_14 + f_17 - f_18;
1150 const float p_3 = -f_10 - f_7 + f_8 + f_9;
1151 const float p_4 = f_1 + f_10 + f_11 + f_12 + f_15 + f_16 + f_2 + f_7 + f_8 + f_9;
1152 const float p_5 = f_11 - f_12 - f_15 + f_16;
1153 const float p_6 = -f_13 + f_14 + f_17 - f_18;
1154 const float p_7 = f_11 - f_12 - f_15 + f_16;
1155 const float p_8 = f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18 + f_5 + f_6;
1170 gpu::GPUField<float>
const *pdf_field,
1172 CellInterval ci(cell, cell);
1173 thrust::device_vector<float> dev_data(9u);
1174 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1176 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1177 kernel.addParam(dev_data_ptr);
1180 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
1185 gpu::GPUField<float>
const *pdf_field,
1186 CellInterval
const &ci) {
1187 thrust::device_vector<float> dev_data(9u * ci.numCells());
1188 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1190 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1191 kernel.addParam(dev_data_ptr);
1193 std::vector<float> out(dev_data.size());
1194 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
1199 gpu::GPUField<float>
const *pdf_field) {
1200 auto const ci = pdf_field->xyzSize();
1201 thrust::device_vector<float> dev_data(9u * ci.numCells());
1202 auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data());
1204 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*pdf_field, ci));
1205 kernel.addParam(dev_data_ptr);
1207 std::vector<float> out(dev_data.size());
1208 thrust::copy(dev_data.begin(), dev_data.end(), out.data());
1209 Matrix3<float> pressureTensor(
float{0});
1210 for (
auto i = uint_t{0u}; i < 9u * ci.numCells(); i += 9u) {
1211 pressureTensor[0u] += out[i + 0u];
1212 pressureTensor[1u] += out[i + 1u];
1213 pressureTensor[2u] += out[i + 2u];
1215 pressureTensor[3u] += out[i + 3u];
1216 pressureTensor[4u] += out[i + 4u];
1217 pressureTensor[5u] += out[i + 5u];
1219 pressureTensor[6u] += out[i + 6u];
1220 pressureTensor[7u] += out[i + 7u];
1221 pressureTensor[8u] += out[i + 8u];
1223 return pressureTensor;
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
static __forceinline__ __device__ uint getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset)
Get linear index of flattened data with original layout fzyx.
static double weight(int type, double r_cut, double k, double r)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT rho_out)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, double const rho_in, Cell const &cell)
double get(GhostLayerField< double, uint_t{19u}> const *pdf_field, Cell const &cell)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, double const *RESTRICT rho_in)
__device__ void kernel_set_device(gpu::FieldAccessor< double > pdf, double const *RESTRICT const u, double rho)
void set(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> *velocity_field, GhostLayerField< double, uint_t{3u}> *force_field, Vector3< double > const &force, Cell const &cell)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT f_in)
__global__ void kernel_get(gpu::FieldAccessor< double > vec, double const *RESTRICT const pos, double *RESTRICT const vel, uint n_pos, uint gl)
static dim3 calculate_dim_grid(uint const threads_x, uint const blocks_per_grid_y, uint const threads_per_block)
std::vector< double > get(gpu::GPUField< double > const *vec_field, std::vector< double > const &pos, uint gl)
__global__ void kernel_set(gpu::FieldAccessor< double > vec, double const *RESTRICT const pos, double const *RESTRICT const forces, uint n_pos, uint gl)
void set(gpu::GPUField< double > const *vec_field, std::vector< double > const &pos, std::vector< double > const &forces, uint gl)
static __forceinline__ __device__ void calculate_weights(double const *RESTRICT const pos, int *RESTRICT const corner, double *RESTRICT const weights, uint gl)
Calculate interpolation weights.
auto reduce(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> const *force_field)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > force, double *RESTRICT out)
__global__ void kernel_set_vel(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT pop)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > pdf, double const *RESTRICT pop)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, double const *RESTRICT pop)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, std::array< double, 19u > const &pop, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT pop)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, Cell const &cell)
void initialize(GhostLayerField< double, uint_t{19u}> *pdf_field, std::array< double, 19u > const &pop)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT p_out)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, Cell const &cell)
auto reduce(GhostLayerField< double, uint_t{19u}> const *pdf_field)
void initialize(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_get(gpu::FieldAccessor< double > vec, double *u_out)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void add(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
__global__ void kernel_set(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void set(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
auto get(GhostLayerField< double, uint_t{3u}> const *vec_field, Cell const &cell)
__global__ void kernel_broadcast_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void add_to_all(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, GhostLayerField< double, uint_t{3u}> *velocity_field, GhostLayerField< double, uint_t{3u}> const *force_field, Vector3< double > const &u, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > force, double *RESTRICT u_out)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT u_in)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> const *force_field, Cell const &cell)
\file PackInfoPdfDoublePrecision.cpp \author pystencils
static Utils::Vector3d velocity(Particle const &p_ref, Particle const &p_vs)
Velocity of the virtual site.