30#include <core/DataTypes.h>
31#include <core/cell/Cell.h>
32#include <core/cell/CellInterval.h>
33#include <core/math/Matrix3.h>
34#include <core/math/Vector3.h>
36#include <field/iterators/IteratorMacros.h>
38#include <gpu/FieldAccessor.h>
39#include <gpu/FieldIndexing.h>
40#include <gpu/GPUField.h>
41#include <gpu/Kernel.h>
43#include <thrust/copy.h>
44#include <thrust/device_ptr.h>
45#include <thrust/device_vector.h>
46#include <thrust/functional.h>
47#include <thrust/transform.h>
53#define RESTRICT __restrict__
54#elif defined(__clang__)
56#if defined(__CUDA_ARCH__)
58#define RESTRICT __restrict__
61#define RESTRICT __restrict__
64#elif defined(__GNUC__) or defined(__GNUG__)
65#define RESTRICT __restrict__
66#elif defined(_MSC_VER)
67#define RESTRICT __restrict
114namespace Population {
117 gpu::FieldAccessor<float>
pdf,
122 if (
pdf.isValidPosition()) {
146 gpu::FieldAccessor<float>
pdf,
151 if (
pdf.isValidPosition()) {
175 gpu::FieldAccessor<float>
pdf,
178 if (
pdf.isValidPosition()) {
202 gpu::FieldAccessor<float>
pdf,
204 gpu::FieldAccessor<float> force,
211 if (
pdf.isValidPosition()) {
250std::array<float, 19u>
get(
253 CellInterval
ci(cell, cell);
254 thrust::device_vector<float>
dev_data(19u);
257 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
260 std::array<float, 19u>
pop;
267 std::array<float, 19u>
const &
pop,
271 CellInterval
ci(cell, cell);
273 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
274 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
282 std::array<float, 19u>
const &
pop,
286 CellInterval
ci(cell, cell);
288 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
289 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
velocity_field,
ci));
290 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
force_field,
ci));
291 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
297 std::array<float, 19u>
const &
pop) {
298 CellInterval
ci =
pdf_field->xyzSizeWithGhostLayer();
302 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
303 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
309 CellInterval
const &
ci) {
310 thrust::device_vector<float>
dev_data(
ci.numCells() * 19u);
313 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
316 std::vector<float>
out(
ci.numCells() * 19u);
323 std::vector<float>
const &
values,
324 CellInterval
const &
ci) {
328 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
329 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
337 std::vector<float>
const &
values,
338 CellInterval
const &
ci) {
342 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
343 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
velocity_field,
ci));
344 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
force_field,
ci));
345 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
353 gpu::FieldAccessor<float>
vec,
358 if (
vec.isValidPosition()) {
366 gpu::FieldAccessor<float>
vec,
371 if (
vec.isValidPosition()) {
379 gpu::FieldAccessor<float>
vec,
382 if (
vec.isValidPosition()) {
390 gpu::FieldAccessor<float>
vec,
395 if (
vec.isValidPosition()) {
403 gpu::FieldAccessor<float>
vec,
406 if (
vec.isValidPosition()) {
414 gpu::FieldAccessor<float>
vec,
423 if (
vec.isValidPosition()
and index < length) {
424 auto const array_index = index *
uint(3u);
426 auto const cy =
indices[array_index + 1u];
427 auto const cz =
indices[array_index + 2u];
439 CellInterval
ci(cell, cell);
440 thrust::device_vector<float>
dev_data(3u);
443 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
vec_field,
ci));
455 CellInterval
ci(cell, cell);
456 thrust::device_vector<float>
dev_data(
vec.data(),
vec.data() + 3u);
459 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
vec_field,
ci));
460 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
468 CellInterval
ci(cell, cell);
469 thrust::device_vector<float>
dev_data(
vec.data(),
vec.data() + 3u);
472 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
vec_field,
ci));
473 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
480 CellInterval
ci =
vec_field->xyzSizeWithGhostLayer();
481 thrust::device_vector<float>
dev_data(
vec.data(),
vec.data() + 3u);
484 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
vec_field,
ci));
485 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
492 CellInterval
ci =
vec_field->xyzSizeWithGhostLayer();
493 thrust::device_vector<float>
dev_data(
vec.data(),
vec.data() + 3u);
496 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
vec_field,
ci));
497 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
503 CellInterval
const &
ci) {
504 thrust::device_vector<float>
dev_data(
ci.numCells() * 3u);
507 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
vec_field,
ci));
510 std::vector<float>
out(
ci.numCells() * 3u);
517 std::vector<float>
const &
values,
518 CellInterval
const &
ci) {
522 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
vec_field,
ci));
523 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
528 gpu::GPUField<float>
const *field,
529 thrust::device_vector<int>
const &
indices,
530 thrust::device_vector<float>
const &
values,
536 auto const length =
static_cast<uint>(
indices.size() / 3ul);
539 gpu::FieldIndexing<float>::withGhostLayerXYZ(*field,
gl).gpuAccess(),
544namespace Interpolation {
553 for (
int dim = 0; dim < 3; ++dim) {
558 weights[dim * 2 + 0] =
float{0.5} - distance;
559 weights[dim * 2 + 1] =
float{0.5} + distance;
564 gpu::FieldAccessor<float>
pdf,
581 for (
int i = 0; i < 2; i++) {
582 auto const cx = corner[0] + i;
583 auto const wx = weights[0][i];
585 for (
int j = 0;
j < 2;
j++) {
586 auto const cy = corner[1] +
j;
587 auto const wxy =
wx * weights[1][
j];
589 for (
int k = 0; k < 2; k++) {
590 auto const cz = corner[2] + k;
624 gpu::FieldAccessor<float> vel,
633 vel.set({0
u, 0
u, 0
u}, {0
u, 0
u, 0
u});
640 for (
int i = 0; i < 2; i++) {
641 auto const cx = corner[0] + i;
642 auto const wx = weights[0][i];
644 for (
int j = 0;
j < 2;
j++) {
645 auto const cy = corner[1] +
j;
646 auto const wxy =
wx * weights[1][
j];
648 for (
int k = 0; k < 2; k++) {
649 auto const cz = corner[2] + k;
662 gpu::FieldAccessor<float> force,
671 force.set({0
u, 0
u, 0
u}, {0
u, 0
u, 0
u});
678 for (
int i = 0; i < 2; i++) {
679 auto const cx = corner[0] + i;
680 auto const wx = weights[0][i];
682 for (
int j = 0;
j < 2;
j++) {
683 auto const cy = corner[1] +
j;
684 auto const wxy =
wx * weights[1][
j];
686 for (
int k = 0; k < 2; k++) {
687 auto const cz = corner[2] + k;
703 gpu::GPUField<float>
const *field,
704 std::vector<float>
const &pos,
707 thrust::device_vector<float>
dev_pos(pos.begin(), pos.end());
708 thrust::device_vector<float>
dev_rho(pos.size() / 3ul);
713 auto const n_pos =
static_cast<uint>(pos.size() / 3ul);
716 gpu::FieldIndexing<float>::withGhostLayerXYZ(*field,
gl).gpuAccess(),
726 gpu::GPUField<float>
const *field,
727 std::vector<float>
const &pos,
729 thrust::device_vector<float>
dev_pos(pos.begin(), pos.end());
730 thrust::device_vector<float>
dev_vel(pos.size());
735 auto const n_pos =
static_cast<uint>(pos.size() / 3ul);
738 gpu::FieldIndexing<float>::withGhostLayerXYZ(*field,
gl).gpuAccess(),
747 gpu::GPUField<float>
const *field,
748 std::vector<float>
const &pos,
749 std::vector<float>
const &forces,
751 thrust::device_vector<float>
dev_pos(pos.begin(), pos.end());
752 thrust::device_vector<float>
dev_for(forces.begin(), forces.end());
757 auto const n_pos =
static_cast<uint>(pos.size() / 3ul);
760 gpu::FieldIndexing<float>::withGhostLayerXYZ(*field,
gl).gpuAccess(),
765namespace Equilibrium {
768 gpu::FieldAccessor<float>
pdf,
774 pdf.get(0
u) =
delta_rho * 0.33333333333333331f +
rho * -0.33333333333333331f * (
u[0] *
u[0]) +
rho * -0.33333333333333331f * (
u[1] *
u[1]) +
rho * -0.33333333333333331f * (
u[2] *
u[2]);
775 pdf.get(1u) =
delta_rho * 0.055555555555555552f +
rho * -0.16666666666666666f * (
u[0] *
u[0]) +
rho * -0.16666666666666666f * (
u[2] *
u[2]) +
rho * 0.16666666666666666f *
u[1] +
rho * 0.16666666666666666f * (
u[1] *
u[1]);
776 pdf.get(2u) =
delta_rho * 0.055555555555555552f +
rho * -0.16666666666666666f *
u[1] +
rho * -0.16666666666666666f * (
u[0] *
u[0]) +
rho * -0.16666666666666666f * (
u[2] *
u[2]) +
rho * 0.16666666666666666f * (
u[1] *
u[1]);
777 pdf.get(3u) =
delta_rho * 0.055555555555555552f +
rho * -0.16666666666666666f *
u[0] +
rho * -0.16666666666666666f * (
u[1] *
u[1]) +
rho * -0.16666666666666666f * (
u[2] *
u[2]) +
rho * 0.16666666666666666f * (
u[0] *
u[0]);
778 pdf.get(4u) =
delta_rho * 0.055555555555555552f +
rho * -0.16666666666666666f * (
u[1] *
u[1]) +
rho * -0.16666666666666666f * (
u[2] *
u[2]) +
rho * 0.16666666666666666f *
u[0] +
rho * 0.16666666666666666f * (
u[0] *
u[0]);
779 pdf.get(5u) =
delta_rho * 0.055555555555555552f +
rho * -0.16666666666666666f * (
u[0] *
u[0]) +
rho * -0.16666666666666666f * (
u[1] *
u[1]) +
rho * 0.16666666666666666f *
u[2] +
rho * 0.16666666666666666f * (
u[2] *
u[2]);
780 pdf.get(6u) =
delta_rho * 0.055555555555555552f +
rho * -0.16666666666666666f *
u[2] +
rho * -0.16666666666666666f * (
u[0] *
u[0]) +
rho * -0.16666666666666666f * (
u[1] *
u[1]) +
rho * 0.16666666666666666f * (
u[2] *
u[2]);
781 pdf.get(7u) =
delta_rho * 0.027777777777777776f +
rho * -0.083333333333333329f *
u[0] +
rho * -0.25f *
u[0] *
u[1] +
rho * 0.083333333333333329f *
u[1] +
rho * 0.083333333333333329f * (
u[0] *
u[0]) +
rho * 0.083333333333333329f * (
u[1] *
u[1]);
782 pdf.get(8u) =
delta_rho * 0.027777777777777776f +
rho * 0.083333333333333329f *
u[0] +
rho * 0.083333333333333329f *
u[1] +
rho * 0.083333333333333329f * (
u[0] *
u[0]) +
rho * 0.083333333333333329f * (
u[1] *
u[1]) +
rho * 0.25f *
u[0] *
u[1];
783 pdf.get(9u) =
delta_rho * 0.027777777777777776f +
rho * -0.083333333333333329f *
u[0] +
rho * -0.083333333333333329f *
u[1] +
rho * 0.083333333333333329f * (
u[0] *
u[0]) +
rho * 0.083333333333333329f * (
u[1] *
u[1]) +
rho * 0.25f *
u[0] *
u[1];
784 pdf.get(10u) =
delta_rho * 0.027777777777777776f +
rho * -0.083333333333333329f *
u[1] +
rho * -0.25f *
u[0] *
u[1] +
rho * 0.083333333333333329f *
u[0] +
rho * 0.083333333333333329f * (
u[0] *
u[0]) +
rho * 0.083333333333333329f * (
u[1] *
u[1]);
785 pdf.get(11u) =
delta_rho * 0.027777777777777776f +
rho * 0.083333333333333329f *
u[1] +
rho * 0.083333333333333329f *
u[2] +
rho * 0.083333333333333329f * (
u[1] *
u[1]) +
rho * 0.083333333333333329f * (
u[2] *
u[2]) +
rho * 0.25f *
u[1] *
u[2];
786 pdf.get(12u) =
delta_rho * 0.027777777777777776f +
rho * -0.083333333333333329f *
u[1] +
rho * -0.25f *
u[1] *
u[2] +
rho * 0.083333333333333329f *
u[2] +
rho * 0.083333333333333329f * (
u[1] *
u[1]) +
rho * 0.083333333333333329f * (
u[2] *
u[2]);
787 pdf.get(13u) =
delta_rho * 0.027777777777777776f +
rho * -0.083333333333333329f *
u[0] +
rho * -0.25f *
u[0] *
u[2] +
rho * 0.083333333333333329f *
u[2] +
rho * 0.083333333333333329f * (
u[0] *
u[0]) +
rho * 0.083333333333333329f * (
u[2] *
u[2]);
788 pdf.get(14u) =
delta_rho * 0.027777777777777776f +
rho * 0.083333333333333329f *
u[0] +
rho * 0.083333333333333329f *
u[2] +
rho * 0.083333333333333329f * (
u[0] *
u[0]) +
rho * 0.083333333333333329f * (
u[2] *
u[2]) +
rho * 0.25f *
u[0] *
u[2];
789 pdf.get(15u) =
delta_rho * 0.027777777777777776f +
rho * -0.083333333333333329f *
u[2] +
rho * -0.25f *
u[1] *
u[2] +
rho * 0.083333333333333329f *
u[1] +
rho * 0.083333333333333329f * (
u[1] *
u[1]) +
rho * 0.083333333333333329f * (
u[2] *
u[2]);
790 pdf.get(16u) =
delta_rho * 0.027777777777777776f +
rho * -0.083333333333333329f *
u[1] +
rho * -0.083333333333333329f *
u[2] +
rho * 0.083333333333333329f * (
u[1] *
u[1]) +
rho * 0.083333333333333329f * (
u[2] *
u[2]) +
rho * 0.25f *
u[1] *
u[2];
791 pdf.get(17u) =
delta_rho * 0.027777777777777776f +
rho * -0.083333333333333329f *
u[0] +
rho * -0.083333333333333329f *
u[2] +
rho * 0.083333333333333329f * (
u[0] *
u[0]) +
rho * 0.083333333333333329f * (
u[2] *
u[2]) +
rho * 0.25f *
u[0] *
u[2];
792 pdf.get(18u) =
delta_rho * 0.027777777777777776f +
rho * -0.083333333333333329f *
u[2] +
rho * -0.25f *
u[0] *
u[2] +
rho * 0.083333333333333329f *
u[0] +
rho * 0.083333333333333329f * (
u[0] *
u[0]) +
rho * 0.083333333333333329f * (
u[2] *
u[2]);
800 gpu::FieldAccessor<float>
pdf,
806 if (
pdf.isValidPosition()) {
808 float const f_1 =
pdf.get(1u);
809 float const f_2 =
pdf.get(2u);
810 float const f_3 =
pdf.get(3u);
811 float const f_4 =
pdf.get(4u);
812 float const f_5 =
pdf.get(5u);
813 float const f_6 =
pdf.get(6u);
814 float const f_7 =
pdf.get(7u);
815 float const f_8 =
pdf.get(8u);
816 float const f_9 =
pdf.get(9u);
817 float const f_10 =
pdf.get(10u);
818 float const f_11 =
pdf.get(11u);
819 float const f_12 =
pdf.get(12u);
820 float const f_13 =
pdf.get(13u);
821 float const f_14 =
pdf.get(14u);
822 float const f_15 =
pdf.get(15u);
823 float const f_16 =
pdf.get(16u);
824 float const f_17 =
pdf.get(17u);
825 float const f_18 =
pdf.get(18u);
836 gpu::FieldAccessor<float>
pdf,
842 if (
pdf.isValidPosition()) {
844 float const f_1 =
pdf.get(1u);
845 float const f_2 =
pdf.get(2u);
846 float const f_3 =
pdf.get(3u);
847 float const f_4 =
pdf.get(4u);
848 float const f_5 =
pdf.get(5u);
849 float const f_6 =
pdf.get(6u);
850 float const f_7 =
pdf.get(7u);
851 float const f_8 =
pdf.get(8u);
852 float const f_9 =
pdf.get(9u);
853 float const f_10 =
pdf.get(10u);
854 float const f_11 =
pdf.get(11u);
855 float const f_12 =
pdf.get(12u);
856 float const f_13 =
pdf.get(13u);
857 float const f_14 =
pdf.get(14u);
858 float const f_15 =
pdf.get(15u);
859 float const f_16 =
pdf.get(16u);
860 float const f_17 =
pdf.get(17u);
861 float const f_18 =
pdf.get(18u);
884 CellInterval
ci(cell, cell);
885 thrust::device_vector<float>
dev_data(1u);
888 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
900 CellInterval
const &
ci) {
901 thrust::device_vector<float>
dev_data(
ci.numCells());
904 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
918 CellInterval
ci(cell, cell);
922 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
923 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
930 std::vector<float>
const &
values,
932 CellInterval
const &
ci) {
936 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
937 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
946 gpu::FieldAccessor<float>
pdf,
947 gpu::FieldAccessor<float> force,
953 if (
pdf.isValidPosition()) {
955 float const f_1 =
pdf.get(1u);
956 float const f_2 =
pdf.get(2u);
957 float const f_3 =
pdf.get(3u);
958 float const f_4 =
pdf.get(4u);
959 float const f_5 =
pdf.get(5u);
960 float const f_6 =
pdf.get(6u);
961 float const f_7 =
pdf.get(7u);
962 float const f_8 =
pdf.get(8u);
963 float const f_9 =
pdf.get(9u);
964 float const f_10 =
pdf.get(10u);
965 float const f_11 =
pdf.get(11u);
966 float const f_12 =
pdf.get(12u);
967 float const f_13 =
pdf.get(13u);
968 float const f_14 =
pdf.get(14u);
969 float const f_15 =
pdf.get(15u);
970 float const f_16 =
pdf.get(16u);
971 float const f_17 =
pdf.get(17u);
972 float const f_18 =
pdf.get(18u);
992 gpu::FieldAccessor<float>
pdf,
994 gpu::FieldAccessor<float> force,
1001 if (
pdf.isValidPosition()) {
1002 float const f_0 =
pdf.get(0
u);
1003 float const f_1 =
pdf.get(1u);
1004 float const f_2 =
pdf.get(2u);
1005 float const f_3 =
pdf.get(3u);
1006 float const f_4 =
pdf.get(4u);
1007 float const f_5 =
pdf.get(5u);
1008 float const f_6 =
pdf.get(6u);
1009 float const f_7 =
pdf.get(7u);
1010 float const f_8 =
pdf.get(8u);
1011 float const f_9 =
pdf.get(9u);
1012 float const f_10 =
pdf.get(10u);
1013 float const f_11 =
pdf.get(11u);
1014 float const f_12 =
pdf.get(12u);
1015 float const f_13 =
pdf.get(13u);
1016 float const f_14 =
pdf.get(14u);
1017 float const f_15 =
pdf.get(15u);
1018 float const f_16 =
pdf.get(16u);
1019 float const f_17 =
pdf.get(17u);
1020 float const f_18 =
pdf.get(18u);
1027 const float u_0 = -force.get(0) * 0.50000000000000000f /
rho +
u[0];
1028 const float u_1 = -force.get(1) * 0.50000000000000000f /
rho +
u[1];
1029 const float u_2 = -force.get(2) * 0.50000000000000000f /
rho +
u[2];
1045 CellInterval
ci(cell, cell);
1046 thrust::device_vector<float>
dev_data(3u);
1049 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
1050 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
force_field,
ci));
1061 CellInterval
const &
ci) {
1062 thrust::device_vector<float>
dev_data(3u);
1065 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
1066 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
force_field,
ci));
1080 CellInterval
ci(cell, cell);
1081 thrust::device_vector<float>
dev_data(
u.data(),
u.data() + 3u);
1084 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
1085 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
velocity_field,
ci));
1086 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
force_field,
ci));
1087 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
1095 std::vector<float>
const &
values,
1096 CellInterval
const &
ci) {
1100 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
1101 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
velocity_field,
ci));
1102 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
force_field,
ci));
1103 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
1111 gpu::FieldAccessor<float>
pdf,
1112 gpu::FieldAccessor<float>
velocity,
1113 gpu::FieldAccessor<float> force,
1121 if (
pdf.isValidPosition()) {
1122 float const f_0 =
pdf.get(0
u);
1123 float const f_1 =
pdf.get(1u);
1124 float const f_2 =
pdf.get(2u);
1125 float const f_3 =
pdf.get(3u);
1126 float const f_4 =
pdf.get(4u);
1127 float const f_5 =
pdf.get(5u);
1128 float const f_6 =
pdf.get(6u);
1129 float const f_7 =
pdf.get(7u);
1130 float const f_8 =
pdf.get(8u);
1131 float const f_9 =
pdf.get(9u);
1132 float const f_10 =
pdf.get(10u);
1133 float const f_11 =
pdf.get(11u);
1134 float const f_12 =
pdf.get(12u);
1135 float const f_13 =
pdf.get(13u);
1136 float const f_14 =
pdf.get(14u);
1137 float const f_15 =
pdf.get(15u);
1138 float const f_16 =
pdf.get(16u);
1139 float const f_17 =
pdf.get(17u);
1140 float const f_18 =
pdf.get(18u);
1172 CellInterval
ci(cell, cell);
1173 thrust::device_vector<float>
dev_data(
u.data(),
u.data() + 3u);
1176 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
1177 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
velocity_field,
ci));
1178 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
force_field,
ci));
1179 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
1187 std::vector<float>
const &
values,
1189 CellInterval
const &
ci) {
1193 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
1194 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
velocity_field,
ci));
1195 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
force_field,
ci));
1196 kernel.addParam(
const_cast<const float *
>(
dev_data_ptr));
1202namespace MomentumDensity {
1205 gpu::FieldAccessor<float>
pdf,
1206 gpu::FieldAccessor<float> force,
1213 if (
pdf.isValidPosition()) {
1214 float const f_1 =
pdf.get(1u);
1215 float const f_2 =
pdf.get(2u);
1216 float const f_3 =
pdf.get(3u);
1217 float const f_4 =
pdf.get(4u);
1218 float const f_5 =
pdf.get(5u);
1219 float const f_6 =
pdf.get(6u);
1220 float const f_7 =
pdf.get(7u);
1221 float const f_8 =
pdf.get(8u);
1222 float const f_9 =
pdf.get(9u);
1223 float const f_10 =
pdf.get(10u);
1224 float const f_11 =
pdf.get(11u);
1225 float const f_12 =
pdf.get(12u);
1226 float const f_13 =
pdf.get(13u);
1227 float const f_14 =
pdf.get(14u);
1228 float const f_15 =
pdf.get(15u);
1229 float const f_16 =
pdf.get(16u);
1230 float const f_17 =
pdf.get(17u);
1231 float const f_18 =
pdf.get(18u);
1253 thrust::device_vector<float>
dev_data(3u *
ci.numCells());
1256 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
1257 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
force_field,
ci));
1264 for (
auto i =
uint_t{0
u}; i < 3u *
ci.numCells(); i += 3u) {
1273namespace PressureTensor {
1276 gpu::FieldAccessor<float>
pdf,
1281 if (
pdf.isValidPosition()) {
1282 float const f_1 =
pdf.getNeighbor(0, -1, 0, 1u);
1283 float const f_2 =
pdf.getNeighbor(0, 1, 0, 2u);
1284 float const f_3 =
pdf.getNeighbor(1, 0, 0, 3u);
1285 float const f_4 =
pdf.getNeighbor(-1, 0, 0, 4u);
1286 float const f_5 =
pdf.getNeighbor(0, 0, -1, 5u);
1287 float const f_6 =
pdf.getNeighbor(0, 0, 1, 6u);
1288 float const f_7 =
pdf.getNeighbor(1, -1, 0, 7u);
1289 float const f_8 =
pdf.getNeighbor(-1, -1, 0, 8u);
1290 float const f_9 =
pdf.getNeighbor(1, 1, 0, 9u);
1291 float const f_10 =
pdf.getNeighbor(-1, 1, 0, 10u);
1292 float const f_11 =
pdf.getNeighbor(0, -1, -1, 11u);
1293 float const f_12 =
pdf.getNeighbor(0, 1, -1, 12u);
1294 float const f_13 =
pdf.getNeighbor(1, 0, -1, 13u);
1295 float const f_14 =
pdf.getNeighbor(-1, 0, -1, 14u);
1296 float const f_15 =
pdf.getNeighbor(0, -1, 1, 15u);
1297 float const f_16 =
pdf.getNeighbor(0, 1, 1, 16u);
1298 float const f_17 =
pdf.getNeighbor(1, 0, 1, 17u);
1299 float const f_18 =
pdf.getNeighbor(-1, 0, 1, 18u);
1326 CellInterval
ci(cell, cell);
1327 thrust::device_vector<float>
dev_data(9u);
1330 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
1341 CellInterval
const &
ci) {
1342 thrust::device_vector<float>
dev_data(9u *
ci.numCells());
1345 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
1359 thrust::device_vector<float>
dev_data(9u *
ci.numCells());
1362 kernel.addFieldIndexingParam(gpu::FieldIndexing<float>::interval(*
pdf_field,
ci));
1368 for (
auto i =
uint_t{0
u}; i < 9u *
ci.numCells(); i += 9u) {
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
static __forceinline__ __device__ uint getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset)
Get linear index of flattened data with original layout fzyx.
static dim3 calculate_dim_grid(uint const threads_x, uint const blocks_per_grid_y, uint const threads_per_block)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double weight(int type, double r_cut, double k, double r)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, double const rho_in, double const density, Cell const &cell)
double get(GhostLayerField< double, uint_t{19u}> const *pdf_field, double const density, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT rho_out, double const density)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, double const *RESTRICT rho_in, double const density)
__device__ void kernel_set_device(gpu::FieldAccessor< double > pdf, double const *RESTRICT const u, double rho)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT f_in, double const density)
void set(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> *velocity_field, GhostLayerField< double, uint_t{3u}> *force_field, Vector3< double > const &force, double const density, Cell const &cell)
__global__ void kernel_get_rho(gpu::FieldAccessor< double > pdf, double const *RESTRICT const pos, double *RESTRICT const rho_out, double const density, uint n_pos, uint gl)
void add_force(gpu::GPUField< double > const *field, std::vector< double > const &pos, std::vector< double > const &forces, uint gl)
__global__ void kernel_add_force(gpu::FieldAccessor< double > force, double const *RESTRICT const pos, double const *RESTRICT const forces, uint n_pos, uint gl)
std::vector< double > get_rho(gpu::GPUField< double > const *field, std::vector< double > const &pos, double const density, uint gl)
__global__ void kernel_get_vel(gpu::FieldAccessor< double > vel, double const *RESTRICT const pos, double *RESTRICT const vel_out, uint n_pos, uint gl)
std::vector< double > get_vel(gpu::GPUField< double > const *field, std::vector< double > const &pos, uint gl)
static __forceinline__ __device__ void calculate_weights(double const *RESTRICT const pos, int *RESTRICT const corner, double *RESTRICT const weights, uint gl)
Calculate interpolation weights.
auto reduce(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> const *force_field, double const density)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > force, double *RESTRICT out, double const density)
__global__ void kernel_set_vel(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT pop)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > pdf, double const *RESTRICT pop)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, double const *RESTRICT pop)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, std::array< double, 19u > const &pop, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT pop)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, Cell const &cell)
void initialize(GhostLayerField< double, uint_t{19u}> *pdf_field, std::array< double, 19u > const &pop)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT p_out)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, double const density, Cell const &cell)
auto reduce(GhostLayerField< double, uint_t{19u}> const *pdf_field, double const density)
void initialize(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_get(gpu::FieldAccessor< double > vec, double *u_out)
void set_from_list(gpu::GPUField< double > const *field, thrust::device_vector< int > const &indices, thrust::device_vector< double > const &values, uint gl)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void add(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
__global__ void kernel_set(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void set(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
__global__ void kernel_set_from_list(gpu::FieldAccessor< double > vec, int const *RESTRICT const indices, double const *RESTRICT const values, uint length)
auto get(GhostLayerField< double, uint_t{3u}> const *vec_field, Cell const &cell)
__global__ void kernel_broadcast_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void add_to_all(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, GhostLayerField< double, uint_t{3u}> *velocity_field, GhostLayerField< double, uint_t{3u}> const *force_field, Vector3< double > const &u, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > force, double *RESTRICT u_out)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT u_in)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> const *force_field, Cell const &cell)
\file PackInfoPdfDoublePrecision.cpp \author pystencils
static Utils::Vector3d velocity(Particle const &p_ref, Particle const &p_vs)
Velocity of the virtual site.
Rescale values in a device vector by some constant.
__host__ __device__ constexpr float operator()(float const &x) const
algo_rescale(float scale_factor)