30#include <core/DataTypes.h>
31#include <core/cell/Cell.h>
32#include <core/cell/CellInterval.h>
33#include <core/math/Matrix3.h>
34#include <core/math/Vector3.h>
36#include <field/iterators/IteratorMacros.h>
38#include <gpu/FieldAccessor.h>
39#include <gpu/FieldIndexing.h>
40#include <gpu/GPUField.h>
41#include <gpu/Kernel.h>
43#include <thrust/copy.h>
44#include <thrust/device_ptr.h>
45#include <thrust/device_vector.h>
46#include <thrust/functional.h>
47#include <thrust/transform.h>
53#define RESTRICT __restrict__
54#elif defined(__clang__)
56#if defined(__CUDA_ARCH__)
58#define RESTRICT __restrict__
61#define RESTRICT __restrict__
64#elif defined(__GNUC__) or defined(__GNUG__)
65#define RESTRICT __restrict__
66#elif defined(_MSC_VER)
67#define RESTRICT __restrict
114namespace Population {
117 gpu::FieldAccessor<double>
pdf,
122 if (
pdf.isValidPosition()) {
146 gpu::FieldAccessor<double>
pdf,
151 if (
pdf.isValidPosition()) {
175 gpu::FieldAccessor<double>
pdf,
178 if (
pdf.isValidPosition()) {
202 gpu::FieldAccessor<double>
pdf,
203 gpu::FieldAccessor<double>
velocity,
204 gpu::FieldAccessor<double> force,
211 if (
pdf.isValidPosition()) {
213 const double f_1 =
pdf.get(1u) =
pop[1u];
214 const double f_2 =
pdf.get(2u) =
pop[2u];
215 const double f_3 =
pdf.get(3u) =
pop[3u];
216 const double f_4 =
pdf.get(4u) =
pop[4u];
217 const double f_5 =
pdf.get(5u) =
pop[5u];
218 const double f_6 =
pdf.get(6u) =
pop[6u];
219 const double f_7 =
pdf.get(7u) =
pop[7u];
220 const double f_8 =
pdf.get(8u) =
pop[8u];
221 const double f_9 =
pdf.get(9u) =
pop[9u];
250std::array<double, 19u>
get(
253 CellInterval
ci(cell, cell);
254 thrust::device_vector<double>
dev_data(19u);
257 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
260 std::array<double, 19u>
pop;
267 std::array<double, 19u>
const &
pop,
271 CellInterval
ci(cell, cell);
273 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
274 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
282 std::array<double, 19u>
const &
pop,
286 CellInterval
ci(cell, cell);
288 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
289 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
velocity_field,
ci));
290 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
force_field,
ci));
291 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
297 std::array<double, 19u>
const &
pop) {
298 CellInterval
ci =
pdf_field->xyzSizeWithGhostLayer();
302 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
303 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
309 CellInterval
const &
ci) {
310 thrust::device_vector<double>
dev_data(
ci.numCells() * 19u);
313 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
316 std::vector<double>
out(
ci.numCells() * 19u);
323 std::vector<double>
const &
values,
324 CellInterval
const &
ci) {
328 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
329 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
337 std::vector<double>
const &
values,
338 CellInterval
const &
ci) {
342 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
343 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
velocity_field,
ci));
344 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
force_field,
ci));
345 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
353 gpu::FieldAccessor<double>
vec,
358 if (
vec.isValidPosition()) {
366 gpu::FieldAccessor<double>
vec,
371 if (
vec.isValidPosition()) {
379 gpu::FieldAccessor<double>
vec,
382 if (
vec.isValidPosition()) {
390 gpu::FieldAccessor<double>
vec,
395 if (
vec.isValidPosition()) {
403 gpu::FieldAccessor<double>
vec,
406 if (
vec.isValidPosition()) {
414 gpu::FieldAccessor<double>
vec,
423 if (
vec.isValidPosition()
and index < length) {
424 auto const array_index = index *
uint(3u);
426 auto const cy =
indices[array_index + 1u];
427 auto const cz =
indices[array_index + 2u];
439 CellInterval
ci(cell, cell);
440 thrust::device_vector<double>
dev_data(3u);
443 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
455 CellInterval
ci(cell, cell);
456 thrust::device_vector<double>
dev_data(
vec.data(),
vec.data() + 3u);
459 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
460 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
468 CellInterval
ci(cell, cell);
469 thrust::device_vector<double>
dev_data(
vec.data(),
vec.data() + 3u);
472 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
473 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
480 CellInterval
ci =
vec_field->xyzSizeWithGhostLayer();
481 thrust::device_vector<double>
dev_data(
vec.data(),
vec.data() + 3u);
484 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
485 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
492 CellInterval
ci =
vec_field->xyzSizeWithGhostLayer();
493 thrust::device_vector<double>
dev_data(
vec.data(),
vec.data() + 3u);
496 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
497 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
503 CellInterval
const &
ci) {
504 thrust::device_vector<double>
dev_data(
ci.numCells() * 3u);
507 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
510 std::vector<double>
out(
ci.numCells() * 3u);
517 std::vector<double>
const &
values,
518 CellInterval
const &
ci) {
522 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
523 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
528 gpu::GPUField<double>
const *field,
529 thrust::device_vector<int>
const &
indices,
530 thrust::device_vector<double>
const &
values,
536 auto const length =
static_cast<uint>(
indices.size() / 3ul);
539 gpu::FieldIndexing<double>::withGhostLayerXYZ(*field,
gl).gpuAccess(),
544namespace Interpolation {
553 for (
int dim = 0; dim < 3; ++dim) {
558 weights[dim * 2 + 0] =
double{0.5} - distance;
559 weights[dim * 2 + 1] =
double{0.5} + distance;
564 gpu::FieldAccessor<double>
pdf,
578 double weights[3][2];
581 for (
int i = 0; i < 2; i++) {
582 auto const cx = corner[0] + i;
583 auto const wx = weights[0][i];
585 for (
int j = 0;
j < 2;
j++) {
586 auto const cy = corner[1] +
j;
587 auto const wxy =
wx * weights[1][
j];
589 for (
int k = 0; k < 2; k++) {
590 auto const cz = corner[2] + k;
624 gpu::FieldAccessor<double> vel,
633 vel.set({0
u, 0
u, 0
u}, {0
u, 0
u, 0
u});
637 double weights[3][2];
640 for (
int i = 0; i < 2; i++) {
641 auto const cx = corner[0] + i;
642 auto const wx = weights[0][i];
644 for (
int j = 0;
j < 2;
j++) {
645 auto const cy = corner[1] +
j;
646 auto const wxy =
wx * weights[1][
j];
648 for (
int k = 0; k < 2; k++) {
649 auto const cz = corner[2] + k;
662 gpu::FieldAccessor<double> force,
664 double const *
RESTRICT const forces,
671 force.set({0
u, 0
u, 0
u}, {0
u, 0
u, 0
u});
675 double weights[3][2];
678 for (
int i = 0; i < 2; i++) {
679 auto const cx = corner[0] + i;
680 auto const wx = weights[0][i];
682 for (
int j = 0;
j < 2;
j++) {
683 auto const cy = corner[1] +
j;
684 auto const wxy =
wx * weights[1][
j];
686 for (
int k = 0; k < 2; k++) {
687 auto const cz = corner[2] + k;
703 gpu::GPUField<double>
const *field,
704 std::vector<double>
const &pos,
707 thrust::device_vector<double>
dev_pos(pos.begin(), pos.end());
708 thrust::device_vector<double>
dev_rho(pos.size() / 3ul);
713 auto const n_pos =
static_cast<uint>(pos.size() / 3ul);
716 gpu::FieldIndexing<double>::withGhostLayerXYZ(*field,
gl).gpuAccess(),
726 gpu::GPUField<double>
const *field,
727 std::vector<double>
const &pos,
729 thrust::device_vector<double>
dev_pos(pos.begin(), pos.end());
730 thrust::device_vector<double>
dev_vel(pos.size());
735 auto const n_pos =
static_cast<uint>(pos.size() / 3ul);
738 gpu::FieldIndexing<double>::withGhostLayerXYZ(*field,
gl).gpuAccess(),
747 gpu::GPUField<double>
const *field,
748 std::vector<double>
const &pos,
749 std::vector<double>
const &forces,
751 thrust::device_vector<double>
dev_pos(pos.begin(), pos.end());
752 thrust::device_vector<double>
dev_for(forces.begin(), forces.end());
757 auto const n_pos =
static_cast<uint>(pos.size() / 3ul);
760 gpu::FieldIndexing<double>::withGhostLayerXYZ(*field,
gl).gpuAccess(),
765namespace Equilibrium {
768 gpu::FieldAccessor<double>
pdf,
774 pdf.get(0
u) =
delta_rho * 0.33333333333333331 +
rho * -0.33333333333333331 * (
u[0] *
u[0]) +
rho * -0.33333333333333331 * (
u[1] *
u[1]) +
rho * -0.33333333333333331 * (
u[2] *
u[2]);
775 pdf.get(1u) =
delta_rho * 0.055555555555555552 +
rho * -0.16666666666666666 * (
u[0] *
u[0]) +
rho * -0.16666666666666666 * (
u[2] *
u[2]) +
rho * 0.16666666666666666 *
u[1] +
rho * 0.16666666666666666 * (
u[1] *
u[1]);
776 pdf.get(2u) =
delta_rho * 0.055555555555555552 +
rho * -0.16666666666666666 *
u[1] +
rho * -0.16666666666666666 * (
u[0] *
u[0]) +
rho * -0.16666666666666666 * (
u[2] *
u[2]) +
rho * 0.16666666666666666 * (
u[1] *
u[1]);
777 pdf.get(3u) =
delta_rho * 0.055555555555555552 +
rho * -0.16666666666666666 *
u[0] +
rho * -0.16666666666666666 * (
u[1] *
u[1]) +
rho * -0.16666666666666666 * (
u[2] *
u[2]) +
rho * 0.16666666666666666 * (
u[0] *
u[0]);
778 pdf.get(4u) =
delta_rho * 0.055555555555555552 +
rho * -0.16666666666666666 * (
u[1] *
u[1]) +
rho * -0.16666666666666666 * (
u[2] *
u[2]) +
rho * 0.16666666666666666 *
u[0] +
rho * 0.16666666666666666 * (
u[0] *
u[0]);
779 pdf.get(5u) =
delta_rho * 0.055555555555555552 +
rho * -0.16666666666666666 * (
u[0] *
u[0]) +
rho * -0.16666666666666666 * (
u[1] *
u[1]) +
rho * 0.16666666666666666 *
u[2] +
rho * 0.16666666666666666 * (
u[2] *
u[2]);
780 pdf.get(6u) =
delta_rho * 0.055555555555555552 +
rho * -0.16666666666666666 *
u[2] +
rho * -0.16666666666666666 * (
u[0] *
u[0]) +
rho * -0.16666666666666666 * (
u[1] *
u[1]) +
rho * 0.16666666666666666 * (
u[2] *
u[2]);
781 pdf.get(7u) =
delta_rho * 0.027777777777777776 +
rho * -0.083333333333333329 *
u[0] +
rho * -0.25 *
u[0] *
u[1] +
rho * 0.083333333333333329 *
u[1] +
rho * 0.083333333333333329 * (
u[0] *
u[0]) +
rho * 0.083333333333333329 * (
u[1] *
u[1]);
782 pdf.get(8u) =
delta_rho * 0.027777777777777776 +
rho * 0.083333333333333329 *
u[0] +
rho * 0.083333333333333329 *
u[1] +
rho * 0.083333333333333329 * (
u[0] *
u[0]) +
rho * 0.083333333333333329 * (
u[1] *
u[1]) +
rho * 0.25 *
u[0] *
u[1];
783 pdf.get(9u) =
delta_rho * 0.027777777777777776 +
rho * -0.083333333333333329 *
u[0] +
rho * -0.083333333333333329 *
u[1] +
rho * 0.083333333333333329 * (
u[0] *
u[0]) +
rho * 0.083333333333333329 * (
u[1] *
u[1]) +
rho * 0.25 *
u[0] *
u[1];
784 pdf.get(10u) =
delta_rho * 0.027777777777777776 +
rho * -0.083333333333333329 *
u[1] +
rho * -0.25 *
u[0] *
u[1] +
rho * 0.083333333333333329 *
u[0] +
rho * 0.083333333333333329 * (
u[0] *
u[0]) +
rho * 0.083333333333333329 * (
u[1] *
u[1]);
785 pdf.get(11u) =
delta_rho * 0.027777777777777776 +
rho * 0.083333333333333329 *
u[1] +
rho * 0.083333333333333329 *
u[2] +
rho * 0.083333333333333329 * (
u[1] *
u[1]) +
rho * 0.083333333333333329 * (
u[2] *
u[2]) +
rho * 0.25 *
u[1] *
u[2];
786 pdf.get(12u) =
delta_rho * 0.027777777777777776 +
rho * -0.083333333333333329 *
u[1] +
rho * -0.25 *
u[1] *
u[2] +
rho * 0.083333333333333329 *
u[2] +
rho * 0.083333333333333329 * (
u[1] *
u[1]) +
rho * 0.083333333333333329 * (
u[2] *
u[2]);
787 pdf.get(13u) =
delta_rho * 0.027777777777777776 +
rho * -0.083333333333333329 *
u[0] +
rho * -0.25 *
u[0] *
u[2] +
rho * 0.083333333333333329 *
u[2] +
rho * 0.083333333333333329 * (
u[0] *
u[0]) +
rho * 0.083333333333333329 * (
u[2] *
u[2]);
788 pdf.get(14u) =
delta_rho * 0.027777777777777776 +
rho * 0.083333333333333329 *
u[0] +
rho * 0.083333333333333329 *
u[2] +
rho * 0.083333333333333329 * (
u[0] *
u[0]) +
rho * 0.083333333333333329 * (
u[2] *
u[2]) +
rho * 0.25 *
u[0] *
u[2];
789 pdf.get(15u) =
delta_rho * 0.027777777777777776 +
rho * -0.083333333333333329 *
u[2] +
rho * -0.25 *
u[1] *
u[2] +
rho * 0.083333333333333329 *
u[1] +
rho * 0.083333333333333329 * (
u[1] *
u[1]) +
rho * 0.083333333333333329 * (
u[2] *
u[2]);
790 pdf.get(16u) =
delta_rho * 0.027777777777777776 +
rho * -0.083333333333333329 *
u[1] +
rho * -0.083333333333333329 *
u[2] +
rho * 0.083333333333333329 * (
u[1] *
u[1]) +
rho * 0.083333333333333329 * (
u[2] *
u[2]) +
rho * 0.25 *
u[1] *
u[2];
791 pdf.get(17u) =
delta_rho * 0.027777777777777776 +
rho * -0.083333333333333329 *
u[0] +
rho * -0.083333333333333329 *
u[2] +
rho * 0.083333333333333329 * (
u[0] *
u[0]) +
rho * 0.083333333333333329 * (
u[2] *
u[2]) +
rho * 0.25 *
u[0] *
u[2];
792 pdf.get(18u) =
delta_rho * 0.027777777777777776 +
rho * -0.083333333333333329 *
u[2] +
rho * -0.25 *
u[0] *
u[2] +
rho * 0.083333333333333329 *
u[0] +
rho * 0.083333333333333329 * (
u[0] *
u[0]) +
rho * 0.083333333333333329 * (
u[2] *
u[2]);
800 gpu::FieldAccessor<double>
pdf,
806 if (
pdf.isValidPosition()) {
807 double const f_0 =
pdf.get(0
u);
808 double const f_1 =
pdf.get(1u);
809 double const f_2 =
pdf.get(2u);
810 double const f_3 =
pdf.get(3u);
811 double const f_4 =
pdf.get(4u);
812 double const f_5 =
pdf.get(5u);
813 double const f_6 =
pdf.get(6u);
814 double const f_7 =
pdf.get(7u);
815 double const f_8 =
pdf.get(8u);
816 double const f_9 =
pdf.get(9u);
817 double const f_10 =
pdf.get(10u);
818 double const f_11 =
pdf.get(11u);
819 double const f_12 =
pdf.get(12u);
820 double const f_13 =
pdf.get(13u);
821 double const f_14 =
pdf.get(14u);
822 double const f_15 =
pdf.get(15u);
823 double const f_16 =
pdf.get(16u);
824 double const f_17 =
pdf.get(17u);
825 double const f_18 =
pdf.get(18u);
836 gpu::FieldAccessor<double>
pdf,
842 if (
pdf.isValidPosition()) {
843 double const f_0 =
pdf.get(0
u);
844 double const f_1 =
pdf.get(1u);
845 double const f_2 =
pdf.get(2u);
846 double const f_3 =
pdf.get(3u);
847 double const f_4 =
pdf.get(4u);
848 double const f_5 =
pdf.get(5u);
849 double const f_6 =
pdf.get(6u);
850 double const f_7 =
pdf.get(7u);
851 double const f_8 =
pdf.get(8u);
852 double const f_9 =
pdf.get(9u);
853 double const f_10 =
pdf.get(10u);
854 double const f_11 =
pdf.get(11u);
855 double const f_12 =
pdf.get(12u);
856 double const f_13 =
pdf.get(13u);
857 double const f_14 =
pdf.get(14u);
858 double const f_15 =
pdf.get(15u);
859 double const f_16 =
pdf.get(16u);
860 double const f_17 =
pdf.get(17u);
861 double const f_18 =
pdf.get(18u);
884 CellInterval
ci(cell, cell);
885 thrust::device_vector<double>
dev_data(1u);
888 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
900 CellInterval
const &
ci) {
901 thrust::device_vector<double>
dev_data(
ci.numCells());
904 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
918 CellInterval
ci(cell, cell);
922 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
923 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
930 std::vector<double>
const &
values,
932 CellInterval
const &
ci) {
936 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
937 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
946 gpu::FieldAccessor<double>
pdf,
947 gpu::FieldAccessor<double> force,
953 if (
pdf.isValidPosition()) {
954 double const f_0 =
pdf.get(0
u);
955 double const f_1 =
pdf.get(1u);
956 double const f_2 =
pdf.get(2u);
957 double const f_3 =
pdf.get(3u);
958 double const f_4 =
pdf.get(4u);
959 double const f_5 =
pdf.get(5u);
960 double const f_6 =
pdf.get(6u);
961 double const f_7 =
pdf.get(7u);
962 double const f_8 =
pdf.get(8u);
963 double const f_9 =
pdf.get(9u);
964 double const f_10 =
pdf.get(10u);
965 double const f_11 =
pdf.get(11u);
966 double const f_12 =
pdf.get(12u);
967 double const f_13 =
pdf.get(13u);
968 double const f_14 =
pdf.get(14u);
969 double const f_15 =
pdf.get(15u);
970 double const f_16 =
pdf.get(16u);
971 double const f_17 =
pdf.get(17u);
972 double const f_18 =
pdf.get(18u);
992 gpu::FieldAccessor<double>
pdf,
993 gpu::FieldAccessor<double>
velocity,
994 gpu::FieldAccessor<double> force,
1001 if (
pdf.isValidPosition()) {
1002 double const f_0 =
pdf.get(0
u);
1003 double const f_1 =
pdf.get(1u);
1004 double const f_2 =
pdf.get(2u);
1005 double const f_3 =
pdf.get(3u);
1006 double const f_4 =
pdf.get(4u);
1007 double const f_5 =
pdf.get(5u);
1008 double const f_6 =
pdf.get(6u);
1009 double const f_7 =
pdf.get(7u);
1010 double const f_8 =
pdf.get(8u);
1011 double const f_9 =
pdf.get(9u);
1012 double const f_10 =
pdf.get(10u);
1013 double const f_11 =
pdf.get(11u);
1014 double const f_12 =
pdf.get(12u);
1015 double const f_13 =
pdf.get(13u);
1016 double const f_14 =
pdf.get(14u);
1017 double const f_15 =
pdf.get(15u);
1018 double const f_16 =
pdf.get(16u);
1019 double const f_17 =
pdf.get(17u);
1020 double const f_18 =
pdf.get(18u);
1027 const double u_0 = -force.get(0) * 0.50000000000000000 /
rho +
u[0];
1028 const double u_1 = -force.get(1) * 0.50000000000000000 /
rho +
u[1];
1029 const double u_2 = -force.get(2) * 0.50000000000000000 /
rho +
u[2];
1045 CellInterval
ci(cell, cell);
1046 thrust::device_vector<double>
dev_data(3u);
1049 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
1050 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
force_field,
ci));
1061 CellInterval
const &
ci) {
1062 thrust::device_vector<double>
dev_data(3u);
1065 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
1066 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
force_field,
ci));
1080 CellInterval
ci(cell, cell);
1081 thrust::device_vector<double>
dev_data(
u.data(),
u.data() + 3u);
1084 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
1085 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
velocity_field,
ci));
1086 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
force_field,
ci));
1087 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
1095 std::vector<double>
const &
values,
1096 CellInterval
const &
ci) {
1100 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
1101 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
velocity_field,
ci));
1102 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
force_field,
ci));
1103 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
1111 gpu::FieldAccessor<double>
pdf,
1112 gpu::FieldAccessor<double>
velocity,
1113 gpu::FieldAccessor<double> force,
1121 if (
pdf.isValidPosition()) {
1122 double const f_0 =
pdf.get(0
u);
1123 double const f_1 =
pdf.get(1u);
1124 double const f_2 =
pdf.get(2u);
1125 double const f_3 =
pdf.get(3u);
1126 double const f_4 =
pdf.get(4u);
1127 double const f_5 =
pdf.get(5u);
1128 double const f_6 =
pdf.get(6u);
1129 double const f_7 =
pdf.get(7u);
1130 double const f_8 =
pdf.get(8u);
1131 double const f_9 =
pdf.get(9u);
1132 double const f_10 =
pdf.get(10u);
1133 double const f_11 =
pdf.get(11u);
1134 double const f_12 =
pdf.get(12u);
1135 double const f_13 =
pdf.get(13u);
1136 double const f_14 =
pdf.get(14u);
1137 double const f_15 =
pdf.get(15u);
1138 double const f_16 =
pdf.get(16u);
1139 double const f_17 =
pdf.get(17u);
1140 double const f_18 =
pdf.get(18u);
1172 CellInterval
ci(cell, cell);
1173 thrust::device_vector<double>
dev_data(
u.data(),
u.data() + 3u);
1176 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
1177 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
velocity_field,
ci));
1178 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
force_field,
ci));
1179 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
1187 std::vector<double>
const &
values,
1189 CellInterval
const &
ci) {
1193 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
1194 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
velocity_field,
ci));
1195 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
force_field,
ci));
1196 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
1202namespace MomentumDensity {
1205 gpu::FieldAccessor<double>
pdf,
1206 gpu::FieldAccessor<double> force,
1213 if (
pdf.isValidPosition()) {
1214 double const f_1 =
pdf.get(1u);
1215 double const f_2 =
pdf.get(2u);
1216 double const f_3 =
pdf.get(3u);
1217 double const f_4 =
pdf.get(4u);
1218 double const f_5 =
pdf.get(5u);
1219 double const f_6 =
pdf.get(6u);
1220 double const f_7 =
pdf.get(7u);
1221 double const f_8 =
pdf.get(8u);
1222 double const f_9 =
pdf.get(9u);
1223 double const f_10 =
pdf.get(10u);
1224 double const f_11 =
pdf.get(11u);
1225 double const f_12 =
pdf.get(12u);
1226 double const f_13 =
pdf.get(13u);
1227 double const f_14 =
pdf.get(14u);
1228 double const f_15 =
pdf.get(15u);
1229 double const f_16 =
pdf.get(16u);
1230 double const f_17 =
pdf.get(17u);
1231 double const f_18 =
pdf.get(18u);
1253 thrust::device_vector<double>
dev_data(3u *
ci.numCells());
1256 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
1257 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
force_field,
ci));
1264 for (
auto i =
uint_t{0
u}; i < 3u *
ci.numCells(); i += 3u) {
1273namespace PressureTensor {
1276 gpu::FieldAccessor<double>
pdf,
1281 if (
pdf.isValidPosition()) {
1282 double const f_1 =
pdf.getNeighbor(0, -1, 0, 1u);
1283 double const f_2 =
pdf.getNeighbor(0, 1, 0, 2u);
1284 double const f_3 =
pdf.getNeighbor(1, 0, 0, 3u);
1285 double const f_4 =
pdf.getNeighbor(-1, 0, 0, 4u);
1286 double const f_5 =
pdf.getNeighbor(0, 0, -1, 5u);
1287 double const f_6 =
pdf.getNeighbor(0, 0, 1, 6u);
1288 double const f_7 =
pdf.getNeighbor(1, -1, 0, 7u);
1289 double const f_8 =
pdf.getNeighbor(-1, -1, 0, 8u);
1290 double const f_9 =
pdf.getNeighbor(1, 1, 0, 9u);
1291 double const f_10 =
pdf.getNeighbor(-1, 1, 0, 10u);
1292 double const f_11 =
pdf.getNeighbor(0, -1, -1, 11u);
1293 double const f_12 =
pdf.getNeighbor(0, 1, -1, 12u);
1294 double const f_13 =
pdf.getNeighbor(1, 0, -1, 13u);
1295 double const f_14 =
pdf.getNeighbor(-1, 0, -1, 14u);
1296 double const f_15 =
pdf.getNeighbor(0, -1, 1, 15u);
1297 double const f_16 =
pdf.getNeighbor(0, 1, 1, 16u);
1298 double const f_17 =
pdf.getNeighbor(1, 0, 1, 17u);
1299 double const f_18 =
pdf.getNeighbor(-1, 0, 1, 18u);
1326 CellInterval
ci(cell, cell);
1327 thrust::device_vector<double>
dev_data(9u);
1330 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
1341 CellInterval
const &
ci) {
1342 thrust::device_vector<double>
dev_data(9u *
ci.numCells());
1345 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
1359 thrust::device_vector<double>
dev_data(9u *
ci.numCells());
1362 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
pdf_field,
ci));
1368 for (
auto i =
uint_t{0
u}; i < 9u *
ci.numCells(); i += 9u) {
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
static __forceinline__ __device__ uint getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset)
Get linear index of flattened data with original layout fzyx.
static __forceinline__ __device__ uint getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset)
Get linear index of flattened data with original layout fzyx.
static dim3 calculate_dim_grid(uint const threads_x, uint const blocks_per_grid_y, uint const threads_per_block)
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double weight(int type, double r_cut, double k, double r)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, double const rho_in, double const density, Cell const &cell)
double get(GhostLayerField< double, uint_t{19u}> const *pdf_field, double const density, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT rho_out, double const density)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, double const *RESTRICT rho_in, double const density)
__device__ void kernel_set_device(gpu::FieldAccessor< double > pdf, double const *RESTRICT const u, double rho)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT f_in, double const density)
void set(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> *velocity_field, GhostLayerField< double, uint_t{3u}> *force_field, Vector3< double > const &force, double const density, Cell const &cell)
__global__ void kernel_get_rho(gpu::FieldAccessor< double > pdf, double const *RESTRICT const pos, double *RESTRICT const rho_out, double const density, uint n_pos, uint gl)
void add_force(gpu::GPUField< double > const *field, std::vector< double > const &pos, std::vector< double > const &forces, uint gl)
__global__ void kernel_add_force(gpu::FieldAccessor< double > force, double const *RESTRICT const pos, double const *RESTRICT const forces, uint n_pos, uint gl)
std::vector< double > get_rho(gpu::GPUField< double > const *field, std::vector< double > const &pos, double const density, uint gl)
__global__ void kernel_get_vel(gpu::FieldAccessor< double > vel, double const *RESTRICT const pos, double *RESTRICT const vel_out, uint n_pos, uint gl)
std::vector< double > get_vel(gpu::GPUField< double > const *field, std::vector< double > const &pos, uint gl)
static __forceinline__ __device__ void calculate_weights(double const *RESTRICT const pos, int *RESTRICT const corner, double *RESTRICT const weights, uint gl)
Calculate interpolation weights.
auto reduce(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> const *force_field, double const density)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > force, double *RESTRICT out, double const density)
__global__ void kernel_set_vel(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT pop)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > pdf, double const *RESTRICT pop)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, double const *RESTRICT pop)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, std::array< double, 19u > const &pop, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT pop)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, Cell const &cell)
void initialize(GhostLayerField< double, uint_t{19u}> *pdf_field, std::array< double, 19u > const &pop)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, double *RESTRICT p_out)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, double const density, Cell const &cell)
auto reduce(GhostLayerField< double, uint_t{19u}> const *pdf_field, double const density)
void initialize(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_get(gpu::FieldAccessor< double > vec, double *u_out)
void set_from_list(gpu::GPUField< double > const *field, thrust::device_vector< int > const &indices, thrust::device_vector< double > const &values, uint gl)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void add(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
__global__ void kernel_set(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void set(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
__global__ void kernel_set_from_list(gpu::FieldAccessor< double > vec, int const *RESTRICT const indices, double const *RESTRICT const values, uint length)
auto get(GhostLayerField< double, uint_t{3u}> const *vec_field, Cell const &cell)
__global__ void kernel_broadcast_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void add_to_all(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void set(GhostLayerField< double, uint_t{19u}> *pdf_field, GhostLayerField< double, uint_t{3u}> *velocity_field, GhostLayerField< double, uint_t{3u}> const *force_field, Vector3< double > const &u, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > force, double *RESTRICT u_out)
__global__ void kernel_set(gpu::FieldAccessor< double > pdf, gpu::FieldAccessor< double > velocity, gpu::FieldAccessor< double > force, double const *RESTRICT u_in)
auto get(GhostLayerField< double, uint_t{19u}> const *pdf_field, GhostLayerField< double, uint_t{3u}> const *force_field, Cell const &cell)
\file PackInfoPdfDoublePrecision.cpp \author pystencils
static Utils::Vector3d velocity(Particle const &p_ref, Particle const &p_vs)
Velocity of the virtual site.
Rescale values in a device vector by some constant.
algo_rescale(double scale_factor)
__host__ __device__ constexpr double operator()(double const &x) const