29#include <core/DataTypes.h>
30#include <core/cell/Cell.h>
31#include <core/cell/CellInterval.h>
32#include <core/math/Vector3.h>
34#include <field/iterators/IteratorMacros.h>
36#include <gpu/FieldAccessor.h>
37#include <gpu/FieldIndexing.h>
38#include <gpu/GPUField.h>
39#include <gpu/Kernel.h>
41#include <thrust/device_ptr.h>
42#include <thrust/device_vector.h>
48#define RESTRICT __restrict__
49#elif defined(__clang__)
51#if defined(__CUDA_ARCH__)
53#define RESTRICT __restrict__
56#define RESTRICT __restrict__
59#elif defined(__GNUC__) or defined(__GNUG__)
60#define RESTRICT __restrict__
61#elif defined(_MSC_VER)
62#define RESTRICT __restrict
143 CellInterval
ci(cell, cell);
144 thrust::device_vector<double>
dev_data(1u);
147 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
scalar_field,
ci));
159 CellInterval
ci(cell, cell);
161 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
scalar_field,
ci));
162 kernel.addParam(value);
170 CellInterval
ci(cell, cell);
172 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
scalar_field,
ci));
173 kernel.addParam(value);
179 double const value) {
182 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
scalar_field,
ci));
183 kernel.addParam(value);
192 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
scalar_field,
ci));
193 kernel.addParam(value);
199 CellInterval
const &
ci) {
200 thrust::device_vector<double>
dev_data(
ci.numCells() * 1u);
203 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
scalar_field,
ci));
206 std::vector<double>
out(
ci.numCells());
213 std::vector<double>
const &
values,
214 CellInterval
const &
ci) {
218 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
scalar_field,
ci));
219 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
227 gpu::FieldAccessor<double>
vec,
232 if (
vec.isValidPosition()) {
240 gpu::FieldAccessor<double>
vec,
245 if (
vec.isValidPosition()) {
253 gpu::FieldAccessor<double>
vec,
256 if (
vec.isValidPosition()) {
264 gpu::FieldAccessor<double>
vec,
269 if (
vec.isValidPosition()) {
277 gpu::FieldAccessor<double>
vec,
280 if (
vec.isValidPosition()) {
291 CellInterval
ci(cell, cell);
292 thrust::device_vector<double>
dev_data(3u);
295 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
307 CellInterval
ci(cell, cell);
308 thrust::device_vector<double>
dev_data(
vec.data(),
vec.data() + 3u);
311 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
312 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
320 CellInterval
ci(cell, cell);
321 thrust::device_vector<double>
dev_data(
vec.data(),
vec.data() + 3u);
324 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
325 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
332 CellInterval
ci =
vec_field->xyzSizeWithGhostLayer();
333 thrust::device_vector<double>
dev_data(
vec.data(),
vec.data() + 3u);
336 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
337 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
344 CellInterval
ci =
vec_field->xyzSizeWithGhostLayer();
345 thrust::device_vector<double>
dev_data(
vec.data(),
vec.data() + 3u);
348 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
349 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
355 CellInterval
const &
ci) {
356 thrust::device_vector<double>
dev_data(
ci.numCells() * 3u);
359 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
362 std::vector<double>
out(
ci.numCells() * 3u);
369 std::vector<double>
const &
values,
370 CellInterval
const &
ci) {
374 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
vec_field,
ci));
375 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
565std::array<double, 13>
get(
568 CellInterval
ci(cell, cell);
569 thrust::device_vector<double>
dev_data(13u);
572 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
flux_field,
ci));
575 std::array<double, 13>
vec;
582 std::array<double, 13>
const &
flux) {
587 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
flux_field,
ci));
588 kernel.addParam(
const_cast<const double *
>(
dev_data_ptr));
594 CellInterval
const &
ci) {
595 thrust::device_vector<double>
dev_data(
ci.numCells() * 13u);
598 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
flux_field,
ci));
601 std::vector<double>
out(
ci.numCells() * 13u);
609 CellInterval
ci(cell, cell);
610 thrust::device_vector<double>
dev_data(3u);
613 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
flux_field,
ci));
623 CellInterval
const &
ci) {
624 thrust::device_vector<double>
dev_data(
ci.numCells() * 3u);
627 kernel.addFieldIndexingParam(gpu::FieldIndexing<double>::interval(*
flux_field,
ci));
630 std::vector<double>
out(
ci.numCells() * 3u);
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
static __forceinline__ __device__ uint getLinearIndex(uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim, uint fOffset)
Get linear index of flattened data with original layout fzyx.
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
auto get_vector(GhostLayerField< double, uint_t{13u}> const *flux_field, Cell const &cell)
void initialize(GhostLayerField< double, uint_t{13u}> *flux_field, std::array< double, 13 > const &values)
__global__ void kernel_get(gpu::FieldAccessor< double > flux_field, double *j_out)
__global__ void kernel_get_vector(gpu::FieldAccessor< double > flux_field, double *j_out)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > flux_field, double const *RESTRICT j_in)
auto get(GhostLayerField< double, uint_t{13u}> const *flux_field, Cell const &cell)
__global__ void kernel_set(gpu::FieldAccessor< double > scalar_field, double const *RESTRICT in)
void add_to_all(GhostLayerField< double, 1u > *scalar_field, double const &value)
void initialize(GhostLayerField< double, 1u > *scalar_field, double const &value)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > scalar_field, double const in)
void set(GhostLayerField< double, 1u > *scalar_field, double const &value, Cell const &cell)
__global__ void kernel_add(gpu::FieldAccessor< double > scalar_field, double const *RESTRICT in)
auto get(GhostLayerField< double, 1u > const *scalar_field, Cell const &cell)
__global__ void kernel_broadcast_add(gpu::FieldAccessor< double > scalar_field, double const in)
void add(GhostLayerField< double, 1u > *scalar_field, double const &value, Cell const &cell)
__global__ void kernel_get(gpu::FieldAccessor< double > scalar_field, double *out)
__global__ void kernel_set(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
__global__ void kernel_get(gpu::FieldAccessor< double > vec, double *u_out)
__global__ void kernel_broadcast_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void set(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
void initialize(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
__global__ void kernel_add(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
__global__ void kernel_broadcast(gpu::FieldAccessor< double > vec, double const *RESTRICT u_in)
void add(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec, Cell const &cell)
void add_to_all(GhostLayerField< double, uint_t{3u}> *vec_field, Vector3< double > const &vec)
auto get(GhostLayerField< double, uint_t{3u}> const *vec_field, Cell const &cell)
\file PackInfoPdfDoublePrecision.cpp \author pystencils