26#include <waLBerlaDefinitions.h>
43#if defined(__CUDACC__) and defined(WALBERLA_BUILD_WITH_CUDA)
62#include <domain_decomposition/BlockDataID.h>
72namespace ReactionKernelIndexedSelector {
74template <
typename FloatType =
double, std::
size_t N = 1>
struct KernelTrait {
75 using ReactionKernelIndexed =
76 pystencils::ReactionKernelIndexed_1_double_precision;
79template <>
struct KernelTrait<
double, 2> {
80 using ReactionKernelIndexed =
81 pystencils::ReactionKernelIndexed_2_double_precision;
84template <>
struct KernelTrait<
double, 3> {
85 using ReactionKernelIndexed =
86 pystencils::ReactionKernelIndexed_3_double_precision;
89template <>
struct KernelTrait<
double, 4> {
90 using ReactionKernelIndexed =
91 pystencils::ReactionKernelIndexed_4_double_precision;
94template <>
struct KernelTrait<
double, 5> {
95 using ReactionKernelIndexed =
96 pystencils::ReactionKernelIndexed_5_double_precision;
99template <>
struct KernelTrait<
float, 1> {
100 using ReactionKernelIndexed =
101 pystencils::ReactionKernelIndexed_1_single_precision;
104template <>
struct KernelTrait<
float, 2> {
105 using ReactionKernelIndexed =
106 pystencils::ReactionKernelIndexed_2_single_precision;
109template <>
struct KernelTrait<
float, 3> {
110 using ReactionKernelIndexed =
111 pystencils::ReactionKernelIndexed_3_single_precision;
114template <>
struct KernelTrait<
float, 4> {
115 using ReactionKernelIndexed =
116 pystencils::ReactionKernelIndexed_4_single_precision;
119template <>
struct KernelTrait<
float, 5> {
120 using ReactionKernelIndexed =
121 pystencils::ReactionKernelIndexed_5_single_precision;
124template <
typename FloatType,
class Reactant, std::size_t...
ints>
125auto get_kernel_impl(
const std::vector<std::shared_ptr<Reactant>> &
reactants,
127 std::index_sequence<ints...>
int_seq) {
128 auto kernel = std::make_shared<
129 typename KernelTrait<FloatType,
int_seq.size()>::ReactionKernelIndexed>(
131 walberla::BlockDataID(
137 std::function<
void(IBlock *)>
sweep = [kernel](IBlock *b) { kernel->run(b); };
141template <
typename FloatType,
class Reactant,
class...
Args>
142auto get_kernel_impl(
const std::vector<std::shared_ptr<Reactant>> &
reactants,
148 std::make_index_sequence<1>{});
152 std::make_index_sequence<2>{});
156 std::make_index_sequence<3>{});
160 std::make_index_sequence<4>{});
164 std::make_index_sequence<5>{});
167 throw std::runtime_error(
"reactions of this size are not implemented!");
172auto get_kernel(
const std::vector<std::shared_ptr<Reactant>> &
reactants,
175 const auto is_double_precision =
176 reactants[0]->get_species()->is_double_precision();
178 if (is_double_precision) {
185#if defined(__CUDACC__) and defined(WALBERLA_BUILD_WITH_CUDA)
187template <
typename FloatType =
double, std::
size_t N = 1>
190 pystencils::ReactionKernelIndexed_1_double_precision_CUDA;
195 pystencils::ReactionKernelIndexed_2_double_precision_CUDA;
200 pystencils::ReactionKernelIndexed_3_double_precision_CUDA;
205 pystencils::ReactionKernelIndexed_4_double_precision_CUDA;
210 pystencils::ReactionKernelIndexed_5_double_precision_CUDA;
215 pystencils::ReactionKernelIndexed_1_single_precision_CUDA;
220 pystencils::ReactionKernelIndexed_2_single_precision_CUDA;
225 pystencils::ReactionKernelIndexed_3_single_precision_CUDA;
230 pystencils::ReactionKernelIndexed_4_single_precision_CUDA;
235 pystencils::ReactionKernelIndexed_5_single_precision_CUDA;
238template <
typename FloatType,
class Reactant, std::size_t...
ints>
240 const std::vector<std::shared_ptr<Reactant>> &
reactants,
242 std::index_sequence<ints...>
int_seq) {
246 walberla::BlockDataID(
252 std::function<
void(IBlock *)>
sweep = [kernel](IBlock *b) { kernel->run(b); };
256template <
typename FloatType,
class Reactant,
class...
Args>
263 std::make_index_sequence<1>{});
267 std::make_index_sequence<2>{});
271 std::make_index_sequence<3>{});
275 std::make_index_sequence<4>{});
279 std::make_index_sequence<5>{});
282 throw std::runtime_error(
"reactions of this size are not implemented!");
290 const auto is_double_precision =
291 reactants[0]->get_species()->is_double_precision();
293 if (is_double_precision) {
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
\file PackInfoPdfDoublePrecision.cpp \author pystencils