26#include <waLBerlaDefinitions.h>
43#if defined(__CUDACC__) and defined(WALBERLA_BUILD_WITH_CUDA)
62#include <domain_decomposition/BlockDataID.h>
72namespace ReactionKernelBulkSelector {
74template <
typename FloatType =
double, std::
size_t N = 1>
struct KernelTrait {
75 using ReactionKernelBulk = pystencils::ReactionKernelBulk_1_double_precision;
78template <>
struct KernelTrait<
double, 2> {
79 using ReactionKernelBulk = pystencils::ReactionKernelBulk_2_double_precision;
82template <>
struct KernelTrait<
double, 3> {
83 using ReactionKernelBulk = pystencils::ReactionKernelBulk_3_double_precision;
86template <>
struct KernelTrait<
double, 4> {
87 using ReactionKernelBulk = pystencils::ReactionKernelBulk_4_double_precision;
90template <>
struct KernelTrait<
double, 5> {
91 using ReactionKernelBulk = pystencils::ReactionKernelBulk_5_double_precision;
94template <>
struct KernelTrait<
float, 1> {
95 using ReactionKernelBulk = pystencils::ReactionKernelBulk_1_single_precision;
98template <>
struct KernelTrait<
float, 2> {
99 using ReactionKernelBulk = pystencils::ReactionKernelBulk_2_single_precision;
102template <>
struct KernelTrait<
float, 3> {
103 using ReactionKernelBulk = pystencils::ReactionKernelBulk_3_single_precision;
106template <>
struct KernelTrait<
float, 4> {
107 using ReactionKernelBulk = pystencils::ReactionKernelBulk_4_single_precision;
110template <>
struct KernelTrait<
float, 5> {
111 using ReactionKernelBulk = pystencils::ReactionKernelBulk_5_single_precision;
114template <
typename FloatType,
class Reactant, std::size_t...
ints>
115auto get_kernel_impl(
const std::vector<std::shared_ptr<Reactant>> &
reactants,
117 std::index_sequence<ints...>
int_seq) {
118 auto kernel = std::make_shared<
119 typename KernelTrait<FloatType,
int_seq.size()>::ReactionKernelBulk>(
120 walberla::BlockDataID(
126 std::function<
void(IBlock *)>
sweep = [kernel](IBlock *b) { kernel->run(b); };
130template <
typename FloatType,
class Reactant,
class...
Args>
131auto get_kernel_impl(
const std::vector<std::shared_ptr<Reactant>> &
reactants,
137 std::make_index_sequence<1>{});
141 std::make_index_sequence<2>{});
145 std::make_index_sequence<3>{});
149 std::make_index_sequence<4>{});
153 std::make_index_sequence<5>{});
156 throw std::runtime_error(
"reactions of this size are not implemented!");
161auto get_kernel(
const std::vector<std::shared_ptr<Reactant>> &
reactants,
164 const auto is_double_precision =
165 reactants[0]->get_species()->is_double_precision();
167 if (is_double_precision) {
174#if defined(__CUDACC__) and defined(WALBERLA_BUILD_WITH_CUDA)
176template <
typename FloatType =
double, std::
size_t N = 1>
179 pystencils::ReactionKernelBulk_1_double_precision_CUDA;
184 pystencils::ReactionKernelBulk_2_double_precision_CUDA;
189 pystencils::ReactionKernelBulk_3_double_precision_CUDA;
194 pystencils::ReactionKernelBulk_4_double_precision_CUDA;
199 pystencils::ReactionKernelBulk_5_double_precision_CUDA;
204 pystencils::ReactionKernelBulk_1_single_precision_CUDA;
209 pystencils::ReactionKernelBulk_2_single_precision_CUDA;
214 pystencils::ReactionKernelBulk_3_single_precision_CUDA;
219 pystencils::ReactionKernelBulk_4_single_precision_CUDA;
224 pystencils::ReactionKernelBulk_5_single_precision_CUDA;
227template <
typename FloatType,
class Reactant, std::size_t...
ints>
229 const std::vector<std::shared_ptr<Reactant>> &
reactants,
233 walberla::BlockDataID(
239 std::function<
void(IBlock *)>
sweep = [kernel](IBlock *b) { kernel->run(b); };
243template <
typename FloatType,
class Reactant,
class...
Args>
250 std::make_index_sequence<1>{});
254 std::make_index_sequence<2>{});
258 std::make_index_sequence<3>{});
262 std::make_index_sequence<4>{});
266 std::make_index_sequence<5>{});
269 throw std::runtime_error(
"reactions of this size are not implemented!");
277 const auto is_double_precision =
278 reactants[0]->get_species()->is_double_precision();
280 if (is_double_precision) {
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
\file PackInfoPdfDoublePrecision.cpp \author pystencils