41#if defined(__CUDACC__)
60#include <domain_decomposition/BlockDataID.h>
70namespace ReactionKernelBulkSelector {
72template <
typename FloatType =
double, std::
size_t N = 1>
struct KernelTrait {
73 using ReactionKernelBulk = pystencils::ReactionKernelBulk_1_double_precision;
76template <>
struct KernelTrait<
double, 2> {
77 using ReactionKernelBulk = pystencils::ReactionKernelBulk_2_double_precision;
80template <>
struct KernelTrait<
double, 3> {
81 using ReactionKernelBulk = pystencils::ReactionKernelBulk_3_double_precision;
84template <>
struct KernelTrait<
double, 4> {
85 using ReactionKernelBulk = pystencils::ReactionKernelBulk_4_double_precision;
88template <>
struct KernelTrait<
double, 5> {
89 using ReactionKernelBulk = pystencils::ReactionKernelBulk_5_double_precision;
92template <>
struct KernelTrait<
float, 1> {
93 using ReactionKernelBulk = pystencils::ReactionKernelBulk_1_single_precision;
96template <>
struct KernelTrait<
float, 2> {
97 using ReactionKernelBulk = pystencils::ReactionKernelBulk_2_single_precision;
100template <>
struct KernelTrait<
float, 3> {
101 using ReactionKernelBulk = pystencils::ReactionKernelBulk_3_single_precision;
104template <>
struct KernelTrait<
float, 4> {
105 using ReactionKernelBulk = pystencils::ReactionKernelBulk_4_single_precision;
108template <>
struct KernelTrait<
float, 5> {
109 using ReactionKernelBulk = pystencils::ReactionKernelBulk_5_single_precision;
112template <
typename FloatType,
class Reactant, std::size_t...
ints>
113auto get_kernel_impl(
const std::vector<std::shared_ptr<Reactant>> &
reactants,
115 std::index_sequence<ints...>
int_seq) {
116 auto kernel = std::make_shared<
117 typename KernelTrait<FloatType,
int_seq.size()>::ReactionKernelBulk>(
118 walberla::BlockDataID(
124 std::function<
void(IBlock *)>
sweep = [kernel](IBlock *b) { kernel->run(b); };
128template <
typename FloatType,
class Reactant,
class...
Args>
129auto get_kernel_impl(
const std::vector<std::shared_ptr<Reactant>> &
reactants,
135 std::make_index_sequence<1>{});
139 std::make_index_sequence<2>{});
143 std::make_index_sequence<3>{});
147 std::make_index_sequence<4>{});
151 std::make_index_sequence<5>{});
154 throw std::runtime_error(
"reactions of this size are not implemented!");
159auto get_kernel(
const std::vector<std::shared_ptr<Reactant>> &
reactants,
162 const auto is_double_precision =
163 reactants[0]->get_species()->is_double_precision();
165 if (is_double_precision) {
172#if defined(__CUDACC__)
174template <
typename FloatType =
double, std::
size_t N = 1>
177 pystencils::ReactionKernelBulk_1_double_precision_CUDA;
182 pystencils::ReactionKernelBulk_2_double_precision_CUDA;
187 pystencils::ReactionKernelBulk_3_double_precision_CUDA;
192 pystencils::ReactionKernelBulk_4_double_precision_CUDA;
197 pystencils::ReactionKernelBulk_5_double_precision_CUDA;
202 pystencils::ReactionKernelBulk_1_single_precision_CUDA;
207 pystencils::ReactionKernelBulk_2_single_precision_CUDA;
212 pystencils::ReactionKernelBulk_3_single_precision_CUDA;
217 pystencils::ReactionKernelBulk_4_single_precision_CUDA;
222 pystencils::ReactionKernelBulk_5_single_precision_CUDA;
225template <
typename FloatType,
class Reactant, std::size_t...
ints>
227 const std::vector<std::shared_ptr<Reactant>> &
reactants,
231 walberla::BlockDataID(
237 std::function<
void(IBlock *)>
sweep = [kernel](IBlock *b) { kernel->run(b); };
241template <
typename FloatType,
class Reactant,
class...
Args>
248 std::make_index_sequence<1>{});
252 std::make_index_sequence<2>{});
256 std::make_index_sequence<3>{});
260 std::make_index_sequence<4>{});
264 std::make_index_sequence<5>{});
267 throw std::runtime_error(
"reactions of this size are not implemented!");
275 const auto is_double_precision =
276 reactants[0]->get_species()->is_double_precision();
278 if (is_double_precision) {
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
\file PackInfoPdfDoublePrecision.cpp \author pystencils