dox/ReactionKernelIndexed__4__single__precision__CUDA_8h_source.html

/*

 * Copyright (C) 2022-2026 The ESPResSo project

 * Copyright (C) 2020-2025 The waLBerla project

 *

 * This file is part of ESPResSo.

 *

 * ESPResSo is free software: you can redistribute it and/or modify

 * it under the terms of the GNU General Public License as published by

 * the Free Software Foundation, either version 3 of the License, or

 * (at your option) any later version.

 *

 * ESPResSo is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 * GNU General Public License for more details.

 *

 * You should have received a copy of the GNU General Public License

 * along with this program.  If not, see <http://www.gnu.org/licenses/>.

 */


// kernel generated with pystencils v1.4+1.ge851f4e, lbmpy v1.4+1.ge9efe34,

// sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit

// 007e77e077ad9d22b5eed6f3d3118240993e553c


/*

 * Boundary class.

 * Adapted from the waLBerla source file

 * https://i10git.cs.fau.de/walberla/walberla/-/blob/3e54d4f2336e47168ad87e3caaf7b3b082d86ca7/python/pystencils_walberla/templates/Boundary.tmpl.h

 */


#pragma once


#include <core/DataTypes.h>


#include <blockforest/StructuredBlockForest.h>

#include <core/debug/Debug.h>

#include <domain_decomposition/BlockDataID.h>

#include <domain_decomposition/IBlock.h>

#include <field/FlagField.h>

#include <gpu/FieldCopy.h>

#include <gpu/GPUField.h>

#include <gpu/GPUWrapper.h>


#include <array>

#include <cassert>

#include <functional>

#include <memory>

#include <vector>


#if defined(__clang__)

#pragma clang diagnostic push

#pragma clang diagnostic ignored "-Wunused-variable"

#pragma clang diagnostic ignored "-Wunused-parameter"

#elif defined(__GNUC__) or defined(__GNUG__)

#pragma GCC diagnostic push

#pragma GCC diagnostic ignored "-Wunused-variable"

#pragma GCC diagnostic ignored "-Wunused-parameter"

#endif


#ifdef __GNUC__

#define RESTRICT __restrict__

#elif _MSC_VER

#define RESTRICT __restrict

#else

#define RESTRICT

#endif


#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT

using walberla::half;

#endif


namespace walberla {

namespace pystencils {


class ReactionKernelIndexed_4_single_precision_CUDA {

public:


  struct IndexInfo {

    int32_t x;

    int32_t y;

    int32_t z;

    IndexInfo(int32_t x_, int32_t y_, int32_t z_) : x(x_), y(y_), z(z_) {}


    bool operator==(const IndexInfo &o) const {

      return x == o.x && y == o.y && z == o.z;

    }


  };


  class IndexVectors {

  public:

    using CpuIndexVector = std::vector<IndexInfo>;


    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };


    IndexVectors() = default;


    bool operator==(IndexVectors const &other) const {

      return other.cpuVectors_ == cpuVectors_;

    }


    ~IndexVectors() {

      for (auto &gpuVec : gpuVectors_) {

        if (gpuVec) {

          WALBERLA_GPU_CHECK(gpuFree(gpuVec));

        }

      }

    }


    auto &indexVector(Type t) { return cpuVectors_[t]; }

    auto const &indexVector(Type t) const { return cpuVectors_[t]; }


    IndexInfo *pointerCpu(Type t) {

      return cpuVectors_[t].empty() ? nullptr : cpuVectors_[t].data();

    }


    IndexInfo *pointerGpu(Type t) { return gpuVectors_[t]; }


    void syncGPU() {

      for (auto &gpuVec : gpuVectors_) {

        if (gpuVec) {

          WALBERLA_GPU_CHECK(gpuFree(gpuVec));

          gpuVec = nullptr;

        }

      }

      gpuVectors_.resize(cpuVectors_.size());


      WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);

      for (size_t i = 0; i < cpuVectors_.size(); ++i) {

        auto &gpuVec = gpuVectors_[i];

        auto &cpuVec = cpuVectors_[i];

        if (cpuVec.empty()) {

          continue;

        }

        WALBERLA_GPU_CHECK(

            gpuMalloc(&gpuVec, sizeof(IndexInfo) * cpuVec.size()));

        WALBERLA_GPU_CHECK(gpuMemcpy(gpuVec, cpuVec.data(),

                                     sizeof(IndexInfo) * cpuVec.size(),

                                     gpuMemcpyHostToDevice));

      }

    }


  private:

    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};


    using GpuIndexVector = IndexInfo *;

    std::vector<GpuIndexVector> gpuVectors_;

  };


  ReactionKernelIndexed_4_single_precision_CUDA(

      const std::shared_ptr<StructuredBlockForest> &blocks,

      BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_,

      BlockDataID rho_3ID_, float order_0, float order_1, float order_2,

      float order_3, float rate_coefficient, float stoech_0, float stoech_1,

      float stoech_2, float stoech_3)

      : rho_0ID(rho_0ID_), rho_1ID(rho_1ID_), rho_2ID(rho_2ID_),

        rho_3ID(rho_3ID_), order_0_(order_0), order_1_(order_1),

        order_2_(order_2), order_3_(order_3),

        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),

        stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3) {

    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {

      return new IndexVectors();

    };

    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(

        createIdxVector,

        "IndexField_ReactionKernelIndexed_4_single_precision_CUDA");

  }


  ReactionKernelIndexed_4_single_precision_CUDA(

      BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_,

      BlockDataID rho_2ID_, BlockDataID rho_3ID_, float order_0, float order_1,

      float order_2, float order_3, float rate_coefficient, float stoech_0,

      float stoech_1, float stoech_2, float stoech_3)

      : indexVectorID(indexVectorID_), rho_0ID(rho_0ID_), rho_1ID(rho_1ID_),

        rho_2ID(rho_2ID_), rho_3ID(rho_3ID_), order_0_(order_0),

        order_1_(order_1), order_2_(order_2), order_3_(order_3),

        rate_coefficient_(rate_coefficient), stoech_0_(stoech_0),

        stoech_1_(stoech_1), stoech_2_(stoech_2), stoech_3_(stoech_3) {}


  void run(IBlock *block, gpuStream_t stream = nullptr);


  void operator()(IBlock *block, gpuStream_t stream = nullptr) {

    run(block, stream);

  }


  void inner(IBlock *block, gpuStream_t stream = nullptr);


  void outer(IBlock *block, gpuStream_t stream = nullptr);


  Vector3<double> getForce(IBlock * /*block*/) {


    WALBERLA_ABORT(

        "Boundary condition was not generated including force calculation.")

    return Vector3<double>(double_c(0.0));

  }


  std::function<void(IBlock *)> getSweep(gpuStream_t stream = nullptr) {

    return [this, stream](IBlock *b) { this->run(b, stream); };

  }


  std::function<void(IBlock *)> getInnerSweep(gpuStream_t stream = nullptr) {

    return [this, stream](IBlock *b) { this->inner(b, stream); };

  }


  std::function<void(IBlock *)> getOuterSweep(gpuStream_t stream = nullptr) {

    return [this, stream](IBlock *b) { this->outer(b, stream); };

  }


  template <typename FlagField_T>


  void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,

                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,

                         FlagUID domainFlagUID) {

    for (auto &block : *blocks)

      fillFromFlagField<FlagField_T>(&block, flagFieldID, boundaryFlagUID,

                                     domainFlagUID);

  }


  template <typename FlagField_T>


  void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID,

                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {

    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);

    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);

    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);

    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);


    auto *flagField = block->getData<FlagField_T>(flagFieldID);


    if (!(flagField->flagExists(boundaryFlagUID) and

          flagField->flagExists(domainFlagUID)))

      return;


    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);

    auto domainFlag = flagField->getFlag(domainFlagUID);


    auto inner = flagField->xyzSize();

    inner.expand(cell_idx_t(-1));


    indexVectorAll.clear();

    indexVectorInner.clear();

    indexVectorOuter.clear();


    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();

    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();

         ++it) {


      if (!isFlagSet(it, boundaryFlag))

        continue;

      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),

                                   it.y() + cell_idx_c(0),

                                   it.z() + cell_idx_c(0)) &&

          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {


        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);


        indexVectorAll.emplace_back(element);

        if (inner.contains(it.x(), it.y(), it.z()))

          indexVectorInner.emplace_back(element);

        else

          indexVectorOuter.emplace_back(element);

      }

    }


    indexVectors->syncGPU();

  }


private:

  void run_impl(IBlock *block, IndexVectors::Type type,

                gpuStream_t stream = nullptr);


  BlockDataID indexVectorID;


public:

  BlockDataID rho_0ID;

  BlockDataID rho_1ID;

  BlockDataID rho_2ID;

  BlockDataID rho_3ID;

  float order_0_;

  float order_1_;

  float order_2_;

  float order_3_;

  float rate_coefficient_;

  float stoech_0_;

  float stoech_1_;

  float stoech_2_;

  float stoech_3_;

};


#if defined(__clang__)

#pragma clang diagnostic pop

#elif defined(__GNUC__) or defined(__GNUG__)

#pragma GCC diagnostic pop

#endif


} // namespace pystencils

} // namespace walberla

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:87

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::~IndexVectors
~IndexVectors()
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:98

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::syncGPU
void syncGPU()
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:112

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::pointerCpu
IndexInfo * pointerCpu(Type t)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:107

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::indexVector
auto & indexVector(Type t)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:105

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::IndexVectors
IndexVectors()=default

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::indexVector
auto const & indexVector(Type t) const
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:106

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::pointerGpu
IndexInfo * pointerGpu(Type t)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:111

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::Type
Type
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:91

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::ALL
@ ALL
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:91

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::INNER
@ INNER
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:91

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::OUTER
@ OUTER
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:91

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::NUM_TYPES
@ NUM_TYPES
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:91

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::CpuIndexVector
std::vector< IndexInfo > CpuIndexVector
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:89

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexVectors::operator==
bool operator==(IndexVectors const &other) const
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:94

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:75

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::ReactionKernelIndexed_4_single_precision_CUDA
ReactionKernelIndexed_4_single_precision_CUDA(const std::shared_ptr< StructuredBlockForest > &blocks, BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_, float order_0, float order_1, float order_2, float order_3, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2, float stoech_3)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:143

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::operator()
void operator()(IBlock *block, gpuStream_t stream=nullptr)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:175

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::getForce
Vector3< double > getForce(IBlock *)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:183

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::rho_3ID
BlockDataID rho_3ID
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:269

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::outer
void outer(IBlock *block, gpuStream_t stream=nullptr)
Definition ReactionKernelIndexed_4_single_precision_CUDA.cu:174

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::stoech_0_
float stoech_0_
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:275

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::order_1_
float order_1_
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:271

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::stoech_3_
float stoech_3_
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:278

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::fillFromFlagField
void fillFromFlagField(IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:212

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::order_0_
float order_0_
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:270

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::inner
void inner(IBlock *block, gpuStream_t stream=nullptr)
Definition ReactionKernelIndexed_4_single_precision_CUDA.cu:170

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::rho_1ID
BlockDataID rho_1ID
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:267

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::getInnerSweep
std::function< void(IBlock *)> getInnerSweep(gpuStream_t stream=nullptr)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:194

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::stoech_2_
float stoech_2_
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:277

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::rho_0ID
BlockDataID rho_0ID
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:266

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::getOuterSweep
std::function< void(IBlock *)> getOuterSweep(gpuStream_t stream=nullptr)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:198

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::run
void run(IBlock *block, gpuStream_t stream=nullptr)
Definition ReactionKernelIndexed_4_single_precision_CUDA.cu:166

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::order_3_
float order_3_
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:273

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::order_2_
float order_2_
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:272

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::ReactionKernelIndexed_4_single_precision_CUDA
ReactionKernelIndexed_4_single_precision_CUDA(BlockDataID indexVectorID_, BlockDataID rho_0ID_, BlockDataID rho_1ID_, BlockDataID rho_2ID_, BlockDataID rho_3ID_, float order_0, float order_1, float order_2, float order_3, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2, float stoech_3)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:162

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::stoech_1_
float stoech_1_
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:276

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::rate_coefficient_
float rate_coefficient_
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:274

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::rho_2ID
BlockDataID rho_2ID
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:268

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::getSweep
std::function< void(IBlock *)> getSweep(gpuStream_t stream=nullptr)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:190

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::fillFromFlagField
void fillFromFlagField(const std::shared_ptr< StructuredBlockForest > &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:203

stream
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
Definition common_cuda.cu:34

block
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:177

walberla
\file PackInfoPdfDoublePrecision.cpp \author pystencils
Definition EKWalberla.hpp:38

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexInfo
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:77

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexInfo::x
int32_t x
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:78

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexInfo::y
int32_t y
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:79

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexInfo::operator==
bool operator==(const IndexInfo &o) const
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:82

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexInfo::z
int32_t z
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:80

walberla::pystencils::ReactionKernelIndexed_4_single_precision_CUDA::IndexInfo::IndexInfo
IndexInfo(int32_t x_, int32_t y_, int32_t z_)
Definition ReactionKernelIndexed_4_single_precision_CUDA.h:81