dox/StreamCollideSweepLeesEdwardsDoublePrecisionCUDA_8h_source.html

//======================================================================================================================

//

//  This file is part of waLBerla. waLBerla is free software: you can

//  redistribute it and/or modify it under the terms of the GNU General Public

//  License as published by the Free Software Foundation, either version 3 of

//  the License, or (at your option) any later version.

//

//  waLBerla is distributed in the hope that it will be useful, but WITHOUT

//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

//  for more details.

//

//  You should have received a copy of the GNU General Public License along

//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.

//

//! \\file StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h

//! \\author pystencils

//======================================================================================================================


// kernel generated with pystencils v1.4+1.ge851f4e, lbmpy v1.4+1.ge9efe34,

// sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit

// 272d4a09ec35da50685afc9586645e1b9984b423


#pragma once

#include "core/DataTypes.h"

#include "core/logging/Logging.h"


#include "gpu/GPUField.h"

#include "gpu/GPUWrapper.h"


#include "domain_decomposition/BlockDataID.h"

#include "domain_decomposition/IBlock.h"

#include "domain_decomposition/StructuredBlockStorage.h"

#include "field/SwapableCompare.h"


#include <functional>

#include <unordered_map>


#ifdef __GNUC__

#define RESTRICT __restrict__

#else

#define RESTRICT

#endif


#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \

    (defined WALBERLA_CXX_COMPILER_IS_CLANG)

#pragma GCC diagnostic push

#pragma GCC diagnostic ignored "-Wunused-parameter"

#pragma GCC diagnostic ignored "-Wreorder"

#endif


namespace walberla {

namespace pystencils {


class StreamCollideSweepLeesEdwardsDoublePrecisionCUDA {

public:


  StreamCollideSweepLeesEdwardsDoublePrecisionCUDA(

      BlockDataID forceID_, BlockDataID pdfsID_, int64_t lebc_bot_index,

      int64_t lebc_top_index, double omega_bulk, double omega_even,

      double omega_odd, double omega_shear, double v_s)

      : forceID(forceID_), pdfsID(pdfsID_), lebc_bot_index_(lebc_bot_index),

        lebc_top_index_(lebc_top_index), omega_bulk_(omega_bulk),

        omega_even_(omega_even), omega_odd_(omega_odd),

        omega_shear_(omega_shear), v_s_(v_s) {}


  ~StreamCollideSweepLeesEdwardsDoublePrecisionCUDA() {

    for (auto p : cache_pdfs_) {

      delete p.second;

    }

  }


  void run(IBlock *block, gpuStream_t stream = nullptr);


  void runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,

                         const CellInterval &globalCellInterval,

                         cell_idx_t ghostLayers, IBlock *block,

                         gpuStream_t stream = nullptr);


  void operator()(IBlock *block, gpuStream_t stream = nullptr) {

    run(block, stream);

  }


  static std::function<void(IBlock *)>


  getSweep(const shared_ptr<StreamCollideSweepLeesEdwardsDoublePrecisionCUDA>

               &kernel) {

    return [kernel](IBlock *b) { kernel->run(b); };

  }


  static std::function<void(IBlock *, gpuStream_t)> getSweepOnCellInterval(

      const shared_ptr<StreamCollideSweepLeesEdwardsDoublePrecisionCUDA>

          &kernel,

      const shared_ptr<StructuredBlockStorage> &blocks,

      const CellInterval &globalCellInterval, cell_idx_t ghostLayers = 1) {

    return [kernel, blocks, globalCellInterval,

            ghostLayers](IBlock *b, gpuStream_t stream = nullptr) {

      kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b,

                                stream);

    };

  }


  std::function<void(IBlock *)> getSweep(gpuStream_t stream = nullptr) {

    return [this, stream](IBlock *b) { this->run(b, stream); };

  }


  std::function<void(IBlock *)>


  getSweepOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks,

                         const CellInterval &globalCellInterval,

                         cell_idx_t ghostLayers = 1,

                         gpuStream_t stream = nullptr) {

    return [this, blocks, globalCellInterval, ghostLayers, stream](IBlock *b) {

      this->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b,

                              stream);

    };

  }


  void configure(const shared_ptr<StructuredBlockStorage> & /*blocks*/,

                 IBlock * /*block*/) {}


  inline int64_t getLebc_bot_index() const { return lebc_bot_index_; }

  inline int64_t getLebc_top_index() const { return lebc_top_index_; }

  inline double getOmega_bulk() const { return omega_bulk_; }

  inline double getOmega_even() const { return omega_even_; }

  inline double getOmega_odd() const { return omega_odd_; }

  inline double getOmega_shear() const { return omega_shear_; }

  inline double getV_s() const { return v_s_; }


  inline void setLebc_bot_index(const int64_t value) {

    lebc_bot_index_ = value;

  }


  inline void setLebc_top_index(const int64_t value) {

    lebc_top_index_ = value;

  }


  inline void setOmega_bulk(const double value) { omega_bulk_ = value; }

  inline void setOmega_even(const double value) { omega_even_ = value; }

  inline void setOmega_odd(const double value) { omega_odd_ = value; }

  inline void setOmega_shear(const double value) { omega_shear_ = value; }

  inline void setV_s(const double value) { v_s_ = value; }


private:

  BlockDataID forceID;

  BlockDataID pdfsID;

  int64_t lebc_bot_index_;

  int64_t lebc_top_index_;

  double omega_bulk_;

  double omega_even_;

  double omega_odd_;

  double omega_shear_;

  double v_s_;

  std::unordered_map<IBlock *, gpu::GPUField<double> *> cache_pdfs_;

};


} // namespace pystencils

} // namespace walberla


#if (defined WALBERLA_CXX_COMPILER_IS_GNU) ||                                  \

    (defined WALBERLA_CXX_COMPILER_IS_CLANG)

#pragma GCC diagnostic pop

#endif

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:55

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::configure
void configure(const shared_ptr< StructuredBlockStorage > &, IBlock *)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:116

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::~StreamCollideSweepLeesEdwardsDoublePrecisionCUDA
~StreamCollideSweepLeesEdwardsDoublePrecisionCUDA()
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:66

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::getSweepOnCellInterval
static std::function< void(IBlock *, gpuStream_t)> getSweepOnCellInterval(const shared_ptr< StreamCollideSweepLeesEdwardsDoublePrecisionCUDA > &kernel, const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers=1)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:89

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::setLebc_bot_index
void setLebc_bot_index(const int64_t value)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:126

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::getSweepOnCellInterval
std::function< void(IBlock *)> getSweepOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers=1, gpuStream_t stream=nullptr)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:106

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::getOmega_bulk
double getOmega_bulk() const
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:121

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::setV_s
void setV_s(const double value)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:136

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::operator()
void operator()(IBlock *block, gpuStream_t stream=nullptr)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:79

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::getOmega_shear
double getOmega_shear() const
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:124

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::getSweep
static std::function< void(IBlock *)> getSweep(const shared_ptr< StreamCollideSweepLeesEdwardsDoublePrecisionCUDA > &kernel)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:84

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::getLebc_top_index
int64_t getLebc_top_index() const
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:120

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::setOmega_odd
void setOmega_odd(const double value)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:134

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::setOmega_shear
void setOmega_shear(const double value)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:135

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::getOmega_even
double getOmega_even() const
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:122

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::setOmega_bulk
void setOmega_bulk(const double value)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:132

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::getLebc_bot_index
int64_t getLebc_bot_index() const
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:119

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::getSweep
std::function< void(IBlock *)> getSweep(gpuStream_t stream=nullptr)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:101

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA
StreamCollideSweepLeesEdwardsDoublePrecisionCUDA(BlockDataID forceID_, BlockDataID pdfsID_, int64_t lebc_bot_index, int64_t lebc_top_index, double omega_bulk, double omega_even, double omega_odd, double omega_shear, double v_s)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:57

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::run
void run(IBlock *block, gpuStream_t stream=nullptr)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.cu:298

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::setOmega_even
void setOmega_even(const double value)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:133

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::getOmega_odd
double getOmega_odd() const
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:123

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::setLebc_top_index
void setLebc_top_index(const int64_t value)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:129

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::runOnCellInterval
void runOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream=nullptr)
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.cu:355

walberla::pystencils::StreamCollideSweepLeesEdwardsDoublePrecisionCUDA::getV_s
double getV_s() const
Definition StreamCollideSweepLeesEdwardsDoublePrecisionCUDA.h:125

stream
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
Definition common_cuda.cu:34

block
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:177

walberla
\file PackInfoPdfDoublePrecision.cpp \author pystencils
Definition EKWalberla.hpp:38