dox/Dirichlet__single__precision__CUDA_8h_source.html

//======================================================================================================================

//

//  This file is part of waLBerla. waLBerla is free software: you can

//  redistribute it and/or modify it under the terms of the GNU General Public

//  License as published by the Free Software Foundation, either version 3 of

//  the License, or (at your option) any later version.

//

//  waLBerla is distributed in the hope that it will be useful, but WITHOUT

//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

//  for more details.

//

//  You should have received a copy of the GNU General Public License along

//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.

//

//! \\file Dirichlet_single_precision_CUDA.h

//! \\author pystencils

//======================================================================================================================


// kernel generated with pystencils v1.4+1.ge851f4e, lbmpy v1.4+1.ge9efe34,

// sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit

// 007e77e077ad9d22b5eed6f3d3118240993e553c


#pragma once

#include "core/DataTypes.h"

#include "core/logging/Logging.h"


#include "blockforest/StructuredBlockForest.h"

#include "core/debug/Debug.h"

#include "domain_decomposition/BlockDataID.h"

#include "domain_decomposition/IBlock.h"

#include "field/FlagField.h"

#include "gpu/FieldCopy.h"

#include "gpu/GPUField.h"

#include "gpu/GPUWrapper.h"


#include <functional>

#include <memory>

#include <vector>


#ifdef __GNUC__

#define RESTRICT __restrict__

#else

#define RESTRICT

#endif


#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT

using walberla::half;

#endif


namespace walberla {

namespace pystencils {


class Dirichlet_single_precision_CUDA {

public:


  struct IndexInfo {

    int32_t x;

    int32_t y;

    int32_t z;

    int32_t dir;

    float value;


    IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)

        : x(x_), y(y_), z(z_), dir(dir_), value() {}


    bool operator==(const IndexInfo &o) const {

      return x == o.x && y == o.y && z == o.z && dir == o.dir &&

             floatIsEqual(value, o.value);

    }


  };


  class IndexVectors {

  public:

    using CpuIndexVector = std::vector<IndexInfo>;


    enum Type { ALL = 0, INNER = 1, OUTER = 2, NUM_TYPES = 3 };


    IndexVectors() = default;


    bool operator==(IndexVectors const &other) const {

      return other.cpuVectors_ == cpuVectors_;

    }


    ~IndexVectors() {

      for (auto &gpuVec : gpuVectors_) {

        if (gpuVec) {

          WALBERLA_GPU_CHECK(gpuFree(gpuVec));

        }

      }

    }


    auto &indexVector(Type t) { return cpuVectors_[t]; }

    auto const &indexVector(Type t) const { return cpuVectors_[t]; }


    IndexInfo *pointerCpu(Type t) {

      return cpuVectors_[t].empty() ? nullptr : cpuVectors_[t].data();

    }


    IndexInfo *pointerGpu(Type t) { return gpuVectors_[t]; }


    void syncGPU() {

      for (auto &gpuVec : gpuVectors_) {

        if (gpuVec) {

          WALBERLA_GPU_CHECK(gpuFree(gpuVec));

          gpuVec = nullptr;

        }

      }

      gpuVectors_.resize(cpuVectors_.size());


      WALBERLA_ASSERT_EQUAL(cpuVectors_.size(), NUM_TYPES);

      for (size_t i = 0; i < cpuVectors_.size(); ++i) {

        auto &gpuVec = gpuVectors_[i];

        auto &cpuVec = cpuVectors_[i];

        if (cpuVec.empty()) {

          continue;

        }

        WALBERLA_GPU_CHECK(

            gpuMalloc(&gpuVec, sizeof(IndexInfo) * cpuVec.size()));

        WALBERLA_GPU_CHECK(gpuMemcpy(gpuVec, cpuVec.data(),

                                     sizeof(IndexInfo) * cpuVec.size(),

                                     gpuMemcpyHostToDevice));

      }

    }


  private:

    std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES};


    using GpuIndexVector = IndexInfo *;

    std::vector<GpuIndexVector> gpuVectors_;

  };


  Dirichlet_single_precision_CUDA(

      const std::shared_ptr<StructuredBlockForest> &blocks,

      BlockDataID fieldID_,

      std::function<float(const Cell &,

                          const shared_ptr<StructuredBlockForest> &, IBlock &)>

          &dirichletCallback)

      : elementInitaliser(dirichletCallback), fieldID(fieldID_) {

    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {

      return new IndexVectors();

    };

    indexVectorID = blocks->addStructuredBlockData<IndexVectors>(

        createIdxVector, "IndexField_Dirichlet_single_precision_CUDA");

  }


  void run(IBlock *block, gpuStream_t stream = nullptr);


  void operator()(IBlock *block, gpuStream_t stream = nullptr) {

    run(block, stream);

  }


  void inner(IBlock *block, gpuStream_t stream = nullptr);


  void outer(IBlock *block, gpuStream_t stream = nullptr);


  Vector3<float> getForce(IBlock * /*block*/) {


    WALBERLA_ABORT(

        "Boundary condition was not generated including force calculation.")

    return Vector3<float>(float_c(0.0));

  }


  std::function<void(IBlock *)> getSweep(gpuStream_t stream = nullptr) {

    return [this, stream](IBlock *b) { this->run(b, stream); };

  }


  std::function<void(IBlock *)> getInnerSweep(gpuStream_t stream = nullptr) {

    return [this, stream](IBlock *b) { this->inner(b, stream); };

  }


  std::function<void(IBlock *)> getOuterSweep(gpuStream_t stream = nullptr) {

    return [this, stream](IBlock *b) { this->outer(b, stream); };

  }


  template <typename FlagField_T>


  void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,

                         ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,

                         FlagUID domainFlagUID) {

    for (auto &block : *blocks)

      fillFromFlagField<FlagField_T>(blocks, &block, flagFieldID,

                                     boundaryFlagUID, domainFlagUID);

  }


  template <typename FlagField_T>


  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,

                         IBlock *block, ConstBlockDataID flagFieldID,

                         FlagUID boundaryFlagUID, FlagUID domainFlagUID) {

    auto *indexVectors = block->getData<IndexVectors>(indexVectorID);

    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);

    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);

    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);


    auto *flagField = block->getData<FlagField_T>(flagFieldID);


    if (!(flagField->flagExists(boundaryFlagUID) &&

          flagField->flagExists(domainFlagUID)))

      return;


    auto boundaryFlag = flagField->getFlag(boundaryFlagUID);

    auto domainFlag = flagField->getFlag(domainFlagUID);


    auto inner = flagField->xyzSize();

    inner.expand(cell_idx_t(-1));


    indexVectorAll.clear();

    indexVectorInner.clear();

    indexVectorOuter.clear();


    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();

    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();

         ++it) {


      if (!isFlagSet(it, boundaryFlag))

        continue;

      if (flagWithGLayers.contains(it.x() + cell_idx_c(0),

                                   it.y() + cell_idx_c(0),

                                   it.z() + cell_idx_c(0)) &&

          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {


        auto element = IndexInfo(it.x(), it.y(), it.z(), 0);

        float InitialisatonAdditionalData =

            elementInitaliser(Cell(it.x(), it.y(), it.z()), blocks, *block);

        element.value = InitialisatonAdditionalData;

        indexVectorAll.emplace_back(element);

        if (inner.contains(it.x(), it.y(), it.z()))

          indexVectorInner.emplace_back(element);

        else

          indexVectorOuter.emplace_back(element);

      }

    }


    indexVectors->syncGPU();

  }


private:

  void run_impl(IBlock *block, IndexVectors::Type type,

                gpuStream_t stream = nullptr);


  BlockDataID indexVectorID;


  std::function<float(const Cell &, const shared_ptr<StructuredBlockForest> &,

                      IBlock &)>

      elementInitaliser;


public:

  BlockDataID fieldID;

};


} // namespace pystencils

} // namespace walberla

Cell
Definition Cell.hpp:96

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors
Definition Dirichlet_single_precision_CUDA.h:70

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::IndexVectors
IndexVectors()=default

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::indexVector
auto const & indexVector(Type t) const
Definition Dirichlet_single_precision_CUDA.h:89

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::operator==
bool operator==(IndexVectors const &other) const
Definition Dirichlet_single_precision_CUDA.h:77

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::~IndexVectors
~IndexVectors()
Definition Dirichlet_single_precision_CUDA.h:81

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::Type
Type
Definition Dirichlet_single_precision_CUDA.h:74

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::INNER
@ INNER
Definition Dirichlet_single_precision_CUDA.h:74

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::OUTER
@ OUTER
Definition Dirichlet_single_precision_CUDA.h:74

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::NUM_TYPES
@ NUM_TYPES
Definition Dirichlet_single_precision_CUDA.h:74

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::ALL
@ ALL
Definition Dirichlet_single_precision_CUDA.h:74

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::pointerGpu
IndexInfo * pointerGpu(Type t)
Definition Dirichlet_single_precision_CUDA.h:94

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::CpuIndexVector
std::vector< IndexInfo > CpuIndexVector
Definition Dirichlet_single_precision_CUDA.h:72

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::indexVector
auto & indexVector(Type t)
Definition Dirichlet_single_precision_CUDA.h:88

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::pointerCpu
IndexInfo * pointerCpu(Type t)
Definition Dirichlet_single_precision_CUDA.h:90

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexVectors::syncGPU
void syncGPU()
Definition Dirichlet_single_precision_CUDA.h:95

walberla::pystencils::Dirichlet_single_precision_CUDA
Definition Dirichlet_single_precision_CUDA.h:54

walberla::pystencils::Dirichlet_single_precision_CUDA::fieldID
BlockDataID fieldID
Definition Dirichlet_single_precision_CUDA.h:240

walberla::pystencils::Dirichlet_single_precision_CUDA::getOuterSweep
std::function< void(IBlock *)> getOuterSweep(gpuStream_t stream=nullptr)
Definition Dirichlet_single_precision_CUDA.h:165

walberla::pystencils::Dirichlet_single_precision_CUDA::Dirichlet_single_precision_CUDA
Dirichlet_single_precision_CUDA(const std::shared_ptr< StructuredBlockForest > &blocks, BlockDataID fieldID_, std::function< float(const Cell &, const shared_ptr< StructuredBlockForest > &, IBlock &)> &dirichletCallback)
Definition Dirichlet_single_precision_CUDA.h:126

walberla::pystencils::Dirichlet_single_precision_CUDA::getForce
Vector3< float > getForce(IBlock *)
Definition Dirichlet_single_precision_CUDA.h:150

walberla::pystencils::Dirichlet_single_precision_CUDA::getInnerSweep
std::function< void(IBlock *)> getInnerSweep(gpuStream_t stream=nullptr)
Definition Dirichlet_single_precision_CUDA.h:161

walberla::pystencils::Dirichlet_single_precision_CUDA::fillFromFlagField
void fillFromFlagField(const std::shared_ptr< StructuredBlockForest > &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
Definition Dirichlet_single_precision_CUDA.h:170

walberla::pystencils::Dirichlet_single_precision_CUDA::outer
void outer(IBlock *block, gpuStream_t stream=nullptr)
Definition Dirichlet_single_precision_CUDA.cu:145

walberla::pystencils::Dirichlet_single_precision_CUDA::operator()
void operator()(IBlock *block, gpuStream_t stream=nullptr)
Definition Dirichlet_single_precision_CUDA.h:142

walberla::pystencils::Dirichlet_single_precision_CUDA::fillFromFlagField
void fillFromFlagField(const shared_ptr< StructuredBlockForest > &blocks, IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID)
Definition Dirichlet_single_precision_CUDA.h:179

walberla::pystencils::Dirichlet_single_precision_CUDA::inner
void inner(IBlock *block, gpuStream_t stream=nullptr)
Definition Dirichlet_single_precision_CUDA.cu:141

walberla::pystencils::Dirichlet_single_precision_CUDA::getSweep
std::function< void(IBlock *)> getSweep(gpuStream_t stream=nullptr)
Definition Dirichlet_single_precision_CUDA.h:157

walberla::pystencils::Dirichlet_single_precision_CUDA::run
void run(IBlock *block, gpuStream_t stream=nullptr)
Definition Dirichlet_single_precision_CUDA.cu:137

stream
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
Definition common_cuda.cu:34

block
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:177

walberla
\file PackInfoPdfDoublePrecision.cpp \author pystencils
Definition EKWalberla.hpp:38

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexInfo
Definition Dirichlet_single_precision_CUDA.h:56

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexInfo::z
int32_t z
Definition Dirichlet_single_precision_CUDA.h:59

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexInfo::IndexInfo
IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_)
Definition Dirichlet_single_precision_CUDA.h:62

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexInfo::dir
int32_t dir
Definition Dirichlet_single_precision_CUDA.h:60

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexInfo::x
int32_t x
Definition Dirichlet_single_precision_CUDA.h:57

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexInfo::value
float value
Definition Dirichlet_single_precision_CUDA.h:61

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexInfo::y
int32_t y
Definition Dirichlet_single_precision_CUDA.h:58

walberla::pystencils::Dirichlet_single_precision_CUDA::IndexInfo::operator==
bool operator==(const IndexInfo &o) const
Definition Dirichlet_single_precision_CUDA.h:64