dox/DynamicUBBSinglePrecision_8cpp_source.html

//======================================================================================================================

//

//  This file is part of waLBerla. waLBerla is free software: you can

//  redistribute it and/or modify it under the terms of the GNU General Public

//  License as published by the Free Software Foundation, either version 3 of

//  the License, or (at your option) any later version.

//

//  waLBerla is distributed in the hope that it will be useful, but WITHOUT

//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

//  for more details.

//

//  You should have received a copy of the GNU General Public License along

//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.

//

//! \\file DynamicUBBSinglePrecision.cpp

//! \\author pystencils

//======================================================================================================================


// kernel generated with pystencils v1.4+1.ge851f4e, lbmpy v1.4+1.ge9efe34, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit 007e77e077ad9d22b5eed6f3d3118240993e553c


#include "DynamicUBBSinglePrecision.h"

#include "core/DataTypes.h"

#include "core/Macros.h"


#define FUNC_PREFIX


using namespace std;


namespace walberla {

namespace lbm {


#ifdef __GNUC__

#pragma GCC diagnostic push

#pragma GCC diagnostic ignored "-Wstrict-aliasing"

#pragma GCC diagnostic ignored "-Wunused-variable"

#pragma GCC diagnostic ignored "-Wconversion"

#endif


#ifdef __CUDACC__

#pragma push

#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__

#pragma nv_diag_suppress 177

#else

#pragma diag_suppress 177

#endif

#endif

// NOLINTBEGIN(readability-non-const-parameter*)


namespace internal_6cb25260a6784120b7639a911a9d03fd {


static FUNC_PREFIX void dynamicubbsingleprecision_boundary_DynamicUBBSinglePrecision(uint8_t *RESTRICT _data_forceVector, uint8_t *RESTRICT const _data_indexVector, float *RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize) {


  const int32_t f_in_inv_dir_idx[] = {0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13};

  const int32_t f_in_inv_offsets_x[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1};

  const int32_t f_in_inv_offsets_y[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0};

  const int32_t f_in_inv_offsets_z[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1};


  const float weights[] = {((float)(0.33333333333333333)), ((float)(0.055555555555555556)), ((float)(0.055555555555555556)), ((float)(0.055555555555555556)), ((float)(0.055555555555555556)), ((float)(0.055555555555555556)), ((float)(0.055555555555555556)), ((float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((float)(0.027777777777777778)), ((float)(0.027777777777777778))};


  const int32_t neighbour_offset_x[] = {0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1};

  const int32_t neighbour_offset_y[] = {0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0};

  const int32_t neighbour_offset_z[] = {0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1};


#ifdef _OPENMP

#pragma omp parallel

#endif

  {

#ifdef _OPENMP

#pragma omp for schedule(static)

#endif

    for (int64_t ctr_0 = 0; ctr_0 < indexVectorSize; ctr_0 += 1) {

      const int32_t x = *((int32_t *)(&_data_indexVector[28 * ctr_0]));

      const int32_t y = *((int32_t *)(&_data_indexVector[28 * ctr_0 + 4]));

      const int32_t z = *((int32_t *)(&_data_indexVector[28 * ctr_0 + 8]));

      const int32_t dir = *((int32_t *)(&_data_indexVector[28 * ctr_0 + 12]));

      const float vel0Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 10 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 14 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 18 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 4 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 8 * _stride_pdfs_3];

      const float vel1Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 11 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 15 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 7 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_3];

      const float vel2Term = _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 12 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 13 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 5 * _stride_pdfs_3];

      const float delta_rho = vel0Term + vel1Term + vel2Term + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 16 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 17 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 2 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 3 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 6 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + 9 * _stride_pdfs_3] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z];

      const float rho = delta_rho + 1.0f;

      _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_0 * f_in_inv_offsets_x[dir] + _stride_pdfs_1 * y + _stride_pdfs_1 * f_in_inv_offsets_y[dir] + _stride_pdfs_2 * z + _stride_pdfs_2 * f_in_inv_offsets_z[dir] + _stride_pdfs_3 * f_in_inv_dir_idx[dir]] = -rho * (6.0f * ((float)(neighbour_offset_x[dir])) * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) + 6.0f * ((float)(neighbour_offset_y[dir])) * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) + 6.0f * ((float)(neighbour_offset_z[dir])) * *((float *)(&_data_indexVector[28 * ctr_0 + 24]))) * weights[dir] + _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_3 * dir];

      const float f = -rho * (6.0f * ((float)(neighbour_offset_x[dir])) * *((float *)(&_data_indexVector[28 * ctr_0 + 16])) + 6.0f * ((float)(neighbour_offset_y[dir])) * *((float *)(&_data_indexVector[28 * ctr_0 + 20])) + 6.0f * ((float)(neighbour_offset_z[dir])) * *((float *)(&_data_indexVector[28 * ctr_0 + 24]))) * weights[dir] + 2.0f * _data_pdfs[_stride_pdfs_0 * x + _stride_pdfs_1 * y + _stride_pdfs_2 * z + _stride_pdfs_3 * dir];

      *((double *)(&_data_forceVector[24 * ctr_0])) = ((double)(f * ((float)(neighbour_offset_x[dir]))));

      *((double *)(&_data_forceVector[24 * ctr_0 + 8])) = ((double)(f * ((float)(neighbour_offset_y[dir]))));

      *((double *)(&_data_forceVector[24 * ctr_0 + 16])) = ((double)(f * ((float)(neighbour_offset_z[dir]))));

    }

  }

}


} // namespace internal_6cb25260a6784120b7639a911a9d03fd


// NOLINTEND(readability-non-const-parameter*)

#ifdef __GNUC__

#pragma GCC diagnostic pop

#endif


#ifdef __CUDACC__

#pragma pop

#endif


void DynamicUBBSinglePrecision::run_impl(IBlock *block, IndexVectors::Type type) {

  auto *indexVectors = block->getData<IndexVectors>(indexVectorID);

  int32_t indexVectorSize = int32_c(indexVectors->indexVector(type).size());

  if (indexVectorSize == 0)

    return;


  auto pointer = indexVectors->pointerCpu(type);


  uint8_t *_data_indexVector = reinterpret_cast<uint8_t *>(pointer);


  auto *forceVector = block->getData<ForceVector>(forceVectorID);

  WALBERLA_ASSERT_EQUAL(indexVectorSize, int32_c(forceVector->forceVector().size()))


  auto forcePointer = forceVector->pointerCpu();


  uint8_t *_data_forceVector = reinterpret_cast<uint8_t *>(forcePointer);


  auto pdfs = block->getData<field::GhostLayerField<float, 19>>(pdfsID);


  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()))

  float *RESTRICT _data_pdfs = pdfs->dataAt(0, 0, 0, 0);

  const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());

  const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());

  const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());

  const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));

  internal_6cb25260a6784120b7639a911a9d03fd::dynamicubbsingleprecision_boundary_DynamicUBBSinglePrecision(_data_forceVector, _data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);

}


void DynamicUBBSinglePrecision::run(IBlock *block) {

  run_impl(block, IndexVectors::ALL);

}


void DynamicUBBSinglePrecision::inner(IBlock *block) {

  run_impl(block, IndexVectors::INNER);

}


void DynamicUBBSinglePrecision::outer(IBlock *block) {

  run_impl(block, IndexVectors::OUTER);

}


} // namespace lbm

} // namespace walberla

FUNC_PREFIX
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
Definition AdvectiveFluxKernel_double_precision.cpp:28

RESTRICT
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
Definition AdvectiveFluxKernel_double_precision.h:40

DynamicUBBSinglePrecision.h

walberla::lbm::DynamicUBBSinglePrecision::IndexVectors::INNER
@ INNER
Definition DynamicUBBSinglePrecision.h:96

walberla::lbm::DynamicUBBSinglePrecision::IndexVectors::ALL
@ ALL
Definition DynamicUBBSinglePrecision.h:96

walberla::lbm::DynamicUBBSinglePrecision::IndexVectors::OUTER
@ OUTER
Definition DynamicUBBSinglePrecision.h:96

walberla::lbm::DynamicUBBSinglePrecision
Definition DynamicUBBSinglePrecision.h:73

walberla::lbm::DynamicUBBSinglePrecision::outer
void outer(IBlock *block)
Definition DynamicUBBSinglePrecision.cpp:135

walberla::lbm::DynamicUBBSinglePrecision::pdfsID
BlockDataID pdfsID
Definition DynamicUBBSinglePrecision.h:675

walberla::lbm::DynamicUBBSinglePrecision::run
void run(IBlock *block)
Definition DynamicUBBSinglePrecision.cpp:127

walberla::lbm::DynamicUBBSinglePrecision::inner
void inner(IBlock *block)
Definition DynamicUBBSinglePrecision.cpp:131

stream
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
Definition common_cuda.cu:34

block
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:177

std
STL namespace.

walberla::lbm::internal_6cb25260a6784120b7639a911a9d03fd::dynamicubbsingleprecision_boundary_DynamicUBBSinglePrecision
static FUNC_PREFIX void dynamicubbsingleprecision_boundary_DynamicUBBSinglePrecision(uint8_t *RESTRICT _data_forceVector, uint8_t *RESTRICT const _data_indexVector, float *RESTRICT _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int32_t indexVectorSize)
Definition DynamicUBBSinglePrecision.cpp:50

walberla
\file PackInfoPdfDoublePrecision.cpp \author pystencils
Definition EKWalberla.hpp:38