dox/ReactionKernelBulk__3__single__precision__CUDA_8cu_source.html

//======================================================================================================================

//

//  This file is part of waLBerla. waLBerla is free software: you can

//  redistribute it and/or modify it under the terms of the GNU General Public

//  License as published by the Free Software Foundation, either version 3 of

//  the License, or (at your option) any later version.

//

//  waLBerla is distributed in the hope that it will be useful, but WITHOUT

//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

//  for more details.

//

//  You should have received a copy of the GNU General Public License along

//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.

//

//! \\file ReactionKernelBulk_3_single_precision_CUDA.cpp

//! \\author pystencils

//======================================================================================================================


// kernel generated with pystencils v1.4+1.ge851f4e, lbmpy v1.4+1.ge9efe34, sympy v1.12.1, lbmpy_walberla/pystencils_walberla from waLBerla commit 007e77e077ad9d22b5eed6f3d3118240993e553c


#include <cmath>


#include "ReactionKernelBulk_3_single_precision_CUDA.h"

#include "core/DataTypes.h"

#include "core/Macros.h"


#define FUNC_PREFIX __global__


#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)

#pragma GCC diagnostic push

#pragma GCC diagnostic ignored "-Wfloat-equal"

#pragma GCC diagnostic ignored "-Wshadow"

#pragma GCC diagnostic ignored "-Wconversion"

#pragma GCC diagnostic ignored "-Wunused-variable"

#endif


#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)

#pragma warning push

#pragma warning(disable : 1599)

#endif


using namespace std;


namespace walberla {

namespace pystencils {


namespace internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda {

static FUNC_PREFIX __launch_bounds__(256) void reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda(float *RESTRICT _data_rho_0, float *RESTRICT _data_rho_1, float *RESTRICT _data_rho_2, int64_t const _size_rho_0_0, int64_t const _size_rho_0_1, int64_t const _size_rho_0_2, int64_t const _stride_rho_0_0, int64_t const _stride_rho_0_1, int64_t const _stride_rho_0_2, int64_t const _stride_rho_1_0, int64_t const _stride_rho_1_1, int64_t const _stride_rho_1_2, int64_t const _stride_rho_2_0, int64_t const _stride_rho_2_1, int64_t const _stride_rho_2_2, float order_0, float order_1, float order_2, float rate_coefficient, float stoech_0, float stoech_1, float stoech_2) {

  if (blockDim.x * blockIdx.x + threadIdx.x < _size_rho_0_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_rho_0_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_rho_0_2) {

    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;

    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;

    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;

    const float local_rho_0 = _data_rho_0[_stride_rho_0_0 * ctr_0 + _stride_rho_0_1 * ctr_1 + _stride_rho_0_2 * ctr_2];

    const float local_rho_1 = _data_rho_1[_stride_rho_1_0 * ctr_0 + _stride_rho_1_1 * ctr_1 + _stride_rho_1_2 * ctr_2];

    const float local_rho_2 = _data_rho_2[_stride_rho_2_0 * ctr_0 + _stride_rho_2_1 * ctr_1 + _stride_rho_2_2 * ctr_2];

    const float rate_factor = rate_coefficient * powf(local_rho_0, order_0) * powf(local_rho_1, order_1) * powf(local_rho_2, order_2);

    _data_rho_0[_stride_rho_0_0 * ctr_0 + _stride_rho_0_1 * ctr_1 + _stride_rho_0_2 * ctr_2] = local_rho_0 + rate_factor * stoech_0;

    _data_rho_1[_stride_rho_1_0 * ctr_0 + _stride_rho_1_1 * ctr_1 + _stride_rho_1_2 * ctr_2] = local_rho_1 + rate_factor * stoech_1;

    _data_rho_2[_stride_rho_2_0 * ctr_0 + _stride_rho_2_1 * ctr_1 + _stride_rho_2_2 * ctr_2] = local_rho_2 + rate_factor * stoech_2;

  }

}

} // namespace internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda


void ReactionKernelBulk_3_single_precision_CUDA::run(IBlock *block, gpuStream_t stream) {


  auto rho_0 = block->getData<gpu::GPUField<float>>(rho_0ID);

  auto rho_2 = block->getData<gpu::GPUField<float>>(rho_2ID);

  auto rho_1 = block->getData<gpu::GPUField<float>>(rho_1ID);


  auto &stoech_1 = this->stoech_1_;

  auto &rate_coefficient = this->rate_coefficient_;

  auto &stoech_2 = this->stoech_2_;

  auto &order_2 = this->order_2_;

  auto &order_0 = this->order_0_;

  auto &stoech_0 = this->stoech_0_;

  auto &order_1 = this->order_1_;

  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_0->nrOfGhostLayers()))

  float *RESTRICT _data_rho_0 = rho_0->dataAt(0, 0, 0, 0);

  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_1->nrOfGhostLayers()))

  float *RESTRICT _data_rho_1 = rho_1->dataAt(0, 0, 0, 0);

  WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(rho_2->nrOfGhostLayers()))

  float *RESTRICT _data_rho_2 = rho_2->dataAt(0, 0, 0, 0);

  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(int64_c(rho_0->xSize()) + 0))

  const int64_t _size_rho_0_0 = int64_t(int64_c(rho_0->xSize()) + 0);

  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(int64_c(rho_0->ySize()) + 0))

  const int64_t _size_rho_0_1 = int64_t(int64_c(rho_0->ySize()) + 0);

  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(int64_c(rho_0->zSize()) + 0))

  const int64_t _size_rho_0_2 = int64_t(int64_c(rho_0->zSize()) + 0);

  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());

  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());

  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());

  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());

  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());

  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());

  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());

  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());

  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());

  dim3 _block(uint32_c(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)), uint32_c(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))), uint32_c(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))));

  dim3 _grid(uint32_c(((_size_rho_0_0) % (((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)) == 0 ? (int64_t)(_size_rho_0_0) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)) : ((int64_t)(_size_rho_0_0) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))) + 1)), uint32_c(((_size_rho_0_1) % (((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))) == 0 ? (int64_t)(_size_rho_0_1) / (int64_t)(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))) : ((int64_t)(_size_rho_0_1) / (int64_t)(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) + 1)), uint32_c(((_size_rho_0_2) % (((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))) == 0 ? (int64_t)(_size_rho_0_2) / (int64_t)(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))) : ((int64_t)(_size_rho_0_2) / (int64_t)(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))))) + 1)));

  internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda<<<_grid, _block, 0, stream>>>(_data_rho_0, _data_rho_1, _data_rho_2, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, order_0, order_1, order_2, rate_coefficient, stoech_0, stoech_1, stoech_2);

}


void ReactionKernelBulk_3_single_precision_CUDA::runOnCellInterval(const shared_ptr<StructuredBlockStorage> &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream) {


  CellInterval ci = globalCellInterval;

  CellInterval blockBB = blocks->getBlockCellBB(*block);

  blockBB.expand(ghostLayers);

  ci.intersect(blockBB);

  blocks->transformGlobalToBlockLocalCellInterval(ci, *block);

  if (ci.empty())

    return;


  auto rho_0 = block->getData<gpu::GPUField<float>>(rho_0ID);

  auto rho_2 = block->getData<gpu::GPUField<float>>(rho_2ID);

  auto rho_1 = block->getData<gpu::GPUField<float>>(rho_1ID);


  auto &stoech_1 = this->stoech_1_;

  auto &rate_coefficient = this->rate_coefficient_;

  auto &stoech_2 = this->stoech_2_;

  auto &order_2 = this->order_2_;

  auto &order_0 = this->order_0_;

  auto &stoech_0 = this->stoech_0_;

  auto &order_1 = this->order_1_;

  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_0->nrOfGhostLayers()))

  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_0->nrOfGhostLayers()))

  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_0->nrOfGhostLayers()))

  float *RESTRICT _data_rho_0 = rho_0->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);

  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_1->nrOfGhostLayers()))

  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_1->nrOfGhostLayers()))

  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_1->nrOfGhostLayers()))

  float *RESTRICT _data_rho_1 = rho_1->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);

  WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(rho_2->nrOfGhostLayers()))

  WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(rho_2->nrOfGhostLayers()))

  WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(rho_2->nrOfGhostLayers()))

  float *RESTRICT _data_rho_2 = rho_2->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);

  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))

  const int64_t _size_rho_0_0 = int64_t(int64_c(ci.xSize()) + 0);

  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))

  const int64_t _size_rho_0_1 = int64_t(int64_c(ci.ySize()) + 0);

  WALBERLA_ASSERT_GREATER_EQUAL(rho_0->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))

  const int64_t _size_rho_0_2 = int64_t(int64_c(ci.zSize()) + 0);

  const int64_t _stride_rho_0_0 = int64_t(rho_0->xStride());

  const int64_t _stride_rho_0_1 = int64_t(rho_0->yStride());

  const int64_t _stride_rho_0_2 = int64_t(rho_0->zStride());

  const int64_t _stride_rho_1_0 = int64_t(rho_1->xStride());

  const int64_t _stride_rho_1_1 = int64_t(rho_1->yStride());

  const int64_t _stride_rho_1_2 = int64_t(rho_1->zStride());

  const int64_t _stride_rho_2_0 = int64_t(rho_2->xStride());

  const int64_t _stride_rho_2_1 = int64_t(rho_2->yStride());

  const int64_t _stride_rho_2_2 = int64_t(rho_2->zStride());

  dim3 _block(uint32_c(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)), uint32_c(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))), uint32_c(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))));

  dim3 _grid(uint32_c(((_size_rho_0_0) % (((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)) == 0 ? (int64_t)(_size_rho_0_0) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)) : ((int64_t)(_size_rho_0_0) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))) + 1)), uint32_c(((_size_rho_0_1) % (((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))) == 0 ? (int64_t)(_size_rho_0_1) / (int64_t)(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))) : ((int64_t)(_size_rho_0_1) / (int64_t)(((1024 < ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))) ? 1024 : ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) + 1)), uint32_c(((_size_rho_0_2) % (((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))) == 0 ? (int64_t)(_size_rho_0_2) / (int64_t)(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))))) : ((int64_t)(_size_rho_0_2) / (int64_t)(((64 < ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))) ? 64 : ((_size_rho_0_2 < ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0))))))) ? _size_rho_0_2 : ((int64_t)(256) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0) * ((_size_rho_0_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))) ? _size_rho_0_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_rho_0_0) ? 128 : _size_rho_0_0)))))))))) + 1)));

  internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda<<<_grid, _block, 0, stream>>>(_data_rho_0, _data_rho_1, _data_rho_2, _size_rho_0_0, _size_rho_0_1, _size_rho_0_2, _stride_rho_0_0, _stride_rho_0_1, _stride_rho_0_2, _stride_rho_1_0, _stride_rho_1_1, _stride_rho_1_2, _stride_rho_2_0, _stride_rho_2_1, _stride_rho_2_2, order_0, order_1, order_2, rate_coefficient, stoech_0, stoech_1, stoech_2);

}


} // namespace pystencils

} // namespace walberla


#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)

#pragma GCC diagnostic pop

#endif


#if (defined WALBERLA_CXX_COMPILER_IS_INTEL)

#pragma warning pop

#endif

FUNC_PREFIX
#define FUNC_PREFIX
\file AdvectiveFluxKernel_double_precision.cpp \author pystencils
Definition AdvectiveFluxKernel_double_precision.cpp:28

RESTRICT
#define RESTRICT
\file AdvectiveFluxKernel_double_precision.h \author pystencils
Definition AdvectiveFluxKernel_double_precision.h:40

ReactionKernelBulk_3_single_precision_CUDA.h

walberla::pystencils::ReactionKernelBulk_3_single_precision_CUDA::run
void run(IBlock *block, gpuStream_t stream=nullptr)
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:65

walberla::pystencils::ReactionKernelBulk_3_single_precision_CUDA::runOnCellInterval
void runOnCellInterval(const shared_ptr< StructuredBlockStorage > &blocks, const CellInterval &globalCellInterval, cell_idx_t ghostLayers, IBlock *block, gpuStream_t stream=nullptr)
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:104

stream
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
Definition common_cuda.cu:34

block
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:175

std
STL namespace.

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::order_1
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const float float order_1
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_data_rho_2
static FUNC_PREFIX float *RESTRICT float *RESTRICT _data_rho_2
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::stoech_1
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const float float float float float float stoech_1
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_data_rho_1
static FUNC_PREFIX float *RESTRICT _data_rho_1
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::rate_coefficient
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const float float float float rate_coefficient
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_stride_rho_2_1
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_1
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_stride_rho_2_0
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_0
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_stride_rho_1_1
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_1_1
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::__launch_bounds__
static FUNC_PREFIX __launch_bounds__(256) void reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda(float *RESTRICT _data_rho_0

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_stride_rho_0_0
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const _stride_rho_0_0
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::order_0
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const float order_0
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_size_rho_0_0
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const _size_rho_0_0
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::stoech_0
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const float float float float float stoech_0
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_size_rho_0_1
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const _size_rho_0_1
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_stride_rho_2_2
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_2_2
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_stride_rho_0_1
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_0_1
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_stride_rho_1_0
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_1_0
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_size_rho_0_2
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const _size_rho_0_2
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_stride_rho_0_2
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_0_2
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::order_2
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const float float float order_2
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla::pystencils::internal_reactionkernelbulk_3_single_precision_cuda_reactionkernelbulk_3_single_precision_cuda::_stride_rho_1_2
static FUNC_PREFIX float *RESTRICT float *RESTRICT int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const int64_t const _stride_rho_1_2
Definition ReactionKernelBulk_3_single_precision_CUDA.cu:49

walberla
\file PackInfoPdfDoublePrecision.cpp \author pystencils
Definition EKWalberla.hpp:38