30#if defined(__CUDACC__)
37#include <blockforest/communication/UniformBufferedScheme.h>
38#include <domain_decomposition/BlockDataID.h>
39#include <field/AddToStorage.h>
40#include <field/GhostLayerField.h>
41#include <field/communication/PackInfo.h>
42#include <field/vtk/VTKWriter.h>
43#include <stencil/D3Q27.h>
44#include <waLBerlaDefinitions.h>
45#if defined(__CUDACC__)
46#include <gpu/AddGPUFieldToStorage.h>
47#include <gpu/FieldAccessor.h>
48#include <gpu/FieldIndexing.h>
49#include <gpu/GPUField.h>
50#include <gpu/HostFieldAllocator.h>
51#include <gpu/Kernel.h>
52#include <gpu/communication/MemcpyPackInfo.h>
53#include <gpu/communication/UniformGPUScheme.h>
57#pragma clang diagnostic push
58#pragma clang diagnostic ignored "-Wfloat-conversion"
59#pragma clang diagnostic ignored "-Wimplicit-float-conversion"
60#elif defined(__GNUC__) or defined(__GNUG__)
61#pragma GCC diagnostic push
62#pragma GCC diagnostic ignored "-Wfloat-conversion"
66#include <heffte_backends.h>
67#include <heffte_geometry.h>
70#pragma clang diagnostic pop
71#elif defined(__GNUC__) or defined(__GNUG__)
72#pragma GCC diagnostic pop
89template <
typename T, std::
size_t N>
91 std::array<T, N> res{};
92 std::ranges::copy(vec, res.
begin());
97 return (z * dim[1] + y) * dim[0] + x;
100template <
typename FloatType, lbmpy::Arch Architecture>
103 template <
typename T> FloatType FloatType_c(T t) {
104 return numeric_cast<FloatType>(t);
108 template <
typename FT, lbmpy::Arch AT = lbmpy::Arch::CPU>
struct FieldTrait {
112 template <
class Field>
113 using PackInfo = field::communication::PackInfo<Field>;
114 template <
class Stencil>
116 blockforest::communication::UniformBufferedScheme<Stencil>;
119#if defined(__CUDACC__)
121 using ComplexType = std::conditional_t<std::is_same_v<FloatType, float>,
122 cufftComplex, cufftDoubleComplex>;
125 template <
class Field>
126 using PackInfo = gpu::communication::MemcpyPackInfo<Field>;
127 template <
class Stencil>
140 template <
class Field>
146#if defined(__CUDACC__)
147 using backend = heffte::backend::cufft;
153 std::unique_ptr<heffte::fft3d<backend>>
fft;
154 std::unique_ptr<heffte::fft3d<backend>::buffer_container<
ComplexType>>
158 auto const &grid_range) {
161 box_in = std::make_unique<heffte::box3d<>>(
164 box_out = std::make_unique<heffte::box3d<>>(
168 MPI_COMM_WORLD, options);
169 buffer = std::make_unique<
170 heffte::fft3d<backend>::buffer_container<
ComplexType>>(
171 fft->size_workspace());
176 BlockDataID m_potential_field_with_ghosts_id;
177#if defined(__CUDACC__)
178 BlockDataID m_potential_field_id;
179 BlockDataID m_greens_function_field_id;
180 BlockDataID m_potential_fourier_id;
183 std::unique_ptr<heffte_container<ComplexType>> heffte;
184 std::shared_ptr<FullCommunicator> m_full_communication;
186#if defined(__CUDACC__)
187 std::optional<BlockDataID> m_potential_cpu_field_id;
188 std::shared_ptr<gpu::HostFieldAllocator<FloatType>> m_host_field_allocator;
190 using GreenFunctionField = gpu::GPUField<FloatType>;
191 using PotentialFourier = gpu::GPUField<ComplexType>;
193 walberla::gpu::Kernel<void (*)(walberla::gpu::FieldAccessor<ComplexType>,
194 walberla::gpu::FieldAccessor<FloatType>)>
196 walberla::gpu::Kernel<void (*)(walberla::gpu::FieldAccessor<FloatType>,
197 walberla::gpu::FieldAccessor<FloatType>)>
200 std::vector<FloatType> m_greens;
201 std::vector<FloatType> m_potential;
202 std::vector<ComplexType> m_potential_fourier;
210#if defined(__CUDACC__)
212 kernel_greens(gpu::make_kernel(
213 multiply_by_greens_function<FloatType,
ComplexType>)),
214 kernel_move_fields(gpu::make_kernel(move_field<FloatType>))
218#if defined(__CUDACC__)
219 m_potential_field_id = gpu::addGPUFieldToStorage<PotentialField>(
220 blocks,
"potential field", 1u, field::fzyx, 0u,
false);
221 m_potential_field_with_ghosts_id =
222 gpu::addGPUFieldToStorage<PotentialField>(
223 blocks,
"potential field with ghosts", 1u, field::fzyx,
225 m_greens_function_field_id = gpu::addGPUFieldToStorage<GreenFunctionField>(
226 blocks,
"greens function", 1u, field::fzyx, 0u,
false);
227 m_potential_fourier_id = gpu::addGPUFieldToStorage<PotentialFourier>(
228 blocks,
"fourier field", 1u, field::fzyx, 0u,
false);
232 for (
auto &
block : *blocks) {
233 auto green_field =
block.template getData<GreenFunctionField>(
234 m_greens_function_field_id);
235 auto kernel = gpu::make_kernel(create_greens_function<FloatType>);
236 kernel.addFieldIndexingParam(
237 gpu::FieldIndexing<FloatType>::xyz(*green_field));
238 kernel.addParam(grid_range.first[0]);
239 kernel.addParam(grid_range.first[1]);
240 kernel.addParam(grid_range.first[2]);
241 kernel.addParam(grid_range.second[0]);
242 kernel.addParam(grid_range.second[1]);
243 kernel.addParam(grid_range.second[2]);
244 kernel.addParam(global_dim[0]);
245 kernel.addParam(global_dim[1]);
246 kernel.addParam(global_dim[2]);
250 block.template getData<PotentialField>(m_potential_field_id);
251 auto potential_ghosts =
block.template getData<PotentialField>(
252 m_potential_field_with_ghosts_id);
253 auto green =
block.template getData<GreenFunctionField>(
254 m_greens_function_field_id);
256 block.template getData<PotentialFourier>(m_potential_fourier_id);
259 gpu::make_kernel(multiply_by_greens_function<FloatType, ComplexType>);
260 kernel_greens.addFieldIndexingParam(
261 gpu::FieldIndexing<ComplexType>::allInner(*fourier));
262 kernel_greens.addFieldIndexingParam(
263 gpu::FieldIndexing<FloatType>::allInner(*green));
265 kernel_move_fields = gpu::make_kernel(move_field<FloatType>);
266 kernel_move_fields.addFieldIndexingParam(
267 gpu::FieldIndexing<FloatType>::xyz(*potential_ghosts));
268 kernel_move_fields.addFieldIndexingParam(
269 gpu::FieldIndexing<FloatType>::xyz(*
potential));
272 m_host_field_allocator =
273 std::make_shared<gpu::HostFieldAllocator<FloatType>>();
275 m_potential_field_with_ghosts_id = field::addToStorage<PotentialField>(
276 blocks,
"potential field with ghosts", 0., field::fzyx,
280 m_full_communication = std::make_shared<FullCommunicator>(blocks);
281 m_full_communication->addPackInfo(
283 m_potential_field_with_ghosts_id));
286 [[nodiscard]]
bool is_gpu() const noexcept
override {
291 return std::is_same_v<FloatType, double>;
295 return static_cast<std::size_t
>(m_potential_field_with_ghosts_id);
298 void setup_fft([[maybe_unused]]
bool use_gpu_aware)
override {
300 heffte::plan_options options = heffte::default_options<
302#if defined(__CUDACC__)
304 options.use_reorder =
false;
305 options.algorithm = heffte::reshape_algorithm::p2p_plined;
306 options.use_pencils =
true;
307 options.use_gpu_aware = use_gpu_aware;
311 std::make_unique<heffte_container<ComplexType>>(options, grid_range);
312#if not defined(__CUDACC__)
314 m_potential = std::vector<FloatType>(heffte->fft->size_inbox());
315 m_greens = std::vector<FloatType>(heffte->fft->size_outbox());
316 m_potential_fourier =
317 std::vector<ComplexType>(heffte->fft->size_outbox());
318 auto const dim = grid_range.second - grid_range.first;
320 for (
int x = 0; x < dim[0]; x++) {
321 for (
int y = 0; y < dim[1]; y++) {
322 for (
int z = 0; z < dim[2]; z++) {
324 greens_function<FloatType>(x + grid_range.first[0],
325 y + grid_range.first[1],
326 z + grid_range.first[2], global_dim);
335 [[nodiscard]] std::optional<double>
337 bool consider_ghosts =
false)
override {
343 auto const potential_field = bc->block->template getData<PotentialField>(
344 m_potential_field_with_ghosts_id);
349 [[nodiscard]] std::vector<double>
352 std::vector<double> out;
353 uint_t values_size{0u};
355 if (
auto const ci =
get_interval(lattice, lower_corner, upper_corner)) {
356 out = std::vector<double>(ci->numCells());
357 for (
auto &
block : *lattice.get_blocks()) {
358 auto const block_offset = lattice.get_block_corner(
block,
true);
360 lattice, lower_corner, upper_corner, block_offset,
block)) {
361 auto const potential_field =
block.template getData<PotentialField>(
362 m_potential_field_with_ghosts_id);
364 assert(values.size() == bci->numCells());
365 values_size += bci->numCells();
366 auto kernel = [&values, &out](
unsigned const block_index,
367 unsigned const local_index,
369 out[local_index] = double_c(values[block_index]);
375 assert(values_size == ci->numCells());
381#if not defined(__CUDACC__)
384 auto dim = grid_range.second - grid_range.first;
385 heffte->fft->forward(m_potential.data(), m_potential_fourier.data(),
386 heffte->buffer->data());
387 std::ranges::transform(m_potential_fourier, m_greens,
388 m_potential_fourier.begin(), std::multiplies<>{});
389 heffte->fft->backward(m_potential_fourier.data(), m_potential.data(),
390 heffte->buffer->data());
393 auto potential_with_ghosts =
block.template getData<PotentialField>(
394 m_potential_field_with_ghosts_id);
395 for (
int x = 0; x < dim[0]; x++) {
396 for (
int y = 0; y < dim[1]; y++) {
397 for (
int z = 0; z < dim[2]; z++) {
398 potential_with_ghosts->get(x, y, z) =
406#if defined(__CUDACC__)
410 block.template getData<PotentialField>(m_potential_field_id);
412 block.template getData<PotentialFourier>(m_potential_fourier_id);
413 FloatType *_data_potential =
potential->dataAt(0, 0, 0, 0);
414 ComplexType *_data_fourier = fourier->dataAt(0, 0, 0, 0);
415 heffte->fft->forward(_data_potential, _data_fourier,
416 heffte->buffer->data());
418 heffte->fft->backward(_data_fourier, _data_potential,
419 heffte->buffer->data());
420 kernel_move_fields();
430 auto const density_id = BlockDataID(
id);
431#if not defined(__CUDACC__)
434 auto dim = grid_range.second - grid_range.first;
436 auto density_field =
block.template getData<PotentialField>(density_id);
437 for (
int x = 0; x < dim[0]; x++) {
438 for (
int y = 0; y < dim[1]; y++) {
439 for (
int z = 0; z < dim[2]; z++) {
441 factor * density_field->get(x, y, z);
448#if defined(__CUDACC__)
452 block.template getData<PotentialField>(m_potential_field_id);
454 block.template getData<gpu::GPUField<FloatType>>(density_id);
455 add_fields(field, density_field, FloatType_c(factor));
462#if not defined(__CUDACC__)
465 auto const dim = grid_range.second - grid_range.first;
466 for (
int x = 0; x < dim[0]; x++) {
467 for (
int i = 0; i < m_potential_fourier.size(); i++) {
468 m_potential_fourier[i] *= m_greens[i];
470 for (
int y = 0; y < dim[1]; y++) {
471 for (
int z = 0; z < dim[2]; z++) {
478#if defined(__CUDACC__)
483 block.template getData<PotentialField>(m_potential_field_id);
495 auto &vtk_handle = it.second;
496 if (vtk_handle->enabled) {
497 vtk::writeFiles(vtk_handle->ptr)();
498 vtk_handle->execution_count++;
504 template <
typename Field_T, u
int_t F_SIZE_ARG,
typename OutputType>
505 class VTKWriter :
public vtk::BlockCellDataWriter<OutputType, F_SIZE_ARG> {
507 VTKWriter(ConstBlockDataID
const &block_id, std::string
const &
id,
508 FloatType unit_conversion)
509 : vtk::BlockCellDataWriter<OutputType, F_SIZE_ARG>(id),
515 WALBERLA_ASSERT_NOT_NULLPTR(this->block_);
524 template <
typename OutputType =
float>
526 :
public VTKWriter<PotentialFieldVTK, 1u, OutputType> {
530 using Base::evaluate;
533 OutputType
evaluate(cell_idx_t
const x, cell_idx_t
const y,
534 cell_idx_t
const z, cell_idx_t
const)
override {
535 WALBERLA_ASSERT_NOT_NULLPTR(this->
m_field);
545 int flag_observables)
override {
546#if defined(__CUDACC__)
547 auto const allocate_cpu_field_if_empty =
548 [&]<
typename Field>(
auto const &blocks, std::string name,
549 std::optional<BlockDataID> &cpu_field) {
551 cpu_field = field::addToStorage<Field>(
552 blocks, name, FloatType{0}, field::fzyx,
558 auto const unit_conversion = FloatType_c(units.at(
"potential"));
559#if defined(__CUDACC__)
563 blocks,
"potential_cpu", m_potential_cpu_field_id);
564 vtk_obj.addBeforeFunction(
565 gpu::fieldCpyFunctor<PotentialFieldVTK, PotentialField>(
566 blocks, *m_potential_cpu_field_id,
567 m_potential_field_with_ghosts_id));
569 *m_potential_cpu_field_id,
"potential", unit_conversion));
574 m_potential_field_with_ghosts_id,
"potential", unit_conversion));
580#if defined(__CUDACC__)
582 gpu::GPUField<FloatType> *field_add, FloatType factor) {
583 auto kernel = gpu::make_kernel(add_fields_with_factor<FloatType>);
584 kernel.addFieldIndexingParam(
585 gpu::FieldIndexing<FloatType>::xyz(*field_out));
586 kernel.addFieldIndexingParam(
587 gpu::FieldIndexing<FloatType>::xyz(*field_add));
588 kernel.addParam(factor);
Vector implementation and trait types for boost qvm interoperability.
std::map< std::string, std::shared_ptr< VTKHandle > > m_vtk_auto
VTK writers that are executed automatically.
std::unordered_map< std::string, double > units_map
auto const & get_grid_dimensions() const
std::pair< Utils::Vector3i, Utils::Vector3i > get_local_grid_range(bool with_halo=false) const
auto get_ghost_layers() const
DEVICE_QUALIFIER constexpr iterator begin() noexcept
OutputType evaluate(cell_idx_t const x, cell_idx_t const y, cell_idx_t const z, cell_idx_t const) override
void configure() override
VTKWriter(ConstBlockDataID const &block_id, std::string const &id, FloatType unit_conversion)
FloatType const m_conversion
ConstBlockDataID const m_block_id
FieldTrait< FloatType, Architecture >::template FullCommunicator< stencil::D3Q27 > FullCommunicator
bool is_gpu() const noexcept override
FieldTrait< FloatType, Architecture >::PotentialFieldVTK PotentialFieldVTK
void reset_charge_field() override
bool is_double_precision() const noexcept override
std::vector< double > get_slice_potential(Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner) const override
PoissonSolverFFT(std::shared_ptr< LatticeWalberla > lattice, double permittivity)
void setup_fft(bool use_gpu_aware) override
FieldTrait< FloatType, Architecture >::ComplexType ComplexType
std::optional< double > get_node_potential(Utils::Vector3i const &node, bool consider_ghosts=false) override
void ghost_communication()
void add_charge_to_field(std::size_t id, double valency) override
FieldTrait< FloatType, Architecture >::PotentialField PotentialField
void integrate_vtk_writers() override
void register_vtk_field_writers(walberla::vtk::VTKOutput &vtk_obj, LatticeModel::units_map const &units, int flag_observables) override
FieldTrait< FloatType, Architecture >::template PackInfo< Field > PackInfo
~PoissonSolverFFT() override=default
std::size_t get_potential_field_id() const noexcept override
LatticeWalberla const & get_lattice() const noexcept override
virtual double get_permittivity() const noexcept
static double * block(double *p, std::size_t index, std::size_t size)
void initialize(GhostLayerField< double, 1u > *scalar_field, double const &value)
auto get(GhostLayerField< double, 1u > const *scalar_field, Cell const &cell)
\file PackInfoPdfDoublePrecision.cpp \author pystencils
int pos_to_linear_index(int x, int y, int z, auto const &dim)
auto to_array(Utils::Vector< T, N > const &vec)
std::optional< BlockAndCell > get_block_and_cell(::LatticeWalberla const &lattice, signed_integral_vector auto const &node, bool consider_ghost_layers)
std::optional< walberla::cell::CellInterval > get_block_interval(::LatticeWalberla const &lattice, Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner, Utils::Vector3i const &block_offset, IBlock const &block)
void copy_block_buffer(CellInterval const &bci, CellInterval const &ci, Utils::Vector3i const &block_offset, Utils::Vector3i const &lower_corner, Kernel &&kernel)
Synchronize data between a sliced block and a container.
std::optional< walberla::cell::CellInterval > get_interval(::LatticeWalberla const &lattice, Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner)
blockforest::communication::UniformBufferedScheme< Stencil > FullCommunicator
field::GhostLayerField< FT, 1u > PotentialFieldVTK
std::complex< FloatType > ComplexType
field::GhostLayerField< FT, 1u > PotentialField
field::communication::PackInfo< Field > PackInfo
std::unique_ptr< heffte::fft3d< backend > > fft
std::unique_ptr< heffte::box3d<> > box_in
heffte::backend::fftw backend
std::unique_ptr< heffte::box3d<> > box_out
std::unique_ptr< heffte::fft3d< backend >::buffer_container< ComplexType > > buffer
heffte_container(heffte::plan_options const &options, auto const &grid_range)