30#if defined(__CUDACC__)
37#include <blockforest/communication/UniformBufferedScheme.h>
38#include <domain_decomposition/BlockDataID.h>
39#include <field/AddToStorage.h>
40#include <field/GhostLayerField.h>
41#include <field/communication/PackInfo.h>
42#include <field/vtk/VTKWriter.h>
43#include <stencil/D3Q27.h>
44#include <waLBerlaDefinitions.h>
45#if defined(__CUDACC__)
46#include <gpu/AddGPUFieldToStorage.h>
47#include <gpu/FieldAccessor.h>
48#include <gpu/FieldIndexing.h>
49#include <gpu/GPUField.h>
50#include <gpu/Kernel.h>
51#include <gpu/communication/MemcpyPackInfo.h>
52#include <gpu/communication/UniformGPUScheme.h>
56#pragma clang diagnostic push
57#pragma clang diagnostic ignored "-Wfloat-conversion"
58#pragma clang diagnostic ignored "-Wimplicit-float-conversion"
59#elif defined(__GNUC__) or defined(__GNUG__)
60#pragma GCC diagnostic push
61#pragma GCC diagnostic ignored "-Wfloat-conversion"
65#include <heffte_backends.h>
66#include <heffte_geometry.h>
69#pragma clang diagnostic pop
70#elif defined(__GNUC__) or defined(__GNUG__)
71#pragma GCC diagnostic pop
88template <
typename T, std::
size_t N>
90 std::array<T, N> res{};
91 std::ranges::copy(vec, res.
begin());
96 return (z * dim[1] + y) * dim[0] + x;
99template <
typename FloatType, lbmpy::Arch Architecture>
102 template <
typename T> FloatType FloatType_c(T t) {
103 return numeric_cast<FloatType>(t);
107 template <
typename FT, lbmpy::Arch AT = lbmpy::Arch::CPU>
struct FieldTrait {
110 template <
class Field>
111 using PackInfo = field::communication::PackInfo<Field>;
112 template <
class Stencil>
114 blockforest::communication::UniformBufferedScheme<Stencil>;
117#if defined(__CUDACC__)
119 using ComplexType = std::conditional_t<std::is_same_v<FloatType, float>,
120 cufftComplex, cufftDoubleComplex>;
122 template <
class Field>
123 using PackInfo = gpu::communication::MemcpyPackInfo<Field>;
124 template <
class Stencil>
135 template <
class Field>
141#if defined(__CUDACC__)
142 using backend = heffte::backend::cufft;
148 std::unique_ptr<heffte::fft3d<backend>>
fft;
149 std::unique_ptr<heffte::fft3d<backend>::buffer_container<
ComplexType>>
153 auto const &grid_range) {
156 box_in = std::make_unique<heffte::box3d<>>(
159 box_out = std::make_unique<heffte::box3d<>>(
163 MPI_COMM_WORLD, options);
164 buffer = std::make_unique<
165 heffte::fft3d<backend>::buffer_container<
ComplexType>>(
166 fft->size_workspace());
171 BlockDataID m_potential_field_with_ghosts_id;
172#if defined(__CUDACC__)
173 BlockDataID m_potential_field_id;
174 BlockDataID m_greens_function_field_id;
175 BlockDataID m_potential_fourier_id;
178 std::unique_ptr<heffte_container<ComplexType>> heffte;
179 std::shared_ptr<FullCommunicator> m_full_communication;
181#if defined(__CUDACC__)
182 using GreenFunctionField = gpu::GPUField<FloatType>;
183 using PotentialFourier = gpu::GPUField<ComplexType>;
185 walberla::gpu::Kernel<void (*)(walberla::gpu::FieldAccessor<ComplexType>,
186 walberla::gpu::FieldAccessor<FloatType>)>
188 walberla::gpu::Kernel<void (*)(walberla::gpu::FieldAccessor<FloatType>,
189 walberla::gpu::FieldAccessor<FloatType>)>
192 std::vector<FloatType> m_greens;
193 std::vector<FloatType> m_potential;
194 std::vector<ComplexType> m_potential_fourier;
202#if defined(__CUDACC__)
204 kernel_greens(gpu::make_kernel(
205 multiply_by_greens_function<FloatType,
ComplexType>)),
206 kernel_move_fields(gpu::make_kernel(move_field<FloatType>))
210#if defined(__CUDACC__)
211 m_potential_field_id = gpu::addGPUFieldToStorage<PotentialField>(
212 blocks,
"potential field", 1u, field::fzyx, 0u,
false);
213 m_potential_field_with_ghosts_id =
214 gpu::addGPUFieldToStorage<PotentialField>(
215 blocks,
"potential field with ghosts", 1u, field::fzyx,
217 m_greens_function_field_id = gpu::addGPUFieldToStorage<GreenFunctionField>(
218 blocks,
"greens function", 1u, field::fzyx, 0u,
false);
219 m_potential_fourier_id = gpu::addGPUFieldToStorage<PotentialFourier>(
220 blocks,
"fourier field", 1u, field::fzyx, 0u,
false);
224 for (
auto &
block : *blocks) {
225 auto green_field =
block.template getData<GreenFunctionField>(
226 m_greens_function_field_id);
227 auto kernel = gpu::make_kernel(create_greens_function<FloatType>);
228 kernel.addFieldIndexingParam(
229 gpu::FieldIndexing<FloatType>::xyz(*green_field));
230 kernel.addParam(grid_range.first[0]);
231 kernel.addParam(grid_range.first[1]);
232 kernel.addParam(grid_range.first[2]);
233 kernel.addParam(grid_range.second[0]);
234 kernel.addParam(grid_range.second[1]);
235 kernel.addParam(grid_range.second[2]);
236 kernel.addParam(global_dim[0]);
237 kernel.addParam(global_dim[1]);
238 kernel.addParam(global_dim[2]);
242 block.template getData<PotentialField>(m_potential_field_id);
243 auto potential_ghosts =
block.template getData<PotentialField>(
244 m_potential_field_with_ghosts_id);
245 auto green =
block.template getData<GreenFunctionField>(
246 m_greens_function_field_id);
248 block.template getData<PotentialFourier>(m_potential_fourier_id);
251 gpu::make_kernel(multiply_by_greens_function<FloatType, ComplexType>);
252 kernel_greens.addFieldIndexingParam(
253 gpu::FieldIndexing<ComplexType>::allInner(*fourier));
254 kernel_greens.addFieldIndexingParam(
255 gpu::FieldIndexing<FloatType>::allInner(*green));
257 kernel_move_fields = gpu::make_kernel(move_field<FloatType>);
258 kernel_move_fields.addFieldIndexingParam(
259 gpu::FieldIndexing<FloatType>::xyz(*potential_ghosts));
260 kernel_move_fields.addFieldIndexingParam(
261 gpu::FieldIndexing<FloatType>::xyz(*
potential));
265 m_potential_field_with_ghosts_id = field::addToStorage<PotentialField>(
266 blocks,
"potential field with ghosts", 0., field::fzyx,
270 m_full_communication = std::make_shared<FullCommunicator>(blocks);
271 m_full_communication->addPackInfo(
273 m_potential_field_with_ghosts_id));
276 [[nodiscard]]
bool is_gpu() const noexcept
override {
281 return std::is_same_v<FloatType, double>;
285 return static_cast<std::size_t
>(m_potential_field_with_ghosts_id);
288 void setup_fft([[maybe_unused]]
bool use_gpu_aware)
override {
290 heffte::plan_options options = heffte::default_options<
292#if defined(__CUDACC__)
294 options.use_reorder =
false;
295 options.algorithm = heffte::reshape_algorithm::p2p_plined;
296 options.use_pencils =
true;
297 options.use_gpu_aware = use_gpu_aware;
301 std::make_unique<heffte_container<ComplexType>>(options, grid_range);
302#if not defined(__CUDACC__)
304 m_potential = std::vector<FloatType>(heffte->fft->size_inbox());
305 m_greens = std::vector<FloatType>(heffte->fft->size_outbox());
306 m_potential_fourier =
307 std::vector<ComplexType>(heffte->fft->size_outbox());
308 auto const dim = grid_range.second - grid_range.first;
310 for (
int x = 0; x < dim[0]; x++) {
311 for (
int y = 0; y < dim[1]; y++) {
312 for (
int z = 0; z < dim[2]; z++) {
314 greens_function<FloatType>(x + grid_range.first[0],
315 y + grid_range.first[1],
316 z + grid_range.first[2], global_dim);
323#if not defined(__CUDACC__)
326 heffte->fft->forward(m_potential.data(), m_potential_fourier.data(),
327 heffte->buffer->data());
330#if defined(__CUDACC__)
334 block.template getData<PotentialField>(m_potential_field_id);
336 block.template getData<PotentialFourier>(m_potential_fourier_id);
337 FloatType *_data_potential =
potential->dataAt(0, 0, 0, 0);
338 ComplexType *_data_fourier = fourier->dataAt(0, 0, 0, 0);
340 heffte->fft->forward(_data_potential, _data_fourier,
341 heffte->buffer->data());
347 [[nodiscard]] std::optional<double>
349 bool consider_ghosts =
false)
override {
355 auto const potential_field = bc->block->template getData<PotentialField>(
356 m_potential_field_with_ghosts_id);
361 [[nodiscard]] std::vector<double>
364 std::vector<double> out;
366 uint_t values_size{0u};
369 if (
auto const ci =
get_interval(lattice, lower_corner, upper_corner)) {
370 out = std::vector<double>(ci->numCells());
371 for (
auto &
block : *lattice.get_blocks()) {
372 auto const block_offset = lattice.get_block_corner(
block,
true);
374 lattice, lower_corner, upper_corner, block_offset,
block)) {
375 auto const potential_field =
block.template getData<PotentialField>(
376 m_potential_field_with_ghosts_id);
378 assert(values.size() == bci->numCells());
380 values_size += bci->numCells();
382 auto kernel = [&values, &out](
unsigned const block_index,
383 unsigned const local_index,
385 out[local_index] = double_c(values[block_index]);
391 assert(values_size == ci->numCells());
397#if not defined(__CUDACC__)
400 auto dim = grid_range.second - grid_range.first;
401 heffte->fft->forward(m_potential.data(), m_potential_fourier.data(),
402 heffte->buffer->data());
403 std::ranges::transform(m_potential_fourier, m_greens,
404 m_potential_fourier.begin(), std::multiplies<>{});
405 heffte->fft->backward(m_potential_fourier.data(), m_potential.data(),
406 heffte->buffer->data());
409 auto potential_with_ghosts =
block.template getData<PotentialField>(
410 m_potential_field_with_ghosts_id);
411 for (
int x = 0; x < dim[0]; x++) {
412 for (
int y = 0; y < dim[1]; y++) {
413 for (
int z = 0; z < dim[2]; z++) {
414 potential_with_ghosts->get(x, y, z) =
422#if defined(__CUDACC__)
426 block.template getData<PotentialField>(m_potential_field_id);
428 block.template getData<PotentialFourier>(m_potential_fourier_id);
429 FloatType *_data_potential =
potential->dataAt(0, 0, 0, 0);
430 ComplexType *_data_fourier = fourier->dataAt(0, 0, 0, 0);
431 heffte->fft->forward(_data_potential, _data_fourier,
432 heffte->buffer->data());
434 heffte->fft->backward(_data_fourier, _data_potential,
435 heffte->buffer->data());
436 kernel_move_fields();
446 auto const density_id = BlockDataID(
id);
447#if not defined(__CUDACC__)
450 auto dim = grid_range.second - grid_range.first;
452 auto density_field =
block.template getData<PotentialField>(density_id);
453 for (
int x = 0; x < dim[0]; x++) {
454 for (
int y = 0; y < dim[1]; y++) {
455 for (
int z = 0; z < dim[2]; z++) {
457 factor * density_field->get(x, y, z);
464#if defined(__CUDACC__)
468 block.template getData<PotentialField>(m_potential_field_id);
470 block.template getData<gpu::GPUField<FloatType>>(density_id);
471 add_fields(field, density_field, FloatType_c(factor));
478#if not defined(__CUDACC__)
481 auto const dim = grid_range.second - grid_range.first;
482 for (
int x = 0; x < dim[0]; x++) {
483 for (
int i = 0; i < m_potential_fourier.size(); i++) {
484 m_potential_fourier[i] *= m_greens[i];
486 for (
int y = 0; y < dim[1]; y++) {
487 for (
int z = 0; z < dim[2]; z++) {
494#if defined(__CUDACC__)
499 block.template getData<PotentialField>(m_potential_field_id);
511 auto &vtk_handle = it.second;
512 if (vtk_handle->enabled) {
513 vtk::writeFiles(vtk_handle->ptr)();
514 vtk_handle->execution_count++;
520 template <
typename VecType, u
int_t F_SIZE_ARG,
typename OutputType>
521 class VTKWriter :
public vtk::BlockCellDataWriter<OutputType, F_SIZE_ARG> {
523 VTKWriter(ConstBlockDataID
const &block_id, std::string
const &
id,
524 FloatType unit_conversion)
525 : vtk::BlockCellDataWriter<OutputType, F_SIZE_ARG>(id),
529 void configure()
override { WALBERLA_ASSERT_NOT_NULLPTR(this->block_); }
532 cell_idx_t
const z) {
533 return (
static_cast<std::size_t
>(x) *
m_dims[2] *
m_dims[1] +
534 static_cast<std::size_t
>(y) *
m_dims[2] +
535 static_cast<std::size_t
>(z)) *
549 template <
typename OutputType =
float>
551 :
public VTKWriter<std::vector<FloatType>, 1u, OutputType> {
555 using Base::evaluate;
558 OutputType
evaluate(cell_idx_t
const x, cell_idx_t
const y,
559 cell_idx_t
const z, cell_idx_t
const)
override {
560 WALBERLA_ASSERT(!this->
m_content.empty());
569 int flag_observables)
override {
571 auto const unit_conversion = FloatType_c(units.at(
"potential"));
573 WALBERLA_ASSERT_NOT_NULLPTR(blocks);
574 auto potential_writer = make_shared<PotentialVTKWriter<float>>(
575 m_potential_field_with_ghosts_id,
"potential", unit_conversion);
576 auto before_function = [
this, blocks, potential_writer]() {
577 for (
auto &
block : *blocks) {
578 auto *potential_field =
block.template getData<PotentialField>(
579 m_potential_field_with_ghosts_id);
580 auto const bci = potential_field->xyzSize();
581 potential_writer->set_content(
583 potential_writer->set_dims(Vector3<uint_t>(
584 uint_c(bci.xSize()), uint_c(bci.ySize()), uint_c(bci.zSize())));
587 vtk_obj.addBeforeFunction(std::move(before_function));
588 vtk_obj.addCellDataWriter(potential_writer);
593#if defined(__CUDACC__)
595 gpu::GPUField<FloatType> *field_add, FloatType factor) {
596 auto kernel = gpu::make_kernel(add_fields_with_factor<FloatType>);
597 kernel.addFieldIndexingParam(
598 gpu::FieldIndexing<FloatType>::xyz(*field_out));
599 kernel.addFieldIndexingParam(
600 gpu::FieldIndexing<FloatType>::xyz(*field_add));
601 kernel.addParam(factor);
Vector implementation and trait types for boost qvm interoperability.
std::map< std::string, std::shared_ptr< VTKHandle > > m_vtk_auto
VTK writers that are executed automatically.
std::unordered_map< std::string, double > units_map
auto const & get_grid_dimensions() const
std::pair< Utils::Vector3i, Utils::Vector3i > get_local_grid_range(bool with_halo=false) const
DEVICE_QUALIFIER constexpr iterator begin() noexcept
OutputType evaluate(cell_idx_t const x, cell_idx_t const y, cell_idx_t const z, cell_idx_t const) override
void configure() override
std::size_t get_first_index(cell_idx_t const x, cell_idx_t const y, cell_idx_t const z)
void set_content(VecType content)
void set_dims(Vector3< uint_t > dims)
VTKWriter(ConstBlockDataID const &block_id, std::string const &id, FloatType unit_conversion)
FieldTrait< FloatType, Architecture >::template FullCommunicator< stencil::D3Q27 > FullCommunicator
bool is_gpu() const noexcept override
void reset_charge_field() override
bool is_double_precision() const noexcept override
std::vector< double > get_slice_potential(Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner) const override
PoissonSolverFFT(std::shared_ptr< LatticeWalberla > lattice, double permittivity)
void setup_fft(bool use_gpu_aware) override
FieldTrait< FloatType, Architecture >::ComplexType ComplexType
std::optional< double > get_node_potential(Utils::Vector3i const &node, bool consider_ghosts=false) override
void ghost_communication()
void add_charge_to_field(std::size_t id, double valency) override
FieldTrait< FloatType, Architecture >::PotentialField PotentialField
void integrate_vtk_writers() override
void register_vtk_field_writers(walberla::vtk::VTKOutput &vtk_obj, LatticeModel::units_map const &units, int flag_observables) override
FieldTrait< FloatType, Architecture >::template PackInfo< Field > PackInfo
~PoissonSolverFFT() override=default
std::size_t get_potential_field_id() const noexcept override
LatticeWalberla const & get_lattice() const noexcept override
virtual double get_permittivity() const noexcept
static double * block(double *p, std::size_t index, std::size_t size)
void initialize(GhostLayerField< double, 1u > *scalar_field, double const &value)
auto get(GhostLayerField< double, 1u > const *scalar_field, Cell const &cell)
\file PackInfoPdfDoublePrecision.cpp \author pystencils
int pos_to_linear_index(int x, int y, int z, auto const &dim)
auto to_array(Utils::Vector< T, N > const &vec)
void copy_block_buffer(CellInterval const &bci, CellInterval const &ci, Utils::Vector3i const &block_offset, Utils::Vector3i const &lower_corner, auto &&kernel)
Synchronize data between a sliced block and a container.
std::optional< BlockAndCell > get_block_and_cell(::LatticeWalberla const &lattice, signed_integral_vector auto const &node, bool consider_ghost_layers)
std::optional< walberla::cell::CellInterval > get_block_interval(::LatticeWalberla const &lattice, Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner, Utils::Vector3i const &block_offset, IBlock const &block)
std::optional< walberla::cell::CellInterval > get_interval(::LatticeWalberla const &lattice, Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner)
blockforest::communication::UniformBufferedScheme< Stencil > FullCommunicator
std::complex< FloatType > ComplexType
field::GhostLayerField< FT, 1u > PotentialField
field::communication::PackInfo< Field > PackInfo
std::unique_ptr< heffte::fft3d< backend > > fft
std::unique_ptr< heffte::box3d<> > box_in
heffte::backend::fftw backend
std::unique_ptr< heffte::box3d<> > box_out
std::unique_ptr< heffte::fft3d< backend >::buffer_container< ComplexType > > buffer
heffte_container(heffte::plan_options const &options, auto const &grid_range)