ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
PoissonSolverFFT.hpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2025-2026 The ESPResSo project
3 *
4 * This file is part of ESPResSo.
5 *
6 * ESPResSo is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * ESPResSo is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#pragma once
21
25
26#include "greens_function.hpp"
27
30#if defined(__CUDACC__)
33#endif
34
35#include <utils/Vector.hpp>
36
37#include <blockforest/communication/UniformBufferedScheme.h>
38#include <domain_decomposition/BlockDataID.h>
39#include <field/AddToStorage.h>
40#include <field/GhostLayerField.h>
41#include <field/communication/PackInfo.h>
42#include <field/vtk/VTKWriter.h>
43#include <stencil/D3Q27.h>
44#include <waLBerlaDefinitions.h>
45#if defined(__CUDACC__)
46#include <gpu/AddGPUFieldToStorage.h>
47#include <gpu/FieldAccessor.h>
48#include <gpu/FieldIndexing.h>
49#include <gpu/GPUField.h>
50#include <gpu/Kernel.h>
51#include <gpu/communication/MemcpyPackInfo.h>
52#include <gpu/communication/UniformGPUScheme.h>
53#endif
54
55#if defined(__clang__)
56#pragma clang diagnostic push
57#pragma clang diagnostic ignored "-Wfloat-conversion"
58#pragma clang diagnostic ignored "-Wimplicit-float-conversion"
59#elif defined(__GNUC__) or defined(__GNUG__)
60#pragma GCC diagnostic push
61#pragma GCC diagnostic ignored "-Wfloat-conversion"
62#endif
63
64#include <heffte.h>
65#include <heffte_backends.h>
66#include <heffte_geometry.h>
67
68#if defined(__clang__)
69#pragma clang diagnostic pop
70#elif defined(__GNUC__) or defined(__GNUG__)
71#pragma GCC diagnostic pop
72#endif
73
74#include <algorithm>
75#include <array>
76#include <complex>
77#include <cstddef>
78#include <functional>
79#include <memory>
80#include <optional>
81#include <ranges>
82#include <string>
83#include <type_traits>
84#include <utility>
85#include <vector>
86
87namespace walberla {
88
89template <typename T, std::size_t N>
91 std::array<T, N> res{};
92 std::ranges::copy(vec, res.begin());
93 return res;
94}
95
96inline int pos_to_linear_index(int x, int y, int z, auto const &dim) {
97 return (z * dim[1] + y) * dim[0] + x;
98}
99
100template <typename FloatType, lbmpy::Arch Architecture>
102private:
103 template <typename T> FloatType FloatType_c(T t) {
104 return numeric_cast<FloatType>(t);
105 }
106
107protected:
108 template <typename FT, lbmpy::Arch AT = lbmpy::Arch::CPU> struct FieldTrait {
109 using ComplexType = std::complex<FloatType>;
110 using PotentialField = field::GhostLayerField<FT, 1u>;
111 template <class Field>
112 using PackInfo = field::communication::PackInfo<Field>;
113 template <class Stencil>
115 blockforest::communication::UniformBufferedScheme<Stencil>;
116 };
117
118#if defined(__CUDACC__)
119 template <typename FT> struct FieldTrait<FT, lbmpy::Arch::GPU> {
120 using ComplexType = std::conditional_t<std::is_same_v<FloatType, float>,
122 using PotentialField = gpu::GPUField<FT>;
123 template <class Field>
124 using PackInfo = gpu::communication::MemcpyPackInfo<Field>;
125 template <class Stencil>
126 using FullCommunicator = gpu::communication::UniformGPUScheme<Stencil>;
127 };
128#endif
129
130public:
134 FieldTrait<FloatType,
136 template <class Field>
137 using PackInfo =
139
140protected:
141 template <typename ComplexType> struct heffte_container {
142#if defined(__CUDACC__)
143 using backend = heffte::backend::cufft;
144#else // __CUDACC__
145 using backend = heffte::backend::fftw;
146#endif // __CUDACC__
147 std::unique_ptr<heffte::box3d<>> box_in;
148 std::unique_ptr<heffte::box3d<>> box_out;
149 std::unique_ptr<heffte::fft3d<backend>> fft;
150 std::unique_ptr<heffte::fft3d<backend>::buffer_container<ComplexType>>
152
153 heffte_container(heffte::plan_options const &options,
154 auto const &grid_range) {
155 auto const offset_vec = Utils::Vector3i({1, 1, 1});
156 auto const order = Utils::Vector3i({0, 1, 2});
157 box_in = std::make_unique<heffte::box3d<>>(
160 box_out = std::make_unique<heffte::box3d<>>(
163 fft = std::make_unique<heffte::fft3d<backend>>(*box_in, *box_out,
165 buffer = std::make_unique<
166 heffte::fft3d<backend>::buffer_container<ComplexType>>(
167 fft->size_workspace());
168 }
169 };
170
171private:
172 BlockDataID m_potential_field_with_ghosts_id;
173#if defined(__CUDACC__)
174 BlockDataID m_potential_field_id;
177#endif // __CUDACC__
178
179 std::unique_ptr<heffte_container<ComplexType>> heffte;
180 std::shared_ptr<FullCommunicator> m_full_communication;
181
182#if defined(__CUDACC__)
183 using GreenFunctionField = gpu::GPUField<FloatType>;
184 using PotentialFourier = gpu::GPUField<ComplexType>;
185
186 walberla::gpu::Kernel<void (*)(walberla::gpu::FieldAccessor<ComplexType>,
187 walberla::gpu::FieldAccessor<FloatType>)>
189 walberla::gpu::Kernel<void (*)(walberla::gpu::FieldAccessor<FloatType>,
190 walberla::gpu::FieldAccessor<FloatType>)>
192#else // __CUDACC__
193 std::vector<FloatType> m_greens;
194 std::vector<FloatType> m_potential;
195 std::vector<ComplexType> m_potential_fourier;
196#endif // __CUDACC__
197
198public:
199 ~PoissonSolverFFT() override = default;
200 PoissonSolverFFT(std::shared_ptr<LatticeWalberla> lattice,
201 double permittivity)
202 : PoissonSolver(std::move(lattice), permittivity)
204 ,
208#endif
209 {
210 auto blocks = get_lattice().get_blocks();
211#if defined(__CUDACC__)
212 m_potential_field_id = gpu::addGPUFieldToStorage<PotentialField>(
213 blocks, "potential field", 1u, field::fzyx, 0u, false);
214 m_potential_field_with_ghosts_id =
215 gpu::addGPUFieldToStorage<PotentialField>(
216 blocks, "potential field with ghosts", 1u, field::fzyx,
217 get_lattice().get_ghost_layers());
218 m_greens_function_field_id = gpu::addGPUFieldToStorage<GreenFunctionField>(
219 blocks, "greens function", 1u, field::fzyx, 0u, false);
220 m_potential_fourier_id = gpu::addGPUFieldToStorage<PotentialFourier>(
221 blocks, "fourier field", 1u, field::fzyx, 0u, false);
222
225 for (auto &block : *blocks) {
228 auto kernel = gpu::make_kernel(create_greens_function<FloatType>);
229 kernel.addFieldIndexingParam(
230 gpu::FieldIndexing<FloatType>::xyz(*green_field));
231 kernel.addParam(grid_range.first[0]);
232 kernel.addParam(grid_range.first[1]);
233 kernel.addParam(grid_range.first[2]);
234 kernel.addParam(grid_range.second[0]);
235 kernel.addParam(grid_range.second[1]);
236 kernel.addParam(grid_range.second[2]);
237 kernel.addParam(global_dim[0]);
238 kernel.addParam(global_dim[1]);
239 kernel.addParam(global_dim[2]);
240 kernel();
241
242 auto potential =
243 block.template getData<PotentialField>(m_potential_field_id);
245 m_potential_field_with_ghosts_id);
248 auto fourier =
250
253
256 kernel_greens.addFieldIndexingParam(
257 gpu::FieldIndexing<ComplexType>::allInner(*fourier));
258 kernel_greens.addFieldIndexingParam(
259 gpu::FieldIndexing<FloatType>::allInner(*green));
260
261 kernel_move_fields = gpu::make_kernel(move_field<FloatType>);
262 kernel_move_fields.addFieldIndexingParam(
263 gpu::FieldIndexing<FloatType>::xyz(*potential_ghosts));
264 kernel_move_fields.addFieldIndexingParam(
265 gpu::FieldIndexing<FloatType>::xyz(*potential));
266 }
267
268#else // __CUDACC__
269 m_potential_field_with_ghosts_id = field::addToStorage<PotentialField>(
270 blocks, "potential field with ghosts", FloatType{0}, field::fzyx,
272#endif // __CUDACC__
273
274 m_full_communication = std::make_shared<FullCommunicator>(blocks);
275 m_full_communication->addPackInfo(
276 std::make_shared<PackInfo<PotentialField>>(
277 m_potential_field_with_ghosts_id));
278 }
279
280 [[nodiscard]] bool is_gpu() const noexcept override {
282 }
283
285 return std::is_same_v<FloatType, double>;
286 }
287
288 std::size_t get_potential_field_id() const noexcept override {
289 return static_cast<std::size_t>(m_potential_field_with_ghosts_id);
290 }
291
292 void setup_fft([[maybe_unused]] bool use_gpu_aware) override {
294 heffte::plan_options options = heffte::default_options<
296#if defined(__CUDACC__)
297 if constexpr (Architecture == lbmpy::Arch::GPU) {
298 options.use_reorder = false;
299 options.algorithm = heffte::reshape_algorithm::p2p_plined;
300 options.use_pencils = true;
301 options.use_gpu_aware = use_gpu_aware;
302 }
303#endif
304 heffte =
305 std::make_unique<heffte_container<ComplexType>>(options, grid_range);
306#if not defined(__CUDACC__)
307 if constexpr (Architecture == lbmpy::Arch::CPU) {
308 m_potential = std::vector<FloatType>(heffte->fft->size_inbox());
309 m_greens = std::vector<FloatType>(heffte->fft->size_outbox());
310 m_potential_fourier =
311 std::vector<ComplexType>(heffte->fft->size_outbox());
312 auto const dim = grid_range.second - grid_range.first;
314 for (int x = 0; x < dim[0]; x++) {
315 for (int y = 0; y < dim[1]; y++) {
316 for (int z = 0; z < dim[2]; z++) {
317 m_greens[pos_to_linear_index(x, y, z, dim)] =
319 y + grid_range.first[1],
320 z + grid_range.first[2], global_dim);
321 }
322 }
323 }
324 }
325#endif
327#if not defined(__CUDACC__)
328 if constexpr (Architecture == lbmpy::Arch::CPU) {
329 // make FFT plan
330 heffte->fft->forward(m_potential.data(), m_potential_fourier.data(),
331 heffte->buffer->data());
332 }
333#endif
334#if defined(__CUDACC__)
335 if constexpr (Architecture == lbmpy::Arch::GPU) {
336 for (auto &block : *get_lattice().get_blocks()) {
337 auto potential =
338 block.template getData<PotentialField>(m_potential_field_id);
339 auto fourier =
341 FloatType *_data_potential = potential->dataAt(0, 0, 0, 0);
342 ComplexType *_data_fourier = fourier->dataAt(0, 0, 0, 0);
343 // make FFT plan
344 heffte->fft->forward(_data_potential, _data_fourier,
345 heffte->buffer->data());
346 }
347 }
348#endif
349 }
350
351 [[nodiscard]] std::optional<double>
353 bool consider_ghosts = false) override {
355
356 if (not bc or get_potential_field_id() == 0u)
357 return std::nullopt;
358
359 auto const potential_field = bc->block->template getData<PotentialField>(
360 m_potential_field_with_ghosts_id);
361 return {double_c(
363 }
364
365 bool set_node_potential(Utils::Vector3i const &, double) override {
366 throw std::runtime_error("Setting potential is not supported by EKFFT");
367 }
368
369 [[nodiscard]] std::vector<double>
371 Utils::Vector3i const &upper_corner) const override {
372 std::vector<double> out;
373#ifndef NDEBUG
375#endif
376 auto const &lattice = get_lattice();
377 if (auto const ci = get_interval(lattice, lower_corner, upper_corner)) {
378 out = std::vector<double>(ci->numCells());
379 for (auto &block : *lattice.get_blocks()) {
380 auto const block_offset = lattice.get_block_corner(block, true);
381 if (auto const bci = get_block_interval(
383 auto const potential_field = block.template getData<PotentialField>(
384 m_potential_field_with_ghosts_id);
386 assert(values.size() == bci->numCells());
387#ifndef NDEBUG
388 values_size += bci->numCells();
389#endif
390 auto kernel = [&values, &out](unsigned const block_index,
391 unsigned const local_index,
392 Utils::Vector3i const &) {
394 };
395
397 }
398 }
399 assert(values_size == ci->numCells());
400 }
401 return out;
402 }
403
405 std::vector<double> const &) override {
406 throw std::runtime_error("Setting potential is not supported by EKFFT");
407 }
408
409 void solve() override {
410#if not defined(__CUDACC__)
411 if constexpr (Architecture == lbmpy::Arch::CPU) {
413 auto dim = grid_range.second - grid_range.first;
414 heffte->fft->forward(m_potential.data(), m_potential_fourier.data(),
415 heffte->buffer->data());
416 std::ranges::transform(m_potential_fourier, m_greens,
417 m_potential_fourier.begin(), std::multiplies<>{});
418 heffte->fft->backward(m_potential_fourier.data(), m_potential.data(),
419 heffte->buffer->data());
420
421 for (auto &block : *get_lattice().get_blocks()) {
423 m_potential_field_with_ghosts_id);
424 for (int x = 0; x < dim[0]; x++) {
425 for (int y = 0; y < dim[1]; y++) {
426 for (int z = 0; z < dim[2]; z++) {
427 potential_with_ghosts->get(x, y, z) =
428 m_potential[pos_to_linear_index(x, y, z, dim)];
429 }
430 }
431 }
432 }
433 }
434#endif
435#if defined(__CUDACC__)
436 if constexpr (Architecture == lbmpy::Arch::GPU) {
437 for (auto &block : *get_lattice().get_blocks()) {
438 auto potential =
439 block.template getData<PotentialField>(m_potential_field_id);
440 auto fourier =
442 FloatType *_data_potential = potential->dataAt(0, 0, 0, 0);
443 ComplexType *_data_fourier = fourier->dataAt(0, 0, 0, 0);
444 heffte->fft->forward(_data_potential, _data_fourier,
445 heffte->buffer->data());
447 heffte->fft->backward(_data_fourier, _data_potential,
448 heffte->buffer->data());
450 }
451 }
452#endif
455 }
456
457 void add_charge_to_field(std::size_t id, double valency) override {
458 auto const factor = FloatType_c(valency / get_permittivity());
459 auto const density_id = BlockDataID(id);
460#if not defined(__CUDACC__)
461 if constexpr (Architecture == lbmpy::Arch::CPU) {
463 auto dim = grid_range.second - grid_range.first;
464 for (auto &block : *get_lattice().get_blocks()) {
466 for (int x = 0; x < dim[0]; x++) {
467 for (int y = 0; y < dim[1]; y++) {
468 for (int z = 0; z < dim[2]; z++) {
469 m_potential[pos_to_linear_index(x, y, z, dim)] +=
470 factor * density_field->get(x, y, z);
471 }
472 }
473 }
474 }
475 }
476#endif
477#if defined(__CUDACC__)
478 if constexpr (Architecture == lbmpy::Arch::GPU) {
479 for (auto &block : *get_lattice().get_blocks()) {
480 auto field =
481 block.template getData<PotentialField>(m_potential_field_id);
482 auto density_field =
484 add_fields(field, density_field, FloatType_c(factor));
485 }
486 }
487#endif
488 }
489
490 void reset_charge_field() override {
491#if not defined(__CUDACC__)
492 if constexpr (Architecture == lbmpy::Arch::CPU) {
494 auto const dim = grid_range.second - grid_range.first;
495 for (int x = 0; x < dim[0]; x++) {
496 for (int i = 0; i < m_potential_fourier.size(); i++) {
497 m_potential_fourier[i] *= m_greens[i];
498 }
499 for (int y = 0; y < dim[1]; y++) {
500 for (int z = 0; z < dim[2]; z++) {
501 m_potential[pos_to_linear_index(x, y, z, dim)] = FloatType{0};
502 }
503 }
504 }
505 }
506#endif
507#if defined(__CUDACC__)
508 if constexpr (Architecture == lbmpy::Arch::GPU) {
509 // the FFT-solver re-uses the potential field for the charge
510 for (auto &block : *get_lattice().get_blocks()) {
511 auto field =
512 block.template getData<PotentialField>(m_potential_field_id);
513 ek::accessor::Scalar::initialize(field, FloatType{0});
514 }
515 }
516#endif
517 }
518
519 void ghost_communication() override { (*m_full_communication)(); }
520
521protected:
522 void integrate_vtk_writers() override {
523 for (auto const &vtk_handle : m_vtk_auto | std::views::values) {
524 if (vtk_handle->enabled) {
525 vtk::writeFiles(vtk_handle->ptr)();
526 vtk_handle->execution_count++;
527 }
528 }
529 }
530
531protected:
532 template <typename VecType, uint_t F_SIZE_ARG, typename OutputType>
533 class VTKWriter : public vtk::BlockCellDataWriter<OutputType, F_SIZE_ARG> {
534 public:
535 VTKWriter(ConstBlockDataID const &block_id, std::string const &id,
536 FloatType unit_conversion)
537 : vtk::BlockCellDataWriter<OutputType, F_SIZE_ARG>(id),
539
540 protected:
541 void configure() override { WALBERLA_ASSERT_NOT_NULLPTR(this->block_); }
542
543 std::size_t get_first_index(cell_idx_t const x, cell_idx_t const y,
544 cell_idx_t const z) {
545 return (static_cast<std::size_t>(x) * m_dims[2] * m_dims[1] +
546 static_cast<std::size_t>(y) * m_dims[2] +
547 static_cast<std::size_t>(z)) *
549 }
550
551 FloatType m_conversion;
554
555 public:
557
558 void set_dims(Vector3<uint_t> dims) { m_dims = dims; }
559 };
560
561 template <typename OutputType = float>
563 : public VTKWriter<std::vector<FloatType>, 1u, OutputType> {
564 public:
566 using Base::Base;
567 using Base::evaluate;
568
569 protected:
571 cell_idx_t const z, cell_idx_t const) override {
572 WALBERLA_ASSERT(!this->m_content.empty());
573 auto const potential = this->m_content[this->get_first_index(x, y, z)];
575 }
576 };
577
578public:
579 void register_vtk_field_writers(walberla::vtk::VTKOutput &vtk_obj,
581 int flag_observables) override {
582 if (flag_observables & static_cast<int>(EKPoissonOutputVTK::potential)) {
583 auto const unit_conversion = FloatType_c(units.at("potential"));
584 auto const blocks = get_lattice().get_blocks();
587 m_potential_field_with_ghosts_id, "potential", unit_conversion);
588 auto before_function = [this, blocks, potential_writer]() {
589 for (auto &block : *blocks) {
591 m_potential_field_with_ghosts_id);
592 auto const bci = potential_field->xyzSize();
593 potential_writer->set_content(
596 uint_c(bci.xSize()), uint_c(bci.ySize()), uint_c(bci.zSize())));
597 }
598 };
599 vtk_obj.addBeforeFunction(std::move(before_function));
600 vtk_obj.addCellDataWriter(potential_writer);
601 }
602 }
603
604private:
605#if defined(__CUDACC__)
607 gpu::GPUField<FloatType> *field_add, FloatType factor) {
608 auto kernel = gpu::make_kernel(add_fields_with_factor<FloatType>);
609 kernel.addFieldIndexingParam(
610 gpu::FieldIndexing<FloatType>::xyz(*field_out));
611 kernel.addFieldIndexingParam(
612 gpu::FieldIndexing<FloatType>::xyz(*field_add));
613 kernel.addParam(factor);
614 kernel();
615 }
616#endif
617};
618
619} // namespace walberla
Vector implementation and trait types for boost qvm interoperability.
std::map< std::string, std::shared_ptr< VTKHandle > > m_vtk_auto
VTK writers that are executed automatically.
std::unordered_map< std::string, double > units_map
std::pair< Utils::Vector3i, Utils::Vector3i > get_local_grid_range(bool with_halo=false) const
OutputType evaluate(cell_idx_t const x, cell_idx_t const y, cell_idx_t const z, cell_idx_t const) override
std::size_t get_first_index(cell_idx_t const x, cell_idx_t const y, cell_idx_t const z)
void set_dims(Vector3< uint_t > dims)
VTKWriter(ConstBlockDataID const &block_id, std::string const &id, FloatType unit_conversion)
FieldTrait< FloatType, Architecture >::template FullCommunicator< stencil::D3Q27 > FullCommunicator
bool is_gpu() const noexcept override
bool set_node_potential(Utils::Vector3i const &, double) override
bool is_double_precision() const noexcept override
std::vector< double > get_slice_potential(Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner) const override
PoissonSolverFFT(std::shared_ptr< LatticeWalberla > lattice, double permittivity)
void setup_fft(bool use_gpu_aware) override
FieldTrait< FloatType, Architecture >::ComplexType ComplexType
std::optional< double > get_node_potential(Utils::Vector3i const &node, bool consider_ghosts=false) override
void add_charge_to_field(std::size_t id, double valency) override
FieldTrait< FloatType, Architecture >::PotentialField PotentialField
void register_vtk_field_writers(walberla::vtk::VTKOutput &vtk_obj, LatticeModel::units_map const &units, int flag_observables) override
void set_slice_potential(Utils::Vector3i const &, Utils::Vector3i const &, std::vector< double > const &) override
FieldTrait< FloatType, Architecture >::template PackInfo< Field > PackInfo
~PoissonSolverFFT() override=default
std::size_t get_potential_field_id() const noexcept override
LatticeWalberla const & get_lattice() const noexcept override
virtual double get_permittivity() const noexcept
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:175
VectorXi< 3 > Vector3i
Definition Vector.hpp:193
Definition fft.cpp:76
STL namespace.
void initialize(GhostLayerField< double, 1u > *scalar_field, double const &value)
auto get(GhostLayerField< double, 1u > const *scalar_field, Cell const &cell)
\file PackInfoPdfDoublePrecision.cpp \author pystencils
int pos_to_linear_index(int x, int y, int z, auto const &dim)
auto to_array(Utils::Vector< T, N > const &vec)
void copy_block_buffer(CellInterval const &bci, CellInterval const &ci, Utils::Vector3i const &block_offset, Utils::Vector3i const &lower_corner, auto &&kernel)
Synchronize data between a sliced block and a container.
std::optional< BlockAndCell > get_block_and_cell(::LatticeWalberla const &lattice, signed_integral_vector auto const &node, bool consider_ghost_layers)
std::optional< walberla::cell::CellInterval > get_block_interval(::LatticeWalberla const &lattice, Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner, Utils::Vector3i const &block_offset, IBlock const &block)
std::optional< walberla::cell::CellInterval > get_interval(::LatticeWalberla const &lattice, Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner)
blockforest::communication::UniformBufferedScheme< Stencil > FullCommunicator
field::GhostLayerField< FT, 1u > PotentialField
field::communication::PackInfo< Field > PackInfo
std::unique_ptr< heffte::fft3d< backend > > fft
std::unique_ptr< heffte::box3d<> > box_in
std::unique_ptr< heffte::box3d<> > box_out
std::unique_ptr< heffte::fft3d< backend >::buffer_container< ComplexType > > buffer
heffte_container(heffte::plan_options const &options, auto const &grid_range)