ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
P3MFFT.hpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2024-2025 The ESPResSo project
3 *
4 * This file is part of ESPResSo.
5 *
6 * ESPResSo is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * ESPResSo is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#pragma once
21
22#include <utils/Vector.hpp>
23#include <utils/index.hpp>
24
25#include <boost/mpi/communicator.hpp>
26
27#include <heffte.h>
28#include <heffte_backends.h>
29
30#include <algorithm>
31#include <array>
32#include <initializer_list>
33#include <memory>
34
35/**
36 * @brief FFT manager.
37 */
38template <typename FloatType, class FFTConfig> class P3MFFT {
39private:
40 using backend_tag = heffte::backend::default_backend<heffte::tag::cpu>::type;
41 using FFT3D =
42 std::conditional_t<FFTConfig::use_r2c, heffte::fft3d_r2c<backend_tag>,
43 heffte::fft3d<backend_tag>>;
44 using Box = heffte::box3d<>;
45
46 /* input box */
47 std::unique_ptr<Box> in_box;
48 /* output box */
49 std::unique_ptr<Box> out_box;
50 /* workspace for the FFT */
51 std::vector<std::complex<FloatType>> m_workspace;
52 /* FFT backend */
53 std::unique_ptr<FFT3D> fft3d;
54
55 template <typename T, std::size_t N>
56 static auto to_array(Utils::Vector<T, N> const &vec) {
57 std::array<T, N> res{};
58 std::ranges::copy(vec, res.begin());
59 return res;
60 }
61
62public:
63 P3MFFT(boost::mpi::communicator comm, Utils::Vector3i const &global_mesh,
66 Utils::Vector3i const &node_grid) {
67 auto constexpr row_major_order = std::array<int, 3>{2, 1, 0};
68 auto constexpr col_major_order = std::array<int, 3>{0, 1, 2};
69 auto constexpr in_box_order =
70 (FFTConfig::r_space_order == Utils::MemoryOrder::ROW_MAJOR)
73 auto constexpr out_box_order =
74 (FFTConfig::k_space_order == Utils::MemoryOrder::ROW_MAJOR)
77 auto const n_procs = Utils::product(node_grid);
78 auto const high = to_array(global_mesh - Utils::Vector3i::broadcast(1));
79 auto const global_out_box_full = Box({0, 0, 0}, high, out_box_order);
80 auto const global_out_box =
81 FFTConfig::use_r2c ? global_out_box_full.r2c(FFTConfig::r2c_dir)
83 auto best_grid = node_grid;
84 for (auto i : {0u, 1u, 2u}) {
85 if (global_mesh[i] % (2 * n_procs) == 0) {
86 best_grid = {n_procs, 1, 1};
87 break;
88 }
89 }
90 // use optimal output box decomposition based on prime factors
91 auto out_boxes = heffte::split_world(global_out_box, to_array(best_grid));
92 out_box = std::make_unique<Box>(out_boxes[comm.rank()]);
93
94 in_box = std::make_unique<Box>(
95 to_array(rs_local_ld_index),
98
99 // at this stage we can manually adjust some HeFFTe options
100 heffte::plan_options options = heffte::default_options<backend_tag>();
101
102 // use strided 1-D FFT operations
103 // some backends work just as well when the entries of the data are not
104 // contiguous then there is no need to reorder the data in the intermediate
105 // stages which saves time
106 options.use_reorder = true;
107
108 // use point-to-point communications
109 // collaborative all-to-all and individual point-to-point communications are
110 // two alternatives one may be better than the other depending on the
111 // version of MPI, the hardware interconnect, and the problem size
112 options.algorithm = heffte::reshape_algorithm::p2p_plined;
113
114 // in the intermediate steps, the data can be shapes as either 2-D slabs or
115 // 1-D pencils for sufficiently large problem, it is expected that the
116 // pencil decomposition is better but for smaller problems, the slabs may
117 // perform better (depending on hardware and backend)
118 options.use_pencils = true;
119 if constexpr (FFTConfig::use_r2c) {
120 fft3d = std::make_unique<FFT3D>(*in_box, *out_box, FFTConfig::r2c_dir,
121 comm, options);
122 } else {
123 fft3d = std::make_unique<FFT3D>(*in_box, *out_box, comm, options);
124 }
125 m_workspace.resize(fft3d->size_workspace());
126 }
127
129 return Utils::Vector3i(out_box->low);
130 }
132 return Utils::Vector3i(out_box->high) + Utils::Vector3i::broadcast(1);
133 }
138 return Utils::Vector3i(in_box->high) + Utils::Vector3i::broadcast(1) -
139 Utils::Vector3i(in_box->low);
140 }
141 void forward(auto &&in, auto &&out) {
142 fft3d->forward(in, out, m_workspace.data());
143 }
144 void backward(auto &&in, auto &&out) {
145 fft3d->backward(in, out, m_workspace.data());
146 }
147};
Vector implementation and trait types for boost qvm interoperability.
FFT manager.
Definition P3MFFT.hpp:38
Utils::Vector3i rs_local_size() const
Definition P3MFFT.hpp:137
void forward(auto &&in, auto &&out)
Definition P3MFFT.hpp:141
Utils::Vector3i ks_local_ld_index() const
Definition P3MFFT.hpp:128
void backward(auto &&in, auto &&out)
Definition P3MFFT.hpp:144
P3MFFT(boost::mpi::communicator comm, Utils::Vector3i const &global_mesh, Utils::Vector3i const &rs_local_ld_index, Utils::Vector3i const &rs_local_ur_index, Utils::Vector3i const &node_grid)
Definition P3MFFT.hpp:63
Utils::Vector3i ks_local_size() const
Definition P3MFFT.hpp:134
Utils::Vector3i ks_local_ur_index() const
Definition P3MFFT.hpp:131
static DEVICE_QUALIFIER constexpr Vector< T, N > broadcast(typename Base::value_type const &value) noexcept
Create a vector that has all entries set to the same value.
Definition Vector.hpp:132
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
T product(Vector< T, N > const &v)
Definition Vector.hpp:373
VectorXi< 3 > Vector3i
Definition Vector.hpp:194