ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
fft.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2024 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21/** \file
22 *
23 * Routines, row decomposition, data structures and communication for the
24 * 3D-FFT.
25 *
26 */
27
28#include "fft.hpp"
29#include "vector.hpp"
30
31#include "p3m/packing.hpp"
32
33#include <utils/Vector.hpp>
34#include <utils/index.hpp>
37
38#include <boost/mpi/communicator.hpp>
39#include <boost/serialization/vector.hpp>
40
41#include <fftw3.h>
42
43#ifdef _OPENMP
44#include <omp.h>
45#endif
46
47#include <algorithm>
48#include <cmath>
49#include <cstddef>
50#include <cstdio>
51#include <cstring>
52#include <optional>
53#include <span>
54#include <stdexcept>
55#include <utility>
56#include <vector>
57
59
60/** @name MPI tags for FFT communication */
61/**@{*/
62/** Tag for communication in forw_grid_comm() */
63#define REQ_FFT_FORW 301
64/** Tag for communication in back_grid_comm() */
65#define REQ_FFT_BACK 302
66/**@}*/
67
68template <typename T>
69static void fft_sendrecv(T const *const sendbuf, int scount, int dest,
70 T *const recvbuf, int rcount, int source,
71 boost::mpi::communicator const &comm, int tag) {
72 auto const type = boost::mpi::get_mpi_datatype<T>(*sendbuf);
73 MPI_Sendrecv(reinterpret_cast<void const *>(sendbuf), scount, type, dest, tag,
74 reinterpret_cast<void *>(recvbuf), rcount, type, source, tag,
75 comm, MPI_STATUS_IGNORE);
76}
77
78namespace fft {
79
80template <typename FloatType = double> struct fftw {
81 using complex = fftw_complex;
82 static auto constexpr plan_many_dft = fftw_plan_many_dft;
83 static auto constexpr destroy_plan = fftw_destroy_plan;
84 static auto constexpr execute_dft = fftw_execute_dft;
85 static auto constexpr malloc = fftw_malloc;
86 static auto constexpr free = fftw_free;
87};
88template <> struct fftw<float> {
89 using complex = fftwf_complex;
90 static auto constexpr plan_many_dft = fftwf_plan_many_dft;
91 static auto constexpr destroy_plan = fftwf_destroy_plan;
92 static auto constexpr execute_dft = fftwf_execute_dft;
93 static auto constexpr malloc = fftwf_malloc;
94 static auto constexpr free = fftwf_free;
95};
96
97/** This ugly function does the bookkeeping: which nodes have to
98 * communicate to each other, when you change the node grid.
99 * Changing the regular decomposition requires communication. This
100 * function finds (hopefully) the best way to do this. As input it
101 * needs the two grids (@p grid1, @p grid2) and a linear list (@p node_list1)
102 * with the node identities for @p grid1. The linear list (@p node_list2)
103 * for the second grid is calculated. For the communication group of
104 * the calling node it calculates a list (@c group) with the node
105 * identities and the positions (@p my_pos, @p pos) of that nodes in @p grid1
106 * and @p grid2. The return value is the size of the communication
107 * group. It gives -1 if the two grids do not fit to each other
108 * (@p grid1 and @p grid2 have to be component-wise multiples of each
109 * other, see e.g. \ref calc_2d_grid for how to do this).
110 *
111 * \param[in] grid1 The node grid you start with.
112 * \param[in] grid2 The node grid you want to have.
113 * \param[in] node_list1 Linear node index list for grid1.
114 * \param[out] node_list2 Linear node index list for grid2.
115 * \param[out] pos Positions of the nodes in grid2
116 * \param[out] my_pos Position of comm.rank() in grid2.
117 * \param[in] rank MPI rank.
118 * \return Size of the communication group.
119 */
120std::optional<std::vector<int>>
122 std::span<int const> node_list1, std::span<int> node_list2,
123 std::span<int> pos, std::span<int> my_pos, int rank) {
124 int i;
125 /* communication group cell size on grid1 and grid2 */
126 int s1[3], s2[3];
127 /* The communication group cells build the same super grid on grid1 and grid2
128 */
129 int ds[3];
130 /* communication group size */
131 int g_size = 1;
132 /* comm. group cell index */
133 int gi[3];
134 /* position of a node in a grid */
135 Utils::Vector3i p1, p2;
136 /* node identity */
137 int n;
138 /* comm.rank() position in the communication group. */
139 int c_pos = -1;
140 /* flag for group identification */
141 int my_group = 0;
142
143 /* calculate dimension of comm. group cells for both grids */
144 if (Utils::product(grid1) != Utils::product(grid2))
145 return std::nullopt; /* unlike number of nodes */
146 for (i = 0; i < 3; i++) {
147 s1[i] = grid1[i] / grid2[i];
148 if (s1[i] == 0)
149 s1[i] = 1;
150 else if (grid1[i] != grid2[i] * s1[i])
151 return std::nullopt; /* grids do not match!!! */
152
153 s2[i] = grid2[i] / grid1[i];
154 if (s2[i] == 0)
155 s2[i] = 1;
156 else if (grid2[i] != grid1[i] * s2[i])
157 return std::nullopt; /* grids do not match!!! */
158
159 ds[i] = grid2[i] / s2[i];
160 g_size *= s2[i];
161 }
162
163 std::vector<int> group(g_size);
164
165 /* calc node_list2 */
166 /* loop through all comm. group cells */
167 for (gi[2] = 0; gi[2] < ds[2]; gi[2]++)
168 for (gi[1] = 0; gi[1] < ds[1]; gi[1]++)
169 for (gi[0] = 0; gi[0] < ds[0]; gi[0]++) {
170 /* loop through all nodes in that comm. group cell */
171 for (i = 0; i < g_size; i++) {
172 p1[0] = (gi[0] * s1[0]) + (i % s1[0]);
173 p1[1] = (gi[1] * s1[1]) + ((i / s1[0]) % s1[1]);
174 p1[2] = (gi[2] * s1[2]) + (i / (s1[0] * s1[1]));
175
176 p2[0] = (gi[0] * s2[0]) + (i % s2[0]);
177 p2[1] = (gi[1] * s2[1]) + ((i / s2[0]) % s2[1]);
178 p2[2] = (gi[2] * s2[2]) + (i / (s2[0] * s2[1]));
179
180 n = node_list1[Utils::get_linear_index(p1, grid1)];
181 node_list2[Utils::get_linear_index(p2, grid2)] = n;
182
183 pos[3 * n + 0] = p2[0];
184 pos[3 * n + 1] = p2[1];
185 pos[3 * n + 2] = p2[2];
186 if (my_group == 1)
187 group[i] = n;
188 if (n == rank && my_group == 0) {
189 my_group = 1;
190 c_pos = i;
191 my_pos[0] = p2[0];
192 my_pos[1] = p2[1];
193 my_pos[2] = p2[2];
194 i = -1; /* restart the loop */
195 }
196 }
197 my_group = 0;
198 }
199
200 /* permute comm. group according to the nodes position in the group */
201 /* This is necessary to have matching node pairs during communication! */
202 while (c_pos > 0) {
203 n = group[g_size - 1];
204 for (i = g_size - 1; i > 0; i--)
205 group[i] = group[i - 1];
206 group[0] = n;
207 c_pos--;
208 }
209 return {group};
210}
211
212namespace {
213/** Calculate the local fft mesh. Calculate the local mesh (@p loc_mesh)
214 * of a node at position (@p n_pos) in a node grid (@p n_grid) for a global
215 * mesh of size (@p mesh) and a mesh offset (@p mesh_off (in mesh units))
216 * and store also the first point (@p start) of the local mesh.
217 *
218 * \param[in] n_pos Position of the node in @p n_grid.
219 * \param[in] n_grid node grid.
220 * \param[in] mesh global mesh dimensions.
221 * \param[in] mesh_off global mesh offset.
222 * \param[out] loc_mesh local mesh dimension.
223 * \param[out] start first point of local mesh in global mesh.
224 * \return Number of mesh points in local mesh.
225 */
226int calc_local_mesh(const int *n_pos, const int *n_grid, const int *mesh,
227 const double *mesh_off, int *loc_mesh, int *start) {
228 int last[3], size = 1;
229
230 for (int i = 0; i < 3; i++) {
231 auto const ai = mesh[i] / static_cast<double>(n_grid[i]);
232 start[i] = static_cast<int>(ceil(ai * n_pos[i] - mesh_off[i]));
233 last[i] = static_cast<int>(floor(ai * (n_pos[i] + 1) - mesh_off[i]));
234 /* correct round off errors */
235 if (ai * (n_pos[i] + 1) - mesh_off[i] - last[i] < 1.0e-15)
236 last[i]--;
237 if (1.0 + ai * n_pos[i] - mesh_off[i] - start[i] < 1.0e-15)
238 start[i]--;
239 loc_mesh[i] = last[i] - start[i] + 1;
240 size *= loc_mesh[i];
241 }
242 return size;
243}
244
245/** Calculate a send (or recv.) block for grid communication during a
246 * decomposition change. Calculate the send block specification
247 * (block = lower left corner and upper right corner) which a node at
248 * position (@p pos1) in the actual node grid (@p grid1) has to send to
249 * another node at position (@p pos2) in the desired node grid (@p grid2).
250 * The global mesh, subject to communication, is specified via its size
251 * (@p mesh) and its mesh offset (@p mesh_off (in mesh units)).
252 *
253 * For the calculation of a receive block you have to change the arguments in
254 * the following way:
255 * - @p pos1: position of receiving node in the desired node grid.
256 * - @p grid1: desired node grid.
257 * - @p pos2: position of the node you intend to receive the data from in the
258 * actual node grid.
259 * - @p grid2: actual node grid.
260 *
261 * \param[in] pos1 Position of send node in @p grid1.
262 * \param[in] grid1 node grid 1.
263 * \param[in] pos2 Position of recv node in @p grid2.
264 * \param[in] grid2 node grid 2.
265 * \param[in] mesh global mesh dimensions.
266 * \param[in] mesh_off global mesh offset.
267 * \param[out] block send block specification.
268 * \return Size of the send block.
269 */
270int calc_send_block(const int *pos1, const int *grid1, const int *pos2,
271 const int *grid2, const int *mesh, const double *mesh_off,
272 int *block) {
273 int size = 1;
274 int mesh1[3], first1[3], last1[3];
275 int mesh2[3], first2[3], last2[3];
276
277 calc_local_mesh(pos1, grid1, mesh, mesh_off, mesh1, first1);
278 calc_local_mesh(pos2, grid2, mesh, mesh_off, mesh2, first2);
279
280 for (int i = 0; i < 3; i++) {
281 last1[i] = first1[i] + mesh1[i] - 1;
282 last2[i] = first2[i] + mesh2[i] - 1;
283 block[i] = std::max(first1[i], first2[i]) - first1[i];
284 block[i + 3] = (std::min(last1[i], last2[i]) - first1[i]) - block[i] + 1;
285 size *= block[i + 3];
286 }
287 return size;
288}
289
290/** Pack a block with dimensions <tt>size[0] * size[1] * size[2]</tt> starting
291 * at @p start of an input 3D-grid with dimension @p dim into an output
292 * 3D-grid with dimensions <tt>size[2] * size[0] * size[1]</tt> with
293 * a simultaneous one-fold permutation of the indices. The permutation is
294 * defined as: slow_in -> fast_out, mid_in ->slow_out, fast_in -> mid_out.
295 *
296 * An element <tt>(i0_in, i1_in, i2_in)</tt> is then
297 * <tt>(i0_out = i1_in-start[1], i1_out = i2_in-start[2],
298 * i2_out = i0_in-start[0])</tt> and for the linear indices we have:
299 * - <tt>li_in = i2_in + size[2] * (i1_in + (size[1]*i0_in))</tt>
300 * - <tt>li_out = i2_out + size[0] * (i1_out + (size[2]*i0_out))</tt>
301 *
302 * For index definition see \ref fft_pack_block.
303 *
304 * \param[in] in input 3D-grid.
305 * \param[out] out output 3D-grid (block).
306 * \param[in] start start index of the block in the in-grid.
307 * \param[in] size size of the block (=dimension of the out-grid).
308 * \param[in] dim size of the in-grid.
309 * \param[in] element size of a grid element (e.g. 1 for Real, 2 for Complex).
310 */
311template <typename FloatType>
312void pack_block_permute1(FloatType const *const in, FloatType *const out,
313 const int *start, const int *size, const int *dim,
314 int element) {
315
316 /* offsets for indices in input grid */
317 auto const m_in_offset = element * (dim[2] - size[2]);
318 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
319 /* offset for mid changing indices of output grid */
320 auto const m_out_offset = (element * size[0]) - element;
321 /* linear index of in grid */
322 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
323
324 for (int s = 0; s < size[0]; s++) { /* fast changing out */
325 /* linear index of out grid */
326 int li_out = element * s;
327 for (int m = 0; m < size[1]; m++) { /* slow changing out */
328 for (int f = 0; f < size[2]; f++) { /* mid changing out */
329 for (int e = 0; e < element; e++)
330 out[li_out++] = in[li_in++];
331 li_out += m_out_offset;
332 }
333 li_in += m_in_offset;
334 }
335 li_in += s_in_offset;
336 }
337}
338
339/** Pack a block with dimensions <tt>size[0] * size[1] * size[2]</tt> starting
340 * at @p start of an input 3D-grid with dimension @p dim into an output
341 * 3D-grid with dimensions <tt>size[2] * size[0] * size[1]</tt> with
342 * a simultaneous two-fold permutation of the indices. The permutation is
343 * defined as: slow_in -> mid_out, mid_in ->fast_out, fast_in -> slow_out.
344 *
345 * An element <tt>(i0_in, i1_in, i2_in)</tt> is then
346 * <tt>(i0_out = i2_in-start[2], i1_out = i0_in-start[0],
347 * i2_out = i1_in-start[1])</tt> and for the linear indices we have:
348 * - <tt>li_in = i2_in + size[2] * (i1_in + (size[1]*i0_in))</tt>
349 * - <tt>li_out = i2_out + size[0] * (i1_out + (size[2]*i0_out))</tt>
350 *
351 * For index definition see \ref fft_pack_block.
352 *
353 * \param[in] in input 3D-grid.
354 * \param[out] out output 3D-grid (block).
355 * \param[in] start start index of the block in the in-grid.
356 * \param[in] size size of the block (=dimension of the out-grid).
357 * \param[in] dim size of the in-grid.
358 * \param[in] element size of a grid element (e.g. 1 for Real, 2 for Complex).
359 */
360template <typename FloatType>
361void pack_block_permute2(FloatType const *const in, FloatType *const out,
362 const int *start, const int *size, const int *dim,
363 int element) {
364
365 /* offsets for indices in input grid */
366 auto const m_in_offset = element * (dim[2] - size[2]);
367 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
368 /* offset for slow changing index of output grid */
369 auto const s_out_offset = (element * size[0] * size[1]) - element;
370 /* linear index of in grid */
371 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
372
373 for (int s = 0; s < size[0]; s++) { /* mid changing out */
374 auto const m_out_start = element * (s * size[1]);
375 for (int m = 0; m < size[1]; m++) { /* fast changing out */
376 /* linear index of out grid */
377 int li_out = m_out_start + element * m;
378 for (int f = 0; f < size[2]; f++) { /* slow changing out */
379 for (int e = 0; e < element; e++)
380 out[li_out++] = in[li_in++];
381 li_out += s_out_offset;
382 }
383 li_in += m_in_offset;
384 }
385 li_in += s_in_offset;
386 }
387}
388
389} // namespace
390
391/** Communicate the grid data according to the given forward FFT plan.
392 * \param comm MPI communicator.
393 * \param plan FFT communication plan.
394 * \param in input mesh.
395 * \param out output mesh.
396 */
397template <typename FloatType>
398void fft_data_struct<FloatType>::forw_grid_comm(
399 boost::mpi::communicator const &comm, fft_forw_plan<FloatType> const &plan,
400 FloatType const *in, FloatType *out) {
401 for (std::size_t i = 0ul; i < plan.group.size(); i++) {
402 plan.pack_function(in, send_buf.data(), &(plan.send_block[6ul * i]),
403 &(plan.send_block[6ul * i + 3ul]), plan.old_mesh.data(),
404 plan.element);
405
406 if (plan.group[i] != comm.rank()) {
407 fft_sendrecv(send_buf.data(), plan.send_size[i], plan.group[i],
408 recv_buf.data(), plan.recv_size[i], plan.group[i], comm,
410 } else { /* Self communication... */
411 std::swap(send_buf, recv_buf);
412 }
413 fft_unpack_block(recv_buf.data(), out, &(plan.recv_block[6ul * i]),
414 &(plan.recv_block[6ul * i + 3ul]), plan.new_mesh.data(),
415 plan.element);
416 }
417}
418
419/** Communicate the grid data according to the given backward FFT plan.
420 * \param comm MPI communicator.
421 * \param plan_f Forward FFT plan.
422 * \param plan_b Backward FFT plan.
423 * \param in input mesh.
424 * \param out output mesh.
425 */
426template <typename FloatType>
427void fft_data_struct<FloatType>::back_grid_comm(
428 boost::mpi::communicator const &comm,
429 fft_forw_plan<FloatType> const &plan_f,
430 fft_back_plan<FloatType> const &plan_b, FloatType const *in,
431 FloatType *out) {
432 /* Back means: Use the send/receive stuff from the forward plan but
433 replace the receive blocks by the send blocks and vice
434 versa. Attention then also new_mesh and old_mesh are exchanged */
435
436 for (std::size_t i = 0ul; i < plan_f.group.size(); i++) {
437 plan_b.pack_function(in, send_buf.data(), &(plan_f.recv_block[6ul * i]),
438 &(plan_f.recv_block[6ul * i + 3ul]),
439 plan_f.new_mesh.data(), plan_f.element);
440
441 if (plan_f.group[i] != comm.rank()) { /* send first, receive second */
442 fft_sendrecv(send_buf.data(), plan_f.recv_size[i], plan_f.group[i],
443 recv_buf.data(), plan_f.send_size[i], plan_f.group[i], comm,
445 } else { /* Self communication... */
446 std::swap(send_buf, recv_buf);
447 }
448 fft_unpack_block(recv_buf.data(), out, &(plan_f.send_block[6ul * i]),
449 &(plan_f.send_block[6ul * i + 3ul]),
450 plan_f.old_mesh.data(), plan_f.element);
451 }
452}
453
454/** Calculate 'best' mapping between a 2D and 3D grid.
455 * Required for the communication from 3D regular domain
456 * decomposition to 2D regular row decomposition.
457 * The dimensions of the 2D grid are resorted, if necessary, in a way
458 * that they are multiples of the 3D grid dimensions.
459 * \param g3d 3D grid.
460 * \param g2d 2D grid.
461 * \return index of the row direction [0,1,2].
462 */
463int map_3don2d_grid(int const g3d[3], int g2d[3]) {
464 int row_dir = -1;
465 /* trivial case */
466 if (g3d[2] == 1) {
467 return 2;
468 }
469 if (g2d[0] % g3d[0] == 0) {
470 if (g2d[1] % g3d[1] == 0) {
471 row_dir = 2;
472 } else if (g2d[1] % g3d[2] == 0) {
473 row_dir = 1;
474 g2d[2] = g2d[1];
475 g2d[1] = 1;
476 }
477 } else if (g2d[0] % g3d[1] == 0) {
478 if (g2d[1] % g3d[0] == 0) {
479 row_dir = 2;
480 int const tmp = g2d[0];
481 g2d[0] = g2d[1];
482 g2d[1] = tmp;
483 } else if (g2d[1] % g3d[2] == 0) {
484 row_dir = 0;
485 g2d[2] = g2d[1];
486 g2d[1] = g2d[0];
487 g2d[0] = 1;
488 }
489 } else if (g2d[0] % g3d[2] == 0) {
490 if (g2d[1] % g3d[0] == 0) {
491 row_dir = 1;
492 g2d[2] = g2d[0];
493 g2d[0] = g2d[1];
494 g2d[1] = 1;
495 } else if (g2d[1] % g3d[1] == 0) {
496 row_dir = 0;
497 g2d[2] = g2d[0];
498 g2d[0] = 1;
499 }
500 }
501 return row_dir;
502}
503
504/** Calculate most square 2D grid. */
505inline void calc_2d_grid(int n, int grid[3]) {
506 grid[0] = n;
507 grid[1] = 1;
508 grid[2] = 1;
509 for (auto i = static_cast<int>(std::sqrt(n)); i >= 1; i--) {
510 if (n % i == 0) {
511 grid[0] = n / i;
512 grid[1] = i;
513 grid[2] = 1;
514 return;
515 }
516 }
517}
518
519template <typename FloatType>
521 boost::mpi::communicator const &comm, Utils::Vector3i const &ca_mesh_dim,
522 int const *ca_mesh_margin, Utils::Vector3i const &global_mesh_dim,
523 Utils::Vector3d const &global_mesh_off, int &ks_pnum,
524 Utils::Vector3i const &grid) {
525
526 int n_grid[4][3]; /* The four node grids. */
527 int my_pos[4][3]; /* The position of comm.rank() in the node grids. */
528 std::vector<int> n_id[4]; /* linear node identity lists for the node grids. */
529 std::vector<int> n_pos[4]; /* positions of nodes in the node grids. */
530
531 auto const rank = comm.rank();
532 auto const node_pos = Utils::Mpi::cart_coords<3>(comm, rank);
533
534 max_comm_size = 0;
535 max_mesh_size = 0;
536 for (int i = 0; i < 4; i++) {
537 n_id[i].resize(1 * comm.size());
538 n_pos[i].resize(3 * comm.size());
539 }
540
541 /* === node grids === */
542 /* real space node grid (n_grid[0]) */
543 for (int i = 0; i < 3; i++) {
544 n_grid[0][i] = grid[i];
545 my_pos[0][i] = node_pos[i];
546 }
547 for (int i = 0; i < comm.size(); i++) {
548 auto const n_pos_i = Utils::Mpi::cart_coords<3>(comm, i);
549 for (int j = 0; j < 3; ++j) {
550 n_pos[0][3 * i + j] = n_pos_i[j];
551 }
552 auto const lin_ind = Utils::get_linear_index(
553 n_pos_i, {n_grid[0][0], n_grid[0][1], n_grid[0][2]});
554 n_id[0][lin_ind] = i;
555 }
556
557 /* FFT node grids (n_grid[1 - 3]) */
558 calc_2d_grid(comm.size(), n_grid[1]);
559 /* resort n_grid[1] dimensions if necessary */
560 forw[1].row_dir = map_3don2d_grid(n_grid[0], n_grid[1]);
561 forw[0].n_permute = 0;
562 for (int i = 1; i < 4; i++)
563 forw[i].n_permute = (forw[1].row_dir + i) % 3;
564 for (int i = 0; i < 3; i++) {
565 n_grid[2][i] = n_grid[1][(i + 1) % 3];
566 n_grid[3][i] = n_grid[1][(i + 2) % 3];
567 }
568 forw[2].row_dir = (forw[1].row_dir - 1) % 3;
569 forw[3].row_dir = (forw[1].row_dir - 2) % 3;
570
571 /* === communication groups === */
572 /* copy local mesh off real space charge assignment grid */
573 for (int i = 0; i < 3; i++)
574 forw[0].new_mesh[i] = ca_mesh_dim[i];
575
576 for (int i = 1; i < 4; i++) {
577 auto group = find_comm_groups(
578 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
579 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1],
580 std::span(n_id[i]), std::span(n_pos[i]), my_pos[i], rank);
581 if (not group) {
582 /* try permutation */
583 std::swap(n_grid[i][(forw[i].row_dir + 1) % 3],
584 n_grid[i][(forw[i].row_dir + 2) % 3]);
585
586 group = find_comm_groups(
587 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
588 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, std::span(n_id[i - 1]),
589 std::span(n_id[i]), std::span(n_pos[i]), my_pos[i], rank);
590
591 if (not group) {
592 throw std::runtime_error("INTERNAL ERROR: fft_find_comm_groups error");
593 }
594 }
595
596 forw[i].group = group.value();
597
598 forw[i].send_block.resize(6 * forw[i].group.size());
599 forw[i].send_size.resize(forw[i].group.size());
600 forw[i].recv_block.resize(6 * forw[i].group.size());
601 forw[i].recv_size.resize(forw[i].group.size());
602
603 forw[i].new_size = calc_local_mesh(
604 my_pos[i], n_grid[i], global_mesh_dim.data(), global_mesh_off.data(),
605 forw[i].new_mesh.data(), forw[i].start.data());
606 permute_ifield(forw[i].new_mesh.data(), 3, -(forw[i].n_permute));
607 permute_ifield(forw[i].start.data(), 3, -(forw[i].n_permute));
608 forw[i].n_ffts = forw[i].new_mesh[0] * forw[i].new_mesh[1];
609
610 /* === send/recv block specifications === */
611 for (std::size_t j = 0ul; j < forw[i].group.size(); j++) {
612 /* send block: comm.rank() to comm-group-node i (identity: node) */
613 int node = forw[i].group[j];
614 forw[i].send_size[j] = calc_send_block(
615 my_pos[i - 1], n_grid[i - 1], &(n_pos[i][3 * node]), n_grid[i],
616 global_mesh_dim.data(), global_mesh_off.data(),
617 &(forw[i].send_block[6ul * j]));
618 permute_ifield(&(forw[i].send_block[6ul * j]), 3,
619 -(forw[i - 1].n_permute));
620 permute_ifield(&(forw[i].send_block[6ul * j + 3ul]), 3,
621 -(forw[i - 1].n_permute));
622 if (forw[i].send_size[j] > max_comm_size)
623 max_comm_size = forw[i].send_size[j];
624 /* First plan send blocks have to be adjusted, since the CA grid
625 may have an additional margin outside the actual domain of the
626 node */
627 if (i == 1) {
628 for (std::size_t k = 0ul; k < 3ul; k++)
629 forw[1].send_block[6ul * j + k] += ca_mesh_margin[2ul * k];
630 }
631 /* recv block: comm.rank() from comm-group-node i (identity: node) */
632 forw[i].recv_size[j] = calc_send_block(
633 my_pos[i], n_grid[i], &(n_pos[i - 1][3 * node]), n_grid[i - 1],
634 global_mesh_dim.data(), global_mesh_off.data(),
635 &(forw[i].recv_block[6ul * j]));
636 permute_ifield(&(forw[i].recv_block[6ul * j]), 3, -(forw[i].n_permute));
637 permute_ifield(&(forw[i].recv_block[6ul * j + 3ul]), 3,
638 -(forw[i].n_permute));
639 if (forw[i].recv_size[j] > max_comm_size)
640 max_comm_size = forw[i].recv_size[j];
641 }
642
643 for (std::size_t j = 0ul; j < 3ul; j++)
644 forw[i].old_mesh[j] = forw[i - 1].new_mesh[j];
645 if (i == 1) {
646 forw[i].element = 1;
647 } else {
648 forw[i].element = 2;
649 for (std::size_t j = 0ul; j < forw[i].group.size(); j++) {
650 forw[i].send_size[j] *= 2;
651 forw[i].recv_size[j] *= 2;
652 }
653 }
654 }
655
656 /* Factor 2 for complex fields */
657 max_comm_size *= 2;
658 max_mesh_size = Utils::product(ca_mesh_dim);
659 for (int i = 1; i < 4; i++)
660 if (2 * forw[i].new_size > max_mesh_size)
661 max_mesh_size = 2 * forw[i].new_size;
662
663 /* === pack function === */
664 for (int i = 1; i < 4; i++) {
665 forw[i].pack_function = pack_block_permute2;
666 }
667 ks_pnum = 6;
668 if (forw[1].row_dir == 2) {
669 forw[1].pack_function = fft_pack_block;
670 ks_pnum = 4;
671 } else if (forw[1].row_dir == 1) {
672 forw[1].pack_function = pack_block_permute1;
673 ks_pnum = 5;
674 }
675
676 send_buf.resize(max_comm_size);
677 recv_buf.resize(max_comm_size);
678 data_buf.resize(max_mesh_size);
679 auto *c_data = (typename fftw<FloatType>::complex *)(data_buf.data());
680
681 /* === FFT Routines (Using FFTW / RFFTW package)=== */
682 for (int i = 1; i < 4; i++) {
683 if (init_tag) {
684#ifdef _OPENMP
685#pragma omp critical(fftw_destroy_plan_forward)
686 forw[i].destroy_plan();
687#endif
688 }
689 forw[i].dir = FFTW_FORWARD;
690#ifdef _OPENMP
691#pragma omp critical(fftw_create_plan_forward)
692 forw[i].plan_handle = fftw<FloatType>::plan_many_dft(
693 1, &forw[i].new_mesh[2], forw[i].n_ffts, c_data, nullptr, 1,
694 forw[i].new_mesh[2], c_data, nullptr, 1, forw[i].new_mesh[2],
695 forw[i].dir, FFTW_PATIENT);
696#endif
697 assert(forw[i].plan_handle);
698 }
699
700 /* === The BACK Direction === */
701 /* this is needed because slightly different functions are used */
702 for (int i = 1; i < 4; i++) {
703 if (init_tag) {
704#ifdef _OPENMP
705#pragma omp critical(fftw_destroy_plan_backward)
706 back[i].destroy_plan();
707#endif
708 }
709 back[i].dir = FFTW_BACKWARD;
710#ifdef _OPENMP
711#pragma omp critical(fftw_create_plan_backward)
712 back[i].plan_handle = fftw<FloatType>::plan_many_dft(
713 1, &forw[i].new_mesh[2], forw[i].n_ffts, c_data, nullptr, 1,
714 forw[i].new_mesh[2], c_data, nullptr, 1, forw[i].new_mesh[2],
715 back[i].dir, FFTW_PATIENT);
716#endif
717 back[i].pack_function = pack_block_permute1;
718 assert(back[i].plan_handle);
719 }
720 if (forw[1].row_dir == 2) {
721 back[1].pack_function = fft_pack_block;
722 } else if (forw[1].row_dir == 1) {
723 back[1].pack_function = pack_block_permute2;
724 }
725
726 init_tag = true;
727
728 return max_mesh_size;
729}
730
731template <typename FloatType>
733 boost::mpi::communicator const &comm, FloatType *data) {
734 /* ===== first direction ===== */
735
736 auto *c_data = (typename fftw<FloatType>::complex *)data;
737 auto *c_data_buf = (typename fftw<FloatType>::complex *)data_buf.data();
738
739 /* communication to current dir row format (in is data) */
740 forw_grid_comm(comm, forw[1], data, data_buf.data());
741
742 /* complexify the real data array (in is data_buf) */
743 for (int i = 0; i < forw[1].new_size; i++) {
744 data[2 * i + 0] = data_buf[i]; /* real value */
745 data[2 * i + 1] = FloatType(0); /* complex value */
746 }
747 /* perform FFT (in/out is data)*/
748 fftw<FloatType>::execute_dft(forw[1].plan_handle, c_data, c_data);
749 /* ===== second direction ===== */
750 /* communication to current dir row format (in is data) */
751 forw_grid_comm(comm, forw[2], data, data_buf.data());
752 /* perform FFT (in/out is data_buf) */
753 fftw<FloatType>::execute_dft(forw[2].plan_handle, c_data_buf, c_data_buf);
754 /* ===== third direction ===== */
755 /* communication to current dir row format (in is data_buf) */
756 forw_grid_comm(comm, forw[3], data_buf.data(), data);
757 /* perform FFT (in/out is data)*/
758 fftw<FloatType>::execute_dft(forw[3].plan_handle, c_data, c_data);
759
760 /* REMARK: Result has to be in data. */
761}
762
763template <typename FloatType>
765 boost::mpi::communicator const &comm, FloatType *data) {
766
767 auto *c_data = (typename fftw<FloatType>::complex *)data;
768 auto *c_data_buf = (typename fftw<FloatType>::complex *)data_buf.data();
769
770 /* ===== third direction ===== */
771
772 /* perform FFT (in is data) */
773 fftw<FloatType>::execute_dft(back[3].plan_handle, c_data, c_data);
774 /* communicate (in is data)*/
775 back_grid_comm(comm, forw[3], back[3], data, data_buf.data());
776
777 /* ===== second direction ===== */
778 /* perform FFT (in is data_buf) */
779 fftw<FloatType>::execute_dft(back[2].plan_handle, c_data_buf, c_data_buf);
780 /* communicate (in is data_buf) */
781 back_grid_comm(comm, forw[2], back[2], data_buf.data(), data);
782
783 /* ===== first direction ===== */
784 /* perform FFT (in is data) */
785 fftw<FloatType>::execute_dft(back[1].plan_handle, c_data, c_data);
786 /* throw away the (hopefully) empty complex component (in is data) */
787 for (int i = 0; i < forw[1].new_size; i++) {
788 data_buf[i] = data[2 * i]; /* real value */
789 }
790 /* communicate (in is data_buf) */
791 back_grid_comm(comm, forw[1], back[1], data_buf.data(), data);
792
793 /* REMARK: Result has to be in data. */
794}
795
796template <typename FloatType> void fft_plan<FloatType>::destroy_plan() {
797 if (plan_handle) {
799 plan_handle = nullptr;
800 }
801}
802
803template <class FloatType>
804FloatType *allocator<FloatType>::allocate(const std::size_t n) const {
805 if (n == 0) {
806 return nullptr;
807 }
808 if (n > std::numeric_limits<std::size_t>::max() / sizeof(FloatType)) {
809 throw std::bad_array_new_length();
810 }
811 void *const pv = fftw<FloatType>::malloc(n * sizeof(FloatType));
812 if (!pv) {
813 throw std::bad_alloc();
814 }
815 return static_cast<FloatType *>(pv);
816}
817
818template <class FloatType>
819void allocator<FloatType>::deallocate(FloatType *const p,
820 std::size_t) const noexcept {
821 fftw<FloatType>::free(static_cast<void *>(p));
822}
823
824template struct allocator<float>;
825template struct allocator<double>;
826
827template struct fft_plan<float>;
828template struct fft_plan<double>;
829
830template struct fft_forw_plan<float>;
831template struct fft_forw_plan<double>;
832
833template struct fft_back_plan<float>;
834template struct fft_back_plan<double>;
835
836template struct fft_data_struct<float>;
837template struct fft_data_struct<double>;
838
839} // namespace fft
Vector implementation and trait types for boost qvm interoperability.
DEVICE_QUALIFIER constexpr pointer data() noexcept
Definition Array.hpp:132
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:176
static void fft_sendrecv(T const *const sendbuf, int scount, int dest, T *const recvbuf, int rcount, int source, boost::mpi::communicator const &comm, int tag)
Definition fft.cpp:69
#define REQ_FFT_BACK
Tag for communication in back_grid_comm()
Definition fft.cpp:65
#define REQ_FFT_FORW
Tag for communication in forw_grid_comm()
Definition fft.cpp:63
Routines, row decomposition, data structures and communication for the 3D-FFT.
T product(Vector< T, N > const &v)
Definition Vector.hpp:373
void permute_ifield(int *field, int size, int permute)
permute an integer array field of size size about permute positions.
int get_linear_index(int a, int b, int c, const Vector3i &adim)
Definition index.hpp:35
void pack_block_permute2(FloatType const *const in, FloatType *const out, const int *start, const int *size, const int *dim, int element)
Pack a block with dimensions size[0] * size[1] * size[2] starting at start of an input 3D-grid with d...
Definition fft.cpp:361
void pack_block_permute1(FloatType const *const in, FloatType *const out, const int *start, const int *size, const int *dim, int element)
Pack a block with dimensions size[0] * size[1] * size[2] starting at start of an input 3D-grid with d...
Definition fft.cpp:312
int calc_send_block(const int *pos1, const int *grid1, const int *pos2, const int *grid2, const int *mesh, const double *mesh_off, int *block)
Calculate a send (or recv.) block for grid communication during a decomposition change.
Definition fft.cpp:270
int calc_local_mesh(const int *n_pos, const int *n_grid, const int *mesh, const double *mesh_off, int *loc_mesh, int *start)
Calculate the local fft mesh.
Definition fft.cpp:226
Definition fft.cpp:78
std::optional< std::vector< int > > find_comm_groups(Utils::Vector3i const &grid1, Utils::Vector3i const &grid2, std::span< int const > node_list1, std::span< int > node_list2, std::span< int > pos, std::span< int > my_pos, int rank)
This ugly function does the bookkeeping: which nodes have to communicate to each other,...
Definition fft.cpp:121
void calc_2d_grid(int n, int grid[3])
Calculate most square 2D grid.
Definition fft.cpp:505
int map_3don2d_grid(int const g3d[3], int g2d[3])
Calculate 'best' mapping between a 2D and 3D grid.
Definition fft.cpp:463
void fft_pack_block(FloatType const *const in, FloatType *const out, int const *start, int const *size, int const *dim, int element)
Pack a 3D-block of size size starting at start of an input 3D-grid in with dimension dim into an outp...
Definition packing.hpp:44
void fft_unpack_block(FloatType const *const in, FloatType *const out, int const *start, int const *size, int const *dim, int element)
Unpack a 3D-block in of size size into an output 3D-grid out of size dim starting at position start.
Definition packing.hpp:83
Aligned allocator for FFT data.
Definition vector.hpp:33
FloatType * allocate(std::size_t n) const
Definition fft.cpp:804
void deallocate(FloatType *p, std::size_t) const noexcept
Definition fft.cpp:819
Plan for a backward 1D FFT of a flattened 3D array.
Definition fft.hpp:116
Information about the three one dimensional FFTs and how the nodes have to communicate inbetween.
Definition fft.hpp:125
void forward_fft(boost::mpi::communicator const &comm, FloatType *data)
Perform an in-place forward 3D FFT.
Definition fft.cpp:732
void backward_fft(boost::mpi::communicator const &comm, FloatType *data)
Perform an in-place backward 3D FFT.
Definition fft.cpp:764
int initialize_fft(boost::mpi::communicator const &comm, Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, Utils::Vector3i const &global_mesh_dim, Utils::Vector3d const &global_mesh_off, int &ks_pnum, Utils::Vector3i const &grid)
Initialize everything connected to the 3D-FFT.
Definition fft.cpp:520
Plan for a forward 1D FFT of a flattened 3D array.
Definition fft.hpp:82
void destroy_plan()
Definition fft.cpp:796
fftwf_complex complex
Definition fft.cpp:89
static auto constexpr malloc
Definition fft.cpp:85
static auto constexpr free
Definition fft.cpp:86
fftw_complex complex
Definition fft.cpp:81
static auto constexpr execute_dft
Definition fft.cpp:84
static auto constexpr destroy_plan
Definition fft.cpp:83
static auto constexpr plan_many_dft
Definition fft.cpp:82