ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
fft.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2022 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21/** \file
22 *
23 * Routines, row decomposition, data structures and communication for the
24 * 3D-FFT.
25 *
26 */
27
28#include "config/config.hpp"
29
30#if defined(P3M) || defined(DP3M)
31
32#include "p3m/fft.hpp"
33
34#include <utils/Span.hpp>
35#include <utils/Vector.hpp>
36#include <utils/index.hpp>
38
39#include <boost/none.hpp>
40#include <boost/optional.hpp>
41
42#include <fftw3.h>
43#include <mpi.h>
44
45#include <cmath>
46#include <cstddef>
47#include <cstdio>
48#include <cstring>
49#include <stdexcept>
50#include <utility>
51#include <vector>
52
55
56/** @name MPI tags for FFT communication */
57/**@{*/
58/** Tag for communication in forw_grid_comm() */
59#define REQ_FFT_FORW 301
60/** Tag for communication in back_grid_comm() */
61#define REQ_FFT_BACK 302
62/**@}*/
63
64namespace {
65/** This ugly function does the bookkeeping: which nodes have to
66 * communicate to each other, when you change the node grid.
67 * Changing the regular decomposition requires communication. This
68 * function finds (hopefully) the best way to do this. As input it
69 * needs the two grids (@p grid1, @p grid2) and a linear list (@p node_list1)
70 * with the node identities for @p grid1. The linear list (@p node_list2)
71 * for the second grid is calculated. For the communication group of
72 * the calling node it calculates a list (@c group) with the node
73 * identities and the positions (@p my_pos, @p pos) of that nodes in @p grid1
74 * and @p grid2. The return value is the size of the communication
75 * group. It gives -1 if the two grids do not fit to each other
76 * (@p grid1 and @p grid2 have to be component-wise multiples of each
77 * other, see e.g. \ref calc_2d_grid for how to do this).
78 *
79 * \param[in] grid1 The node grid you start with.
80 * \param[in] grid2 The node grid you want to have.
81 * \param[in] node_list1 Linear node index list for grid1.
82 * \param[out] node_list2 Linear node index list for grid2.
83 * \param[out] pos Positions of the nodes in grid2
84 * \param[out] my_pos Position of comm.rank() in grid2.
85 * \param[in] comm MPI communicator.
86 * \return Size of the communication group.
87 */
88boost::optional<std::vector<int>>
90 Utils::Span<const int> node_list1, Utils::Span<int> node_list2,
92 boost::mpi::communicator const &comm) {
93 int i;
94 /* communication group cell size on grid1 and grid2 */
95 int s1[3], s2[3];
96 /* The communication group cells build the same super grid on grid1 and grid2
97 */
98 int ds[3];
99 /* communication group size */
100 int g_size = 1;
101 /* comm. group cell index */
102 int gi[3];
103 /* position of a node in a grid */
104 int p1[3], p2[3];
105 /* node identity */
106 int n;
107 /* comm.rank() position in the communication group. */
108 int c_pos = -1;
109 /* flag for group identification */
110 int my_group = 0;
111
112 /* calculate dimension of comm. group cells for both grids */
113 if ((grid1[0] * grid1[1] * grid1[2]) != (grid2[0] * grid2[1] * grid2[2]))
114 return boost::none; /* unlike number of nodes */
115 for (i = 0; i < 3; i++) {
116 s1[i] = grid1[i] / grid2[i];
117 if (s1[i] == 0)
118 s1[i] = 1;
119 else if (grid1[i] != grid2[i] * s1[i])
120 return boost::none; /* grids do not match!!! */
121
122 s2[i] = grid2[i] / grid1[i];
123 if (s2[i] == 0)
124 s2[i] = 1;
125 else if (grid2[i] != grid1[i] * s2[i])
126 return boost::none; /* grids do not match!!! */
127
128 ds[i] = grid2[i] / s2[i];
129 g_size *= s2[i];
130 }
131
132 std::vector<int> group(g_size);
133
134 /* calc node_list2 */
135 /* loop through all comm. group cells */
136 for (gi[2] = 0; gi[2] < ds[2]; gi[2]++)
137 for (gi[1] = 0; gi[1] < ds[1]; gi[1]++)
138 for (gi[0] = 0; gi[0] < ds[0]; gi[0]++) {
139 /* loop through all nodes in that comm. group cell */
140 for (i = 0; i < g_size; i++) {
141 p1[0] = (gi[0] * s1[0]) + (i % s1[0]);
142 p1[1] = (gi[1] * s1[1]) + ((i / s1[0]) % s1[1]);
143 p1[2] = (gi[2] * s1[2]) + (i / (s1[0] * s1[1]));
144
145 p2[0] = (gi[0] * s2[0]) + (i % s2[0]);
146 p2[1] = (gi[1] * s2[1]) + ((i / s2[0]) % s2[1]);
147 p2[2] = (gi[2] * s2[2]) + (i / (s2[0] * s2[1]));
148
149 n = node_list1[get_linear_index(p1[0], p1[1], p1[2], grid1)];
150 node_list2[get_linear_index(p2[0], p2[1], p2[2], grid2)] = n;
151
152 pos[3 * n + 0] = p2[0];
153 pos[3 * n + 1] = p2[1];
154 pos[3 * n + 2] = p2[2];
155 if (my_group == 1)
156 group[i] = n;
157 if (n == comm.rank() && my_group == 0) {
158 my_group = 1;
159 c_pos = i;
160 my_pos[0] = p2[0];
161 my_pos[1] = p2[1];
162 my_pos[2] = p2[2];
163 i = -1; /* restart the loop */
164 }
165 }
166 my_group = 0;
167 }
168
169 /* permute comm. group according to the nodes position in the group */
170 /* This is necessary to have matching node pairs during communication! */
171 while (c_pos > 0) {
172 n = group[g_size - 1];
173 for (i = g_size - 1; i > 0; i--)
174 group[i] = group[i - 1];
175 group[0] = n;
176 c_pos--;
177 }
178 return {group};
179}
180
181/** Calculate the local fft mesh. Calculate the local mesh (@p loc_mesh)
182 * of a node at position (@p n_pos) in a node grid (@p n_grid) for a global
183 * mesh of size (@p mesh) and a mesh offset (@p mesh_off (in mesh units))
184 * and store also the first point (@p start) of the local mesh.
185 *
186 * \param[in] n_pos Position of the node in @p n_grid.
187 * \param[in] n_grid node grid.
188 * \param[in] mesh global mesh dimensions.
189 * \param[in] mesh_off global mesh offset (see \ref p3m_data_struct).
190 * \param[out] loc_mesh local mesh dimension.
191 * \param[out] start first point of local mesh in global mesh.
192 * \return Number of mesh points in local mesh.
193 */
194int calc_local_mesh(const int *n_pos, const int *n_grid, const int *mesh,
195 const double *mesh_off, int *loc_mesh, int *start) {
196 int last[3], size = 1;
197
198 for (int i = 0; i < 3; i++) {
199 auto const ai = mesh[i] / static_cast<double>(n_grid[i]);
200 start[i] = static_cast<int>(ceil(ai * n_pos[i] - mesh_off[i]));
201 last[i] = static_cast<int>(floor(ai * (n_pos[i] + 1) - mesh_off[i]));
202 /* correct round off errors */
203 if (ai * (n_pos[i] + 1) - mesh_off[i] - last[i] < 1.0e-15)
204 last[i]--;
205 if (1.0 + ai * n_pos[i] - mesh_off[i] - start[i] < 1.0e-15)
206 start[i]--;
207 loc_mesh[i] = last[i] - start[i] + 1;
208 size *= loc_mesh[i];
209 }
210 return size;
211}
212
213/** Calculate a send (or recv.) block for grid communication during a
214 * decomposition change. Calculate the send block specification
215 * (block = lower left corner and upper right corner) which a node at
216 * position (@p pos1) in the actual node grid (@p grid1) has to send to
217 * another node at position (@p pos2) in the desired node grid (@p grid2).
218 * The global mesh, subject to communication, is specified via its size
219 * (@p mesh) and its mesh offset (@p mesh_off (in mesh units)).
220 *
221 * For the calculation of a receive block you have to change the arguments in
222 * the following way:
223 * - @p pos1: position of receiving node in the desired node grid.
224 * - @p grid1: desired node grid.
225 * - @p pos2: position of the node you intend to receive the data from in the
226 * actual node grid.
227 * - @p grid2: actual node grid.
228 *
229 * \param[in] pos1 Position of send node in @p grid1.
230 * \param[in] grid1 node grid 1.
231 * \param[in] pos2 Position of recv node in @p grid2.
232 * \param[in] grid2 node grid 2.
233 * \param[in] mesh global mesh dimensions.
234 * \param[in] mesh_off global mesh offset (see \ref p3m_data_struct).
235 * \param[out] block send block specification.
236 * \return Size of the send block.
237 */
238int calc_send_block(const int *pos1, const int *grid1, const int *pos2,
239 const int *grid2, const int *mesh, const double *mesh_off,
240 int *block) {
241 int size = 1;
242 int mesh1[3], first1[3], last1[3];
243 int mesh2[3], first2[3], last2[3];
244
245 calc_local_mesh(pos1, grid1, mesh, mesh_off, mesh1, first1);
246 calc_local_mesh(pos2, grid2, mesh, mesh_off, mesh2, first2);
247
248 for (int i = 0; i < 3; i++) {
249 last1[i] = first1[i] + mesh1[i] - 1;
250 last2[i] = first2[i] + mesh2[i] - 1;
251 block[i] = std::max(first1[i], first2[i]) - first1[i];
252 block[i + 3] = (std::min(last1[i], last2[i]) - first1[i]) - block[i] + 1;
253 size *= block[i + 3];
254 }
255 return size;
256}
257
258/** Pack a block with dimensions <tt>size[0] * size[1] * size[2]</tt> starting
259 * at @p start of an input 3D-grid with dimension @p dim into an output
260 * 3D-grid with dimensions <tt>size[2] * size[0] * size[1]</tt> with
261 * a simultaneous one-fold permutation of the indices. The permutation is
262 * defined as: slow_in -> fast_out, mid_in ->slow_out, fast_in -> mid_out.
263 *
264 * An element <tt>(i0_in, i1_in, i2_in)</tt> is then
265 * <tt>(i0_out = i1_in-start[1], i1_out = i2_in-start[2],
266 * i2_out = i0_in-start[0])</tt> and for the linear indices we have:
267 * - <tt>li_in = i2_in + size[2] * (i1_in + (size[1]*i0_in))</tt>
268 * - <tt>li_out = i2_out + size[0] * (i1_out + (size[2]*i0_out))</tt>
269 *
270 * For index definition see \ref fft_pack_block.
271 *
272 * \param[in] in input 3D-grid.
273 * \param[out] out output 3D-grid (block).
274 * \param[in] start start index of the block in the in-grid.
275 * \param[in] size size of the block (=dimension of the out-grid).
276 * \param[in] dim size of the in-grid.
277 * \param[in] element size of a grid element (e.g. 1 for Real, 2 for Complex).
278 */
279void pack_block_permute1(double const *const in, double *const out,
280 const int *start, const int *size, const int *dim,
281 int element) {
282
283 /* offsets for indices in input grid */
284 auto const m_in_offset = element * (dim[2] - size[2]);
285 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
286 /* offset for mid changing indices of output grid */
287 auto const m_out_offset = (element * size[0]) - element;
288 /* linear index of in grid */
289 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
290
291 for (int s = 0; s < size[0]; s++) { /* fast changing out */
292 /* linear index of out grid */
293 int li_out = element * s;
294 for (int m = 0; m < size[1]; m++) { /* slow changing out */
295 for (int f = 0; f < size[2]; f++) { /* mid changing out */
296 for (int e = 0; e < element; e++)
297 out[li_out++] = in[li_in++];
298 li_out += m_out_offset;
299 }
300 li_in += m_in_offset;
301 }
302 li_in += s_in_offset;
303 }
304}
305
306/** Pack a block with dimensions <tt>size[0] * size[1] * size[2]</tt> starting
307 * at @p start of an input 3D-grid with dimension @p dim into an output
308 * 3D-grid with dimensions <tt>size[2] * size[0] * size[1]</tt> with
309 * a simultaneous two-fold permutation of the indices. The permutation is
310 * defined as: slow_in -> mid_out, mid_in ->fast_out, fast_in -> slow_out.
311 *
312 * An element <tt>(i0_in, i1_in, i2_in)</tt> is then
313 * <tt>(i0_out = i2_in-start[2], i1_out = i0_in-start[0],
314 * i2_out = i1_in-start[1])</tt> and for the linear indices we have:
315 * - <tt>li_in = i2_in + size[2] * (i1_in + (size[1]*i0_in))</tt>
316 * - <tt>li_out = i2_out + size[0] * (i1_out + (size[2]*i0_out))</tt>
317 *
318 * For index definition see \ref fft_pack_block.
319 *
320 * \param[in] in input 3D-grid.
321 * \param[out] out output 3D-grid (block).
322 * \param[in] start start index of the block in the in-grid.
323 * \param[in] size size of the block (=dimension of the out-grid).
324 * \param[in] dim size of the in-grid.
325 * \param[in] element size of a grid element (e.g. 1 for Real, 2 for Complex).
326 */
327void pack_block_permute2(double const *const in, double *const out,
328 const int *start, const int *size, const int *dim,
329 int element) {
330
331 /* offsets for indices in input grid */
332 auto const m_in_offset = element * (dim[2] - size[2]);
333 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
334 /* offset for slow changing index of output grid */
335 auto const s_out_offset = (element * size[0] * size[1]) - element;
336 /* linear index of in grid */
337 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
338
339 for (int s = 0; s < size[0]; s++) { /* mid changing out */
340 auto const m_out_start = element * (s * size[1]);
341 for (int m = 0; m < size[1]; m++) { /* fast changing out */
342 /* linear index of out grid */
343 int li_out = m_out_start + element * m;
344 for (int f = 0; f < size[2]; f++) { /* slow changing out */
345 for (int e = 0; e < element; e++)
346 out[li_out++] = in[li_in++];
347 li_out += s_out_offset;
348 }
349 li_in += m_in_offset;
350 }
351 li_in += s_in_offset;
352 }
353}
354
355/** Communicate the grid data according to the given forward FFT plan.
356 * \param plan FFT communication plan.
357 * \param in input mesh.
358 * \param out output mesh.
359 * \param fft FFT communication plan.
360 * \param comm MPI communicator.
361 */
362void forw_grid_comm(fft_forw_plan plan, const double *in, double *out,
363 fft_data_struct &fft,
364 const boost::mpi::communicator &comm) {
365 for (std::size_t i = 0ul; i < plan.group.size(); i++) {
366 plan.pack_function(in, fft.send_buf.data(), &(plan.send_block[6ul * i]),
367 &(plan.send_block[6ul * i + 3ul]), plan.old_mesh,
368 plan.element);
369
370 if (plan.group[i] != comm.rank()) {
371 MPI_Sendrecv(fft.send_buf.data(), plan.send_size[i], MPI_DOUBLE,
372 plan.group[i], REQ_FFT_FORW, fft.recv_buf.data(),
373 plan.recv_size[i], MPI_DOUBLE, plan.group[i], REQ_FFT_FORW,
374 comm, MPI_STATUS_IGNORE);
375 } else { /* Self communication... */
376 std::swap(fft.send_buf, fft.recv_buf);
377 }
378 fft_unpack_block(fft.recv_buf.data(), out, &(plan.recv_block[6ul * i]),
379 &(plan.recv_block[6ul * i + 3ul]), plan.new_mesh,
380 plan.element);
381 }
382}
383
384/** Communicate the grid data according to the given backward FFT plan.
385 * \param plan_f Forward FFT plan.
386 * \param plan_b Backward FFT plan.
387 * \param in input mesh.
388 * \param out output mesh.
389 * \param fft FFT communication plan.
390 * \param comm MPI communicator.
391 */
393 const double *in, double *out, fft_data_struct &fft,
394 const boost::mpi::communicator &comm) {
395 /* Back means: Use the send/receive stuff from the forward plan but
396 replace the receive blocks by the send blocks and vice
397 versa. Attention then also new_mesh and old_mesh are exchanged */
398
399 for (std::size_t i = 0ul; i < plan_f.group.size(); i++) {
400 plan_b.pack_function(in, fft.send_buf.data(), &(plan_f.recv_block[6ul * i]),
401 &(plan_f.recv_block[6ul * i + 3ul]), plan_f.new_mesh,
402 plan_f.element);
403
404 if (plan_f.group[i] != comm.rank()) { /* send first, receive second */
405 MPI_Sendrecv(fft.send_buf.data(), plan_f.recv_size[i], MPI_DOUBLE,
406 plan_f.group[i], REQ_FFT_BACK, fft.recv_buf.data(),
407 plan_f.send_size[i], MPI_DOUBLE, plan_f.group[i],
408 REQ_FFT_BACK, comm, MPI_STATUS_IGNORE);
409 } else { /* Self communication... */
410 std::swap(fft.send_buf, fft.recv_buf);
411 }
412 fft_unpack_block(fft.recv_buf.data(), out, &(plan_f.send_block[6ul * i]),
413 &(plan_f.send_block[6ul * i + 3ul]), plan_f.old_mesh,
414 plan_f.element);
415 }
416}
417
418/** Calculate 'best' mapping between a 2D and 3D grid.
419 * Required for the communication from 3D regular domain
420 * decomposition to 2D regular row decomposition.
421 * The dimensions of the 2D grid are resorted, if necessary, in a way
422 * that they are multiples of the 3D grid dimensions.
423 * \param g3d 3D grid.
424 * \param g2d 2D grid.
425 * \return index of the row direction [0,1,2].
426 */
427int map_3don2d_grid(int const g3d[3], int g2d[3]) {
428 int row_dir = -1;
429 /* trivial case */
430 if (g3d[2] == 1) {
431 return 2;
432 }
433 if (g2d[0] % g3d[0] == 0) {
434 if (g2d[1] % g3d[1] == 0) {
435 row_dir = 2;
436 } else if (g2d[1] % g3d[2] == 0) {
437 row_dir = 1;
438 g2d[2] = g2d[1];
439 g2d[1] = 1;
440 }
441 } else if (g2d[0] % g3d[1] == 0) {
442 if (g2d[1] % g3d[0] == 0) {
443 row_dir = 2;
444 int const tmp = g2d[0];
445 g2d[0] = g2d[1];
446 g2d[1] = tmp;
447 } else if (g2d[1] % g3d[2] == 0) {
448 row_dir = 0;
449 g2d[2] = g2d[1];
450 g2d[1] = g2d[0];
451 g2d[0] = 1;
452 }
453 } else if (g2d[0] % g3d[2] == 0) {
454 if (g2d[1] % g3d[0] == 0) {
455 row_dir = 1;
456 g2d[2] = g2d[0];
457 g2d[0] = g2d[1];
458 g2d[1] = 1;
459 } else if (g2d[1] % g3d[1] == 0) {
460 row_dir = 0;
461 g2d[2] = g2d[0];
462 g2d[0] = 1;
463 }
464 }
465 return row_dir;
466}
467
468/** Calculate most square 2D grid. */
469void calc_2d_grid(int n, int grid[3]) {
470 grid[0] = n;
471 grid[1] = 1;
472 grid[2] = 1;
473 for (auto i = static_cast<int>(std::sqrt(n)); i >= 1; i--) {
474 if (n % i == 0) {
475 grid[0] = n / i;
476 grid[1] = i;
477 grid[2] = 1;
478 return;
479 }
480 }
481}
482} // namespace
483
484int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin,
485 Utils::Vector3i const &global_mesh_dim,
486 Utils::Vector3d const &global_mesh_off, int &ks_pnum,
487 fft_data_struct &fft, Utils::Vector3i const &grid,
488 boost::mpi::communicator const &comm) {
489
490 int n_grid[4][3]; /* The four node grids. */
491 int my_pos[4][3]; /* The position of comm.rank() in the node grids. */
492 std::vector<int> n_id[4]; /* linear node identity lists for the node grids. */
493 std::vector<int> n_pos[4]; /* positions of nodes in the node grids. */
494
495 int node_pos[3];
496 MPI_Cart_coords(comm, comm.rank(), 3, node_pos);
497
498 fft.max_comm_size = 0;
499 fft.max_mesh_size = 0;
500 for (int i = 0; i < 4; i++) {
501 n_id[i].resize(1 * comm.size());
502 n_pos[i].resize(3 * comm.size());
503 }
504
505 /* === node grids === */
506 /* real space node grid (n_grid[0]) */
507 for (int i = 0; i < 3; i++) {
508 n_grid[0][i] = grid[i];
509 my_pos[0][i] = node_pos[i];
510 }
511 for (int i = 0; i < comm.size(); i++) {
512 MPI_Cart_coords(comm, i, 3, &(n_pos[0][3 * i + 0]));
513 auto const lin_ind = get_linear_index(
514 n_pos[0][3 * i + 0], n_pos[0][3 * i + 1], n_pos[0][3 * i + 2],
515 {n_grid[0][0], n_grid[0][1], n_grid[0][2]});
516 n_id[0][lin_ind] = i;
517 }
518
519 /* FFT node grids (n_grid[1 - 3]) */
520 calc_2d_grid(comm.size(), n_grid[1]);
521 /* resort n_grid[1] dimensions if necessary */
522 fft.plan[1].row_dir = map_3don2d_grid(n_grid[0], n_grid[1]);
523 fft.plan[0].n_permute = 0;
524 for (int i = 1; i < 4; i++)
525 fft.plan[i].n_permute = (fft.plan[1].row_dir + i) % 3;
526 for (int i = 0; i < 3; i++) {
527 n_grid[2][i] = n_grid[1][(i + 1) % 3];
528 n_grid[3][i] = n_grid[1][(i + 2) % 3];
529 }
530 fft.plan[2].row_dir = (fft.plan[1].row_dir - 1) % 3;
531 fft.plan[3].row_dir = (fft.plan[1].row_dir - 2) % 3;
532
533 /* === communication groups === */
534 /* copy local mesh off real space charge assignment grid */
535 for (int i = 0; i < 3; i++)
536 fft.plan[0].new_mesh[i] = ca_mesh_dim[i];
537
538 for (int i = 1; i < 4; i++) {
539 using Utils::make_span;
540 auto group = find_comm_groups(
541 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
542 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1],
543 make_span(n_id[i]), make_span(n_pos[i]), my_pos[i], comm);
544 if (not group) {
545 /* try permutation */
546 std::swap(n_grid[i][(fft.plan[i].row_dir + 1) % 3],
547 n_grid[i][(fft.plan[i].row_dir + 2) % 3]);
548
549 group = find_comm_groups(
550 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
551 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, make_span(n_id[i - 1]),
552 make_span(n_id[i]), make_span(n_pos[i]), my_pos[i], comm);
553
554 if (not group) {
555 throw std::runtime_error("INTERNAL ERROR: fft_find_comm_groups error");
556 }
557 }
558
559 fft.plan[i].group = *group;
560
561 fft.plan[i].send_block.resize(6 * fft.plan[i].group.size());
562 fft.plan[i].send_size.resize(fft.plan[i].group.size());
563 fft.plan[i].recv_block.resize(6 * fft.plan[i].group.size());
564 fft.plan[i].recv_size.resize(fft.plan[i].group.size());
565
567 my_pos[i], n_grid[i], global_mesh_dim.data(), global_mesh_off.data(),
568 fft.plan[i].new_mesh, fft.plan[i].start);
569 permute_ifield(fft.plan[i].new_mesh, 3, -(fft.plan[i].n_permute));
570 permute_ifield(fft.plan[i].start, 3, -(fft.plan[i].n_permute));
571 fft.plan[i].n_ffts = fft.plan[i].new_mesh[0] * fft.plan[i].new_mesh[1];
572
573 /* === send/recv block specifications === */
574 for (std::size_t j = 0ul; j < fft.plan[i].group.size(); j++) {
575 /* send block: comm.rank() to comm-group-node i (identity: node) */
576 int node = fft.plan[i].group[j];
577 fft.plan[i].send_size[j] = calc_send_block(
578 my_pos[i - 1], n_grid[i - 1], &(n_pos[i][3 * node]), n_grid[i],
579 global_mesh_dim.data(), global_mesh_off.data(),
580 &(fft.plan[i].send_block[6ul * j]));
581 permute_ifield(&(fft.plan[i].send_block[6ul * j]), 3,
582 -(fft.plan[i - 1].n_permute));
583 permute_ifield(&(fft.plan[i].send_block[6ul * j + 3ul]), 3,
584 -(fft.plan[i - 1].n_permute));
585 if (fft.plan[i].send_size[j] > fft.max_comm_size)
586 fft.max_comm_size = fft.plan[i].send_size[j];
587 /* First plan send blocks have to be adjusted, since the CA grid
588 may have an additional margin outside the actual domain of the
589 node */
590 if (i == 1) {
591 for (std::size_t k = 0ul; k < 3ul; k++)
592 fft.plan[1].send_block[6ul * j + k] += ca_mesh_margin[2ul * k];
593 }
594 /* recv block: comm.rank() from comm-group-node i (identity: node) */
595 fft.plan[i].recv_size[j] = calc_send_block(
596 my_pos[i], n_grid[i], &(n_pos[i - 1][3 * node]), n_grid[i - 1],
597 global_mesh_dim.data(), global_mesh_off.data(),
598 &(fft.plan[i].recv_block[6ul * j]));
599 permute_ifield(&(fft.plan[i].recv_block[6ul * j]), 3,
600 -(fft.plan[i].n_permute));
601 permute_ifield(&(fft.plan[i].recv_block[6ul * j + 3ul]), 3,
602 -(fft.plan[i].n_permute));
603 if (fft.plan[i].recv_size[j] > fft.max_comm_size)
604 fft.max_comm_size = fft.plan[i].recv_size[j];
605 }
606
607 for (std::size_t j = 0ul; j < 3ul; j++)
608 fft.plan[i].old_mesh[j] = fft.plan[i - 1].new_mesh[j];
609 if (i == 1) {
610 fft.plan[i].element = 1;
611 } else {
612 fft.plan[i].element = 2;
613 for (std::size_t j = 0ul; j < fft.plan[i].group.size(); j++) {
614 fft.plan[i].send_size[j] *= 2;
615 fft.plan[i].recv_size[j] *= 2;
616 }
617 }
618 }
619
620 /* Factor 2 for complex fields */
621 fft.max_comm_size *= 2;
622 fft.max_mesh_size = Utils::product(ca_mesh_dim);
623 for (int i = 1; i < 4; i++)
624 if (2 * fft.plan[i].new_size > fft.max_mesh_size)
625 fft.max_mesh_size = 2 * fft.plan[i].new_size;
626
627 /* === pack function === */
628 for (int i = 1; i < 4; i++) {
630 }
631 ks_pnum = 6;
632 if (fft.plan[1].row_dir == 2) {
634 ks_pnum = 4;
635 } else if (fft.plan[1].row_dir == 1) {
637 ks_pnum = 5;
638 }
639
640 fft.send_buf.resize(fft.max_comm_size);
641 fft.recv_buf.resize(fft.max_comm_size);
642 fft.data_buf.resize(fft.max_mesh_size);
643 auto *c_data = (fftw_complex *)(fft.data_buf.data());
644
645 /* === FFT Routines (Using FFTW / RFFTW package)=== */
646 for (int i = 1; i < 4; i++) {
647 fft.plan[i].dir = FFTW_FORWARD;
648 /* FFT plan creation.*/
649
650 if (fft.init_tag)
651 fftw_destroy_plan(fft.plan[i].our_fftw_plan);
652 fft.plan[i].our_fftw_plan = fftw_plan_many_dft(
653 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1,
654 fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2],
655 fft.plan[i].dir, FFTW_PATIENT);
656 }
657
658 /* === The BACK Direction === */
659 /* this is needed because slightly different functions are used */
660 for (int i = 1; i < 4; i++) {
661 fft.back[i].dir = FFTW_BACKWARD;
662
663 if (fft.init_tag)
664 fftw_destroy_plan(fft.back[i].our_fftw_plan);
665 fft.back[i].our_fftw_plan = fftw_plan_many_dft(
666 1, &fft.plan[i].new_mesh[2], fft.plan[i].n_ffts, c_data, nullptr, 1,
667 fft.plan[i].new_mesh[2], c_data, nullptr, 1, fft.plan[i].new_mesh[2],
668 fft.back[i].dir, FFTW_PATIENT);
669
671 }
672 if (fft.plan[1].row_dir == 2) {
674 } else if (fft.plan[1].row_dir == 1) {
676 }
677
678 fft.init_tag = true;
679
680 return fft.max_mesh_size;
681}
682
683void fft_perform_forw(double *data, fft_data_struct &fft,
684 const boost::mpi::communicator &comm) {
685 /* ===== first direction ===== */
686
687 auto *c_data = (fftw_complex *)data;
688 auto *c_data_buf = (fftw_complex *)fft.data_buf.data();
689
690 /* communication to current dir row format (in is data) */
691 forw_grid_comm(fft.plan[1], data, fft.data_buf.data(), fft, comm);
692
693 /* complexify the real data array (in is fft.data_buf) */
694 for (int i = 0; i < fft.plan[1].new_size; i++) {
695 data[2 * i + 0] = fft.data_buf[i]; /* real value */
696 data[2 * i + 1] = 0; /* complex value */
697 }
698 /* perform FFT (in/out is data)*/
699 fftw_execute_dft(fft.plan[1].our_fftw_plan, c_data, c_data);
700 /* ===== second direction ===== */
701 /* communication to current dir row format (in is data) */
702 forw_grid_comm(fft.plan[2], data, fft.data_buf.data(), fft, comm);
703 /* perform FFT (in/out is fft.data_buf) */
704 fftw_execute_dft(fft.plan[2].our_fftw_plan, c_data_buf, c_data_buf);
705 /* ===== third direction ===== */
706 /* communication to current dir row format (in is fft.data_buf) */
707 forw_grid_comm(fft.plan[3], fft.data_buf.data(), data, fft, comm);
708 /* perform FFT (in/out is data)*/
709 fftw_execute_dft(fft.plan[3].our_fftw_plan, c_data, c_data);
710
711 /* REMARK: Result has to be in data. */
712}
713
714void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft,
715 const boost::mpi::communicator &comm) {
716
717 auto *c_data = (fftw_complex *)data;
718 auto *c_data_buf = (fftw_complex *)fft.data_buf.data();
719
720 /* ===== third direction ===== */
721
722 /* perform FFT (in is data) */
723 fftw_execute_dft(fft.back[3].our_fftw_plan, c_data, c_data);
724 /* communicate (in is data)*/
725 back_grid_comm(fft.plan[3], fft.back[3], data, fft.data_buf.data(), fft,
726 comm);
727
728 /* ===== second direction ===== */
729 /* perform FFT (in is fft.data_buf) */
730 fftw_execute_dft(fft.back[2].our_fftw_plan, c_data_buf, c_data_buf);
731 /* communicate (in is fft.data_buf) */
732 back_grid_comm(fft.plan[2], fft.back[2], fft.data_buf.data(), data, fft,
733 comm);
734
735 /* ===== first direction ===== */
736 /* perform FFT (in is data) */
737 fftw_execute_dft(fft.back[1].our_fftw_plan, c_data, c_data);
738 /* throw away the (hopefully) empty complex component (in is data) */
739 for (int i = 0; i < fft.plan[1].new_size; i++) {
740 fft.data_buf[i] = data[2 * i]; /* real value */
741 // Vincent:
742 if (check_complex and std::abs(data[2 * i + 1]) > 1e-5) {
743 printf("Complex value is not zero (i=%d,data=%g)!!!\n", i,
744 data[2 * i + 1]);
745 if (i > 100)
746 throw std::runtime_error("Complex value is not zero");
747 }
748 }
749 /* communicate (in is fft.data_buf) */
750 back_grid_comm(fft.plan[1], fft.back[1], fft.data_buf.data(), data, fft,
751 comm);
752
753 /* REMARK: Result has to be in data. */
754}
755
756void fft_pack_block(double const *const in, double *const out,
757 int const start[3], int const size[3], int const dim[3],
758 int element) {
759
760 auto const copy_size = element * size[2] * static_cast<int>(sizeof(double));
761 /* offsets for indices in input grid */
762 auto const m_in_offset = element * dim[2];
763 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
764 /* offsets for indices in output grid */
765 auto const m_out_offset = element * size[2];
766 /* linear index of in grid, linear index of out grid */
767 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
768 int li_out = 0;
769
770 for (int s = 0; s < size[0]; s++) {
771 for (int m = 0; m < size[1]; m++) {
772 memmove(&(out[li_out]), &(in[li_in]), copy_size);
773 li_in += m_in_offset;
774 li_out += m_out_offset;
775 }
776 li_in += s_in_offset;
777 }
778}
779
780void fft_unpack_block(double const *const in, double *const out,
781 int const start[3], int const size[3], int const dim[3],
782 int element) {
783
784 auto const copy_size = element * size[2] * static_cast<int>(sizeof(double));
785 /* offsets for indices in output grid */
786 auto const m_out_offset = element * dim[2];
787 auto const s_out_offset = element * (dim[2] * (dim[1] - size[1]));
788 /* offset for indices in input grid */
789 auto const m_in_offset = element * size[2];
790 /* linear index of in grid, linear index of out grid */
791 int li_in = 0;
792 int li_out = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
793
794 for (int s = 0; s < size[0]; s++) {
795 for (int m = 0; m < size[1]; m++) {
796 memmove(&(out[li_out]), &(in[li_in]), copy_size);
797 li_in += m_in_offset;
798 li_out += m_out_offset;
799 }
800 li_out += s_out_offset;
801 }
802}
803#endif
Vector implementation and trait types for boost qvm interoperability.
float f[3]
__shared__ int pos[MAXDEPTH *THREADS5/WARPSIZE]
__shared__ int node[MAXDEPTH *THREADS5/WARPSIZE]
A stripped-down version of std::span from C++17.
Definition Span.hpp:38
This file contains the defaults for ESPResSo.
static double * block(double *p, std::size_t index, std::size_t size)
Definition elc.cpp:174
void fft_perform_forw(double *data, fft_data_struct &fft, const boost::mpi::communicator &comm)
Perform an in-place forward 3D FFT.
Definition fft.cpp:683
void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft, const boost::mpi::communicator &comm)
Perform an in-place backward 3D FFT.
Definition fft.cpp:714
int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, Utils::Vector3i const &global_mesh_dim, Utils::Vector3d const &global_mesh_off, int &ks_pnum, fft_data_struct &fft, Utils::Vector3i const &grid, boost::mpi::communicator const &comm)
Initialize everything connected to the 3D-FFT.
Definition fft.cpp:484
#define REQ_FFT_BACK
Tag for communication in back_grid_comm()
Definition fft.cpp:61
void fft_unpack_block(double const *const in, double *const out, int const start[3], int const size[3], int const dim[3], int element)
Unpack a 3d-grid input block (size[3]) into an output 3d-grid with dimension dim[3] at start position...
Definition fft.cpp:780
#define REQ_FFT_FORW
Tag for communication in forw_grid_comm()
Definition fft.cpp:59
void fft_pack_block(double const *const in, double *const out, int const start[3], int const size[3], int const dim[3], int element)
Pack a block (size[3] starting at start[3]) of an input 3d-grid with dimension dim[3] into an output ...
Definition fft.cpp:756
Routines, row decomposition, data structures and communication for the 3D-FFT.
T product(Vector< T, N > const &v)
Definition Vector.hpp:359
void permute_ifield(int *field, int size, int permute)
permute an integer array field of size size about permute positions.
int get_linear_index(int a, int b, int c, const Vector3i &adim, MemoryOrder memory_order=MemoryOrder::COLUMN_MAJOR)
get the linear index from the position (a,b,c) in a 3D grid of dimensions adim.
Definition index.hpp:43
DEVICE_QUALIFIER constexpr Span< T > make_span(T *p, std::size_t N)
Definition Span.hpp:112
void back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, const double *in, double *out, fft_data_struct &fft, const boost::mpi::communicator &comm)
Communicate the grid data according to the given backward FFT plan.
Definition fft.cpp:392
int map_3don2d_grid(int const g3d[3], int g2d[3])
Calculate 'best' mapping between a 2D and 3D grid.
Definition fft.cpp:427
void calc_2d_grid(int n, int grid[3])
Calculate most square 2D grid.
Definition fft.cpp:469
void pack_block_permute2(double const *const in, double *const out, const int *start, const int *size, const int *dim, int element)
Pack a block with dimensions size[0] * size[1] * size[2] starting at start of an input 3D-grid with d...
Definition fft.cpp:327
int calc_local_mesh(const int *n_pos, const int *n_grid, const int *mesh, const double *mesh_off, int *loc_mesh, int *start)
Calculate the local fft mesh.
Definition fft.cpp:194
int calc_send_block(const int *pos1, const int *grid1, const int *pos2, const int *grid2, const int *mesh, const double *mesh_off, int *block)
Calculate a send (or recv.) block for grid communication during a decomposition change.
Definition fft.cpp:238
boost::optional< std::vector< int > > find_comm_groups(Utils::Vector3i const &grid1, Utils::Vector3i const &grid2, Utils::Span< const int > node_list1, Utils::Span< int > node_list2, Utils::Span< int > pos, Utils::Span< int > my_pos, boost::mpi::communicator const &comm)
This ugly function does the bookkeeping: which nodes have to communicate to each other,...
Definition fft.cpp:89
void forw_grid_comm(fft_forw_plan plan, const double *in, double *out, fft_data_struct &fft, const boost::mpi::communicator &comm)
Communicate the grid data according to the given forward FFT plan.
Definition fft.cpp:362
void pack_block_permute1(double const *const in, double *const out, const int *start, const int *size, const int *dim, int element)
Pack a block with dimensions size[0] * size[1] * size[2] starting at start of an input 3D-grid with d...
Definition fft.cpp:279
DEVICE_QUALIFIER constexpr pointer data() noexcept
Definition Array.hpp:120
Additional information for backwards FFT.
Definition fft.hpp:134
fftw_plan our_fftw_plan
plan for fft.
Definition fft.hpp:138
void(* pack_function)(double const *const, double *const, int const *, int const *, int const *, int)
packing function for send blocks.
Definition fft.hpp:141
int dir
plan direction.
Definition fft.hpp:136
Information about the three one dimensional FFTs and how the nodes have to communicate inbetween.
Definition fft.hpp:152
int max_comm_size
Maximal size of the communication buffers.
Definition fft.hpp:162
std::vector< double > recv_buf
receive buffer.
Definition fft.hpp:170
fft_vector< double > data_buf
Buffer for receive data.
Definition fft.hpp:172
int max_mesh_size
Maximal local mesh size.
Definition fft.hpp:165
fft_back_plan back[4]
Information for backward FFTs.
Definition fft.hpp:156
std::vector< double > send_buf
send buffer.
Definition fft.hpp:168
fft_forw_plan plan[4]
Information for forward FFTs.
Definition fft.hpp:154
bool init_tag
Whether FFT is initialized or not.
Definition fft.hpp:159
Structure for performing a 1D FFT.
Definition fft.hpp:94
std::vector< int > send_size
Send block communication sizes.
Definition fft.hpp:124
void(* pack_function)(double const *const, double *const, int const *, int const *, int const *, int)
packing function for send blocks.
Definition fft.hpp:119
int row_dir
row direction of that FFT.
Definition fft.hpp:98
int element
size of send block elements.
Definition fft.hpp:130
int start[3]
lower left point of local FFT mesh in global FFT mesh coordinates.
Definition fft.hpp:111
int n_ffts
number of 1D FFTs.
Definition fft.hpp:102
fftw_plan our_fftw_plan
plan for fft.
Definition fft.hpp:104
int old_mesh[3]
size of local mesh before communication.
Definition fft.hpp:107
int new_mesh[3]
size of local mesh after communication, also used for actual FFT.
Definition fft.hpp:109
int dir
plan direction: 0 = Forward FFT, 1 = Backward FFT.
Definition fft.hpp:96
int n_permute
permutations from normal coordinate system.
Definition fft.hpp:100
int new_size
size of new mesh (number of mesh points).
Definition fft.hpp:113
std::vector< int > recv_size
Recv block communication sizes.
Definition fft.hpp:128
std::vector< int > send_block
Send block specification.
Definition fft.hpp:122
std::vector< int > recv_block
Recv block specification.
Definition fft.hpp:126
std::vector< int > group
group of nodes which have to communicate with each other.
Definition fft.hpp:116