38#include <boost/mpi/communicator.hpp>
39#include <boost/serialization/vector.hpp>
63#define REQ_FFT_FORW 301
65#define REQ_FFT_BACK 302
71 boost::mpi::communicator
const &comm,
int tag) {
72 auto const type = boost::mpi::get_mpi_datatype<T>(*
sendbuf);
80template <
typename FloatType =
double>
struct fftw {
120std::optional<std::vector<int>>
123 std::span<int> pos, std::span<int>
my_pos,
int rank) {
146 for (i = 0; i < 3; i++) {
163 std::vector<int> group(
g_size);
167 for (
gi[2] = 0;
gi[2] <
ds[2];
gi[2]++)
168 for (
gi[1] = 0;
gi[1] <
ds[1];
gi[1]++)
169 for (
gi[0] = 0;
gi[0] <
ds[0];
gi[0]++) {
171 for (i = 0; i <
g_size; i++) {
172 p1[0] = (
gi[0] *
s1[0]) + (i %
s1[0]);
173 p1[1] = (
gi[1] *
s1[1]) + ((i /
s1[0]) %
s1[1]);
174 p1[2] = (
gi[2] *
s1[2]) + (i / (
s1[0] *
s1[1]));
176 p2[0] = (
gi[0] *
s2[0]) + (i %
s2[0]);
177 p2[1] = (
gi[1] *
s2[1]) + ((i /
s2[0]) %
s2[1]);
178 p2[2] = (
gi[2] *
s2[2]) + (i / (
s2[0] *
s2[1]));
183 pos[3 * n + 0] =
p2[0];
184 pos[3 * n + 1] =
p2[1];
185 pos[3 * n + 2] =
p2[2];
204 for (i =
g_size - 1; i > 0; i--)
205 group[i] = group[i - 1];
227 const double *mesh_off,
int *
loc_mesh,
int *start) {
228 int last[3], size = 1;
230 for (
int i = 0; i < 3; i++) {
231 auto const ai = mesh[i] /
static_cast<double>(
n_grid[i]);
232 start[i] =
static_cast<int>(
ceil(ai *
n_pos[i] - mesh_off[i]));
233 last[i] =
static_cast<int>(
floor(ai * (
n_pos[i] + 1) - mesh_off[i]));
235 if (ai * (
n_pos[i] + 1) - mesh_off[i] -
last[i] < 1.0e-15)
237 if (1.0 + ai *
n_pos[i] - mesh_off[i] - start[i] < 1.0e-15)
271 const int *
grid2,
const int *mesh,
const double *mesh_off,
280 for (
int i = 0; i < 3; i++) {
285 size *=
block[i + 3];
311template <
typename FloatType>
313 const int *start,
const int *size,
const int *dim,
317 auto const m_in_offset = element * (dim[2] - size[2]);
318 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
320 auto const m_out_offset = (element * size[0]) - element;
322 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
324 for (
int s = 0; s < size[0]; s++) {
327 for (
int m = 0; m < size[1]; m++) {
328 for (
int f = 0; f < size[2]; f++) {
329 for (
int e = 0;
e < element;
e++)
360template <
typename FloatType>
362 const int *start,
const int *size,
const int *dim,
366 auto const m_in_offset = element * (dim[2] - size[2]);
367 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
369 auto const s_out_offset = (element * size[0] * size[1]) - element;
371 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
373 for (
int s = 0; s < size[0]; s++) {
375 for (
int m = 0; m < size[1]; m++) {
378 for (
int f = 0; f < size[2]; f++) {
379 for (
int e = 0;
e < element;
e++)
397template <
typename FloatType>
398void fft_data_struct<FloatType>::forw_grid_comm(
400 FloatType
const *
in, FloatType *
out) {
401 for (std::size_t i = 0
ul; i <
plan.group.size(); i++) {
402 plan.pack_function(
in, send_buf.data(), &(
plan.send_block[6ul * i]),
403 &(
plan.send_block[6ul * i + 3ul]),
plan.old_mesh.data(),
406 if (
plan.group[i] != comm.rank()) {
408 recv_buf.data(),
plan.recv_size[i],
plan.group[i], comm,
411 std::swap(send_buf, recv_buf);
414 &(
plan.recv_block[6ul * i + 3ul]),
plan.new_mesh.data(),
426template <
typename FloatType>
427void fft_data_struct<FloatType>::back_grid_comm(
428 boost::mpi::communicator
const &comm,
436 for (std::size_t i = 0
ul; i <
plan_f.group.size(); i++) {
437 plan_b.pack_function(
in, send_buf.data(), &(
plan_f.recv_block[6ul * i]),
438 &(
plan_f.recv_block[6ul * i + 3ul]),
441 if (
plan_f.group[i] != comm.rank()) {
443 recv_buf.data(),
plan_f.send_size[i],
plan_f.group[i], comm,
446 std::swap(send_buf, recv_buf);
449 &(
plan_f.send_block[6ul * i + 3ul]),
469 if (
g2d[0] %
g3d[0] == 0) {
470 if (
g2d[1] %
g3d[1] == 0) {
472 }
else if (
g2d[1] %
g3d[2] == 0) {
477 }
else if (
g2d[0] %
g3d[1] == 0) {
478 if (
g2d[1] %
g3d[0] == 0) {
483 }
else if (
g2d[1] %
g3d[2] == 0) {
489 }
else if (
g2d[0] %
g3d[2] == 0) {
490 if (
g2d[1] %
g3d[0] == 0) {
495 }
else if (
g2d[1] %
g3d[1] == 0) {
509 for (
auto i =
static_cast<int>(std::sqrt(n)); i >= 1; i--) {
519template <
typename FloatType>
528 std::vector<int>
n_id[4];
529 std::vector<int>
n_pos[4];
531 auto const rank = comm.rank();
532 auto const node_pos = Utils::Mpi::cart_coords<3>(comm, rank);
536 for (
int i = 0; i < 4; i++) {
537 n_id[i].resize(1 * comm.size());
538 n_pos[i].resize(3 * comm.size());
543 for (
int i = 0; i < 3; i++) {
547 for (
int i = 0; i < comm.size(); i++) {
548 auto const n_pos_i = Utils::Mpi::cart_coords<3>(comm, i);
549 for (
int j = 0;
j < 3; ++
j) {
561 forw[0].n_permute = 0;
562 for (
int i = 1; i < 4; i++)
563 forw[i].n_permute = (forw[1].row_dir + i) % 3;
564 for (
int i = 0; i < 3; i++) {
568 forw[2].row_dir = (forw[1].row_dir - 1) % 3;
569 forw[3].row_dir = (forw[1].row_dir - 2) % 3;
573 for (
int i = 0; i < 3; i++)
576 for (
int i = 1; i < 4; i++) {
583 std::swap(
n_grid[i][(forw[i].row_dir + 1) % 3],
584 n_grid[i][(forw[i].row_dir + 2) % 3]);
592 throw std::runtime_error(
"INTERNAL ERROR: fft_find_comm_groups error");
596 forw[i].group = group.value();
598 forw[i].send_block.resize(6 * forw[i].group.size());
599 forw[i].send_size.resize(forw[i].group.size());
600 forw[i].recv_block.resize(6 * forw[i].group.size());
601 forw[i].recv_size.resize(forw[i].group.size());
603 forw[i].new_size = calc_local_mesh(
605 forw[i].new_mesh.data(), forw[i].start.data());
606 permute_ifield(forw[i].new_mesh.data(), 3, -(forw[i].n_permute));
607 permute_ifield(forw[i].start.data(), 3, -(forw[i].n_permute));
608 forw[i].n_ffts = forw[i].new_mesh[0] * forw[i].new_mesh[1];
611 for (std::size_t
j = 0
ul;
j < forw[i].group.size();
j++) {
613 int node = forw[i].group[
j];
614 forw[i].send_size[
j] = calc_send_block(
617 &(forw[i].send_block[6ul *
j]));
618 permute_ifield(&(forw[i].send_block[6ul *
j]), 3,
619 -(forw[i - 1].n_permute));
620 permute_ifield(&(forw[i].send_block[6ul *
j + 3ul]), 3,
621 -(forw[i - 1].n_permute));
622 if (forw[i].send_size[
j] > max_comm_size)
623 max_comm_size = forw[i].send_size[
j];
628 for (std::size_t k = 0
ul; k < 3ul; k++)
632 forw[i].recv_size[
j] = calc_send_block(
635 &(forw[i].recv_block[6ul *
j]));
636 permute_ifield(&(forw[i].recv_block[6ul *
j]), 3, -(forw[i].n_permute));
637 permute_ifield(&(forw[i].recv_block[6ul *
j + 3ul]), 3,
638 -(forw[i].n_permute));
639 if (forw[i].recv_size[
j] > max_comm_size)
640 max_comm_size = forw[i].recv_size[
j];
643 for (std::size_t
j = 0
ul;
j < 3ul;
j++)
644 forw[i].old_mesh[
j] = forw[i - 1].new_mesh[
j];
649 for (std::size_t
j = 0
ul;
j < forw[i].group.size();
j++) {
650 forw[i].send_size[
j] *= 2;
651 forw[i].recv_size[
j] *= 2;
659 for (
int i = 1; i < 4; i++)
660 if (2 * forw[i].new_size > max_mesh_size)
661 max_mesh_size = 2 * forw[i].new_size;
664 for (
int i = 1; i < 4; i++) {
665 forw[i].pack_function = pack_block_permute2;
668 if (forw[1].row_dir == 2) {
671 }
else if (forw[1].row_dir == 1) {
672 forw[1].pack_function = pack_block_permute1;
676 send_buf.resize(max_comm_size);
677 recv_buf.resize(max_comm_size);
678 data_buf.resize(max_mesh_size);
682 for (
int i = 1; i < 4; i++) {
685#pragma omp critical(fftw_destroy_plan_forward)
686 forw[i].destroy_plan();
691#pragma omp critical(fftw_create_plan_forward)
693 1, &forw[i].new_mesh[2], forw[i].n_ffts,
c_data,
nullptr, 1,
694 forw[i].new_mesh[2],
c_data,
nullptr, 1, forw[i].new_mesh[2],
697 assert(forw[i].plan_handle);
702 for (
int i = 1; i < 4; i++) {
705#pragma omp critical(fftw_destroy_plan_backward)
706 back[i].destroy_plan();
711#pragma omp critical(fftw_create_plan_backward)
713 1, &forw[i].new_mesh[2], forw[i].n_ffts,
c_data,
nullptr, 1,
714 forw[i].new_mesh[2],
c_data,
nullptr, 1, forw[i].new_mesh[2],
717 back[i].pack_function = pack_block_permute1;
718 assert(back[i].plan_handle);
720 if (forw[1].row_dir == 2) {
722 }
else if (forw[1].row_dir == 1) {
723 back[1].pack_function = pack_block_permute2;
728 return max_mesh_size;
731template <
typename FloatType>
733 boost::mpi::communicator
const &comm, FloatType *data) {
740 forw_grid_comm(comm, forw[1], data, data_buf.data());
743 for (
int i = 0; i < forw[1].new_size; i++) {
744 data[2 * i + 0] = data_buf[i];
745 data[2 * i + 1] = FloatType(0);
751 forw_grid_comm(comm, forw[2], data, data_buf.data());
756 forw_grid_comm(comm, forw[3], data_buf.data(), data);
763template <
typename FloatType>
765 boost::mpi::communicator
const &comm, FloatType *data) {
775 back_grid_comm(comm, forw[3], back[3], data, data_buf.data());
781 back_grid_comm(comm, forw[2], back[2], data_buf.data(), data);
787 for (
int i = 0; i < forw[1].new_size; i++) {
788 data_buf[i] = data[2 * i];
791 back_grid_comm(comm, forw[1], back[1], data_buf.data(), data);
799 plan_handle =
nullptr;
803template <
class FloatType>
808 if (n > std::numeric_limits<std::size_t>::max() /
sizeof(FloatType)) {
809 throw std::bad_array_new_length();
813 throw std::bad_alloc();
815 return static_cast<FloatType *
>(
pv);
818template <
class FloatType>
820 std::size_t)
const noexcept {
Vector implementation and trait types for boost qvm interoperability.
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
static void fft_sendrecv(T const *const sendbuf, int scount, int dest, T *const recvbuf, int rcount, int source, boost::mpi::communicator const &comm, int tag)
#define REQ_FFT_BACK
Tag for communication in back_grid_comm()
#define REQ_FFT_FORW
Tag for communication in forw_grid_comm()
Routines, row decomposition, data structures and communication for the 3D-FFT.
T product(Vector< T, N > const &v)
void permute_ifield(int *field, int size, int permute)
permute an integer array field of size size about permute positions.
int get_linear_index(int a, int b, int c, const Vector3i &adim)
void pack_block_permute2(FloatType const *const in, FloatType *const out, const int *start, const int *size, const int *dim, int element)
Pack a block with dimensions size[0] * size[1] * size[2] starting at start of an input 3D-grid with d...
void pack_block_permute1(FloatType const *const in, FloatType *const out, const int *start, const int *size, const int *dim, int element)
Pack a block with dimensions size[0] * size[1] * size[2] starting at start of an input 3D-grid with d...
int calc_send_block(const int *pos1, const int *grid1, const int *pos2, const int *grid2, const int *mesh, const double *mesh_off, int *block)
Calculate a send (or recv.) block for grid communication during a decomposition change.
int calc_local_mesh(const int *n_pos, const int *n_grid, const int *mesh, const double *mesh_off, int *loc_mesh, int *start)
Calculate the local fft mesh.
std::optional< std::vector< int > > find_comm_groups(Utils::Vector3i const &grid1, Utils::Vector3i const &grid2, std::span< int const > node_list1, std::span< int > node_list2, std::span< int > pos, std::span< int > my_pos, int rank)
This ugly function does the bookkeeping: which nodes have to communicate to each other,...
void calc_2d_grid(int n, int grid[3])
Calculate most square 2D grid.
int map_3don2d_grid(int const g3d[3], int g2d[3])
Calculate 'best' mapping between a 2D and 3D grid.
void fft_pack_block(FloatType const *const in, FloatType *const out, int const *start, int const *size, int const *dim, int element)
Pack a 3D-block of size size starting at start of an input 3D-grid in with dimension dim into an outp...
void fft_unpack_block(FloatType const *const in, FloatType *const out, int const *start, int const *size, int const *dim, int element)
Unpack a 3D-block in of size size into an output 3D-grid out of size dim starting at position start.
Aligned allocator for FFT data.
FloatType * allocate(std::size_t n) const
void deallocate(FloatType *p, std::size_t) const noexcept
Plan for a backward 1D FFT of a flattened 3D array.
Information about the three one dimensional FFTs and how the nodes have to communicate inbetween.
void forward_fft(boost::mpi::communicator const &comm, FloatType *data)
Perform an in-place forward 3D FFT.
void backward_fft(boost::mpi::communicator const &comm, FloatType *data)
Perform an in-place backward 3D FFT.
int initialize_fft(boost::mpi::communicator const &comm, Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, Utils::Vector3i const &global_mesh_dim, Utils::Vector3d const &global_mesh_off, int &ks_pnum, Utils::Vector3i const &grid)
Initialize everything connected to the 3D-FFT.
Plan for a forward 1D FFT of a flattened 3D array.
static auto constexpr malloc
static auto constexpr free
static auto constexpr execute_dft
static auto constexpr destroy_plan
static auto constexpr plan_many_dft