38#include <boost/mpi/communicator.hpp>
39#include <boost/serialization/vector.hpp>
59#define REQ_FFT_FORW 301
61#define REQ_FFT_BACK 302
67 boost::mpi::communicator
const &comm,
int tag) {
68 auto const type = boost::mpi::get_mpi_datatype<T>(*
sendbuf);
76template <
typename FloatType =
double>
struct fftw {
116std::optional<std::vector<int>>
119 std::span<int> pos, std::span<int>
my_pos,
int rank) {
142 for (i = 0; i < 3; i++) {
159 std::vector<int> group(
g_size);
163 for (
gi[2] = 0;
gi[2] <
ds[2];
gi[2]++)
164 for (
gi[1] = 0;
gi[1] <
ds[1];
gi[1]++)
165 for (
gi[0] = 0;
gi[0] <
ds[0];
gi[0]++) {
167 for (i = 0; i <
g_size; i++) {
168 p1[0] = (
gi[0] *
s1[0]) + (i %
s1[0]);
169 p1[1] = (
gi[1] *
s1[1]) + ((i /
s1[0]) %
s1[1]);
170 p1[2] = (
gi[2] *
s1[2]) + (i / (
s1[0] *
s1[1]));
172 p2[0] = (
gi[0] *
s2[0]) + (i %
s2[0]);
173 p2[1] = (
gi[1] *
s2[1]) + ((i /
s2[0]) %
s2[1]);
174 p2[2] = (
gi[2] *
s2[2]) + (i / (
s2[0] *
s2[1]));
179 pos[3 * n + 0] =
p2[0];
180 pos[3 * n + 1] =
p2[1];
181 pos[3 * n + 2] =
p2[2];
200 for (i =
g_size - 1; i > 0; i--)
201 group[i] = group[i - 1];
223 const double *mesh_off,
int *
loc_mesh,
int *start) {
224 int last[3], size = 1;
226 for (
int i = 0; i < 3; i++) {
227 auto const ai = mesh[i] /
static_cast<double>(
n_grid[i]);
228 start[i] =
static_cast<int>(
ceil(ai *
n_pos[i] - mesh_off[i]));
229 last[i] =
static_cast<int>(
floor(ai * (
n_pos[i] + 1) - mesh_off[i]));
231 if (ai * (
n_pos[i] + 1) - mesh_off[i] -
last[i] < 1.0e-15)
233 if (1.0 + ai *
n_pos[i] - mesh_off[i] - start[i] < 1.0e-15)
267 const int *
grid2,
const int *mesh,
const double *mesh_off,
276 for (
int i = 0; i < 3; i++) {
281 size *=
block[i + 3];
307template <
typename FloatType>
309 const int *start,
const int *size,
const int *dim,
313 auto const m_in_offset = element * (dim[2] - size[2]);
314 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
316 auto const m_out_offset = (element * size[0]) - element;
318 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
320 for (
int s = 0; s < size[0]; s++) {
323 for (
int m = 0; m < size[1]; m++) {
324 for (
int f = 0; f < size[2]; f++) {
325 for (
int e = 0;
e < element;
e++)
356template <
typename FloatType>
358 const int *start,
const int *size,
const int *dim,
362 auto const m_in_offset = element * (dim[2] - size[2]);
363 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
365 auto const s_out_offset = (element * size[0] * size[1]) - element;
367 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
369 for (
int s = 0; s < size[0]; s++) {
371 for (
int m = 0; m < size[1]; m++) {
374 for (
int f = 0; f < size[2]; f++) {
375 for (
int e = 0;
e < element;
e++)
393template <
typename FloatType>
394void fft_data_struct<FloatType>::forw_grid_comm(
396 FloatType
const *
in, FloatType *
out) {
397 for (std::size_t i = 0
ul; i <
plan.group.size(); i++) {
398 plan.pack_function(
in, send_buf.data(), &(
plan.send_block[6ul * i]),
399 &(
plan.send_block[6ul * i + 3ul]),
plan.old_mesh.data(),
402 if (
plan.group[i] != comm.rank()) {
404 recv_buf.data(),
plan.recv_size[i],
plan.group[i], comm,
407 std::swap(send_buf, recv_buf);
410 &(
plan.recv_block[6ul * i + 3ul]),
plan.new_mesh.data(),
422template <
typename FloatType>
423void fft_data_struct<FloatType>::back_grid_comm(
424 boost::mpi::communicator
const &comm,
432 for (std::size_t i = 0
ul; i <
plan_f.group.size(); i++) {
433 plan_b.pack_function(
in, send_buf.data(), &(
plan_f.recv_block[6ul * i]),
434 &(
plan_f.recv_block[6ul * i + 3ul]),
437 if (
plan_f.group[i] != comm.rank()) {
439 recv_buf.data(),
plan_f.send_size[i],
plan_f.group[i], comm,
442 std::swap(send_buf, recv_buf);
445 &(
plan_f.send_block[6ul * i + 3ul]),
465 if (
g2d[0] %
g3d[0] == 0) {
466 if (
g2d[1] %
g3d[1] == 0) {
468 }
else if (
g2d[1] %
g3d[2] == 0) {
473 }
else if (
g2d[0] %
g3d[1] == 0) {
474 if (
g2d[1] %
g3d[0] == 0) {
479 }
else if (
g2d[1] %
g3d[2] == 0) {
485 }
else if (
g2d[0] %
g3d[2] == 0) {
486 if (
g2d[1] %
g3d[0] == 0) {
491 }
else if (
g2d[1] %
g3d[1] == 0) {
505 for (
auto i =
static_cast<int>(std::sqrt(n)); i >= 1; i--) {
515template <
typename FloatType>
524 std::vector<int>
n_id[4];
525 std::vector<int>
n_pos[4];
527 auto const rank = comm.rank();
528 auto const node_pos = Utils::Mpi::cart_coords<3>(comm, rank);
532 for (
int i = 0; i < 4; i++) {
533 n_id[i].resize(1 * comm.size());
534 n_pos[i].resize(3 * comm.size());
539 for (
int i = 0; i < 3; i++) {
543 for (
int i = 0; i < comm.size(); i++) {
544 auto const n_pos_i = Utils::Mpi::cart_coords<3>(comm, i);
545 for (
int j = 0;
j < 3; ++
j) {
557 forw[0].n_permute = 0;
558 for (
int i = 1; i < 4; i++)
559 forw[i].n_permute = (forw[1].row_dir + i) % 3;
560 for (
int i = 0; i < 3; i++) {
564 forw[2].row_dir = (forw[1].row_dir - 1) % 3;
565 forw[3].row_dir = (forw[1].row_dir - 2) % 3;
569 for (
int i = 0; i < 3; i++)
572 for (
int i = 1; i < 4; i++) {
579 std::swap(
n_grid[i][(forw[i].row_dir + 1) % 3],
580 n_grid[i][(forw[i].row_dir + 2) % 3]);
588 throw std::runtime_error(
"INTERNAL ERROR: fft_find_comm_groups error");
592 forw[i].group = group.value();
594 forw[i].send_block.resize(6 * forw[i].group.size());
595 forw[i].send_size.resize(forw[i].group.size());
596 forw[i].recv_block.resize(6 * forw[i].group.size());
597 forw[i].recv_size.resize(forw[i].group.size());
599 forw[i].new_size = calc_local_mesh(
601 forw[i].new_mesh.data(), forw[i].start.data());
602 permute_ifield(forw[i].new_mesh.data(), 3, -(forw[i].n_permute));
603 permute_ifield(forw[i].start.data(), 3, -(forw[i].n_permute));
604 forw[i].n_ffts = forw[i].new_mesh[0] * forw[i].new_mesh[1];
607 for (std::size_t
j = 0
ul;
j < forw[i].group.size();
j++) {
609 int node = forw[i].group[
j];
610 forw[i].send_size[
j] = calc_send_block(
613 &(forw[i].send_block[6ul *
j]));
614 permute_ifield(&(forw[i].send_block[6ul *
j]), 3,
615 -(forw[i - 1].n_permute));
616 permute_ifield(&(forw[i].send_block[6ul *
j + 3ul]), 3,
617 -(forw[i - 1].n_permute));
618 if (forw[i].send_size[
j] > max_comm_size)
619 max_comm_size = forw[i].send_size[
j];
624 for (std::size_t k = 0
ul; k < 3ul; k++)
628 forw[i].recv_size[
j] = calc_send_block(
631 &(forw[i].recv_block[6ul *
j]));
632 permute_ifield(&(forw[i].recv_block[6ul *
j]), 3, -(forw[i].n_permute));
633 permute_ifield(&(forw[i].recv_block[6ul *
j + 3ul]), 3,
634 -(forw[i].n_permute));
635 if (forw[i].recv_size[
j] > max_comm_size)
636 max_comm_size = forw[i].recv_size[
j];
639 for (std::size_t
j = 0
ul;
j < 3ul;
j++)
640 forw[i].old_mesh[
j] = forw[i - 1].new_mesh[
j];
645 for (std::size_t
j = 0
ul;
j < forw[i].group.size();
j++) {
646 forw[i].send_size[
j] *= 2;
647 forw[i].recv_size[
j] *= 2;
655 for (
int i = 1; i < 4; i++)
656 if (2 * forw[i].new_size > max_mesh_size)
657 max_mesh_size = 2 * forw[i].new_size;
660 for (
int i = 1; i < 4; i++) {
661 forw[i].pack_function = pack_block_permute2;
664 if (forw[1].row_dir == 2) {
667 }
else if (forw[1].row_dir == 1) {
668 forw[1].pack_function = pack_block_permute1;
672 send_buf.resize(max_comm_size);
673 recv_buf.resize(max_comm_size);
674 data_buf.resize(max_mesh_size);
678 for (
int i = 1; i < 4; i++) {
680 forw[i].destroy_plan();
684 1, &forw[i].new_mesh[2], forw[i].n_ffts,
c_data,
nullptr, 1,
685 forw[i].new_mesh[2],
c_data,
nullptr, 1, forw[i].new_mesh[2],
687 assert(forw[i].plan_handle);
692 for (
int i = 1; i < 4; i++) {
694 back[i].destroy_plan();
698 1, &forw[i].new_mesh[2], forw[i].n_ffts,
c_data,
nullptr, 1,
699 forw[i].new_mesh[2],
c_data,
nullptr, 1, forw[i].new_mesh[2],
701 back[i].pack_function = pack_block_permute1;
702 assert(back[i].plan_handle);
704 if (forw[1].row_dir == 2) {
706 }
else if (forw[1].row_dir == 1) {
707 back[1].pack_function = pack_block_permute2;
712 return max_mesh_size;
715template <
typename FloatType>
717 boost::mpi::communicator
const &comm, FloatType *data) {
724 forw_grid_comm(comm, forw[1], data, data_buf.data());
727 for (
int i = 0; i < forw[1].new_size; i++) {
728 data[2 * i + 0] = data_buf[i];
729 data[2 * i + 1] = FloatType(0);
735 forw_grid_comm(comm, forw[2], data, data_buf.data());
740 forw_grid_comm(comm, forw[3], data_buf.data(), data);
747template <
typename FloatType>
749 boost::mpi::communicator
const &comm, FloatType *data,
bool check_complex) {
759 back_grid_comm(comm, forw[3], back[3], data, data_buf.data());
765 back_grid_comm(comm, forw[2], back[2], data_buf.data(), data);
771 for (
int i = 0; i < forw[1].new_size; i++) {
772 data_buf[i] = data[2 * i];
775 printf(
"Complex value is not zero (i=%d,data=%g)!!!\n", i,
778 throw std::runtime_error(
"Complex value is not zero");
782 back_grid_comm(comm, forw[1], back[1], data_buf.data(), data);
790 plan_handle =
nullptr;
794template <
class FloatType>
799 if (n > std::numeric_limits<std::size_t>::max() /
sizeof(FloatType)) {
800 throw std::bad_array_new_length();
804 throw std::bad_alloc();
806 return static_cast<FloatType *
>(
pv);
809template <
class FloatType>
811 std::size_t)
const noexcept {
Vector implementation and trait types for boost qvm interoperability.
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
static double * block(double *p, std::size_t index, std::size_t size)
static void fft_sendrecv(T const *const sendbuf, int scount, int dest, T *const recvbuf, int rcount, int source, boost::mpi::communicator const &comm, int tag)
#define REQ_FFT_BACK
Tag for communication in back_grid_comm()
#define REQ_FFT_FORW
Tag for communication in forw_grid_comm()
Routines, row decomposition, data structures and communication for the 3D-FFT.
T product(Vector< T, N > const &v)
void permute_ifield(int *field, int size, int permute)
permute an integer array field of size size about permute positions.
int get_linear_index(int a, int b, int c, const Vector3i &adim, MemoryOrder memory_order=MemoryOrder::COLUMN_MAJOR)
get the linear index from the position (a,b,c) in a 3D grid of dimensions adim.
void pack_block_permute2(FloatType const *const in, FloatType *const out, const int *start, const int *size, const int *dim, int element)
Pack a block with dimensions size[0] * size[1] * size[2] starting at start of an input 3D-grid with d...
void pack_block_permute1(FloatType const *const in, FloatType *const out, const int *start, const int *size, const int *dim, int element)
Pack a block with dimensions size[0] * size[1] * size[2] starting at start of an input 3D-grid with d...
int calc_send_block(const int *pos1, const int *grid1, const int *pos2, const int *grid2, const int *mesh, const double *mesh_off, int *block)
Calculate a send (or recv.) block for grid communication during a decomposition change.
int calc_local_mesh(const int *n_pos, const int *n_grid, const int *mesh, const double *mesh_off, int *loc_mesh, int *start)
Calculate the local fft mesh.
std::optional< std::vector< int > > find_comm_groups(Utils::Vector3i const &grid1, Utils::Vector3i const &grid2, std::span< int const > node_list1, std::span< int > node_list2, std::span< int > pos, std::span< int > my_pos, int rank)
This ugly function does the bookkeeping: which nodes have to communicate to each other,...
void calc_2d_grid(int n, int grid[3])
Calculate most square 2D grid.
int map_3don2d_grid(int const g3d[3], int g2d[3])
Calculate 'best' mapping between a 2D and 3D grid.
void fft_pack_block(FloatType const *const in, FloatType *const out, int const *start, int const *size, int const *dim, int element)
Pack a 3D-block of size size starting at start of an input 3D-grid in with dimension dim into an outp...
void fft_unpack_block(FloatType const *const in, FloatType *const out, int const *start, int const *size, int const *dim, int element)
Unpack a 3D-block in of size size into an output 3D-grid out of size dim starting at position start.
Aligned allocator for FFT data.
FloatType * allocate(std::size_t n) const
void deallocate(FloatType *p, std::size_t) const noexcept
Plan for a backward 1D FFT of a flattened 3D array.
Information about the three one dimensional FFTs and how the nodes have to communicate inbetween.
void forward_fft(boost::mpi::communicator const &comm, FloatType *data)
Perform an in-place forward 3D FFT.
void backward_fft(boost::mpi::communicator const &comm, FloatType *data, bool check_complex)
Perform an in-place backward 3D FFT.
int initialize_fft(boost::mpi::communicator const &comm, Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, Utils::Vector3i const &global_mesh_dim, Utils::Vector3d const &global_mesh_off, int &ks_pnum, Utils::Vector3i const &grid)
Initialize everything connected to the 3D-FFT.
Plan for a forward 1D FFT of a flattened 3D array.
static auto constexpr malloc
static auto constexpr free
static auto constexpr execute_dft
static auto constexpr destroy_plan
static auto constexpr plan_many_dft