30#if defined(P3M) || defined(DP3M)
39#include <boost/none.hpp>
40#include <boost/optional.hpp>
59#define REQ_FFT_FORW 301
61#define REQ_FFT_BACK 302
88boost::optional<std::vector<int>>
92 boost::mpi::communicator
const &comm) {
113 if ((grid1[0] * grid1[1] * grid1[2]) != (grid2[0] * grid2[1] * grid2[2]))
115 for (i = 0; i < 3; i++) {
116 s1[i] = grid1[i] / grid2[i];
119 else if (grid1[i] != grid2[i] * s1[i])
122 s2[i] = grid2[i] / grid1[i];
125 else if (grid2[i] != grid1[i] * s2[i])
128 ds[i] = grid2[i] / s2[i];
132 std::vector<int> group(g_size);
136 for (gi[2] = 0; gi[2] < ds[2]; gi[2]++)
137 for (gi[1] = 0; gi[1] < ds[1]; gi[1]++)
138 for (gi[0] = 0; gi[0] < ds[0]; gi[0]++) {
140 for (i = 0; i < g_size; i++) {
141 p1[0] = (gi[0] * s1[0]) + (i % s1[0]);
142 p1[1] = (gi[1] * s1[1]) + ((i / s1[0]) % s1[1]);
143 p1[2] = (gi[2] * s1[2]) + (i / (s1[0] * s1[1]));
145 p2[0] = (gi[0] * s2[0]) + (i % s2[0]);
146 p2[1] = (gi[1] * s2[1]) + ((i / s2[0]) % s2[1]);
147 p2[2] = (gi[2] * s2[2]) + (i / (s2[0] * s2[1]));
149 n = node_list1[get_linear_index(p1[0], p1[1], p1[2], grid1)];
150 node_list2[get_linear_index(p2[0], p2[1], p2[2], grid2)] = n;
152 pos[3 * n + 0] = p2[0];
153 pos[3 * n + 1] = p2[1];
154 pos[3 * n + 2] = p2[2];
157 if (n == comm.rank() && my_group == 0) {
172 n = group[g_size - 1];
173 for (i = g_size - 1; i > 0; i--)
174 group[i] = group[i - 1];
195 const double *mesh_off,
int *loc_mesh,
int *start) {
196 int last[3], size = 1;
198 for (
int i = 0; i < 3; i++) {
199 auto const ai = mesh[i] /
static_cast<double>(n_grid[i]);
200 start[i] =
static_cast<int>(ceil(ai * n_pos[i] - mesh_off[i]));
201 last[i] =
static_cast<int>(floor(ai * (n_pos[i] + 1) - mesh_off[i]));
203 if (ai * (n_pos[i] + 1) - mesh_off[i] - last[i] < 1.0e-15)
205 if (1.0 + ai * n_pos[i] - mesh_off[i] - start[i] < 1.0e-15)
207 loc_mesh[i] = last[i] - start[i] + 1;
239 const int *grid2,
const int *mesh,
const double *mesh_off,
242 int mesh1[3], first1[3], last1[3];
243 int mesh2[3], first2[3], last2[3];
248 for (
int i = 0; i < 3; i++) {
249 last1[i] = first1[i] + mesh1[i] - 1;
250 last2[i] = first2[i] + mesh2[i] - 1;
251 block[i] = std::max(first1[i], first2[i]) - first1[i];
252 block[i + 3] = (std::min(last1[i], last2[i]) - first1[i]) -
block[i] + 1;
253 size *=
block[i + 3];
280 const int *start,
const int *size,
const int *dim,
284 auto const m_in_offset = element * (dim[2] - size[2]);
285 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
287 auto const m_out_offset = (element * size[0]) - element;
289 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
291 for (
int s = 0; s < size[0]; s++) {
293 int li_out = element * s;
294 for (
int m = 0; m < size[1]; m++) {
295 for (
int f = 0;
f < size[2];
f++) {
296 for (
int e = 0; e < element; e++)
297 out[li_out++] = in[li_in++];
298 li_out += m_out_offset;
300 li_in += m_in_offset;
302 li_in += s_in_offset;
328 const int *start,
const int *size,
const int *dim,
332 auto const m_in_offset = element * (dim[2] - size[2]);
333 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
335 auto const s_out_offset = (element * size[0] * size[1]) - element;
337 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
339 for (
int s = 0; s < size[0]; s++) {
340 auto const m_out_start = element * (s * size[1]);
341 for (
int m = 0; m < size[1]; m++) {
343 int li_out = m_out_start + element * m;
344 for (
int f = 0;
f < size[2];
f++) {
345 for (
int e = 0; e < element; e++)
346 out[li_out++] = in[li_in++];
347 li_out += s_out_offset;
349 li_in += m_in_offset;
351 li_in += s_in_offset;
364 const boost::mpi::communicator &comm) {
365 for (std::size_t i = 0ul; i < plan.
group.size(); i++) {
370 if (plan.
group[i] != comm.rank()) {
374 comm, MPI_STATUS_IGNORE);
394 const boost::mpi::communicator &comm) {
399 for (std::size_t i = 0ul; i < plan_f.
group.size(); i++) {
404 if (plan_f.
group[i] != comm.rank()) {
433 if (g2d[0] % g3d[0] == 0) {
434 if (g2d[1] % g3d[1] == 0) {
436 }
else if (g2d[1] % g3d[2] == 0) {
441 }
else if (g2d[0] % g3d[1] == 0) {
442 if (g2d[1] % g3d[0] == 0) {
444 int const tmp = g2d[0];
447 }
else if (g2d[1] % g3d[2] == 0) {
453 }
else if (g2d[0] % g3d[2] == 0) {
454 if (g2d[1] % g3d[0] == 0) {
459 }
else if (g2d[1] % g3d[1] == 0) {
473 for (
auto i =
static_cast<int>(std::sqrt(n)); i >= 1; i--) {
488 boost::mpi::communicator
const &comm) {
492 std::vector<int> n_id[4];
493 std::vector<int> n_pos[4];
496 MPI_Cart_coords(comm, comm.rank(), 3, node_pos);
500 for (
int i = 0; i < 4; i++) {
501 n_id[i].resize(1 * comm.size());
502 n_pos[i].resize(3 * comm.size());
507 for (
int i = 0; i < 3; i++) {
508 n_grid[0][i] = grid[i];
509 my_pos[0][i] = node_pos[i];
511 for (
int i = 0; i < comm.size(); i++) {
512 MPI_Cart_coords(comm, i, 3, &(n_pos[0][3 * i + 0]));
513 auto const lin_ind = get_linear_index(
514 n_pos[0][3 * i + 0], n_pos[0][3 * i + 1], n_pos[0][3 * i + 2],
515 {n_grid[0][0], n_grid[0][1], n_grid[0][2]});
516 n_id[0][lin_ind] = i;
524 for (
int i = 1; i < 4; i++)
526 for (
int i = 0; i < 3; i++) {
527 n_grid[2][i] = n_grid[1][(i + 1) % 3];
528 n_grid[3][i] = n_grid[1][(i + 2) % 3];
535 for (
int i = 0; i < 3; i++)
538 for (
int i = 1; i < 4; i++) {
541 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
542 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1],
543 make_span(n_id[i]), make_span(n_pos[i]), my_pos[i], comm);
546 std::swap(n_grid[i][(fft.
plan[i].
row_dir + 1) % 3],
550 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
551 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, make_span(n_id[i - 1]),
552 make_span(n_id[i]), make_span(n_pos[i]), my_pos[i], comm);
555 throw std::runtime_error(
"INTERNAL ERROR: fft_find_comm_groups error");
567 my_pos[i], n_grid[i], global_mesh_dim.
data(), global_mesh_off.
data(),
574 for (std::size_t j = 0ul; j < fft.
plan[i].
group.size(); j++) {
578 my_pos[i - 1], n_grid[i - 1], &(n_pos[i][3 *
node]), n_grid[i],
579 global_mesh_dim.
data(), global_mesh_off.
data(),
591 for (std::size_t k = 0ul; k < 3ul; k++)
596 my_pos[i], n_grid[i], &(n_pos[i - 1][3 *
node]), n_grid[i - 1],
597 global_mesh_dim.
data(), global_mesh_off.
data(),
607 for (std::size_t j = 0ul; j < 3ul; j++)
613 for (std::size_t j = 0ul; j < fft.
plan[i].
group.size(); j++) {
623 for (
int i = 1; i < 4; i++)
628 for (
int i = 1; i < 4; i++) {
643 auto *c_data = (fftw_complex *)(fft.
data_buf.data());
646 for (
int i = 1; i < 4; i++) {
647 fft.
plan[i].
dir = FFTW_FORWARD;
655 fft.
plan[i].
dir, FFTW_PATIENT);
660 for (
int i = 1; i < 4; i++) {
661 fft.
back[i].
dir = FFTW_BACKWARD;
668 fft.
back[i].
dir, FFTW_PATIENT);
684 const boost::mpi::communicator &comm) {
687 auto *c_data = (fftw_complex *)data;
688 auto *c_data_buf = (fftw_complex *)fft.
data_buf.data();
715 const boost::mpi::communicator &comm) {
717 auto *c_data = (fftw_complex *)data;
718 auto *c_data_buf = (fftw_complex *)fft.
data_buf.data();
742 if (check_complex and std::abs(data[2 * i + 1]) > 1e-5) {
743 printf(
"Complex value is not zero (i=%d,data=%g)!!!\n", i,
746 throw std::runtime_error(
"Complex value is not zero");
757 int const start[3],
int const size[3],
int const dim[3],
760 auto const copy_size = element * size[2] *
static_cast<int>(
sizeof(double));
762 auto const m_in_offset = element * dim[2];
763 auto const s_in_offset = element * (dim[2] * (dim[1] - size[1]));
765 auto const m_out_offset = element * size[2];
767 int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
770 for (
int s = 0; s < size[0]; s++) {
771 for (
int m = 0; m < size[1]; m++) {
772 memmove(&(out[li_out]), &(in[li_in]), copy_size);
773 li_in += m_in_offset;
774 li_out += m_out_offset;
776 li_in += s_in_offset;
781 int const start[3],
int const size[3],
int const dim[3],
784 auto const copy_size = element * size[2] *
static_cast<int>(
sizeof(double));
786 auto const m_out_offset = element * dim[2];
787 auto const s_out_offset = element * (dim[2] * (dim[1] - size[1]));
789 auto const m_in_offset = element * size[2];
792 int li_out = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0]));
794 for (
int s = 0; s < size[0]; s++) {
795 for (
int m = 0; m < size[1]; m++) {
796 memmove(&(out[li_out]), &(in[li_in]), copy_size);
797 li_in += m_in_offset;
798 li_out += m_out_offset;
800 li_out += s_out_offset;
Vector implementation and trait types for boost qvm interoperability.
__shared__ int pos[MAXDEPTH *THREADS5/WARPSIZE]
__shared__ int node[MAXDEPTH *THREADS5/WARPSIZE]
A stripped-down version of std::span from C++17.
This file contains the defaults for ESPResSo.
static double * block(double *p, std::size_t index, std::size_t size)
void fft_perform_forw(double *data, fft_data_struct &fft, const boost::mpi::communicator &comm)
Perform an in-place forward 3D FFT.
void fft_perform_back(double *data, bool check_complex, fft_data_struct &fft, const boost::mpi::communicator &comm)
Perform an in-place backward 3D FFT.
int fft_init(Utils::Vector3i const &ca_mesh_dim, int const *ca_mesh_margin, Utils::Vector3i const &global_mesh_dim, Utils::Vector3d const &global_mesh_off, int &ks_pnum, fft_data_struct &fft, Utils::Vector3i const &grid, boost::mpi::communicator const &comm)
Initialize everything connected to the 3D-FFT.
#define REQ_FFT_BACK
Tag for communication in back_grid_comm()
void fft_unpack_block(double const *const in, double *const out, int const start[3], int const size[3], int const dim[3], int element)
Unpack a 3d-grid input block (size[3]) into an output 3d-grid with dimension dim[3] at start position...
#define REQ_FFT_FORW
Tag for communication in forw_grid_comm()
void fft_pack_block(double const *const in, double *const out, int const start[3], int const size[3], int const dim[3], int element)
Pack a block (size[3] starting at start[3]) of an input 3d-grid with dimension dim[3] into an output ...
Routines, row decomposition, data structures and communication for the 3D-FFT.
T product(Vector< T, N > const &v)
void permute_ifield(int *field, int size, int permute)
permute an integer array field of size size about permute positions.
int get_linear_index(int a, int b, int c, const Vector3i &adim, MemoryOrder memory_order=MemoryOrder::COLUMN_MAJOR)
get the linear index from the position (a,b,c) in a 3D grid of dimensions adim.
DEVICE_QUALIFIER constexpr Span< T > make_span(T *p, std::size_t N)
void back_grid_comm(fft_forw_plan plan_f, fft_back_plan plan_b, const double *in, double *out, fft_data_struct &fft, const boost::mpi::communicator &comm)
Communicate the grid data according to the given backward FFT plan.
int map_3don2d_grid(int const g3d[3], int g2d[3])
Calculate 'best' mapping between a 2D and 3D grid.
void calc_2d_grid(int n, int grid[3])
Calculate most square 2D grid.
void pack_block_permute2(double const *const in, double *const out, const int *start, const int *size, const int *dim, int element)
Pack a block with dimensions size[0] * size[1] * size[2] starting at start of an input 3D-grid with d...
int calc_local_mesh(const int *n_pos, const int *n_grid, const int *mesh, const double *mesh_off, int *loc_mesh, int *start)
Calculate the local fft mesh.
int calc_send_block(const int *pos1, const int *grid1, const int *pos2, const int *grid2, const int *mesh, const double *mesh_off, int *block)
Calculate a send (or recv.) block for grid communication during a decomposition change.
boost::optional< std::vector< int > > find_comm_groups(Utils::Vector3i const &grid1, Utils::Vector3i const &grid2, Utils::Span< const int > node_list1, Utils::Span< int > node_list2, Utils::Span< int > pos, Utils::Span< int > my_pos, boost::mpi::communicator const &comm)
This ugly function does the bookkeeping: which nodes have to communicate to each other,...
void forw_grid_comm(fft_forw_plan plan, const double *in, double *out, fft_data_struct &fft, const boost::mpi::communicator &comm)
Communicate the grid data according to the given forward FFT plan.
void pack_block_permute1(double const *const in, double *const out, const int *start, const int *size, const int *dim, int element)
Pack a block with dimensions size[0] * size[1] * size[2] starting at start of an input 3D-grid with d...
DEVICE_QUALIFIER constexpr pointer data() noexcept
Additional information for backwards FFT.
fftw_plan our_fftw_plan
plan for fft.
void(* pack_function)(double const *const, double *const, int const *, int const *, int const *, int)
packing function for send blocks.
Information about the three one dimensional FFTs and how the nodes have to communicate inbetween.
int max_comm_size
Maximal size of the communication buffers.
std::vector< double > recv_buf
receive buffer.
fft_vector< double > data_buf
Buffer for receive data.
int max_mesh_size
Maximal local mesh size.
fft_back_plan back[4]
Information for backward FFTs.
std::vector< double > send_buf
send buffer.
fft_forw_plan plan[4]
Information for forward FFTs.
bool init_tag
Whether FFT is initialized or not.
Structure for performing a 1D FFT.
std::vector< int > send_size
Send block communication sizes.
void(* pack_function)(double const *const, double *const, int const *, int const *, int const *, int)
packing function for send blocks.
int row_dir
row direction of that FFT.
int element
size of send block elements.
int start[3]
lower left point of local FFT mesh in global FFT mesh coordinates.
int n_ffts
number of 1D FFTs.
fftw_plan our_fftw_plan
plan for fft.
int old_mesh[3]
size of local mesh before communication.
int new_mesh[3]
size of local mesh after communication, also used for actual FFT.
int dir
plan direction: 0 = Forward FFT, 1 = Backward FFT.
int n_permute
permutations from normal coordinate system.
int new_size
size of new mesh (number of mesh points).
std::vector< int > recv_size
Recv block communication sizes.
std::vector< int > send_block
Send block specification.
std::vector< int > recv_block
Recv block specification.
std::vector< int > group
group of nodes which have to communicate with each other.