118 std::span<int const> node_list1, std::span<int> node_list2,
119 std::span<int> pos, std::span<int> my_pos,
int rank) {
142 for (i = 0; i < 3; i++) {
143 s1[i] = grid1[i] / grid2[i];
146 else if (grid1[i] != grid2[i] * s1[i])
149 s2[i] = grid2[i] / grid1[i];
152 else if (grid2[i] != grid1[i] * s2[i])
155 ds[i] = grid2[i] / s2[i];
159 std::vector<int> group(g_size);
163 for (gi[2] = 0; gi[2] < ds[2]; gi[2]++)
164 for (gi[1] = 0; gi[1] < ds[1]; gi[1]++)
165 for (gi[0] = 0; gi[0] < ds[0]; gi[0]++) {
167 for (i = 0; i < g_size; i++) {
168 p1[0] = (gi[0] * s1[0]) + (i % s1[0]);
169 p1[1] = (gi[1] * s1[1]) + ((i / s1[0]) % s1[1]);
170 p1[2] = (gi[2] * s1[2]) + (i / (s1[0] * s1[1]));
172 p2[0] = (gi[0] * s2[0]) + (i % s2[0]);
173 p2[1] = (gi[1] * s2[1]) + ((i / s2[0]) % s2[1]);
174 p2[2] = (gi[2] * s2[2]) + (i / (s2[0] * s2[1]));
179 pos[3 * n + 0] = p2[0];
180 pos[3 * n + 1] = p2[1];
181 pos[3 * n + 2] = p2[2];
184 if (n == rank && my_group == 0) {
199 n = group[g_size - 1];
200 for (i = g_size - 1; i > 0; i--)
201 group[i] = group[i - 1];
517 boost::mpi::communicator
const &comm,
Utils::Vector3i const &ca_mesh_dim,
524 std::vector<int> n_id[4];
525 std::vector<int> n_pos[4];
527 auto const rank = comm.rank();
528 auto const node_pos = Utils::Mpi::cart_coords<3>(comm, rank);
532 for (
int i = 0; i < 4; i++) {
533 n_id[i].resize(1 * comm.size());
534 n_pos[i].resize(3 * comm.size());
539 for (
int i = 0; i < 3; i++) {
540 n_grid[0][i] = grid[i];
541 my_pos[0][i] = node_pos[i];
543 for (
int i = 0; i < comm.size(); i++) {
544 auto const n_pos_i = Utils::Mpi::cart_coords<3>(comm, i);
545 for (
int j = 0; j < 3; ++j) {
546 n_pos[0][3 * i + j] = n_pos_i[j];
549 n_pos_i, {n_grid[0][0], n_grid[0][1], n_grid[0][2]});
550 n_id[0][lin_ind] = i;
557 forw[0].n_permute = 0;
558 for (
int i = 1; i < 4; i++)
559 forw[i].n_permute = (forw[1].row_dir + i) % 3;
560 for (
int i = 0; i < 3; i++) {
561 n_grid[2][i] = n_grid[1][(i + 1) % 3];
562 n_grid[3][i] = n_grid[1][(i + 2) % 3];
564 forw[2].row_dir = (forw[1].row_dir - 1) % 3;
565 forw[3].row_dir = (forw[1].row_dir - 2) % 3;
569 for (
int i = 0; i < 3; i++)
570 forw[0].new_mesh[i] = ca_mesh_dim[i];
572 for (
int i = 1; i < 4; i++) {
574 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
575 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1],
576 std::span(n_id[i]), std::span(n_pos[i]), my_pos[i], rank);
579 std::swap(n_grid[i][(forw[i].row_dir + 1) % 3],
580 n_grid[i][(forw[i].row_dir + 2) % 3]);
583 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
584 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, std::span(n_id[i - 1]),
585 std::span(n_id[i]), std::span(n_pos[i]), my_pos[i], rank);
588 throw std::runtime_error(
"INTERNAL ERROR: fft_find_comm_groups error");
592 forw[i].group = group.value();
594 forw[i].send_block.resize(6 * forw[i].group.size());
595 forw[i].send_size.resize(forw[i].group.size());
596 forw[i].recv_block.resize(6 * forw[i].group.size());
597 forw[i].recv_size.resize(forw[i].group.size());
599 forw[i].new_size = calc_local_mesh(
600 my_pos[i], n_grid[i], global_mesh_dim.
data(), global_mesh_off.
data(),
601 forw[i].new_mesh.data(), forw[i].start.data());
602 permute_ifield(forw[i].new_mesh.data(), 3, -(forw[i].n_permute));
603 permute_ifield(forw[i].start.data(), 3, -(forw[i].n_permute));
604 forw[i].n_ffts = forw[i].new_mesh[0] * forw[i].new_mesh[1];
607 for (std::size_t j = 0ul; j < forw[i].group.size(); j++) {
609 int node = forw[i].group[j];
610 forw[i].send_size[j] = calc_send_block(
611 my_pos[i - 1], n_grid[i - 1], &(n_pos[i][3 * node]), n_grid[i],
612 global_mesh_dim.
data(), global_mesh_off.
data(),
613 &(forw[i].send_block[6ul * j]));
614 permute_ifield(&(forw[i].send_block[6ul * j]), 3,
615 -(forw[i - 1].n_permute));
616 permute_ifield(&(forw[i].send_block[6ul * j + 3ul]), 3,
617 -(forw[i - 1].n_permute));
618 if (forw[i].send_size[j] > max_comm_size)
619 max_comm_size = forw[i].send_size[j];
624 for (std::size_t k = 0ul; k < 3ul; k++)
625 forw[1].send_block[6ul * j + k] += ca_mesh_margin[2ul * k];
628 forw[i].recv_size[j] = calc_send_block(
629 my_pos[i], n_grid[i], &(n_pos[i - 1][3 * node]), n_grid[i - 1],
630 global_mesh_dim.
data(), global_mesh_off.
data(),
631 &(forw[i].recv_block[6ul * j]));
632 permute_ifield(&(forw[i].recv_block[6ul * j]), 3, -(forw[i].n_permute));
633 permute_ifield(&(forw[i].recv_block[6ul * j + 3ul]), 3,
634 -(forw[i].n_permute));
635 if (forw[i].recv_size[j] > max_comm_size)
636 max_comm_size = forw[i].recv_size[j];
639 for (std::size_t j = 0ul; j < 3ul; j++)
640 forw[i].old_mesh[j] = forw[i - 1].new_mesh[j];
645 for (std::size_t j = 0ul; j < forw[i].group.size(); j++) {
646 forw[i].send_size[j] *= 2;
647 forw[i].recv_size[j] *= 2;
655 for (
int i = 1; i < 4; i++)
656 if (2 * forw[i].new_size > max_mesh_size)
657 max_mesh_size = 2 * forw[i].new_size;
660 for (
int i = 1; i < 4; i++) {
661 forw[i].pack_function = pack_block_permute2;
664 if (forw[1].row_dir == 2) {
667 }
else if (forw[1].row_dir == 1) {
668 forw[1].pack_function = pack_block_permute1;
672 send_buf.resize(max_comm_size);
673 recv_buf.resize(max_comm_size);
674 data_buf.resize(max_mesh_size);
678 for (
int i = 1; i < 4; i++) {
680 forw[i].destroy_plan();
682 forw[i].dir = FFTW_FORWARD;
684 1, &forw[i].new_mesh[2], forw[i].n_ffts, c_data,
nullptr, 1,
685 forw[i].new_mesh[2], c_data,
nullptr, 1, forw[i].new_mesh[2],
686 forw[i].dir, FFTW_PATIENT);
687 assert(forw[i].plan_handle);
692 for (
int i = 1; i < 4; i++) {
694 back[i].destroy_plan();
696 back[i].dir = FFTW_BACKWARD;
698 1, &forw[i].new_mesh[2], forw[i].n_ffts, c_data,
nullptr, 1,
699 forw[i].new_mesh[2], c_data,
nullptr, 1, forw[i].new_mesh[2],
700 back[i].dir, FFTW_PATIENT);
701 back[i].pack_function = pack_block_permute1;
702 assert(back[i].plan_handle);
704 if (forw[1].row_dir == 2) {
706 }
else if (forw[1].row_dir == 1) {
707 back[1].pack_function = pack_block_permute2;
712 return max_mesh_size;