122 std::span<int const> node_list1, std::span<int> node_list2,
123 std::span<int> pos, std::span<int> my_pos,
int rank) {
146 for (i = 0; i < 3; i++) {
147 s1[i] = grid1[i] / grid2[i];
150 else if (grid1[i] != grid2[i] * s1[i])
153 s2[i] = grid2[i] / grid1[i];
156 else if (grid2[i] != grid1[i] * s2[i])
159 ds[i] = grid2[i] / s2[i];
163 std::vector<int> group(g_size);
167 for (gi[2] = 0; gi[2] < ds[2]; gi[2]++)
168 for (gi[1] = 0; gi[1] < ds[1]; gi[1]++)
169 for (gi[0] = 0; gi[0] < ds[0]; gi[0]++) {
171 for (i = 0; i < g_size; i++) {
172 p1[0] = (gi[0] * s1[0]) + (i % s1[0]);
173 p1[1] = (gi[1] * s1[1]) + ((i / s1[0]) % s1[1]);
174 p1[2] = (gi[2] * s1[2]) + (i / (s1[0] * s1[1]));
176 p2[0] = (gi[0] * s2[0]) + (i % s2[0]);
177 p2[1] = (gi[1] * s2[1]) + ((i / s2[0]) % s2[1]);
178 p2[2] = (gi[2] * s2[2]) + (i / (s2[0] * s2[1]));
183 pos[3 * n + 0] = p2[0];
184 pos[3 * n + 1] = p2[1];
185 pos[3 * n + 2] = p2[2];
188 if (n == rank && my_group == 0) {
203 n = group[g_size - 1];
204 for (i = g_size - 1; i > 0; i--)
205 group[i] = group[i - 1];
521 boost::mpi::communicator
const &comm,
Utils::Vector3i const &ca_mesh_dim,
528 std::vector<int> n_id[4];
529 std::vector<int> n_pos[4];
531 auto const rank = comm.rank();
532 auto const node_pos = Utils::Mpi::cart_coords<3>(comm, rank);
536 for (
int i = 0; i < 4; i++) {
537 n_id[i].resize(1 * comm.size());
538 n_pos[i].resize(3 * comm.size());
543 for (
int i = 0; i < 3; i++) {
544 n_grid[0][i] = grid[i];
545 my_pos[0][i] = node_pos[i];
547 for (
int i = 0; i < comm.size(); i++) {
548 auto const n_pos_i = Utils::Mpi::cart_coords<3>(comm, i);
549 for (
int j = 0; j < 3; ++j) {
550 n_pos[0][3 * i + j] = n_pos_i[j];
553 n_pos_i, {n_grid[0][0], n_grid[0][1], n_grid[0][2]});
554 n_id[0][lin_ind] = i;
561 forw[0].n_permute = 0;
562 for (
int i = 1; i < 4; i++)
563 forw[i].n_permute = (forw[1].row_dir + i) % 3;
564 for (
int i = 0; i < 3; i++) {
565 n_grid[2][i] = n_grid[1][(i + 1) % 3];
566 n_grid[3][i] = n_grid[1][(i + 2) % 3];
568 forw[2].row_dir = (forw[1].row_dir - 1) % 3;
569 forw[3].row_dir = (forw[1].row_dir - 2) % 3;
573 for (
int i = 0; i < 3; i++)
574 forw[0].new_mesh[i] = ca_mesh_dim[i];
576 for (
int i = 1; i < 4; i++) {
578 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
579 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1],
580 std::span(n_id[i]), std::span(n_pos[i]), my_pos[i], rank);
583 std::swap(n_grid[i][(forw[i].row_dir + 1) % 3],
584 n_grid[i][(forw[i].row_dir + 2) % 3]);
587 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
588 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, std::span(n_id[i - 1]),
589 std::span(n_id[i]), std::span(n_pos[i]), my_pos[i], rank);
592 throw std::runtime_error(
"INTERNAL ERROR: fft_find_comm_groups error");
596 forw[i].group = group.value();
598 forw[i].send_block.resize(6 * forw[i].group.size());
599 forw[i].send_size.resize(forw[i].group.size());
600 forw[i].recv_block.resize(6 * forw[i].group.size());
601 forw[i].recv_size.resize(forw[i].group.size());
603 forw[i].new_size = calc_local_mesh(
604 my_pos[i], n_grid[i], global_mesh_dim.
data(), global_mesh_off.
data(),
605 forw[i].new_mesh.data(), forw[i].start.data());
606 permute_ifield(forw[i].new_mesh.data(), 3, -(forw[i].n_permute));
607 permute_ifield(forw[i].start.data(), 3, -(forw[i].n_permute));
608 forw[i].n_ffts = forw[i].new_mesh[0] * forw[i].new_mesh[1];
611 for (std::size_t j = 0ul; j < forw[i].group.size(); j++) {
613 int node = forw[i].group[j];
614 forw[i].send_size[j] = calc_send_block(
615 my_pos[i - 1], n_grid[i - 1], &(n_pos[i][3 * node]), n_grid[i],
616 global_mesh_dim.
data(), global_mesh_off.
data(),
617 &(forw[i].send_block[6ul * j]));
618 permute_ifield(&(forw[i].send_block[6ul * j]), 3,
619 -(forw[i - 1].n_permute));
620 permute_ifield(&(forw[i].send_block[6ul * j + 3ul]), 3,
621 -(forw[i - 1].n_permute));
622 if (forw[i].send_size[j] > max_comm_size)
623 max_comm_size = forw[i].send_size[j];
628 for (std::size_t k = 0ul; k < 3ul; k++)
629 forw[1].send_block[6ul * j + k] += ca_mesh_margin[2ul * k];
632 forw[i].recv_size[j] = calc_send_block(
633 my_pos[i], n_grid[i], &(n_pos[i - 1][3 * node]), n_grid[i - 1],
634 global_mesh_dim.
data(), global_mesh_off.
data(),
635 &(forw[i].recv_block[6ul * j]));
636 permute_ifield(&(forw[i].recv_block[6ul * j]), 3, -(forw[i].n_permute));
637 permute_ifield(&(forw[i].recv_block[6ul * j + 3ul]), 3,
638 -(forw[i].n_permute));
639 if (forw[i].recv_size[j] > max_comm_size)
640 max_comm_size = forw[i].recv_size[j];
643 for (std::size_t j = 0ul; j < 3ul; j++)
644 forw[i].old_mesh[j] = forw[i - 1].new_mesh[j];
649 for (std::size_t j = 0ul; j < forw[i].group.size(); j++) {
650 forw[i].send_size[j] *= 2;
651 forw[i].recv_size[j] *= 2;
659 for (
int i = 1; i < 4; i++)
660 if (2 * forw[i].new_size > max_mesh_size)
661 max_mesh_size = 2 * forw[i].new_size;
664 for (
int i = 1; i < 4; i++) {
665 forw[i].pack_function = pack_block_permute2;
668 if (forw[1].row_dir == 2) {
671 }
else if (forw[1].row_dir == 1) {
672 forw[1].pack_function = pack_block_permute1;
676 send_buf.resize(max_comm_size);
677 recv_buf.resize(max_comm_size);
678 data_buf.resize(max_mesh_size);
682 for (
int i = 1; i < 4; i++) {
685#pragma omp critical(fftw_destroy_plan_forward)
686 forw[i].destroy_plan();
689 forw[i].dir = FFTW_FORWARD;
691#pragma omp critical(fftw_create_plan_forward)
693 1, &forw[i].new_mesh[2], forw[i].n_ffts, c_data,
nullptr, 1,
694 forw[i].new_mesh[2], c_data,
nullptr, 1, forw[i].new_mesh[2],
695 forw[i].dir, FFTW_PATIENT);
697 assert(forw[i].plan_handle);
702 for (
int i = 1; i < 4; i++) {
705#pragma omp critical(fftw_destroy_plan_backward)
706 back[i].destroy_plan();
709 back[i].dir = FFTW_BACKWARD;
711#pragma omp critical(fftw_create_plan_backward)
713 1, &forw[i].new_mesh[2], forw[i].n_ffts, c_data,
nullptr, 1,
714 forw[i].new_mesh[2], c_data,
nullptr, 1, forw[i].new_mesh[2],
715 back[i].dir, FFTW_PATIENT);
717 back[i].pack_function = pack_block_permute1;
718 assert(back[i].plan_handle);
720 if (forw[1].row_dir == 2) {
722 }
else if (forw[1].row_dir == 1) {
723 back[1].pack_function = pack_block_permute2;
728 return max_mesh_size;