120 std::span<int const> node_list1, std::span<int> node_list2,
121 std::span<int> pos, std::span<int> my_pos,
int rank) {
144 for (i = 0; i < 3; i++) {
145 s1[i] = grid1[i] / grid2[i];
148 else if (grid1[i] != grid2[i] * s1[i])
151 s2[i] = grid2[i] / grid1[i];
154 else if (grid2[i] != grid1[i] * s2[i])
157 ds[i] = grid2[i] / s2[i];
161 std::vector<int> group(g_size);
165 for (gi[2] = 0; gi[2] < ds[2]; gi[2]++)
166 for (gi[1] = 0; gi[1] < ds[1]; gi[1]++)
167 for (gi[0] = 0; gi[0] < ds[0]; gi[0]++) {
169 for (i = 0; i < g_size; i++) {
170 p1[0] = (gi[0] * s1[0]) + (i % s1[0]);
171 p1[1] = (gi[1] * s1[1]) + ((i / s1[0]) % s1[1]);
172 p1[2] = (gi[2] * s1[2]) + (i / (s1[0] * s1[1]));
174 p2[0] = (gi[0] * s2[0]) + (i % s2[0]);
175 p2[1] = (gi[1] * s2[1]) + ((i / s2[0]) % s2[1]);
176 p2[2] = (gi[2] * s2[2]) + (i / (s2[0] * s2[1]));
181 pos[3 * n + 0] = p2[0];
182 pos[3 * n + 1] = p2[1];
183 pos[3 * n + 2] = p2[2];
186 if (n == rank && my_group == 0) {
201 n = group[g_size - 1];
202 for (i = g_size - 1; i > 0; i--)
203 group[i] = group[i - 1];
519 boost::mpi::communicator
const &comm,
Utils::Vector3i const &ca_mesh_dim,
526 std::vector<int> n_id[4];
527 std::vector<int> n_pos[4];
529 auto const rank = comm.rank();
530 auto const node_pos = Utils::Mpi::cart_coords<3>(comm, rank);
534 for (
int i = 0; i < 4; i++) {
535 n_id[i].resize(1 * comm.size());
536 n_pos[i].resize(3 * comm.size());
541 for (
int i = 0; i < 3; i++) {
542 n_grid[0][i] = grid[i];
543 my_pos[0][i] = node_pos[i];
545 for (
int i = 0; i < comm.size(); i++) {
546 auto const n_pos_i = Utils::Mpi::cart_coords<3>(comm, i);
547 for (
int j = 0; j < 3; ++j) {
548 n_pos[0][3 * i + j] = n_pos_i[j];
551 n_pos_i, {n_grid[0][0], n_grid[0][1], n_grid[0][2]});
552 n_id[0][lin_ind] = i;
559 forw[0].n_permute = 0;
560 for (
int i = 1; i < 4; i++)
561 forw[i].n_permute = (forw[1].row_dir + i) % 3;
562 for (
int i = 0; i < 3; i++) {
563 n_grid[2][i] = n_grid[1][(i + 1) % 3];
564 n_grid[3][i] = n_grid[1][(i + 2) % 3];
566 forw[2].row_dir = (forw[1].row_dir - 1) % 3;
567 forw[3].row_dir = (forw[1].row_dir - 2) % 3;
571 for (
int i = 0; i < 3; i++)
572 forw[0].new_mesh[i] = ca_mesh_dim[i];
574 for (
int i = 1; i < 4; i++) {
576 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
577 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, n_id[i - 1],
578 std::span(n_id[i]), std::span(n_pos[i]), my_pos[i], rank);
581 std::swap(n_grid[i][(forw[i].row_dir + 1) % 3],
582 n_grid[i][(forw[i].row_dir + 2) % 3]);
585 {n_grid[i - 1][0], n_grid[i - 1][1], n_grid[i - 1][2]},
586 {n_grid[i][0], n_grid[i][1], n_grid[i][2]}, std::span(n_id[i - 1]),
587 std::span(n_id[i]), std::span(n_pos[i]), my_pos[i], rank);
590 throw std::runtime_error(
"INTERNAL ERROR: fft_find_comm_groups error");
594 forw[i].group = group.value();
596 forw[i].send_block.resize(6 * forw[i].group.size());
597 forw[i].send_size.resize(forw[i].group.size());
598 forw[i].recv_block.resize(6 * forw[i].group.size());
599 forw[i].recv_size.resize(forw[i].group.size());
601 forw[i].new_size = calc_local_mesh(
602 my_pos[i], n_grid[i], global_mesh_dim.
data(), global_mesh_off.
data(),
603 forw[i].new_mesh.data(), forw[i].start.data());
604 permute_ifield(forw[i].new_mesh.data(), 3, -(forw[i].n_permute));
605 permute_ifield(forw[i].start.data(), 3, -(forw[i].n_permute));
606 forw[i].n_ffts = forw[i].new_mesh[0] * forw[i].new_mesh[1];
609 for (std::size_t j = 0ul; j < forw[i].group.size(); j++) {
611 int node = forw[i].group[j];
612 forw[i].send_size[j] = calc_send_block(
613 my_pos[i - 1], n_grid[i - 1], &(n_pos[i][3 * node]), n_grid[i],
614 global_mesh_dim.
data(), global_mesh_off.
data(),
615 &(forw[i].send_block[6ul * j]));
616 permute_ifield(&(forw[i].send_block[6ul * j]), 3,
617 -(forw[i - 1].n_permute));
618 permute_ifield(&(forw[i].send_block[6ul * j + 3ul]), 3,
619 -(forw[i - 1].n_permute));
620 if (forw[i].send_size[j] > max_comm_size)
621 max_comm_size = forw[i].send_size[j];
626 for (std::size_t k = 0ul; k < 3ul; k++)
627 forw[1].send_block[6ul * j + k] += ca_mesh_margin[2ul * k];
630 forw[i].recv_size[j] = calc_send_block(
631 my_pos[i], n_grid[i], &(n_pos[i - 1][3 * node]), n_grid[i - 1],
632 global_mesh_dim.
data(), global_mesh_off.
data(),
633 &(forw[i].recv_block[6ul * j]));
634 permute_ifield(&(forw[i].recv_block[6ul * j]), 3, -(forw[i].n_permute));
635 permute_ifield(&(forw[i].recv_block[6ul * j + 3ul]), 3,
636 -(forw[i].n_permute));
637 if (forw[i].recv_size[j] > max_comm_size)
638 max_comm_size = forw[i].recv_size[j];
641 for (std::size_t j = 0ul; j < 3ul; j++)
642 forw[i].old_mesh[j] = forw[i - 1].new_mesh[j];
647 for (std::size_t j = 0ul; j < forw[i].group.size(); j++) {
648 forw[i].send_size[j] *= 2;
649 forw[i].recv_size[j] *= 2;
657 for (
int i = 1; i < 4; i++)
658 if (2 * forw[i].new_size > max_mesh_size)
659 max_mesh_size = 2 * forw[i].new_size;
662 for (
int i = 1; i < 4; i++) {
663 forw[i].pack_function = pack_block_permute2;
666 if (forw[1].row_dir == 2) {
669 }
else if (forw[1].row_dir == 1) {
670 forw[1].pack_function = pack_block_permute1;
674 send_buf.resize(max_comm_size);
675 recv_buf.resize(max_comm_size);
676 data_buf.resize(max_mesh_size);
680 for (
int i = 1; i < 4; i++) {
682#pragma omp critical(fftw_destroy_plan_forward)
683 forw[i].destroy_plan();
685 forw[i].dir = FFTW_FORWARD;
686#pragma omp critical(fftw_create_plan_forward)
688 1, &forw[i].new_mesh[2], forw[i].n_ffts, c_data,
nullptr, 1,
689 forw[i].new_mesh[2], c_data,
nullptr, 1, forw[i].new_mesh[2],
690 forw[i].dir, FFTW_PATIENT);
691 assert(forw[i].plan_handle);
696 for (
int i = 1; i < 4; i++) {
698#pragma omp critical(fftw_destroy_plan_backward)
699 back[i].destroy_plan();
701 back[i].dir = FFTW_BACKWARD;
702#pragma omp critical(fftw_create_plan_backward)
704 1, &forw[i].new_mesh[2], forw[i].n_ffts, c_data,
nullptr, 1,
705 forw[i].new_mesh[2], c_data,
nullptr, 1, forw[i].new_mesh[2],
706 back[i].dir, FFTW_PATIENT);
707 back[i].pack_function = pack_block_permute1;
708 assert(back[i].plan_handle);
710 if (forw[1].row_dir == 2) {
712 }
else if (forw[1].row_dir == 1) {
713 back[1].pack_function = pack_block_permute2;
718 return max_mesh_size;