399 bool force_flag,
bool energy_flag) {
402 auto const &system = get_system();
403 auto const &box_geo = *system.box_geo;
404 auto const dipole_prefac = prefactor /
Utils::product(dp3m.params.mesh);
406 auto const npt_flag = force_flag and system.has_npt_enabled();
408 auto constexpr npt_flag =
false;
413#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
414 auto constexpr r2c_dir = FFTConfig::r2c_dir;
415 auto const rs_local_size = dp3m.heffte.fft->rs_local_size();
416 auto const local_size = dp3m.heffte.fft->ks_local_size();
417 auto local_size_full = local_size;
418 if constexpr (FFTConfig::use_r2c) {
419 local_size_full[r2c_dir] -= 1;
420 local_size_full[r2c_dir] *= 2;
422 auto const local_origin = dp3m.heffte.fft->ks_local_ld_index();
424 auto const line_stride = local_size_full[0];
425 auto const plane_stride = local_size_full[0] * local_size_full[0];
427 auto const &global_size = dp3m.params.mesh;
428 auto const cutoff_left = 1 - local_origin[r2c_dir];
429 auto const cutoff_right = global_size[r2c_dir] / 2 - local_origin[r2c_dir];
430 auto &short_dim = local_index[r2c_dir];
431 dp3m.resize_heffte_buffers();
434 if (dp3m.sum_mu2 > 0.) {
436 dp3m.fft_buffers->perform_vector_halo_gather();
437 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
438 dp3m.fft->forward_fft(rs_mesh);
440 dp3m.update_mesh_views();
442#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
443 if (dp3m.heffte.world_size == 1) {
445 std::array<FloatType *, 3u> rs_fields = {
446 {dp3m.heffte.rs_dipole_density[0u].data(),
447 dp3m.heffte.rs_dipole_density[1u].data(),
448 dp3m.heffte.rs_dipole_density[2u].data()}};
449 dp3m.heffte.halo_comm.gather_grid(
::comm_cart, rs_fields,
450 dp3m.local_mesh.dim);
452 for (
auto dir : {0u, 1u, 2u}) {
455 FFTConfig::r_space_order>(
456 dp3m.heffte.rs_dipole_density[dir], dp3m.local_mesh.dim,
457 dp3m.local_mesh.n_halo_ld,
458 dp3m.local_mesh.dim - dp3m.local_mesh.n_halo_ur);
460 std::vector<FloatType> rs_field_no_halo_reorder;
461 rs_field_no_halo_reorder.resize(rs_field_no_halo.size());
462 std::size_t index_row_major = 0u;
463 for_each_3d_order<FFTConfig::k_space_order>(
464 mesh_start, rs_local_size, local_index, [&]() {
465 auto constexpr KX = 1, KY = 2, KZ = 0;
466 auto const index = local_index[KZ] +
467 rs_local_size[0] * local_index[KY] +
468 Utils::sqr(rs_local_size[0]) * local_index[KX];
469 rs_field_no_halo_reorder[index_row_major] =
470 rs_field_no_halo[index];
473 dp3m.heffte.fft->forward(rs_field_no_halo_reorder.data(),
474 dp3m.heffte.ks_dipole_density[dir].data());
476 if (not dp3m.params.tuning) {
477 std::size_t index_row_major_r2c = 0u;
478 for_each_3d_order<FFTConfig::k_space_order>(
479 mesh_start, local_size, local_index, [&]() {
480 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
481 auto constexpr KX = 2, KY = 0, KZ = 1;
482 auto const index_fft_legacy = local_index[KZ] +
483 line_stride * local_index[KY] +
484 plane_stride * local_index[KX];
485 auto const old_value = std::complex<FloatType>{
486 dp3m.mesh.rs_fields[dir][2 * index_fft_legacy],
487 dp3m.mesh.rs_fields[dir][2 * index_fft_legacy + 1]};
488 auto const &new_value =
489 dp3m.heffte.ks_dipole_density[dir][index_row_major_r2c];
490 assert(heffte_almost_equal(new_value, old_value));
491 ++index_row_major_r2c;
502 if (energy_flag or npt_flag) {
506 if (dp3m.sum_mu2 > 0.) {
510 auto index = std::size_t(0u);
511 auto it_energy = dp3m.g_energy.begin();
512 auto node_energy = 0.;
513 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
514 auto constexpr KX = 2, KY = 0, KZ = 1;
515 auto const shift = local_index + dp3m.mesh.start;
516 auto const &d_op = dp3m.d_op[0u];
517 auto const &mesh_dip = dp3m.mesh.rs_fields;
519 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
520 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
521 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
524 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
525 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
526 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
528 node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im));
529 std::advance(it_energy, 1);
531#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
532 if (dp3m.heffte.world_size == 1) {
533 [[maybe_unused]]
auto node_energy_heffte = 0.;
534 std::size_t index_row_major_r2c = 0u;
535 for_each_3d_order<FFTConfig::k_space_order>(
536 mesh_start, local_size, local_index, [&]() {
537 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
538 auto const global_index = local_origin + local_index;
539 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
540 auto const cell_field =
541 mesh_dip[0u][index_row_major_r2c] *
542 FloatType(dp3m.d_op[0u][global_index[1u]]) +
543 mesh_dip[1u][index_row_major_r2c] *
544 FloatType(dp3m.d_op[1u][global_index[2u]]) +
545 mesh_dip[2u][index_row_major_r2c] *
546 FloatType(dp3m.d_op[2u][global_index[0u]]);
547 auto cell_energy =
static_cast<double>(
548 dp3m.heffte.g_energy[index_row_major_r2c] *
549 std::norm(cell_field));
550 if (FFTConfig::use_r2c and (short_dim >= cutoff_left and
551 short_dim <= cutoff_right - 1)) {
559 node_energy_heffte += cell_energy;
561 ++index_row_major_r2c;
563 assert(heffte_almost_equal(
static_cast<FloatType
>(node_energy_heffte),
564 static_cast<FloatType
>(node_energy)));
567 node_energy *= dipole_prefac * std::numbers::pi * box_geo.length_inv()[0];
568 boost::mpi::reduce(
comm_cart, node_energy, energy, std::plus<>(), 0);
570 if (dp3m.energy_correction == 0.)
571 calc_energy_correction();
575 energy -= prefactor * dp3m.sum_mu2 * std::numbers::inv_sqrtpi *
576 (2. / 3.) * Utils::int_pow<3>(dp3m.params.alpha);
579 energy += prefactor * dp3m.energy_correction / box_geo.volume();
589 if (dp3m.sum_mu2 > 0.) {
590 auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0u];
591 dp3m.ks_scalar.resize(dp3m.local_mesh.size);
594 auto index{std::size_t(0u)};
595 auto it_energy = dp3m.g_energy.begin();
596 auto it_ks_scalar = dp3m.ks_scalar.begin();
597 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]()
mutable {
598 auto constexpr KX = 2, KY = 0, KZ = 1;
599 auto const shift = local_index + dp3m.mesh.start;
600 auto const &d_op = dp3m.d_op[0u];
601 auto const &mesh_dip = dp3m.mesh.rs_fields;
603 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
604 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
605 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
608 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
609 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
610 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
612 *it_ks_scalar = *it_energy * std::complex<FloatType>{re, im};
613 std::advance(it_energy, 1);
614 std::advance(it_ks_scalar, 1);
617#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
618 if (dp3m.heffte.world_size == 1) {
619 std::size_t index_row_major_r2c = 0u;
620 for_each_3d_order<FFTConfig::k_space_order>(
621 mesh_start, local_size, local_index, [&]() {
622 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
623 auto const global_index = local_origin + local_index;
624 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
625 dp3m.heffte.ks_scalar[index_row_major_r2c] =
626 dp3m.heffte.g_energy[index_row_major_r2c] *
627 (mesh_dip[0u][index_row_major_r2c] *
628 FloatType(dp3m.d_op[0u][global_index[1u]]) +
629 mesh_dip[1u][index_row_major_r2c] *
630 FloatType(dp3m.d_op[1u][global_index[2u]]) +
631 mesh_dip[2u][index_row_major_r2c] *
632 FloatType(dp3m.d_op[2u][global_index[0u]]));
634 if (not dp3m.params.tuning) {
635 auto constexpr KX = 2, KY = 0, KZ = 1;
636 auto const index_fft_legacy = local_index[KZ] +
637 line_stride * local_index[KY] +
638 plane_stride * local_index[KX];
639 assert(heffte_almost_equal(
640 dp3m.heffte.ks_scalar[index_row_major_r2c],
641 dp3m.ks_scalar[index_fft_legacy]));
644 ++index_row_major_r2c;
651 for (
int d = 0; d < 3; d++) {
652 auto it_ks_scalar = dp3m.ks_scalar.begin();
654 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
655 auto const &offset = dp3m.mesh.start;
656 auto const &d_op = dp3m.d_op[0u];
657 auto const d_op_val = FloatType(d_op[local_index[d] + offset[d]]);
658 auto const &value = *it_ks_scalar;
659 dp3m.mesh.rs_scalar[index] = d_op_val * value.real();
661 dp3m.mesh.rs_scalar[index] = d_op_val * value.imag();
663 std::advance(it_ks_scalar, 1);
665#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
666 if (dp3m.heffte.world_size == 1) {
667 unsigned int constexpr d_ks[3] = {2u, 0u, 1u};
668 std::size_t index_row_major_r2c = 0u;
669 for_each_3d_order<FFTConfig::k_space_order>(
670 mesh_start, local_size, local_index, [&]() {
671 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
672 auto const global_index = local_origin + local_index;
673 auto const d_op_val =
674 FloatType(dp3m.d_op[d][global_index[d_ks[d]]]);
675 dp3m.heffte.ks_B_field_storage[index_row_major_r2c] =
676 d_op_val * dp3m.heffte.ks_scalar[index_row_major_r2c];
678 if (not dp3m.params.tuning) {
679 auto constexpr KX = 2, KY = 0, KZ = 1;
680 auto const index_fft_legacy =
681 local_index[KZ] + line_stride * local_index[KY] +
682 plane_stride * local_index[KX];
683 auto const old_value = std::complex<FloatType>{
684 dp3m.mesh.rs_scalar[2 * index_fft_legacy],
685 dp3m.mesh.rs_scalar[2 * index_fft_legacy + 1]};
686 auto const &new_value =
687 dp3m.heffte.ks_B_field_storage[index_row_major_r2c];
688 assert(heffte_almost_equal(new_value, old_value));
691 ++index_row_major_r2c;
694 dp3m.heffte.fft->backward(dp3m.heffte.ks_B_field_storage.data(),
695 dp3m.heffte.rs_B_fields_no_halo[d].data());
697 dp3m.heffte.rs_B_fields[d] =
700 std::span(dp3m.heffte.rs_B_fields_no_halo[d]),
701 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
702 dp3m.local_mesh.n_halo_ur);
705 dp3m.heffte.rs_B_fields[d].data(),
706 dp3m.local_mesh.dim);
709 dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh());
711 dp3m.fft_buffers->perform_scalar_halo_spread();
713 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
714 Utils::integral_parameter<int, AssignTorques, p3m_min_cao, p3m_max_cao>(
715 dp3m.params.cao, dp3m, dipole_prefac * wavenumber, d_rs,
716 *system.cell_structure);
728 auto it_force = dp3m.g_force.begin();
729 auto it_ks_scalar = dp3m.ks_scalar.begin();
730 std::size_t index = 0u;
731 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
732 auto constexpr KX = 2, KY = 0, KZ = 1;
733 auto const shift = local_index + dp3m.mesh.start;
734 auto const &d_op = dp3m.d_op[0u];
735 auto const &mesh_dip = dp3m.mesh.rs_fields;
737 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
738 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
739 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
742 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
743 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
744 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
746 *it_ks_scalar = {*it_force * im, *it_force * (-re)};
747 std::advance(it_force, 1);
748 std::advance(it_ks_scalar, 1);
752#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
753 if (dp3m.heffte.world_size == 1) {
754 std::size_t index_row_major_r2c = 0u;
755 for_each_3d_order<FFTConfig::k_space_order>(
756 mesh_start, local_size, local_index, [&]() {
757 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
758 auto const global_index = local_origin + local_index;
759 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
761 dp3m.heffte.g_force[index_row_major_r2c] *
762 (mesh_dip[0u][index_row_major_r2c] *
763 FloatType(dp3m.d_op[0u][global_index[1u]]) +
764 mesh_dip[1u][index_row_major_r2c] *
765 FloatType(dp3m.d_op[1u][global_index[2u]]) +
766 mesh_dip[2u][index_row_major_r2c] *
767 FloatType(dp3m.d_op[2u][global_index[0u]]));
768 dp3m.heffte.ks_scalar[index_row_major_r2c] = {value.imag(),
771 if (not dp3m.params.tuning) {
772 auto constexpr KX = 2, KY = 0, KZ = 1;
773 auto const index_fft_legacy = local_index[KZ] +
774 line_stride * local_index[KY] +
775 plane_stride * local_index[KX];
776 assert(heffte_almost_equal(
777 dp3m.heffte.ks_scalar[index_row_major_r2c],
778 dp3m.ks_scalar[index_fft_legacy]));
781 ++index_row_major_r2c;
788 for (
int d = 0; d < 3; d++) {
789 std::size_t index = 0u;
790 auto it_ks_scalar = dp3m.ks_scalar.begin();
791 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
792 auto constexpr KX = 2, KY = 0, KZ = 1;
793 auto const shift = local_index + dp3m.mesh.start;
794 auto const &d_op = dp3m.d_op[0u];
795 auto const &mesh_dip = dp3m.mesh.rs_fields;
796 auto const d_op_val = FloatType(d_op[shift[d]]);
797 auto const f = *it_ks_scalar * d_op_val;
798 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.real();
799 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.real();
800 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.real();
802 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.imag();
803 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.imag();
804 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.imag();
806 std::advance(it_ks_scalar, 1);
809#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
810 if (dp3m.heffte.world_size == 1) {
811 std::size_t index_row_major_r2c = 0u;
812 for_each_3d_order<FFTConfig::k_space_order>(
813 mesh_start, local_size, local_index, [&]() {
814 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
815 auto constexpr KX = 1, KY = 2, KZ = 0;
816 auto const global_index = local_origin + local_index;
817 auto const remapped_index =
818 local_index[KZ] + local_index[KY] * local_size[KZ] +
819 local_index[KX] * local_size[KZ] * local_size[KY];
820 auto const d_op_val =
821 FloatType(dp3m.d_op[d][global_index[d]]);
822 auto &mesh_dip = dp3m.heffte.ks_dipole_density;
823 mesh_dip[0u][index_row_major_r2c] =
824 FloatType(dp3m.d_op[d][global_index[2u]]) * d_op_val *
825 dp3m.heffte.ks_scalar[remapped_index];
826 mesh_dip[1u][index_row_major_r2c] =
827 FloatType(dp3m.d_op[d][global_index[0u]]) * d_op_val *
828 dp3m.heffte.ks_scalar[remapped_index];
829 mesh_dip[2u][index_row_major_r2c] =
830 FloatType(dp3m.d_op[d][global_index[1u]]) * d_op_val *
831 dp3m.heffte.ks_scalar[remapped_index];
833 if (not FFTConfig::use_r2c and not dp3m.params.tuning) {
834 auto const index_fft_legacy = local_index[2] +
835 line_stride * local_index[1] +
836 plane_stride * local_index[0];
837 for (
int j = 0; j < 3; ++j) {
838 auto const old_value = std::complex<FloatType>{
839 dp3m.mesh.rs_fields[j][2 * index_fft_legacy],
840 dp3m.mesh.rs_fields[j][2 * index_fft_legacy + 1]};
841 auto const &new_value = mesh_dip[j][index_row_major_r2c];
842 assert(heffte_almost_equal(new_value, old_value));
846 ++index_row_major_r2c;
849 for (
int dir = 0u; dir < 3u; ++dir) {
850 dp3m.heffte.fft->backward(
851 dp3m.heffte.ks_dipole_density[dir].data(),
852 dp3m.heffte.rs_B_fields_no_halo[dir].data());
854 dp3m.heffte.rs_B_fields[d] =
857 std::span(dp3m.heffte.rs_B_fields_no_halo[dir]),
858 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
859 dp3m.local_mesh.n_halo_ur);
863 std::array<FloatType *, 3u>{{dp3m.heffte.rs_B_fields[0u].data(),
864 dp3m.heffte.rs_B_fields[1u].data(),
865 dp3m.heffte.rs_B_fields[2u].data()}};
866 dp3m.heffte.halo_comm.spread_grid(
::comm_cart, rs_fields,
867 dp3m.local_mesh.dim);
870 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
871 dp3m.fft->backward_fft(rs_mesh);
874 dp3m.fft_buffers->perform_vector_halo_spread();
876 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
879 dp3m.params.cao, dp3m, dipole_prefac *
Utils::sqr(wavenumber), d_rs,
880 *system.cell_structure);
886 auto const surface_term =
887 calc_surface_term(force_flag, energy_flag or npt_flag);
889 energy += surface_term;
897 if (not energy_flag) {