397 bool force_flag,
bool energy_flag) {
400 auto const &system = get_system();
401 auto const &box_geo = *system.box_geo;
402 auto const dipole_prefac = prefactor /
Utils::product(dp3m.params.mesh);
404 auto const npt_flag = force_flag and system.has_npt_enabled();
406 auto constexpr npt_flag =
false;
411#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
412 auto constexpr r2c_dir = FFTConfig::r2c_dir;
413 auto const rs_local_size = dp3m.heffte.fft->rs_local_size();
414 auto const local_size = dp3m.heffte.fft->ks_local_size();
415 auto local_size_full = local_size;
416 if constexpr (FFTConfig::use_r2c) {
417 local_size_full[r2c_dir] -= 1;
418 local_size_full[r2c_dir] *= 2;
420 auto const local_origin = dp3m.heffte.fft->ks_local_ld_index();
422 auto const line_stride = local_size_full[0];
423 auto const plane_stride = local_size_full[0] * local_size_full[0];
425 auto const &global_size = dp3m.params.mesh;
426 auto const cutoff_left = 1 - local_origin[r2c_dir];
427 auto const cutoff_right = global_size[r2c_dir] / 2 - local_origin[r2c_dir];
428 auto &short_dim = local_index[r2c_dir];
429 dp3m.resize_heffte_buffers();
432 if (dp3m.sum_mu2 > 0.) {
434 dp3m.fft_buffers->perform_vector_halo_gather();
435 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
436 dp3m.fft->forward_fft(rs_mesh);
438 dp3m.update_mesh_views();
440#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
441 if (dp3m.heffte.world_size == 1) {
443 std::array<FloatType *, 3u> rs_fields = {
444 {dp3m.heffte.rs_dipole_density[0u].data(),
445 dp3m.heffte.rs_dipole_density[1u].data(),
446 dp3m.heffte.rs_dipole_density[2u].data()}};
447 dp3m.heffte.halo_comm.gather_grid(
::comm_cart, rs_fields,
448 dp3m.local_mesh.dim);
450 for (
auto dir : {0u, 1u, 2u}) {
453 FFTConfig::r_space_order>(
454 dp3m.heffte.rs_dipole_density[dir], dp3m.local_mesh.dim,
455 dp3m.local_mesh.n_halo_ld,
456 dp3m.local_mesh.dim - dp3m.local_mesh.n_halo_ur);
458 std::vector<FloatType> rs_field_no_halo_reorder;
459 rs_field_no_halo_reorder.resize(rs_field_no_halo.size());
460 std::size_t index_row_major = 0u;
461 for_each_3d_order<FFTConfig::k_space_order>(
462 mesh_start, rs_local_size, local_index, [&]() {
463 auto constexpr KX = 1, KY = 2, KZ = 0;
464 auto const index = local_index[KZ] +
465 rs_local_size[0] * local_index[KY] +
466 Utils::sqr(rs_local_size[0]) * local_index[KX];
467 rs_field_no_halo_reorder[index_row_major] =
468 rs_field_no_halo[index];
471 dp3m.heffte.fft->forward(rs_field_no_halo_reorder.data(),
472 dp3m.heffte.ks_dipole_density[dir].data());
474 if (not dp3m.params.tuning) {
475 std::size_t index_row_major_r2c = 0u;
476 for_each_3d_order<FFTConfig::k_space_order>(
477 mesh_start, local_size, local_index, [&]() {
478 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
479 auto constexpr KX = 2, KY = 0, KZ = 1;
480 auto const index_fft_legacy = local_index[KZ] +
481 line_stride * local_index[KY] +
482 plane_stride * local_index[KX];
483 auto const old_value = std::complex<FloatType>{
484 dp3m.mesh.rs_fields[dir][2 * index_fft_legacy],
485 dp3m.mesh.rs_fields[dir][2 * index_fft_legacy + 1]};
486 auto const &new_value =
487 dp3m.heffte.ks_dipole_density[dir][index_row_major_r2c];
488 assert(heffte_almost_equal(new_value, old_value));
489 ++index_row_major_r2c;
500 if (energy_flag or npt_flag) {
504 if (dp3m.sum_mu2 > 0.) {
508 auto index = std::size_t(0u);
509 auto it_energy = dp3m.g_energy.begin();
510 auto node_energy = 0.;
511 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
512 auto constexpr KX = 2, KY = 0, KZ = 1;
513 auto const shift = local_index + dp3m.mesh.start;
514 auto const &d_op = dp3m.d_op[0u];
515 auto const &mesh_dip = dp3m.mesh.rs_fields;
517 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
518 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
519 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
522 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
523 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
524 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
526 node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im));
527 std::advance(it_energy, 1);
529#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
530 if (dp3m.heffte.world_size == 1) {
531 [[maybe_unused]]
auto node_energy_heffte = 0.;
532 std::size_t index_row_major_r2c = 0u;
533 for_each_3d_order<FFTConfig::k_space_order>(
534 mesh_start, local_size, local_index, [&]() {
535 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
536 auto const global_index = local_origin + local_index;
537 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
538 auto const cell_field =
539 mesh_dip[0u][index_row_major_r2c] *
540 FloatType(dp3m.d_op[0u][global_index[1u]]) +
541 mesh_dip[1u][index_row_major_r2c] *
542 FloatType(dp3m.d_op[1u][global_index[2u]]) +
543 mesh_dip[2u][index_row_major_r2c] *
544 FloatType(dp3m.d_op[2u][global_index[0u]]);
545 auto cell_energy =
static_cast<double>(
546 dp3m.heffte.g_energy[index_row_major_r2c] *
547 std::norm(cell_field));
548 if (FFTConfig::use_r2c and (short_dim >= cutoff_left and
549 short_dim <= cutoff_right - 1)) {
557 node_energy_heffte += cell_energy;
559 ++index_row_major_r2c;
561 assert(heffte_almost_equal(
static_cast<FloatType
>(node_energy_heffte),
562 static_cast<FloatType
>(node_energy)));
565 node_energy *= dipole_prefac * std::numbers::pi * box_geo.length_inv()[0];
566 boost::mpi::reduce(
comm_cart, node_energy, energy, std::plus<>(), 0);
568 if (dp3m.energy_correction == 0.)
569 calc_energy_correction();
573 energy -= prefactor * dp3m.sum_mu2 * std::numbers::inv_sqrtpi *
574 (2. / 3.) * Utils::int_pow<3>(dp3m.params.alpha);
577 energy += prefactor * dp3m.energy_correction / box_geo.volume();
587 if (dp3m.sum_mu2 > 0.) {
588 auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0u];
589 dp3m.ks_scalar.resize(dp3m.local_mesh.size);
592 auto index{std::size_t(0u)};
593 auto it_energy = dp3m.g_energy.begin();
594 auto it_ks_scalar = dp3m.ks_scalar.begin();
595 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]()
mutable {
596 auto constexpr KX = 2, KY = 0, KZ = 1;
597 auto const shift = local_index + dp3m.mesh.start;
598 auto const &d_op = dp3m.d_op[0u];
599 auto const &mesh_dip = dp3m.mesh.rs_fields;
601 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
602 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
603 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
606 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
607 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
608 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
610 *it_ks_scalar = *it_energy * std::complex<FloatType>{re, im};
611 std::advance(it_energy, 1);
612 std::advance(it_ks_scalar, 1);
615#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
616 if (dp3m.heffte.world_size == 1) {
617 std::size_t index_row_major_r2c = 0u;
618 for_each_3d_order<FFTConfig::k_space_order>(
619 mesh_start, local_size, local_index, [&]() {
620 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
621 auto const global_index = local_origin + local_index;
622 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
623 dp3m.heffte.ks_scalar[index_row_major_r2c] =
624 dp3m.heffte.g_energy[index_row_major_r2c] *
625 (mesh_dip[0u][index_row_major_r2c] *
626 FloatType(dp3m.d_op[0u][global_index[1u]]) +
627 mesh_dip[1u][index_row_major_r2c] *
628 FloatType(dp3m.d_op[1u][global_index[2u]]) +
629 mesh_dip[2u][index_row_major_r2c] *
630 FloatType(dp3m.d_op[2u][global_index[0u]]));
632 if (not dp3m.params.tuning) {
633 auto constexpr KX = 2, KY = 0, KZ = 1;
634 auto const index_fft_legacy = local_index[KZ] +
635 line_stride * local_index[KY] +
636 plane_stride * local_index[KX];
637 assert(heffte_almost_equal(
638 dp3m.heffte.ks_scalar[index_row_major_r2c],
639 dp3m.ks_scalar[index_fft_legacy]));
642 ++index_row_major_r2c;
649 for (
int d = 0; d < 3; d++) {
650 auto it_ks_scalar = dp3m.ks_scalar.begin();
652 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
653 auto const &offset = dp3m.mesh.start;
654 auto const &d_op = dp3m.d_op[0u];
655 auto const d_op_val = FloatType(d_op[local_index[d] + offset[d]]);
656 auto const &value = *it_ks_scalar;
657 dp3m.mesh.rs_scalar[index] = d_op_val * value.real();
659 dp3m.mesh.rs_scalar[index] = d_op_val * value.imag();
661 std::advance(it_ks_scalar, 1);
663#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
664 if (dp3m.heffte.world_size == 1) {
665 unsigned int constexpr d_ks[3] = {2u, 0u, 1u};
666 std::size_t index_row_major_r2c = 0u;
667 for_each_3d_order<FFTConfig::k_space_order>(
668 mesh_start, local_size, local_index, [&]() {
669 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
670 auto const global_index = local_origin + local_index;
671 auto const d_op_val =
672 FloatType(dp3m.d_op[d][global_index[d_ks[d]]]);
673 dp3m.heffte.ks_B_field_storage[index_row_major_r2c] =
674 d_op_val * dp3m.heffte.ks_scalar[index_row_major_r2c];
676 if (not dp3m.params.tuning) {
677 auto constexpr KX = 2, KY = 0, KZ = 1;
678 auto const index_fft_legacy =
679 local_index[KZ] + line_stride * local_index[KY] +
680 plane_stride * local_index[KX];
681 auto const old_value = std::complex<FloatType>{
682 dp3m.mesh.rs_scalar[2 * index_fft_legacy],
683 dp3m.mesh.rs_scalar[2 * index_fft_legacy + 1]};
684 auto const &new_value =
685 dp3m.heffte.ks_B_field_storage[index_row_major_r2c];
686 assert(heffte_almost_equal(new_value, old_value));
689 ++index_row_major_r2c;
692 dp3m.heffte.fft->backward(dp3m.heffte.ks_B_field_storage.data(),
693 dp3m.heffte.rs_B_fields_no_halo[d].data());
695 dp3m.heffte.rs_B_fields[d] =
698 std::span(dp3m.heffte.rs_B_fields_no_halo[d]),
699 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
700 dp3m.local_mesh.n_halo_ur);
703 dp3m.heffte.rs_B_fields[d].data(),
704 dp3m.local_mesh.dim);
707 dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh());
709 dp3m.fft_buffers->perform_scalar_halo_spread();
711 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
712 Utils::integral_parameter<int, AssignTorques, p3m_min_cao, p3m_max_cao>(
713 dp3m.params.cao, dp3m, dipole_prefac * wavenumber, d_rs,
714 *system.cell_structure);
726 auto it_force = dp3m.g_force.begin();
727 auto it_ks_scalar = dp3m.ks_scalar.begin();
728 std::size_t index = 0u;
729 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
730 auto constexpr KX = 2, KY = 0, KZ = 1;
731 auto const shift = local_index + dp3m.mesh.start;
732 auto const &d_op = dp3m.d_op[0u];
733 auto const &mesh_dip = dp3m.mesh.rs_fields;
735 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
736 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
737 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
740 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
741 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
742 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
744 *it_ks_scalar = {*it_force * im, *it_force * (-re)};
745 std::advance(it_force, 1);
746 std::advance(it_ks_scalar, 1);
750#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
751 if (dp3m.heffte.world_size == 1) {
752 std::size_t index_row_major_r2c = 0u;
753 for_each_3d_order<FFTConfig::k_space_order>(
754 mesh_start, local_size, local_index, [&]() {
755 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
756 auto const global_index = local_origin + local_index;
757 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
759 dp3m.heffte.g_force[index_row_major_r2c] *
760 (mesh_dip[0u][index_row_major_r2c] *
761 FloatType(dp3m.d_op[0u][global_index[1u]]) +
762 mesh_dip[1u][index_row_major_r2c] *
763 FloatType(dp3m.d_op[1u][global_index[2u]]) +
764 mesh_dip[2u][index_row_major_r2c] *
765 FloatType(dp3m.d_op[2u][global_index[0u]]));
766 dp3m.heffte.ks_scalar[index_row_major_r2c] = {value.imag(),
769 if (not dp3m.params.tuning) {
770 auto constexpr KX = 2, KY = 0, KZ = 1;
771 auto const index_fft_legacy = local_index[KZ] +
772 line_stride * local_index[KY] +
773 plane_stride * local_index[KX];
774 assert(heffte_almost_equal(
775 dp3m.heffte.ks_scalar[index_row_major_r2c],
776 dp3m.ks_scalar[index_fft_legacy]));
779 ++index_row_major_r2c;
786 for (
int d = 0; d < 3; d++) {
787 std::size_t index = 0u;
788 auto it_ks_scalar = dp3m.ks_scalar.begin();
789 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
790 auto constexpr KX = 2, KY = 0, KZ = 1;
791 auto const shift = local_index + dp3m.mesh.start;
792 auto const &d_op = dp3m.d_op[0u];
793 auto const &mesh_dip = dp3m.mesh.rs_fields;
794 auto const d_op_val = FloatType(d_op[shift[d]]);
795 auto const f = *it_ks_scalar * d_op_val;
796 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.real();
797 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.real();
798 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.real();
800 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.imag();
801 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.imag();
802 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.imag();
804 std::advance(it_ks_scalar, 1);
807#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
808 if (dp3m.heffte.world_size == 1) {
809 std::size_t index_row_major_r2c = 0u;
810 for_each_3d_order<FFTConfig::k_space_order>(
811 mesh_start, local_size, local_index, [&]() {
812 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
813 auto constexpr KX = 1, KY = 2, KZ = 0;
814 auto const global_index = local_origin + local_index;
815 auto const remapped_index =
816 local_index[KZ] + local_index[KY] * local_size[KZ] +
817 local_index[KX] * local_size[KZ] * local_size[KY];
818 auto const d_op_val =
819 FloatType(dp3m.d_op[d][global_index[d]]);
820 auto &mesh_dip = dp3m.heffte.ks_dipole_density;
821 mesh_dip[0u][index_row_major_r2c] =
822 FloatType(dp3m.d_op[d][global_index[2u]]) * d_op_val *
823 dp3m.heffte.ks_scalar[remapped_index];
824 mesh_dip[1u][index_row_major_r2c] =
825 FloatType(dp3m.d_op[d][global_index[0u]]) * d_op_val *
826 dp3m.heffte.ks_scalar[remapped_index];
827 mesh_dip[2u][index_row_major_r2c] =
828 FloatType(dp3m.d_op[d][global_index[1u]]) * d_op_val *
829 dp3m.heffte.ks_scalar[remapped_index];
831 if (not FFTConfig::use_r2c and not dp3m.params.tuning) {
832 auto const index_fft_legacy = local_index[2] +
833 line_stride * local_index[1] +
834 plane_stride * local_index[0];
835 for (
int j = 0; j < 3; ++j) {
836 auto const old_value = std::complex<FloatType>{
837 dp3m.mesh.rs_fields[j][2 * index_fft_legacy],
838 dp3m.mesh.rs_fields[j][2 * index_fft_legacy + 1]};
839 auto const &new_value = mesh_dip[j][index_row_major_r2c];
840 assert(heffte_almost_equal(new_value, old_value));
844 ++index_row_major_r2c;
847 for (
int dir = 0u; dir < 3u; ++dir) {
848 dp3m.heffte.fft->backward(
849 dp3m.heffte.ks_dipole_density[dir].data(),
850 dp3m.heffte.rs_B_fields_no_halo[dir].data());
852 dp3m.heffte.rs_B_fields[d] =
855 std::span(dp3m.heffte.rs_B_fields_no_halo[dir]),
856 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
857 dp3m.local_mesh.n_halo_ur);
861 std::array<FloatType *, 3u>{{dp3m.heffte.rs_B_fields[0u].data(),
862 dp3m.heffte.rs_B_fields[1u].data(),
863 dp3m.heffte.rs_B_fields[2u].data()}};
864 dp3m.heffte.halo_comm.spread_grid(
::comm_cart, rs_fields,
865 dp3m.local_mesh.dim);
868 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
869 dp3m.fft->backward_fft(rs_mesh);
872 dp3m.fft_buffers->perform_vector_halo_spread();
874 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
877 dp3m.params.cao, dp3m, dipole_prefac *
Utils::sqr(wavenumber), d_rs,
878 *system.cell_structure);
884 auto const surface_term =
885 calc_surface_term(force_flag, energy_flag or npt_flag);
887 energy += surface_term;
895 if (not energy_flag) {