332 bool force_flag,
bool energy_flag) {
335 auto const &system = get_system();
336 auto const &box_geo = *system.box_geo;
337 auto const dipole_prefac = prefactor /
Utils::product(dp3m.params.mesh);
339 auto const npt_flag = force_flag and system.has_npt_enabled();
341 auto constexpr npt_flag =
false;
346#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
347 auto constexpr r2c_dir = FFTConfig::r2c_dir;
348 auto const rs_local_size = dp3m.heffte.fft->rs_local_size();
349 auto const local_size = dp3m.heffte.fft->ks_local_size();
350 auto local_size_full = local_size;
351 if constexpr (FFTConfig::use_r2c) {
352 local_size_full[r2c_dir] -= 1;
353 local_size_full[r2c_dir] *= 2;
355 auto const local_origin = dp3m.heffte.fft->ks_local_ld_index();
357 auto const line_stride = local_size_full[0];
358 auto const plane_stride = local_size_full[0] * local_size_full[0];
360 auto const &global_size = dp3m.params.mesh;
361 auto const cutoff_left = 1 - local_origin[r2c_dir];
362 auto const cutoff_right = global_size[r2c_dir] / 2 - local_origin[r2c_dir];
363 auto &short_dim = local_index[r2c_dir];
364 dp3m.resize_heffte_buffers();
367 if (dp3m.sum_mu2 > 0.) {
369 dp3m.fft_buffers->perform_vector_halo_gather();
370 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
371 dp3m.fft->forward_fft(rs_mesh);
373 dp3m.update_mesh_views();
375#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
376 if (dp3m.heffte.world_size == 1) {
378 std::array<FloatType *, 3u> rs_fields = {
379 {dp3m.heffte.rs_dipole_density[0u].data(),
380 dp3m.heffte.rs_dipole_density[1u].data(),
381 dp3m.heffte.rs_dipole_density[2u].data()}};
382 dp3m.heffte.halo_comm.gather_grid(
::comm_cart, rs_fields,
383 dp3m.local_mesh.dim);
385 for (
auto dir : {0u, 1u, 2u}) {
388 FFTConfig::r_space_order>(
389 dp3m.heffte.rs_dipole_density[dir], dp3m.local_mesh.dim,
390 dp3m.local_mesh.n_halo_ld,
391 dp3m.local_mesh.dim - dp3m.local_mesh.n_halo_ur);
393 std::vector<FloatType> rs_field_no_halo_reorder;
394 rs_field_no_halo_reorder.resize(rs_field_no_halo.size());
395 std::size_t index_row_major = 0u;
396 for_each_3d_order<FFTConfig::k_space_order>(
397 mesh_start, rs_local_size, local_index, [&]() {
398 auto constexpr KX = 1, KY = 2, KZ = 0;
399 auto const index = local_index[KZ] +
400 rs_local_size[0] * local_index[KY] +
401 Utils::sqr(rs_local_size[0]) * local_index[KX];
402 rs_field_no_halo_reorder[index_row_major] =
403 rs_field_no_halo[index];
406 dp3m.heffte.fft->forward(rs_field_no_halo_reorder.data(),
407 dp3m.heffte.ks_dipole_density[dir].data());
409 if (not dp3m.params.tuning) {
410 std::size_t index_row_major_r2c = 0u;
411 for_each_3d_order<FFTConfig::k_space_order>(
412 mesh_start, local_size, local_index, [&]() {
413 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
414 auto constexpr KX = 2, KY = 0, KZ = 1;
415 auto const index_fft_legacy = local_index[KZ] +
416 line_stride * local_index[KY] +
417 plane_stride * local_index[KX];
418 auto const old_value = std::complex<FloatType>{
419 dp3m.mesh.rs_fields[dir][2 * index_fft_legacy],
420 dp3m.mesh.rs_fields[dir][2 * index_fft_legacy + 1]};
421 auto const &new_value =
422 dp3m.heffte.ks_dipole_density[dir][index_row_major_r2c];
423 assert(heffte_almost_equal(new_value, old_value));
424 ++index_row_major_r2c;
435 if (energy_flag or npt_flag) {
439 if (dp3m.sum_mu2 > 0.) {
443 auto index = std::size_t(0u);
444 auto it_energy = dp3m.g_energy.begin();
445 auto node_energy = 0.;
446 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
447 auto constexpr KX = 2, KY = 0, KZ = 1;
448 auto const shift = local_index + dp3m.mesh.start;
449 auto const &d_op = dp3m.d_op[0u];
450 auto const &mesh_dip = dp3m.mesh.rs_fields;
452 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
453 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
454 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
457 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
458 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
459 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
461 node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im));
462 std::advance(it_energy, 1);
464#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
465 if (dp3m.heffte.world_size == 1) {
466 [[maybe_unused]]
auto node_energy_heffte = 0.;
467 std::size_t index_row_major_r2c = 0u;
468 for_each_3d_order<FFTConfig::k_space_order>(
469 mesh_start, local_size, local_index, [&]() {
470 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
471 auto const global_index = local_origin + local_index;
472 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
473 auto const cell_field =
474 mesh_dip[0u][index_row_major_r2c] *
475 FloatType(dp3m.d_op[0u][global_index[1u]]) +
476 mesh_dip[1u][index_row_major_r2c] *
477 FloatType(dp3m.d_op[1u][global_index[2u]]) +
478 mesh_dip[2u][index_row_major_r2c] *
479 FloatType(dp3m.d_op[2u][global_index[0u]]);
480 auto cell_energy =
static_cast<double>(
481 dp3m.heffte.g_energy[index_row_major_r2c] *
482 std::norm(cell_field));
483 if (FFTConfig::use_r2c and (short_dim >= cutoff_left and
484 short_dim <= cutoff_right - 1)) {
492 node_energy_heffte += cell_energy;
494 ++index_row_major_r2c;
496 assert(heffte_almost_equal(
static_cast<FloatType
>(node_energy_heffte),
497 static_cast<FloatType
>(node_energy)));
500 node_energy *= dipole_prefac * std::numbers::pi * box_geo.length_inv()[0];
501 boost::mpi::reduce(
comm_cart, node_energy, energy, std::plus<>(), 0);
503 if (dp3m.energy_correction == 0.)
504 calc_energy_correction();
508 energy -= prefactor * dp3m.sum_mu2 * std::numbers::inv_sqrtpi *
509 (2. / 3.) * Utils::int_pow<3>(dp3m.params.alpha);
512 energy += prefactor * dp3m.energy_correction / box_geo.volume();
522 if (dp3m.sum_mu2 > 0.) {
523 auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0u];
524 dp3m.ks_scalar.resize(dp3m.local_mesh.size);
527 auto index{std::size_t(0u)};
528 auto it_energy = dp3m.g_energy.begin();
529 auto it_ks_scalar = dp3m.ks_scalar.begin();
530 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]()
mutable {
531 auto constexpr KX = 2, KY = 0, KZ = 1;
532 auto const shift = local_index + dp3m.mesh.start;
533 auto const &d_op = dp3m.d_op[0u];
534 auto const &mesh_dip = dp3m.mesh.rs_fields;
536 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
537 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
538 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
541 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
542 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
543 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
545 *it_ks_scalar = *it_energy * std::complex<FloatType>{re, im};
546 std::advance(it_energy, 1);
547 std::advance(it_ks_scalar, 1);
550#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
551 if (dp3m.heffte.world_size == 1) {
552 std::size_t index_row_major_r2c = 0u;
553 for_each_3d_order<FFTConfig::k_space_order>(
554 mesh_start, local_size, local_index, [&]() {
555 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
556 auto const global_index = local_origin + local_index;
557 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
558 dp3m.heffte.ks_scalar[index_row_major_r2c] =
559 dp3m.heffte.g_energy[index_row_major_r2c] *
560 (mesh_dip[0u][index_row_major_r2c] *
561 FloatType(dp3m.d_op[0u][global_index[1u]]) +
562 mesh_dip[1u][index_row_major_r2c] *
563 FloatType(dp3m.d_op[1u][global_index[2u]]) +
564 mesh_dip[2u][index_row_major_r2c] *
565 FloatType(dp3m.d_op[2u][global_index[0u]]));
567 if (not dp3m.params.tuning) {
568 auto constexpr KX = 2, KY = 0, KZ = 1;
569 auto const index_fft_legacy = local_index[KZ] +
570 line_stride * local_index[KY] +
571 plane_stride * local_index[KX];
572 assert(heffte_almost_equal(
573 dp3m.heffte.ks_scalar[index_row_major_r2c],
574 dp3m.ks_scalar[index_fft_legacy]));
577 ++index_row_major_r2c;
584 for (
int d = 0; d < 3; d++) {
585 auto it_ks_scalar = dp3m.ks_scalar.begin();
587 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
588 auto const &offset = dp3m.mesh.start;
589 auto const &d_op = dp3m.d_op[0u];
590 auto const d_op_val = FloatType(d_op[local_index[d] + offset[d]]);
591 auto const &value = *it_ks_scalar;
592 dp3m.mesh.rs_scalar[index] = d_op_val * value.real();
594 dp3m.mesh.rs_scalar[index] = d_op_val * value.imag();
596 std::advance(it_ks_scalar, 1);
598#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
599 if (dp3m.heffte.world_size == 1) {
600 unsigned int constexpr d_ks[3] = {2u, 0u, 1u};
601 std::size_t index_row_major_r2c = 0u;
602 for_each_3d_order<FFTConfig::k_space_order>(
603 mesh_start, local_size, local_index, [&]() {
604 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
605 auto const global_index = local_origin + local_index;
606 auto const d_op_val =
607 FloatType(dp3m.d_op[d][global_index[d_ks[d]]]);
608 dp3m.heffte.ks_B_field_storage[index_row_major_r2c] =
609 d_op_val * dp3m.heffte.ks_scalar[index_row_major_r2c];
611 if (not dp3m.params.tuning) {
612 auto constexpr KX = 2, KY = 0, KZ = 1;
613 auto const index_fft_legacy =
614 local_index[KZ] + line_stride * local_index[KY] +
615 plane_stride * local_index[KX];
616 auto const old_value = std::complex<FloatType>{
617 dp3m.mesh.rs_scalar[2 * index_fft_legacy],
618 dp3m.mesh.rs_scalar[2 * index_fft_legacy + 1]};
619 auto const &new_value =
620 dp3m.heffte.ks_B_field_storage[index_row_major_r2c];
621 assert(heffte_almost_equal(new_value, old_value));
624 ++index_row_major_r2c;
627 dp3m.heffte.fft->backward(dp3m.heffte.ks_B_field_storage.data(),
628 dp3m.heffte.rs_B_fields_no_halo[d].data());
630 dp3m.heffte.rs_B_fields[d] =
633 std::span(dp3m.heffte.rs_B_fields_no_halo[d]),
634 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
635 dp3m.local_mesh.n_halo_ur);
638 dp3m.heffte.rs_B_fields[d].data(),
639 dp3m.local_mesh.dim);
642 dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh());
644 dp3m.fft_buffers->perform_scalar_halo_spread();
646 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
647 Utils::integral_parameter<int, AssignTorques, p3m_min_cao, p3m_max_cao>(
648 dp3m.params.cao, dp3m, dipole_prefac * wavenumber, d_rs,
649 *system.cell_structure);
661 auto it_force = dp3m.g_force.begin();
662 auto it_ks_scalar = dp3m.ks_scalar.begin();
663 std::size_t index = 0u;
664 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
665 auto constexpr KX = 2, KY = 0, KZ = 1;
666 auto const shift = local_index + dp3m.mesh.start;
667 auto const &d_op = dp3m.d_op[0u];
668 auto const &mesh_dip = dp3m.mesh.rs_fields;
670 auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
671 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
672 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
675 auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) +
676 mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) +
677 mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]);
679 *it_ks_scalar = {*it_force * im, *it_force * (-re)};
680 std::advance(it_force, 1);
681 std::advance(it_ks_scalar, 1);
685#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
686 if (dp3m.heffte.world_size == 1) {
687 std::size_t index_row_major_r2c = 0u;
688 for_each_3d_order<FFTConfig::k_space_order>(
689 mesh_start, local_size, local_index, [&]() {
690 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
691 auto const global_index = local_origin + local_index;
692 auto const &mesh_dip = dp3m.heffte.ks_dipole_density;
694 dp3m.heffte.g_force[index_row_major_r2c] *
695 (mesh_dip[0u][index_row_major_r2c] *
696 FloatType(dp3m.d_op[0u][global_index[1u]]) +
697 mesh_dip[1u][index_row_major_r2c] *
698 FloatType(dp3m.d_op[1u][global_index[2u]]) +
699 mesh_dip[2u][index_row_major_r2c] *
700 FloatType(dp3m.d_op[2u][global_index[0u]]));
701 dp3m.heffte.ks_scalar[index_row_major_r2c] = {value.imag(),
704 if (not dp3m.params.tuning) {
705 auto constexpr KX = 2, KY = 0, KZ = 1;
706 auto const index_fft_legacy = local_index[KZ] +
707 line_stride * local_index[KY] +
708 plane_stride * local_index[KX];
709 assert(heffte_almost_equal(
710 dp3m.heffte.ks_scalar[index_row_major_r2c],
711 dp3m.ks_scalar[index_fft_legacy]));
714 ++index_row_major_r2c;
721 for (
int d = 0; d < 3; d++) {
722 std::size_t index = 0u;
723 auto it_ks_scalar = dp3m.ks_scalar.begin();
724 for_each_3d(mesh_start, dp3m.mesh.size, local_index, [&]() {
725 auto constexpr KX = 2, KY = 0, KZ = 1;
726 auto const shift = local_index + dp3m.mesh.start;
727 auto const &d_op = dp3m.d_op[0u];
728 auto const &mesh_dip = dp3m.mesh.rs_fields;
729 auto const d_op_val = FloatType(d_op[shift[d]]);
730 auto const f = *it_ks_scalar * d_op_val;
731 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.real();
732 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.real();
733 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.real();
735 mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f.imag();
736 mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f.imag();
737 mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f.imag();
739 std::advance(it_ks_scalar, 1);
742#ifdef ESPRESSO_DP3M_HEFFTE_CROSS_CHECKS
743 if (dp3m.heffte.world_size == 1) {
744 std::size_t index_row_major_r2c = 0u;
745 for_each_3d_order<FFTConfig::k_space_order>(
746 mesh_start, local_size, local_index, [&]() {
747 if (not FFTConfig::use_r2c or (short_dim <= cutoff_right)) {
748 auto constexpr KX = 1, KY = 2, KZ = 0;
749 auto const global_index = local_origin + local_index;
750 auto const remapped_index =
751 local_index[KZ] + local_index[KY] * local_size[KZ] +
752 local_index[KX] * local_size[KZ] * local_size[KY];
753 auto const d_op_val =
754 FloatType(dp3m.d_op[d][global_index[d]]);
755 auto &mesh_dip = dp3m.heffte.ks_dipole_density;
756 mesh_dip[0u][index_row_major_r2c] =
757 FloatType(dp3m.d_op[d][global_index[2u]]) * d_op_val *
758 dp3m.heffte.ks_scalar[remapped_index];
759 mesh_dip[1u][index_row_major_r2c] =
760 FloatType(dp3m.d_op[d][global_index[0u]]) * d_op_val *
761 dp3m.heffte.ks_scalar[remapped_index];
762 mesh_dip[2u][index_row_major_r2c] =
763 FloatType(dp3m.d_op[d][global_index[1u]]) * d_op_val *
764 dp3m.heffte.ks_scalar[remapped_index];
766 if (not FFTConfig::use_r2c and not dp3m.params.tuning) {
767 auto const index_fft_legacy = local_index[2] +
768 line_stride * local_index[1] +
769 plane_stride * local_index[0];
770 for (
int j = 0; j < 3; ++j) {
771 auto const old_value = std::complex<FloatType>{
772 dp3m.mesh.rs_fields[j][2 * index_fft_legacy],
773 dp3m.mesh.rs_fields[j][2 * index_fft_legacy + 1]};
774 auto const &new_value = mesh_dip[j][index_row_major_r2c];
775 assert(heffte_almost_equal(new_value, old_value));
779 ++index_row_major_r2c;
782 for (
int dir = 0u; dir < 3u; ++dir) {
783 dp3m.heffte.fft->backward(
784 dp3m.heffte.ks_dipole_density[dir].data(),
785 dp3m.heffte.rs_B_fields_no_halo[dir].data());
787 dp3m.heffte.rs_B_fields[d] =
790 std::span(dp3m.heffte.rs_B_fields_no_halo[dir]),
791 dp3m.local_mesh.dim_no_halo, dp3m.local_mesh.n_halo_ld,
792 dp3m.local_mesh.n_halo_ur);
796 std::array<FloatType *, 3u>{{dp3m.heffte.rs_B_fields[0u].data(),
797 dp3m.heffte.rs_B_fields[1u].data(),
798 dp3m.heffte.rs_B_fields[2u].data()}};
799 dp3m.heffte.halo_comm.spread_grid(
::comm_cart, rs_fields,
800 dp3m.local_mesh.dim);
803 for (
auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) {
804 dp3m.fft->backward_fft(rs_mesh);
807 dp3m.fft_buffers->perform_vector_halo_spread();
809 auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3;
812 dp3m.params.cao, dp3m, dipole_prefac *
Utils::sqr(wavenumber), d_rs,
813 *system.cell_structure);
819 auto const surface_term =
820 calc_surface_term(force_flag, energy_flag or npt_flag);
822 energy += surface_term;
830 if (not energy_flag) {