26#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
27#include <Kokkos_Core.hpp>
36constexpr inline void noop_projector(
unsigned,
int) {}
39concept IndexVectorConcept =
requires(T
vector) {
40 {
vector[0] } -> std::convertible_to<std::size_t>;
61template <
class Kernel,
class Projector = decltype(detail::noop_projector)>
62 requires std::invocable<Kernel> and std::invocable<Projector, unsigned, int>
64 detail::IndexVectorConcept
auto &&stop,
65 detail::IndexVectorConcept
auto &&counters, Kernel &&kernel,
66 Projector &&projector = detail::noop_projector) {
67 auto &nx = counters[0u];
68 auto &ny = counters[1u];
69 auto &nz = counters[2u];
70 for (nx = start[0u]; nx < stop[0u]; ++nx) {
72 for (ny = start[1u]; ny < stop[1u]; ++ny) {
74 for (nz = start[2u]; nz < stop[2u]; ++nz) {
100 class Projector =
decltype(detail::noop_projector)>
101 requires std::invocable<Kernel> and std::invocable<Projector, unsigned, int>
103 detail::IndexVectorConcept
auto &&stop,
104 detail::IndexVectorConcept
auto &&counters,
106 Projector &&projector = detail::noop_projector) {
108 auto constexpr index_fast = is_row_major ? 2u : 0u;
109 auto constexpr index_slow = is_row_major ? 0u : 2u;
110 auto constexpr index_medium = 1u;
111 auto &nx = counters[index_slow];
112 auto &ny = counters[index_medium];
113 auto &nz = counters[index_fast];
114 for (nx = start[index_slow]; nx < stop[index_slow]; ++nx) {
115 projector(index_slow, nx);
116 for (ny = start[index_medium]; ny < stop[index_medium]; ++ny) {
117 projector(index_medium, ny);
118 for (nz = start[index_fast]; nz < stop[index_fast]; ++nz) {
119 projector(index_fast, nz);
126#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
128template <Utils::MemoryOrder Order>
131 std::integral_constant<Kokkos::Iterate, Kokkos::Iterate::Left>,
132 std::integral_constant<Kokkos::Iterate, Kokkos::Iterate::Right>>;
139template <Utils::MemoryOrder memory_order,
class Kernel>
141 detail::IndexVectorConcept
auto &&stop, Kernel &&kernel) {
142#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
143 if (Kokkos::num_threads() > 1) {
144 auto const size = stop - start;
146 using Range3d = Kokkos::MDRangePolicy<Kokkos::Rank<3, iter, iter>>;
147 Range3d policy({0, 0, 0}, {size[0], size[1], size[2]});
148 Kokkos::parallel_for(
149 "for_each_3d", policy, KOKKOS_LAMBDA(
int i,
int j,
int k) {
150 auto const linear_idx =
151 Utils::get_linear_index<memory_order>(i, j, k, size);
158 int linear_loop_index = 0u;
160 for (
int nx = start[0u]; nx < stop[0u]; ++nx) {
161 for (
int ny = start[1u]; ny < stop[1u]; ++ny) {
162 for (
int nz = start[2u]; nz < stop[2u]; ++nz) {
169 for (
int nz = start[2u]; nz < stop[2u]; ++nz) {
170 for (
int ny = start[1u]; ny < stop[1u]; ++ny) {
171 for (
int nx = start[0u]; nx < stop[0u]; ++nx) {
and std::invocable< Projector, unsigned, int > void for_each_3d_order(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
void for_each_3d_lin(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, Kernel &&kernel)
Run a kernel(index_3d, linear_index) over the given 3d range with given memory order.
std::conditional_t< Order==Utils::MemoryOrder::COLUMN_MAJOR, std::integral_constant< Kokkos::Iterate, Kokkos::Iterate::Left >, std::integral_constant< Kokkos::Iterate, Kokkos::Iterate::Right > > LayoutIterate
Mapping between ESPResSo and Kokkos tags for memory order.
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
std::vector< T, allocator< T > > vector