26#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
27#include <Kokkos_Core.hpp>
36constexpr inline void noop_projector(
unsigned,
int) {}
39concept IndexVectorConcept =
requires(T
vector) {
40 {
vector[0] } -> std::convertible_to<std::size_t>;
61template <
class Kernel,
class Projector = decltype(detail::noop_projector)>
62 requires std::invocable<Kernel> and std::invocable<Projector, unsigned, int>
64 detail::IndexVectorConcept
auto &&stop,
65 detail::IndexVectorConcept
auto &&counters, Kernel &&kernel,
66 Projector &&projector = detail::noop_projector) {
67 auto &nx = counters[0u];
68 auto &ny = counters[1u];
69 auto &nz = counters[2u];
70 for (nx = start[0u]; nx < stop[0u]; ++nx) {
72 for (ny = start[1u]; ny < stop[1u]; ++ny) {
74 for (nz = start[2u]; nz < stop[2u]; ++nz) {
100 class Projector =
decltype(detail::noop_projector)>
101 requires std::invocable<Kernel> and std::invocable<Projector, unsigned, int>
103 detail::IndexVectorConcept
auto &&stop,
104 detail::IndexVectorConcept
auto &&counters,
106 Projector &&projector = detail::noop_projector) {
108 auto constexpr index_fast = is_row_major ? 2u : 0u;
109 auto constexpr index_slow = is_row_major ? 0u : 2u;
110 auto constexpr index_medium = 1u;
111 auto &nx = counters[index_slow];
112 auto &ny = counters[index_medium];
113 auto &nz = counters[index_fast];
114 for (nx = start[index_slow]; nx < stop[index_slow]; ++nx) {
115 projector(index_slow, nx);
116 for (ny = start[index_medium]; ny < stop[index_medium]; ++ny) {
117 projector(index_medium, ny);
118 for (nz = start[index_fast]; nz < stop[index_fast]; ++nz) {
119 projector(index_fast, nz);
126#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
128template <Utils::MemoryOrder Order>
131 std::integral_constant<Kokkos::Iterate, Kokkos::Iterate::Left>,
132 std::integral_constant<Kokkos::Iterate, Kokkos::Iterate::Right>>;
139template <Utils::MemoryOrder memory_order,
class Kernel>
141 detail::IndexVectorConcept
auto &&stop, Kernel &&kernel) {
142#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
143 if (Kokkos::num_threads() > 1) {
144 int nx = stop[0] - start[0];
145 int ny = stop[1] - start[1];
146 int nz = stop[2] - start[2];
148 using Range3d = Kokkos::MDRangePolicy<Kokkos::Rank<3, iter, iter>>;
149 Range3d policy({0, 0, 0}, {nx, ny, nz});
150 Kokkos::parallel_for(
151 "for_each_3d", policy, KOKKOS_LAMBDA(
int i,
int j,
int k) {
152 auto const idx = {start[0] + i, start[1] + j, start[2] + k};
153 auto const linear_idx =
154 Utils::get_linear_index<memory_order>({i, j, k}, {nx, ny, nz});
155 kernel(idx, linear_idx);
161 int linear_loop_index = 0u;
163 for (
int nx = start[0u]; nx < stop[0u]; ++nx) {
164 for (
int ny = start[1u]; ny < stop[1u]; ++ny) {
165 for (
int nz = start[2u]; nz < stop[2u]; ++nz) {
172 for (
int nz = start[2u]; nz < stop[2u]; ++nz) {
173 for (
int ny = start[1u]; ny < stop[1u]; ++ny) {
174 for (
int nx = start[0u]; nx < stop[0u]; ++nx) {
and std::invocable< Projector, unsigned, int > void for_each_3d_order(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
void for_each_3d_lin(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, Kernel &&kernel)
Run a kernel(index_3d, linear_index) over the given 3d range with given memory order.
std::conditional_t< Order==Utils::MemoryOrder::COLUMN_MAJOR, std::integral_constant< Kokkos::Iterate, Kokkos::Iterate::Left >, std::integral_constant< Kokkos::Iterate, Kokkos::Iterate::Right > > LayoutIterate
Mapping between ESPResSo and Kokkos tags for memory order.
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
std::vector< T, allocator< T > > vector