26#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
27#include <Kokkos_Core.hpp>
36constexpr inline void noop_projector(
unsigned,
int) {}
39concept IndexVectorConcept =
requires(T
vector) {
40 {
vector[0] } -> std::convertible_to<std::size_t>;
61template <
class Kernel,
class Projector = decltype(detail::noop_projector)>
62 requires std::invocable<Kernel>
and std::invocable<Projector, unsigned, int>
64 detail::IndexVectorConcept
auto &&stop,
70 for (
nx = start[0
u];
nx < stop[0
u]; ++
nx) {
72 for (
ny = start[1u];
ny < stop[1u]; ++
ny) {
74 for (
nz = start[2u];
nz < stop[2u]; ++
nz) {
100 class Projector =
decltype(detail::noop_projector)>
101 requires std::invocable<Kernel>
and std::invocable<Projector, unsigned, int>
103 detail::IndexVectorConcept
auto &&stop,
104 detail::IndexVectorConcept
auto &&
counters,
126#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
128template <Utils::MemoryOrder Order>
131 std::integral_constant<Kokkos::Iterate, Kokkos::Iterate::Left>,
132 std::integral_constant<Kokkos::Iterate, Kokkos::Iterate::Right>>;
139template <Utils::MemoryOrder memory_order,
class Kernel>
141 detail::IndexVectorConcept
auto &&stop,
Kernel &&kernel) {
142#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
143 if (Kokkos::num_threads() > 1) {
144 int nx = stop[0] - start[0];
145 int ny = stop[1] - start[1];
146 int nz = stop[2] - start[2];
148 using Range3d = Kokkos::MDRangePolicy<Kokkos::Rank<3, iter, iter>>;
150 Kokkos::parallel_for(
152 auto const idx = {start[0] + i, start[1] +
j, start[2] + k};
154 Utils::get_linear_index<memory_order>({i,
j, k}, {
nx,
ny,
nz});
163 for (
int nx = start[0
u];
nx < stop[0
u]; ++
nx) {
164 for (
int ny = start[1u];
ny < stop[1u]; ++
ny) {
165 for (
int nz = start[2u];
nz < stop[2u]; ++
nz) {
172 for (
int nz = start[2u];
nz < stop[2u]; ++
nz) {
173 for (
int ny = start[1u];
ny < stop[1u]; ++
ny) {
174 for (
int nx = start[0
u];
nx < stop[0
u]; ++
nx) {
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
and std::invocable< Projector, unsigned, int > void for_each_3d_order(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
void for_each_3d_lin(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, Kernel &&kernel)
Run a kernel(index_3d, linear_index) over the given 3d range with given memory order.
std::conditional_t< Order==Utils::MemoryOrder::COLUMN_MAJOR, std::integral_constant< Kokkos::Iterate, Kokkos::Iterate::Left >, std::integral_constant< Kokkos::Iterate, Kokkos::Iterate::Right > > LayoutIterate
Mapping between ESPResSo and Kokkos tags for memory order.
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
std::vector< T, allocator< T > > vector