26#include <Kokkos_Core.hpp>
34constexpr inline void noop_projector(
unsigned,
int) {}
37concept IndexVectorConcept =
requires(T
vector) {
38 {
vector[0] } -> std::convertible_to<std::size_t>;
59template <
class Kernel,
class Projector = decltype(detail::noop_projector)>
60 requires std::invocable<Kernel>
and std::invocable<Projector, unsigned, int>
62 detail::IndexVectorConcept
auto &&stop,
68 for (
nx = start[0
u];
nx < stop[0
u]; ++
nx) {
70 for (
ny = start[1u];
ny < stop[1u]; ++
ny) {
72 for (
nz = start[2u];
nz < stop[2u]; ++
nz) {
98 class Projector =
decltype(detail::noop_projector)>
99 requires std::invocable<Kernel>
and std::invocable<Projector, unsigned, int>
101 detail::IndexVectorConcept
auto &&stop,
102 detail::IndexVectorConcept
auto &&
counters,
125template <Utils::MemoryOrder Order>
128 std::integral_constant<Kokkos::Iterate, Kokkos::Iterate::Left>,
129 std::integral_constant<Kokkos::Iterate, Kokkos::Iterate::Right>>;
135template <Utils::MemoryOrder memory_order,
class Kernel>
137 detail::IndexVectorConcept
auto &&stop,
Kernel &&kernel) {
138 if (Kokkos::num_threads() > 1) {
139 auto const size = stop - start;
141 using Range3d = Kokkos::MDRangePolicy<Kokkos::Rank<3, iter, iter>>;
142 Range3d policy({0, 0, 0}, {size[0], size[1], size[2]});
143 Kokkos::parallel_for(
146 Utils::get_linear_index<memory_order>(i,
j, k, size);
154 for (
int nx = start[0
u];
nx < stop[0
u]; ++
nx) {
155 for (
int ny = start[1u];
ny < stop[1u]; ++
ny) {
156 for (
int nz = start[2u];
nz < stop[2u]; ++
nz) {
163 for (
int nz = start[2u];
nz < stop[2u]; ++
nz) {
164 for (
int ny = start[1u];
ny < stop[1u]; ++
ny) {
165 for (
int nx = start[0
u];
nx < stop[0
u]; ++
nx) {
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
and std::invocable< Projector, unsigned, int > void for_each_3d_order(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
void for_each_3d_lin(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, Kernel &&kernel)
Run a kernel(index_3d, linear_index) over the given 3d range with given memory order.
std::conditional_t< Order==Utils::MemoryOrder::COLUMN_MAJOR, std::integral_constant< Kokkos::Iterate, Kokkos::Iterate::Left >, std::integral_constant< Kokkos::Iterate, Kokkos::Iterate::Right > > LayoutIterate
Mapping between ESPResSo and Kokkos tags for memory order.
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
std::vector< T, allocator< T > > vector