ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
for_each_3d.hpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2024 The ESPResSo project
3 *
4 * This file is part of ESPResSo.
5 *
6 * ESPResSo is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * ESPResSo is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#pragma once
21
22#include <config/config.hpp>
23
24#include <utils/index.hpp>
25
26#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
27#include <Kokkos_Core.hpp>
28#endif
29
30#include <concepts>
31#include <cstddef>
32#include <type_traits>
33
34namespace detail {
35
36constexpr inline void noop_projector(unsigned, int) {}
37
38template <typename T>
39concept IndexVectorConcept = requires(T vector) {
40 { vector[0] } -> std::convertible_to<std::size_t>;
41};
42
43} // namespace detail
44
45/**
46 * @brief Repeat an operation on every element of a 3D grid.
47 *
48 * Intermediate values that depend on the iterated coordinates
49 * are calculated and stored once per iteration. This is useful
50 * when the operation is costly.
51 *
52 * @param start Initial values for the loop counters.
53 * @param stop Final values (one-past-the-end) for the loop counters.
54 * @param counters Loop counters.
55 * @param kernel Functor to execute.
56 * @param projector Projection of the current loop counter.
57 * @tparam Kernel Nullary function.
58 * @tparam Projector Binary function that takes a nesting depth and a loop
59 * counter as arguments and projects a value.
60 */
61template <class Kernel, class Projector = decltype(detail::noop_projector)>
62 requires std::invocable<Kernel> and std::invocable<Projector, unsigned, int>
63void for_each_3d(detail::IndexVectorConcept auto &&start,
64 detail::IndexVectorConcept auto &&stop,
65 detail::IndexVectorConcept auto &&counters, Kernel &&kernel,
66 Projector &&projector = detail::noop_projector) {
67 auto &nx = counters[0u];
68 auto &ny = counters[1u];
69 auto &nz = counters[2u];
70 for (nx = start[0u]; nx < stop[0u]; ++nx) {
71 projector(0u, nx);
72 for (ny = start[1u]; ny < stop[1u]; ++ny) {
73 projector(1u, ny);
74 for (nz = start[2u]; nz < stop[2u]; ++nz) {
75 projector(2u, nz);
76 kernel();
77 }
78 }
79 }
80}
81
82/**
83 * @brief Repeat an operation on every element of a 3D grid.
84 *
85 * Intermediate values that depend on the iterated coordinates
86 * are calculated and stored once per iteration. This is useful
87 * when the operation is costly.
88 *
89 * @param start Initial values for the loop counters.
90 * @param stop Final values (one-past-the-end) for the loop counters.
91 * @param counters Loop counters.
92 * @param kernel Functor to execute.
93 * @param projector Projection of the current loop counter.
94 * @tparam Order Data layout.
95 * @tparam Kernel Nullary function.
96 * @tparam Projector Binary function that takes a nesting depth and a loop
97 * counter as arguments and projects a value.
98 */
99template <Utils::MemoryOrder Order, class Kernel,
100 class Projector = decltype(detail::noop_projector)>
101 requires std::invocable<Kernel> and std::invocable<Projector, unsigned, int>
102void for_each_3d_order(detail::IndexVectorConcept auto &&start,
103 detail::IndexVectorConcept auto &&stop,
104 detail::IndexVectorConcept auto &&counters,
105 Kernel &&kernel,
106 Projector &&projector = detail::noop_projector) {
108 auto constexpr index_fast = is_row_major ? 2u : 0u;
109 auto constexpr index_slow = is_row_major ? 0u : 2u;
110 auto constexpr index_medium = 1u;
111 auto &nx = counters[index_slow];
112 auto &ny = counters[index_medium];
113 auto &nz = counters[index_fast];
114 for (nx = start[index_slow]; nx < stop[index_slow]; ++nx) {
116 for (ny = start[index_medium]; ny < stop[index_medium]; ++ny) {
118 for (nz = start[index_fast]; nz < stop[index_fast]; ++nz) {
120 kernel();
121 }
122 }
123 }
124}
125
126#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
127/** @brief Mapping between ESPResSo and Kokkos tags for memory order */
128template <Utils::MemoryOrder Order>
129using LayoutIterate = std::conditional_t<
131 std::integral_constant<Kokkos::Iterate, Kokkos::Iterate::Left>,
132 std::integral_constant<Kokkos::Iterate, Kokkos::Iterate::Right>>;
133#endif
134
135/**
136 * @brief Run a kernel(index_3d, linear_index) over the given 3d range with
137 * given memory order
138 */
139template <Utils::MemoryOrder memory_order, class Kernel>
140void for_each_3d_lin(detail::IndexVectorConcept auto &&start,
141 detail::IndexVectorConcept auto &&stop, Kernel &&kernel) {
142#ifdef ESPRESSO_SHARED_MEMORY_PARALLELISM
143 if (Kokkos::num_threads() > 1) {
144 int nx = stop[0] - start[0];
145 int ny = stop[1] - start[1];
146 int nz = stop[2] - start[2];
147 constexpr Kokkos::Iterate iter = LayoutIterate<memory_order>::value;
148 using Range3d = Kokkos::MDRangePolicy<Kokkos::Rank<3, iter, iter>>;
149 Range3d policy({0, 0, 0}, {nx, ny, nz});
150 Kokkos::parallel_for(
151 "for_each_3d", policy, KOKKOS_LAMBDA(int i, int j, int k) {
152 auto const idx = {start[0] + i, start[1] + j, start[2] + k};
153 auto const linear_idx =
154 Utils::get_linear_index<memory_order>({i, j, k}, {nx, ny, nz});
155 kernel(idx, linear_idx);
156 });
157 return;
158 }
159#endif
160
161 int linear_loop_index = 0u;
163 for (int nx = start[0u]; nx < stop[0u]; ++nx) {
164 for (int ny = start[1u]; ny < stop[1u]; ++ny) {
165 for (int nz = start[2u]; nz < stop[2u]; ++nz) {
168 }
169 }
170 }
171 } else {
172 for (int nz = start[2u]; nz < stop[2u]; ++nz) {
173 for (int ny = start[1u]; ny < stop[1u]; ++ny) {
174 for (int nx = start[0u]; nx < stop[0u]; ++nx) {
177 }
178 }
179 }
180 }
181}
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
and std::invocable< Projector, unsigned, int > void for_each_3d_order(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
void for_each_3d_lin(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, Kernel &&kernel)
Run a kernel(index_3d, linear_index) over the given 3d range with given memory order.
std::conditional_t< Order==Utils::MemoryOrder::COLUMN_MAJOR, std::integral_constant< Kokkos::Iterate, Kokkos::Iterate::Left >, std::integral_constant< Kokkos::Iterate, Kokkos::Iterate::Right > > LayoutIterate
Mapping between ESPResSo and Kokkos tags for memory order.
and std::invocable< Projector, unsigned, int > void for_each_3d(detail::IndexVectorConcept auto &&start, detail::IndexVectorConcept auto &&stop, detail::IndexVectorConcept auto &&counters, Kernel &&kernel, Projector &&projector=detail::noop_projector)
Repeat an operation on every element of a 3D grid.
MemoryOrder
Definition index.hpp:32
std::vector< T, allocator< T > > vector
Definition vector.hpp:52