Loading [MathJax]/extensions/TeX/AMSmath.js
ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages Concepts
TuningAlgorithm.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2022 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include "config/config.hpp"
23
24#if defined(P3M) || defined(DP3M)
25
27#include "p3m/common.hpp"
28
29#include "tuning.hpp"
30
31#include "BoxGeometry.hpp"
32#include "LocalBox.hpp"
34#include "communication.hpp"
35#include "system/System.hpp"
36
37#include <algorithm>
38#include <cassert>
39#include <cmath>
40#include <string>
41#include <tuple>
42#include <utility>
43
44/** @name Error codes for tuning. */
45/**@{*/
46/** charge assignment order too large for mesh size */
47static auto constexpr P3M_TUNE_CAO_TOO_LARGE = 1.;
48/** conflict with ELC gap size */
49static auto constexpr P3M_TUNE_ELC_GAP_SIZE = 2.;
50/** could not achieve target accuracy */
51static auto constexpr P3M_TUNE_ACCURACY_TOO_LARGE = 3.;
52/** conflict with FFT domain decomposition */
53static auto constexpr P3M_TUNE_FFT_MESH_SIZE = 4.;
54/**@}*/
55
56/** @brief Precision threshold for a non-zero real-space cutoff. */
57static auto constexpr P3M_RCUT_PREC = 1e-3;
58
60 auto const &box_geo = *m_system.box_geo;
61 auto const &local_geo = *m_system.local_geo;
62 auto const verlet_skin = m_system.cell_structure->get_verlet_skin();
63 auto const r_cut_iL = get_params().r_cut_iL;
64 if (r_cut_iL == 0.) {
65 auto const min_box_l = std::ranges::min(box_geo.length());
66 auto const min_local_box_l = std::ranges::min(local_geo.length());
67 m_r_cut_iL_min = 0.;
69 m_r_cut_iL_min *= box_geo.length_inv()[0];
70 m_r_cut_iL_max *= box_geo.length_inv()[0];
71 } else {
72 m_r_cut_iL_min = m_r_cut_iL_max = r_cut_iL;
73 m_logger->report_fixed_r_cut_iL(r_cut_iL);
74 }
75}
76
79 auto const cao = get_params().cao;
80 if (cao == -1) {
81 cao_min = 1;
82 cao_max = 7;
84 } else {
85 cao_min = cao_max = cao_best = cao;
86 m_logger->report_fixed_cao(cao);
87 }
88}
89
90void TuningAlgorithm::commit(Utils::Vector3i const &mesh, int cao,
91 double r_cut_iL, double alpha_L) {
92 auto const &box_geo = *m_system.box_geo;
93 auto &p3m_params = get_params();
94 p3m_params.r_cut = r_cut_iL * box_geo.length()[0];
95 p3m_params.r_cut_iL = r_cut_iL;
96 p3m_params.cao = cao;
97 p3m_params.alpha_L = alpha_L;
98 p3m_params.alpha = alpha_L * box_geo.length_inv()[0];
99 p3m_params.mesh = mesh;
100}
101
102/**
103 * @brief Get the optimal alpha and the corresponding computation time
104 * for a fixed @p mesh and @p cao.
105 *
106 * The @p tuned_r_cut_iL is determined via a simple bisection.
107 *
108 * @param[in] mesh @copybrief P3MParameters::mesh
109 * @param[in] cao @copybrief P3MParameters::cao
110 * @param[in,out] tuned_r_cut_iL @copybrief P3MParameters::r_cut_iL
111 * @param[in,out] tuned_alpha_L @copybrief P3MParameters::alpha_L
112 * @param[in,out] tuned_accuracy @copybrief P3MParameters::accuracy
113 *
114 * @returns The integration time in case of success, otherwise
115 * -@ref P3M_TUNE_ACCURACY_TOO_LARGE, -@ref P3M_TUNE_FFT_MESH_SIZE,
116 * -@ref P3M_TUNE_CAO_TOO_LARGE, or -@ref P3M_TUNE_ELC_GAP_SIZE
117 */
119 double &tuned_r_cut_iL,
120 double &tuned_alpha_L,
121 double &tuned_accuracy) {
122 auto const &box_geo = *m_system.box_geo;
123 auto const &local_geo = *m_system.local_geo;
124 auto const verlet_skin = m_system.cell_structure->get_verlet_skin();
125 auto const target_accuracy = get_params().accuracy;
126 double rs_err, ks_err;
129
130 /* initial checks. */
131 auto const k_cut_per_dir = (static_cast<double>(cao) / 2.) *
132 Utils::hadamard_division(box_geo.length(), mesh);
133 auto const k_cut = std::ranges::min(k_cut_per_dir);
134 auto const min_box_l = std::ranges::min(box_geo.length());
135 auto const min_local_box_l = std::ranges::min(local_geo.length());
136 auto const k_cut_max = std::min(min_box_l, min_local_box_l) - verlet_skin;
137
138 if (cao >= std::ranges::min(mesh) or k_cut >= k_cut_max) {
139 m_logger->log_cao_too_large(mesh[0], cao);
141 }
142
145
146 /* Either low and high boundary are equal (for fixed cut), or the low border
147 is initially 0 and therefore
148 has infinite error estimate, as required. Therefore if the high boundary
149 fails, there is no possible r_cut */
151 m_logger->log_skip("accuracy not achieved", mesh[0], cao, r_cut_iL_max,
154 }
155
156 double r_cut_iL, accuracy;
157 for (;;) {
158 r_cut_iL = 0.5 * (r_cut_iL_min + r_cut_iL_max);
159
161 break;
162
163 /* bisection */
164 std::tie(accuracy, rs_err, ks_err, tuned_alpha_L) =
165 calculate_accuracy(mesh, cao, r_cut_iL);
166 if (accuracy > target_accuracy)
167 r_cut_iL_min = r_cut_iL;
168 else
169 r_cut_iL_max = r_cut_iL;
170 }
171
172 /* final result is always the upper interval boundary, since only there
173 * we know that the desired minimal accuracy is obtained */
174 tuned_r_cut_iL = r_cut_iL = r_cut_iL_max;
175
176 auto const report_veto = [&](auto const &veto) {
177 if (veto) {
178 m_logger->log_skip(*veto, mesh[0], cao, r_cut_iL, tuned_alpha_L,
180 }
181 return static_cast<bool>(veto);
182 };
183
184 /* if we are running P3M+ELC, check that r_cut is compatible */
185 auto const r_cut = r_cut_iL * box_geo.length()[0];
187 return -P3M_TUNE_ELC_GAP_SIZE;
188 }
189
190 commit(mesh, cao, r_cut_iL, tuned_alpha_L);
194 }
195 auto const int_time = benchmark_integration_step(m_system, m_timings);
196
198 calculate_accuracy(mesh, cao, r_cut_iL);
199
200 m_logger->log_success(int_time, mesh[0], cao, r_cut_iL, tuned_alpha_L,
203 return int_time;
204}
205
206/**
207 * @brief Get the optimal alpha and the corresponding computation time
208 * for a fixed @p mesh.
209 *
210 * @p _cao should contain an initial guess, which is then adapted by stepping
211 * up and down.
212 *
213 * @param[in] mesh @copybrief P3MParameters::mesh
214 * @param[in,out] tuned_cao initial guess for the
215 * @copybrief P3MParameters::cao
216 * @param[out] tuned_r_cut_iL @copybrief P3MParameters::r_cut_iL
217 * @param[out] tuned_alpha_L @copybrief P3MParameters::alpha_L
218 * @param[out] tuned_accuracy @copybrief P3MParameters::accuracy
219 *
220 * @returns The integration time in case of success, otherwise
221 * -@ref P3M_TUNE_CAO_TOO_LARGE
222 */
224 double &tuned_r_cut_iL,
225 double &tuned_alpha_L,
226 double &tuned_accuracy) {
227 double best_time = -1., tmp_r_cut_iL = 0., tmp_alpha_L = 0.,
228 tmp_accuracy = 0.;
229 /* in which direction improvement is possible. Initially, we don't know it
230 * yet. */
231 int final_dir = 0;
232 int cao = tuned_cao;
233
234 /* the initial step sets a timing mark. If there is no valid r_cut, we can
235 * only try to increase cao to increase the obtainable precision of the far
236 * formula. */
237 double tmp_time;
238 do {
240 /* cao is too large for this grid, but still the accuracy cannot be
241 * achieved, give up */
243 return tmp_time;
244 }
245 /* we have a valid time, start optimising from there */
246 if (tmp_time >= 0.) {
251 tuned_cao = cao;
252 break;
253 }
254 /* the required accuracy could not be obtained, try higher caos */
255 cao++;
256 final_dir = 1;
257 } while (cao <= cao_max);
258 /* with this mesh, the required accuracy cannot be obtained. */
259 if (cao > cao_max)
261
262 /* at the boundaries, only the opposite direction can be used for
263 * optimisation
264 */
265 if (cao == cao_min)
266 final_dir = 1;
267 else if (cao == cao_max)
268 final_dir = -1;
269
270 if (final_dir == 0) {
271 /* check in which direction we can optimise. Both directions are possible */
272 double dir_times[3];
273 for (final_dir = -1; final_dir <= 1; final_dir += 2) {
276 /* in this direction, we cannot optimise, since we get into precision
277 * trouble */
278 if (tmp_time < 0.)
279 continue;
280
281 if (tmp_time < best_time) {
286 tuned_cao = cao + final_dir;
287 }
288 }
289 /* choose the direction which was optimal, if any of the two */
290 if (dir_times[0] == best_time) {
291 final_dir = -1;
292 } else if (dir_times[2] == best_time) {
293 final_dir = 1;
294 } else {
295 /* no improvement in either direction, however if one is only marginally
296 * worse, we can still try; down is possible and not much worse, while
297 * up is either illegal or even worse */
298 if ((dir_times[0] >= 0 && dir_times[0] < best_time + time_granularity) &&
299 (dir_times[2] < 0 || dir_times[2] > dir_times[0]))
300 final_dir = -1;
301 /* same for up */
302 else if ((dir_times[2] >= 0 &&
304 (dir_times[0] < 0 || dir_times[0] > dir_times[2]))
305 final_dir = 1;
306 else {
307 /* really no chance for optimisation */
308 return best_time;
309 }
310 }
311 /* we already checked the initial cao and its neighbor */
312 cao += 2 * final_dir;
313 } else {
314 /* here some constraint is active, and we only checked the initial cao
315 * itself */
316 cao += final_dir;
317 }
318
319 /* move cao into the optimisation direction until we do not gain anymore. */
320 for (; cao >= cao_min && cao <= cao_max; cao += final_dir) {
322 /* if we cannot meet the precision anymore, give up */
323 if (tmp_time < 0.)
324 break;
325
326 if (tmp_time < best_time) {
331 tuned_cao = cao;
332 } else if (tmp_time > best_time + time_granularity) {
333 /* no hope of further optimisation */
334 break;
335 }
336 }
337 return best_time;
338}
339
340#endif // P3M or DP3M
static auto constexpr P3M_TUNE_ACCURACY_TOO_LARGE
could not achieve target accuracy
static auto constexpr P3M_RCUT_PREC
Precision threshold for a non-zero real-space cutoff.
static auto constexpr P3M_TUNE_ELC_GAP_SIZE
conflict with ELC gap size
static auto constexpr P3M_TUNE_FFT_MESH_SIZE
conflict with FFT domain decomposition
static auto constexpr P3M_TUNE_CAO_TOO_LARGE
charge assignment order too large for mesh size
std::shared_ptr< LocalBox > local_geo
std::shared_ptr< CellStructure > cell_structure
std::shared_ptr< BoxGeometry > box_geo
double get_m_time(Utils::Vector3i const &mesh, int &tuned_cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh.
virtual std::optional< std::string > fft_decomposition_veto(Utils::Vector3i const &) const
Veto FFT decomposition in non-cubic boxes.
System::System & m_system
virtual void on_solver_change() const =0
Re-initialize the currently active solver.
double get_mc_time(Utils::Vector3i const &mesh, int cao, double &tuned_r_cut_iL, double &tuned_alpha_L, double &tuned_accuracy)
Get the optimal alpha and the corresponding computation time for a fixed mesh and cao.
virtual std::tuple< double, double, double, double > calculate_accuracy(Utils::Vector3i const &mesh, int cao, double r_cut_iL) const =0
Get the minimal error for this combination of parameters.
virtual std::optional< std::string > layer_correction_veto_r_cut(double r_cut) const =0
Veto real-space cutoffs larger than the layer correction gap.
void commit(Utils::Vector3i const &mesh, int cao, double r_cut_iL, double alpha_L)
Write tuned parameters to the P3M parameter struct.
void determine_cao_limits(int initial_cao)
Determine a sensible range for the charge assignment order.
void determine_r_cut_limits()
Determine a sensible range for the real-space cutoff.
virtual P3MParameters & get_params()=0
Get the P3M parameters.
std::unique_ptr< TuningLogger > m_logger
static auto constexpr time_granularity
Granularity of the time measurement (milliseconds).
cudaStream_t stream[1]
CUDA streams for parallel computing on CPU and GPU.
This file contains the defaults for ESPResSo.
auto hadamard_division(Vector< T, N > const &a, Vector< U, N > const &b)
Definition Vector.hpp:423
Common functions for dipolar and charge P3M.
double r_cut_iL
cutoff radius for real space electrostatics (>0), rescaled to r_cut_iL = r_cut * box_l_i.
int cao
charge assignment order ([0,7]).
double accuracy
accuracy of the actual parameter set.
double benchmark_integration_step(System::System &system, int int_steps)
Benchmark the integration loop.
Definition tuning.cpp:73