Loading [MathJax]/extensions/TeX/AMSmath.js
ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages Concepts
mpiio.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2022 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21/** @file
22 *
23 * Concerning the file layouts.
24 * - Scalar arrays are written like this:
25 * <tt>rank0 --- rank1 --- rank2 ...</tt>
26 * where each rank dumps its scalars in the ordering of the particles.
27 * - Vector arrays are written in the rank ordering like scalar arrays.
28 * The ordering of the vector data is: <tt>v[0] v[1] v[2]</tt>, so the data
29 * looks like this:
30 * <tt>v1[0] v1[1] v1[2] v2[0] v2[1] v2[2] v3[0] ...</tt>
31 *
32 * To be able to determine the rank boundaries (a multiple of
33 * @c nlocalparts), the file 1.pref is written, which dumps the partial
34 * sum of @c nlocalparts, i.e. the prefixes in scalar arrays:
35 * - 1.prefs looks like this:
36 * <tt>0 nlocalpats_rank0 nlocalparts_rank0+nlocalparts_rank1 ...</tt>
37 *
38 * Bonds are dumped as two arrays, namely 1.bond which stores the
39 * bonding partners of the particles and 1.boff which stores the
40 * iteration indices for each particle.
41 * - 1.boff is a scalar array of size <tt>(nlocalpart + 1)</tt> per rank.
42 * - The last element (at index @c nlocalpart) of 1.boff's subpart
43 * <tt>[rank * (nlocalpart + 1) : (rank + 1) * (nlocalpart + 1)]</tt>
44 * determines the number of bonds for processor @c rank.
45 * - In this subarray one can find the bonding partners of particle
46 * <tt>id[i]</tt>. The iteration indices for local part of 1.bonds are:
47 * <tt>subarray[i] : subarray[i+1]</tt>
48 * - Take a look at the bond input code. It's easy to understand.
49 */
50
51#include "mpiio.hpp"
52
53#include "Particle.hpp"
56#include "errorhandling.hpp"
57#include "system/System.hpp"
58
59#include <utils/Vector.hpp>
60
61#include <boost/archive/binary_iarchive.hpp>
62#include <boost/archive/binary_oarchive.hpp>
63#include <boost/iostreams/device/array.hpp>
64#include <boost/iostreams/device/back_inserter.hpp>
65#include <boost/iostreams/stream.hpp>
66
67#include <mpi.h>
68
69#include <algorithm>
70#include <cassert>
71#include <cerrno>
72#include <cstddef>
73#include <cstdio>
74#include <cstring>
75#include <sstream>
76#include <string>
77#include <sys/stat.h>
78#include <tuple>
79#include <utility>
80#include <vector>
81
82namespace Mpiio {
83
84/**
85 * @brief Fatal error handler.
86 * On 1 MPI rank the error is recoverable and an exception is thrown.
87 * On more than 1 MPI rank the error is not recoverable.
88 * @param msg Custom error message
89 * @param fn File path
90 * @param extra Extra context
91 */
92static bool fatal_error(char const *msg, std::string const &fn = "",
93 std::string const &extra = "") {
94 std::stringstream what;
95 what << "MPI-IO Error: " << msg;
96 if (not fn.empty()) {
97 what << " \"" << fn << "\"";
98 }
99 if (not extra.empty()) {
100 what << " :" << extra;
101 }
102 int size;
103 MPI_Comm_size(MPI_COMM_WORLD, &size);
104 if (size == 1) {
105 throw std::runtime_error(what.str());
106 }
107 fprintf(stderr, "%s\n", what.str().c_str());
108 errexit();
109 return false;
110}
111
112/**
113 * @brief Fatal error handler that closes an open file and queries the
114 * message associated with an MPI error code.
115 * On 1 MPI rank the error is recoverable and an exception is thrown.
116 * On more than 1 MPI rank the error is not recoverable.
117 * @param msg Custom error message
118 * @param fn File path
119 * @param fp File handle
120 * @param errnum MPI error code
121 */
122static bool fatal_error(char const *msg, std::string const &fn, MPI_File *fp,
123 int errnum) {
124 // get MPI error message
125 char buf[MPI_MAX_ERROR_STRING];
126 int buf_len;
127 MPI_Error_string(errnum, buf, &buf_len);
128 buf[buf_len] = '\0';
129 // close file handle
130 if (fp) {
131 MPI_File_close(fp);
132 }
133 return fatal_error(msg, fn, buf);
134}
135
136/**
137 * @brief Dump data @p arr of size @p len starting from prefix @p pref
138 * of type @p T using @p MPI_T as MPI datatype. Beware, that @p T and
139 * @p MPI_T have to match!
140 *
141 * @param fn The file name to write to (must not already exist!)
142 * @param arr The array to dump
143 * @param len The number of elements to dump
144 * @param pref The prefix for this process
145 * @param MPI_T The MPI datatype corresponding to the template parameter @p T
146 */
147template <typename T>
148static void mpiio_dump_array(const std::string &fn, T const *arr,
149 std::size_t len, std::size_t pref,
150 MPI_Datatype MPI_T) {
151 MPI_File f;
152 int ret;
153 ret = MPI_File_open(MPI_COMM_WORLD, const_cast<char *>(fn.c_str()),
154 // MPI_MODE_EXCL: Prohibit overwriting
155 MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_EXCL,
156 MPI_INFO_NULL, &f);
157 if (ret) {
158 fatal_error("Could not open file", fn, &f, ret);
159 }
160 auto const offset =
161 static_cast<MPI_Offset>(pref) * static_cast<MPI_Offset>(sizeof(T));
162 ret = MPI_File_set_view(f, offset, MPI_T, MPI_T, const_cast<char *>("native"),
163 MPI_INFO_NULL);
164 ret |= MPI_File_write_all(f, arr, static_cast<int>(len), MPI_T,
165 MPI_STATUS_IGNORE);
166 static_cast<void>(ret and fatal_error("Could not write file", fn, &f, ret));
167 MPI_File_close(&f);
168}
169
170/**
171 * @brief Calculate the file offset on the local node.
172 * @param n_items Number of items on the local node.
173 * @return The number of items on all nodes with lower rank.
174 */
175static unsigned long mpi_calculate_file_offset(unsigned long n_items) {
176 unsigned long offset = 0ul;
177 MPI_Exscan(&n_items, &offset, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD);
178 return offset;
179}
180
181/**
182 * @brief Dump the fields and bond information.
183 * To be called by the head node only.
184 *
185 * @param fn The filename to write to
186 * @param fields The dumped fields
187 * @param bonded_ias The list of bonds
188 */
189static void dump_info(std::string const &fn, unsigned fields,
190 BondedInteractionsMap const &bonded_ias) {
191 // MPI-IO requires consecutive bond ids
192 auto const nbonds = bonded_ias.size();
193 assert(static_cast<std::size_t>(bonded_ias.get_next_key()) == nbonds);
194
195 FILE *f = fopen(fn.c_str(), "wb");
196 if (!f) {
197 fatal_error("Could not open file", fn);
198 }
199 std::vector<int> npartners;
200 bool success = (fwrite(&fields, sizeof(fields), 1u, f) == 1);
201 // Pack the number of partners. This is needed to interpret the bond IntList.
202 npartners.reserve(nbonds);
203 for (int bond_id = 0; bond_id < bonded_ias.get_next_key(); ++bond_id) {
204 if (bonded_ias.contains(bond_id)) {
205 npartners.emplace_back(number_of_partners(*bonded_ias.at(bond_id)));
206 }
207 }
208 success &= fwrite(&nbonds, sizeof(std::size_t), 1u, f) == 1;
209 success &= fwrite(npartners.data(), sizeof(int), nbonds, f) == nbonds;
210 fclose(f);
211 static_cast<void>(success or fatal_error("Could not write file", fn));
212}
213
214void mpi_mpiio_common_write(std::string const &prefix, unsigned fields,
215 BondedInteractionsMap const &bonded_ias,
216 ParticleRange const &particles,
217 write_buffers &buffers) {
218 auto const nlocalpart = static_cast<unsigned long>(particles.size());
219 auto const offset = mpi_calculate_file_offset(nlocalpart);
220 // keep buffers in order to avoid allocating them on every function call
221 auto &pos = buffers.pos;
222 auto &vel = buffers.vel;
223 auto &id = buffers.id;
224 auto &type = buffers.type;
225
226 // Realloc static buffers if necessary
227 if (nlocalpart > id.size())
228 id.resize(nlocalpart);
229 if (fields & MPIIO_OUT_POS && 3ul * nlocalpart > pos.size())
230 pos.resize(3ul * nlocalpart);
231 if (fields & MPIIO_OUT_VEL && 3ul * nlocalpart > vel.size())
232 vel.resize(3ul * nlocalpart);
233 if (fields & MPIIO_OUT_TYP && nlocalpart > type.size())
234 type.resize(nlocalpart);
235
236 // Pack the necessary information
237 auto id_it = id.begin();
238 auto type_it = type.begin();
239 auto pos_it = pos.begin();
240 auto vel_it = vel.begin();
241 for (auto const &p : particles) {
242 *id_it = p.id();
243 ++id_it;
244 if (fields & MPIIO_OUT_POS) {
245 std::copy_n(std::begin(p.pos()), 3u, pos_it);
246 pos_it += 3u;
247 }
248 if (fields & MPIIO_OUT_VEL) {
249 std::copy_n(std::begin(p.v()), 3u, vel_it);
250 vel_it += 3u;
251 }
252 if (fields & MPIIO_OUT_TYP) {
253 *type_it = p.type();
254 ++type_it;
255 }
256 }
257
258 int rank;
259 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
260 if (rank == 0)
261 dump_info(prefix + ".head", fields, bonded_ias);
262 auto const pref_offset = static_cast<unsigned long>(rank);
263 mpiio_dump_array<unsigned long>(prefix + ".pref", &offset, 1ul, pref_offset,
264 MPI_UNSIGNED_LONG);
265 mpiio_dump_array<int>(prefix + ".id", id.data(), nlocalpart, offset, MPI_INT);
266 if (fields & MPIIO_OUT_POS)
267 mpiio_dump_array<double>(prefix + ".pos", pos.data(), 3ul * nlocalpart,
268 3ul * offset, MPI_DOUBLE);
269 if (fields & MPIIO_OUT_VEL)
270 mpiio_dump_array<double>(prefix + ".vel", vel.data(), 3ul * nlocalpart,
271 3ul * offset, MPI_DOUBLE);
272 if (fields & MPIIO_OUT_TYP)
273 mpiio_dump_array<int>(prefix + ".type", type.data(), nlocalpart, offset,
274 MPI_INT);
275
276 if (fields & MPIIO_OUT_BND) {
277 std::vector<char> bonds;
278
279 /* Construct archive that pushes back to the bond buffer */
280 {
281 namespace io = boost::iostreams;
282 io::stream_buffer<io::back_insert_device<std::vector<char>>> os{
283 io::back_inserter(bonds)};
284 boost::archive::binary_oarchive bond_archiver{os};
285
286 for (auto const &p : particles) {
287 bond_archiver << p.bonds();
288 }
289 }
290
291 // Determine the prefixes in the bond file
292 auto const bonds_size = static_cast<unsigned long>(bonds.size());
293 auto const bonds_offset = mpi_calculate_file_offset(bonds_size);
294
295 mpiio_dump_array<unsigned long>(prefix + ".boff", &bonds_size, 1ul,
296 pref_offset, MPI_UNSIGNED_LONG);
297 mpiio_dump_array<char>(prefix + ".bond", bonds.data(), bonds.size(),
298 bonds_offset, MPI_CHAR);
299 }
300}
301
302/**
303 * @brief Get the number of elements in a file by its file size and @p elem_sz.
304 * I.e. query the file size using stat(2) and divide it by @p elem_sz.
305 *
306 * @param fn The filename
307 * @param elem_sz Size of a single element
308 * @return The number of elements stored in the file
309 */
310static unsigned long get_num_elem(const std::string &fn, std::size_t elem_sz) {
311 // Could also be done via MPI_File_open, MPI_File_get_size,
312 // MPI_File_close.
313 struct stat st;
314 errno = 0;
315 if (stat(fn.c_str(), &st) != 0) {
316 auto const reason = strerror(errno);
317 fatal_error("Could not get file size of", fn, reason);
318 }
319 return static_cast<unsigned long>(st.st_size) / elem_sz;
320}
321
322/**
323 * @brief Read a previously dumped array of size @p len starting from prefix
324 * @p pref of type @p T using @p MPI_T as MPI datatype. Beware, that
325 * @p T and @p MPI_T have to match!
326 *
327 * @param fn The file name to read from
328 * @param arr The array to populate
329 * @param len The number of elements to read
330 * @param pref The prefix for this process
331 * @param MPI_T The MPI datatype corresponding to the template parameter @p T
332 */
333template <typename T>
334static void mpiio_read_array(const std::string &fn, T *arr, std::size_t len,
335 std::size_t pref, MPI_Datatype MPI_T) {
336 MPI_File f;
337 int ret;
338 ret = MPI_File_open(MPI_COMM_WORLD, const_cast<char *>(fn.c_str()),
339 MPI_MODE_RDONLY, MPI_INFO_NULL, &f);
340 if (ret) {
341 fatal_error("Could not open file", fn, &f, ret);
342 }
343 auto const offset =
344 static_cast<MPI_Offset>(pref) * static_cast<MPI_Offset>(sizeof(T));
345 ret = MPI_File_set_view(f, offset, MPI_T, MPI_T, const_cast<char *>("native"),
346 MPI_INFO_NULL);
347
348 ret |= MPI_File_read_all(f, arr, static_cast<int>(len), MPI_T,
349 MPI_STATUS_IGNORE);
350 static_cast<void>(ret and fatal_error("Could not read file", fn, &f, ret));
351 MPI_File_close(&f);
352}
353
354/**
355 * @brief Read the header file and return the first value.
356 * To be called by all processes.
357 *
358 * @param fn Filename of the head file
359 * @param rank The rank of the current process in @c MPI_COMM_WORLD
360 */
361static unsigned read_head(const std::string &fn, int rank) {
362 unsigned n_fields = 0u;
363 FILE *f = nullptr;
364 if (rank == 0) {
365 f = fopen(fn.c_str(), "rb");
366 static_cast<void>(not f and fatal_error("Could not open file", fn));
367 auto const n = fread(static_cast<void *>(&n_fields), sizeof n_fields, 1, f);
368 static_cast<void>((n == 1) or fatal_error("Could not read file", fn));
369 }
370 MPI_Bcast(&n_fields, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);
371 if (f) {
372 fclose(f);
373 }
374 return n_fields;
375}
376
377/**
378 * @brief Read the pref file.
379 * Needs to be called by all processes.
380 *
381 * @param fn The file name of the prefs file
382 * @param rank The rank of the current process in @c MPI_COMM_WORLD
383 * @param size The size of @c MPI_COMM_WORLD
384 * @param nglobalpart The global amount of particles
385 * @return The prefix and the local number of particles.
386 */
387static std::tuple<unsigned long, unsigned long>
388read_prefs(const std::string &fn, int rank, int size,
389 unsigned long nglobalpart) {
390 auto const pref_offset = static_cast<unsigned long>(rank);
391 unsigned long pref = 0ul;
392 unsigned long nlocalpart = 0ul;
393 mpiio_read_array<unsigned long>(fn, &pref, 1ul, pref_offset,
394 MPI_UNSIGNED_LONG);
395 if (rank > 0)
396 MPI_Send(&pref, 1, MPI_UNSIGNED_LONG, rank - 1, 0, MPI_COMM_WORLD);
397 if (rank < size - 1)
398 MPI_Recv(&nlocalpart, 1, MPI_UNSIGNED_LONG, rank + 1, MPI_ANY_TAG,
399 MPI_COMM_WORLD, MPI_STATUS_IGNORE);
400 else
401 nlocalpart = nglobalpart;
402 nlocalpart -= pref;
403 return {pref, nlocalpart};
404}
405
406void mpi_mpiio_common_read(const std::string &prefix, unsigned fields,
407 CellStructure &cell_structure) {
408 cell_structure.remove_all_particles();
409
410 int size, rank;
411 MPI_Comm_size(MPI_COMM_WORLD, &size);
412 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
413 auto const nproc = get_num_elem(prefix + ".pref", sizeof(unsigned long));
414 auto const nglobalpart = get_num_elem(prefix + ".id", sizeof(int));
415
416 if (rank == 0 && nproc != static_cast<unsigned long>(size)) {
417 fatal_error("Trying to read a file with a different COMM "
418 "size than at point of writing.");
419 }
420
421 // 1.head on head node:
422 // Read head to determine fields at time of writing.
423 // Compare this var to the current fields.
424 auto const avail_fields = read_head(prefix + ".head", rank);
425 if (rank == 0 && (fields & avail_fields) != fields) {
426 fatal_error("Requesting to read fields which were not dumped.");
427 }
428
429 // 1.pref on all nodes:
430 // Read own prefix (1 int at prefix rank).
431 // Communicate own prefix to rank-1
432 // Determine nlocalpart (prefix of rank+1 - own prefix) on every node.
433 auto const [pref, nlocalpart] =
434 read_prefs(prefix + ".pref", rank, size, nglobalpart);
435
436 std::vector<Particle> particles(nlocalpart);
437
438 {
439 // 1.id on all nodes:
440 // Read nlocalpart ints at defined prefix.
441 std::vector<int> id(nlocalpart);
442 auto id_it = id.begin();
443 mpiio_read_array<int>(prefix + ".id", id.data(), nlocalpart, pref, MPI_INT);
444
445 for (auto &p : particles) {
446 p.id() = *id_it;
447 ++id_it;
448 }
449 }
450
451 if (fields & MPIIO_OUT_POS) {
452 // 1.pos on all nodes:
453 // Read nlocalpart * 3 doubles at defined prefix * 3
454 std::vector<double> pos(3ul * nlocalpart);
455 auto pos_it = pos.begin();
456 mpiio_read_array<double>(prefix + ".pos", pos.data(), 3ul * nlocalpart,
457 3ul * pref, MPI_DOUBLE);
458
459 for (auto &p : particles) {
460 std::copy_n(pos_it, 3u, std::begin(p.pos()));
461 pos_it += 3u;
462 }
463 }
464
465 if (fields & MPIIO_OUT_TYP) {
466 // 1.type on all nodes:
467 // Read nlocalpart ints at defined prefix.
468 std::vector<int> type(nlocalpart);
469 auto type_it = type.begin();
470 mpiio_read_array<int>(prefix + ".type", type.data(), nlocalpart, pref,
471 MPI_INT);
472
473 for (auto &p : particles) {
474 p.type() = *type_it;
475 ++type_it;
476 }
477 }
478
479 if (fields & MPIIO_OUT_VEL) {
480 // 1.vel on all nodes:
481 // Read nlocalpart * 3 doubles at defined prefix * 3
482 std::vector<double> vel(3ul * nlocalpart);
483 auto vel_it = vel.begin();
484 mpiio_read_array<double>(prefix + ".vel", vel.data(), 3ul * nlocalpart,
485 3ul * pref, MPI_DOUBLE);
486
487 for (auto &p : particles) {
488 std::copy_n(vel_it, 3u, std::begin(p.v()));
489 vel_it += 3u;
490 }
491 }
492
493 if (fields & MPIIO_OUT_BND) {
494 // 1.boff
495 // 1 long int per process
496 auto const pref_offset = static_cast<unsigned long>(rank);
497 unsigned long bonds_size = 0u;
498 mpiio_read_array<unsigned long>(prefix + ".boff", &bonds_size, 1ul,
499 pref_offset, MPI_UNSIGNED_LONG);
500 auto const bonds_offset = mpi_calculate_file_offset(bonds_size);
501
502 // 1.bond
503 // nlocalbonds ints per process
504 std::vector<char> bond(bonds_size);
505 mpiio_read_array<char>(prefix + ".bond", bond.data(), bonds_size,
506 bonds_offset, MPI_CHAR);
507
508 boost::iostreams::array_source src(bond.data(), bond.size());
509 boost::iostreams::stream<boost::iostreams::array_source> ss(src);
510 boost::archive::binary_iarchive ia(ss);
511
512 for (auto &p : particles) {
513 ia >> p.bonds();
514 }
515 }
516
517 for (auto &p : particles) {
518 cell_structure.add_particle(std::move(p));
519 }
520}
521} // namespace Mpiio
Vector implementation and trait types for boost qvm interoperability.
Data structures for bonded interactions.
int number_of_partners(Bonded_IA_Parameters const &iaparams)
Get the number of bonded partners for the specified bond.
container for bonded interactions.
mapped_type at(key_type const &key) const
bool contains(key_type const &key) const
A range of particles.
base_type::size_type size() const
void errexit()
exit ungracefully, core dump if switched on.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
static unsigned long mpi_calculate_file_offset(unsigned long n_items)
Calculate the file offset on the local node.
Definition mpiio.cpp:175
void mpi_mpiio_common_write(std::string const &prefix, unsigned fields, BondedInteractionsMap const &bonded_ias, ParticleRange const &particles, write_buffers &buffers)
Parallel binary output using MPI-IO.
Definition mpiio.cpp:214
static std::tuple< unsigned long, unsigned long > read_prefs(const std::string &fn, int rank, int size, unsigned long nglobalpart)
Read the pref file.
Definition mpiio.cpp:388
static unsigned long get_num_elem(const std::string &fn, std::size_t elem_sz)
Get the number of elements in a file by its file size and elem_sz.
Definition mpiio.cpp:310
static void mpiio_read_array(const std::string &fn, T *arr, std::size_t len, std::size_t pref, MPI_Datatype MPI_T)
Read a previously dumped array of size len starting from prefix pref of type T using MPI_T as MPI dat...
Definition mpiio.cpp:334
static void mpiio_dump_array(const std::string &fn, T const *arr, std::size_t len, std::size_t pref, MPI_Datatype MPI_T)
Dump data arr of size len starting from prefix pref of type T using MPI_T as MPI datatype.
Definition mpiio.cpp:148
static unsigned read_head(const std::string &fn, int rank)
Read the header file and return the first value.
Definition mpiio.cpp:361
void mpi_mpiio_common_read(const std::string &prefix, unsigned fields, CellStructure &cell_structure)
Parallel binary input using MPI-IO.
Definition mpiio.cpp:406
static void dump_info(std::string const &fn, unsigned fields, BondedInteractionsMap const &bonded_ias)
Dump the fields and bond information.
Definition mpiio.cpp:189
static bool fatal_error(char const *msg, std::string const &fn="", std::string const &extra="")
Fatal error handler.
Definition mpiio.cpp:92
Describes a cell structure / cell system.
Particle * add_particle(Particle &&p)
Add a particle.
void remove_all_particles()
Remove all particles from the cell system.
std::vector< double > pos
std::vector< double > vel