ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
mpiio.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2022 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21/** @file
22 *
23 * Concerning the file layouts.
24 * - Scalar arrays are written like this:
25 * <tt>rank0 --- rank1 --- rank2 ...</tt>
26 * where each rank dumps its scalars in the ordering of the particles.
27 * - Vector arrays are written in the rank ordering like scalar arrays.
28 * The ordering of the vector data is: <tt>v[0] v[1] v[2]</tt>, so the data
29 * looks like this:
30 * <tt>v1[0] v1[1] v1[2] v2[0] v2[1] v2[2] v3[0] ...</tt>
31 *
32 * To be able to determine the rank boundaries (a multiple of
33 * @c nlocalparts), the file 1.pref is written, which dumps the partial
34 * sum of @c nlocalparts, i.e. the prefixes in scalar arrays:
35 * - 1.prefs looks like this:
36 * <tt>0 nlocalpats_rank0 nlocalparts_rank0+nlocalparts_rank1 ...</tt>
37 *
38 * Bonds are dumped as two arrays, namely 1.bond which stores the
39 * bonding partners of the particles and 1.boff which stores the
40 * iteration indices for each particle.
41 * - 1.boff is a scalar array of size <tt>(nlocalpart + 1)</tt> per rank.
42 * - The last element (at index @c nlocalpart) of 1.boff's subpart
43 * <tt>[rank * (nlocalpart + 1) : (rank + 1) * (nlocalpart + 1)]</tt>
44 * determines the number of bonds for processor @c rank.
45 * - In this subarray one can find the bonding partners of particle
46 * <tt>id[i]</tt>. The iteration indices for local part of 1.bonds are:
47 * <tt>subarray[i] : subarray[i+1]</tt>
48 * - Take a look at the bond input code. It's easy to understand.
49 */
50
51#include "mpiio.hpp"
52
53#include "Particle.hpp"
56#include "errorhandling.hpp"
57#include "system/System.hpp"
58
59#include <utils/Vector.hpp>
60
61#include <boost/archive/binary_iarchive.hpp>
62#include <boost/archive/binary_oarchive.hpp>
63#include <boost/iostreams/device/array.hpp>
64#include <boost/iostreams/device/back_inserter.hpp>
65#include <boost/iostreams/stream.hpp>
66
67#include <mpi.h>
68
69#include <algorithm>
70#include <cassert>
71#include <cerrno>
72#include <cstddef>
73#include <cstdio>
74#include <cstring>
75#include <sstream>
76#include <string>
77#include <sys/stat.h>
78#include <tuple>
79#include <utility>
80#include <vector>
81
82namespace Mpiio {
83
84#if defined(__clang__)
85#pragma clang diagnostic push
86#pragma clang diagnostic ignored "-Wunreachable-code-return"
87#endif
88/**
89 * @brief Fatal error handler.
90 * On 1 MPI rank the error is recoverable and an exception is thrown.
91 * On more than 1 MPI rank the error is not recoverable.
92 * @param msg Custom error message
93 * @param fn File path
94 * @param extra Extra context
95 */
96static bool fatal_error(char const *msg, std::string const &fn = "",
97 std::string const &extra = "") {
98 std::stringstream what;
99 what << "MPI-IO Error: " << msg;
100 if (not fn.empty()) {
101 what << " \"" << fn << "\"";
102 }
103 if (not extra.empty()) {
104 what << " :" << extra;
105 }
106 int size;
107 MPI_Comm_size(MPI_COMM_WORLD, &size);
108 if (size == 1) {
109 throw std::runtime_error(what.str());
110 }
111 fprintf(stderr, "%s\n", what.str().c_str());
112 errexit();
113 return false;
114}
115#if defined(__clang__)
116#pragma clang diagnostic pop
117#endif
118
119/**
120 * @brief Fatal error handler that closes an open file and queries the
121 * message associated with an MPI error code.
122 * On 1 MPI rank the error is recoverable and an exception is thrown.
123 * On more than 1 MPI rank the error is not recoverable.
124 * @param msg Custom error message
125 * @param fn File path
126 * @param fp File handle
127 * @param errnum MPI error code
128 */
129static bool fatal_error(char const *msg, std::string const &fn, MPI_File *fp,
130 int errnum) {
131 // get MPI error message
132 char buf[MPI_MAX_ERROR_STRING];
133 int buf_len;
134 MPI_Error_string(errnum, buf, &buf_len);
135 buf[buf_len] = '\0';
136 // close file handle
137 if (fp) {
138 MPI_File_close(fp);
139 }
140 return fatal_error(msg, fn, buf);
141}
142
143/**
144 * @brief Dump data @p arr of size @p len starting from prefix @p pref
145 * of type @p T using @p MPI_T as MPI datatype. Beware, that @p T and
146 * @p MPI_T have to match!
147 *
148 * @param fn The file name to write to (must not already exist!)
149 * @param arr The array to dump
150 * @param len The number of elements to dump
151 * @param pref The prefix for this process
152 * @param MPI_T The MPI datatype corresponding to the template parameter @p T
153 */
154template <typename T>
155static void mpiio_dump_array(const std::string &fn, T const *arr,
156 std::size_t len, std::size_t pref,
157 MPI_Datatype MPI_T) {
158 MPI_File f;
159 int ret;
160 ret = MPI_File_open(MPI_COMM_WORLD, const_cast<char *>(fn.c_str()),
161 // MPI_MODE_EXCL: Prohibit overwriting
162 MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_EXCL,
163 MPI_INFO_NULL, &f);
164 if (ret) {
165 fatal_error("Could not open file", fn, &f, ret);
166 }
167 auto const offset =
168 static_cast<MPI_Offset>(pref) * static_cast<MPI_Offset>(sizeof(T));
169 ret = MPI_File_set_view(f, offset, MPI_T, MPI_T, const_cast<char *>("native"),
170 MPI_INFO_NULL);
171 ret |= MPI_File_write_all(f, arr, static_cast<int>(len), MPI_T,
172 MPI_STATUS_IGNORE);
173 static_cast<void>(ret and fatal_error("Could not write file", fn, &f, ret));
174 MPI_File_close(&f);
175}
176
177/**
178 * @brief Calculate the file offset on the local node.
179 * @param n_items Number of items on the local node.
180 * @return The number of items on all nodes with lower rank.
181 */
182static unsigned long mpi_calculate_file_offset(unsigned long n_items) {
183 unsigned long offset = 0ul;
184 MPI_Exscan(&n_items, &offset, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD);
185 return offset;
186}
187
188/**
189 * @brief Dump the fields and bond information.
190 * To be called by the head node only.
191 *
192 * @param fn The filename to write to
193 * @param fields The dumped fields
194 * @param bonded_ias The list of bonds
195 */
196static void dump_info(std::string const &fn, unsigned fields,
197 BondedInteractionsMap const &bonded_ias) {
198 // MPI-IO requires consecutive bond ids
199 auto const nbonds = bonded_ias.size();
200 assert(static_cast<std::size_t>(bonded_ias.get_next_key()) == nbonds);
201
202 FILE *f = fopen(fn.c_str(), "wb");
203 if (!f) {
204 fatal_error("Could not open file", fn);
205 }
206 std::vector<int> npartners;
207 bool success = (fwrite(&fields, sizeof(fields), 1u, f) == 1);
208 // Pack the number of partners. This is needed to interpret the bond IntList.
209 npartners.reserve(nbonds);
210 for (int bond_id = 0; bond_id < bonded_ias.get_next_key(); ++bond_id) {
211 if (bonded_ias.contains(bond_id)) {
212 npartners.emplace_back(number_of_partners(*bonded_ias.at(bond_id)));
213 }
214 }
215 success &= fwrite(&nbonds, sizeof(std::size_t), 1u, f) == 1;
216 success &= fwrite(npartners.data(), sizeof(int), nbonds, f) == nbonds;
217 fclose(f);
218 static_cast<void>(success or fatal_error("Could not write file", fn));
219}
220
221void mpi_mpiio_common_write(std::string const &prefix, unsigned fields,
222 BondedInteractionsMap const &bonded_ias,
223 ParticleRange const &particles,
224 write_buffers &buffers) {
225 auto const nlocalpart = static_cast<unsigned long>(particles.size());
226 auto const offset = mpi_calculate_file_offset(nlocalpart);
227 // keep buffers in order to avoid allocating them on every function call
228 auto &pos = buffers.pos;
229 auto &vel = buffers.vel;
230 auto &id = buffers.id;
231 auto &type = buffers.type;
232
233 // Realloc static buffers if necessary
234 if (nlocalpart > id.size())
235 id.resize(nlocalpart);
236 if (fields & MPIIO_OUT_POS && 3ul * nlocalpart > pos.size())
237 pos.resize(3ul * nlocalpart);
238 if (fields & MPIIO_OUT_VEL && 3ul * nlocalpart > vel.size())
239 vel.resize(3ul * nlocalpart);
240 if (fields & MPIIO_OUT_TYP && nlocalpart > type.size())
241 type.resize(nlocalpart);
242
243 // Pack the necessary information
244 auto id_it = id.begin();
245 auto type_it = type.begin();
246 auto pos_it = pos.begin();
247 auto vel_it = vel.begin();
248 for (auto const &p : particles) {
249 *id_it = p.id();
250 ++id_it;
251 if (fields & MPIIO_OUT_POS) {
252 std::copy_n(std::begin(p.pos()), 3u, pos_it);
253 pos_it += 3u;
254 }
255 if (fields & MPIIO_OUT_VEL) {
256 std::copy_n(std::begin(p.v()), 3u, vel_it);
257 vel_it += 3u;
258 }
259 if (fields & MPIIO_OUT_TYP) {
260 *type_it = p.type();
261 ++type_it;
262 }
263 }
264
265 int rank;
266 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
267 if (rank == 0)
268 dump_info(prefix + ".head", fields, bonded_ias);
269 auto const pref_offset = static_cast<unsigned long>(rank);
270 mpiio_dump_array<unsigned long>(prefix + ".pref", &offset, 1ul, pref_offset,
271 MPI_UNSIGNED_LONG);
272 mpiio_dump_array<int>(prefix + ".id", id.data(), nlocalpart, offset, MPI_INT);
273 if (fields & MPIIO_OUT_POS)
274 mpiio_dump_array<double>(prefix + ".pos", pos.data(), 3ul * nlocalpart,
275 3ul * offset, MPI_DOUBLE);
276 if (fields & MPIIO_OUT_VEL)
277 mpiio_dump_array<double>(prefix + ".vel", vel.data(), 3ul * nlocalpart,
278 3ul * offset, MPI_DOUBLE);
279 if (fields & MPIIO_OUT_TYP)
280 mpiio_dump_array<int>(prefix + ".type", type.data(), nlocalpart, offset,
281 MPI_INT);
282
283 if (fields & MPIIO_OUT_BND) {
284 std::vector<char> bonds;
285
286 /* Construct archive that pushes back to the bond buffer */
287 {
288 namespace io = boost::iostreams;
289 io::stream_buffer<io::back_insert_device<std::vector<char>>> os{
290 io::back_inserter(bonds)};
291 boost::archive::binary_oarchive bond_archiver{os};
292
293 for (auto const &p : particles) {
294 bond_archiver << p.bonds();
295 }
296 }
297
298 // Determine the prefixes in the bond file
299 auto const bonds_size = static_cast<unsigned long>(bonds.size());
300 auto const bonds_offset = mpi_calculate_file_offset(bonds_size);
301
302 mpiio_dump_array<unsigned long>(prefix + ".boff", &bonds_size, 1ul,
303 pref_offset, MPI_UNSIGNED_LONG);
304 mpiio_dump_array<char>(prefix + ".bond", bonds.data(), bonds.size(),
305 bonds_offset, MPI_CHAR);
306 }
307}
308
309/**
310 * @brief Get the number of elements in a file by its file size and @p elem_sz.
311 * I.e. query the file size using stat(2) and divide it by @p elem_sz.
312 *
313 * @param fn The filename
314 * @param elem_sz Size of a single element
315 * @return The number of elements stored in the file
316 */
317static unsigned long get_num_elem(const std::string &fn, std::size_t elem_sz) {
318 // Could also be done via MPI_File_open, MPI_File_get_size,
319 // MPI_File_close.
320 struct stat st;
321 errno = 0;
322 if (stat(fn.c_str(), &st) != 0) {
323 auto const reason = strerror(errno);
324 fatal_error("Could not get file size of", fn, reason);
325 }
326 return static_cast<unsigned long>(st.st_size) / elem_sz;
327}
328
329/**
330 * @brief Read a previously dumped array of size @p len starting from prefix
331 * @p pref of type @p T using @p MPI_T as MPI datatype. Beware, that
332 * @p T and @p MPI_T have to match!
333 *
334 * @param fn The file name to read from
335 * @param arr The array to populate
336 * @param len The number of elements to read
337 * @param pref The prefix for this process
338 * @param MPI_T The MPI datatype corresponding to the template parameter @p T
339 */
340template <typename T>
341static void mpiio_read_array(const std::string &fn, T *arr, std::size_t len,
342 std::size_t pref, MPI_Datatype MPI_T) {
343 MPI_File f;
344 int ret;
345 ret = MPI_File_open(MPI_COMM_WORLD, const_cast<char *>(fn.c_str()),
346 MPI_MODE_RDONLY, MPI_INFO_NULL, &f);
347 if (ret) {
348 fatal_error("Could not open file", fn, &f, ret);
349 }
350 auto const offset =
351 static_cast<MPI_Offset>(pref) * static_cast<MPI_Offset>(sizeof(T));
352 ret = MPI_File_set_view(f, offset, MPI_T, MPI_T, const_cast<char *>("native"),
353 MPI_INFO_NULL);
354
355 ret |= MPI_File_read_all(f, arr, static_cast<int>(len), MPI_T,
356 MPI_STATUS_IGNORE);
357 static_cast<void>(ret and fatal_error("Could not read file", fn, &f, ret));
358 MPI_File_close(&f);
359}
360
361/**
362 * @brief Read the header file and return the first value.
363 * To be called by all processes.
364 *
365 * @param fn Filename of the head file
366 * @param rank The rank of the current process in @c MPI_COMM_WORLD
367 */
368static unsigned read_head(const std::string &fn, int rank) {
369 unsigned n_fields = 0u;
370 FILE *f = nullptr;
371 if (rank == 0) {
372 f = fopen(fn.c_str(), "rb");
373 static_cast<void>(not f and fatal_error("Could not open file", fn));
374 auto const n = fread(static_cast<void *>(&n_fields), sizeof n_fields, 1, f);
375 static_cast<void>((n == 1) or fatal_error("Could not read file", fn));
376 }
377 MPI_Bcast(&n_fields, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);
378 if (f) {
379 fclose(f);
380 }
381 return n_fields;
382}
383
384/**
385 * @brief Read the pref file.
386 * Needs to be called by all processes.
387 *
388 * @param fn The file name of the prefs file
389 * @param rank The rank of the current process in @c MPI_COMM_WORLD
390 * @param size The size of @c MPI_COMM_WORLD
391 * @param nglobalpart The global amount of particles
392 * @return The prefix and the local number of particles.
393 */
394static std::tuple<unsigned long, unsigned long>
395read_prefs(const std::string &fn, int rank, int size,
396 unsigned long nglobalpart) {
397 auto const pref_offset = static_cast<unsigned long>(rank);
398 unsigned long pref = 0ul;
399 unsigned long nlocalpart = 0ul;
400 mpiio_read_array<unsigned long>(fn, &pref, 1ul, pref_offset,
401 MPI_UNSIGNED_LONG);
402 if (rank > 0)
403 MPI_Send(&pref, 1, MPI_UNSIGNED_LONG, rank - 1, 0, MPI_COMM_WORLD);
404 if (rank < size - 1)
405 MPI_Recv(&nlocalpart, 1, MPI_UNSIGNED_LONG, rank + 1, MPI_ANY_TAG,
406 MPI_COMM_WORLD, MPI_STATUS_IGNORE);
407 else
408 nlocalpart = nglobalpart;
409 nlocalpart -= pref;
410 return {pref, nlocalpart};
411}
412
413void mpi_mpiio_common_read(const std::string &prefix, unsigned fields,
414 CellStructure &cell_structure) {
415 cell_structure.remove_all_particles();
416
417 int size, rank;
418 MPI_Comm_size(MPI_COMM_WORLD, &size);
419 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
420 auto const nproc = get_num_elem(prefix + ".pref", sizeof(unsigned long));
421 auto const nglobalpart = get_num_elem(prefix + ".id", sizeof(int));
422
423 if (rank == 0 && nproc != static_cast<unsigned long>(size)) {
424 fatal_error("Trying to read a file with a different COMM "
425 "size than at point of writing.");
426 }
427
428 // 1.head on head node:
429 // Read head to determine fields at time of writing.
430 // Compare this var to the current fields.
431 auto const avail_fields = read_head(prefix + ".head", rank);
432 if (rank == 0 && (fields & avail_fields) != fields) {
433 fatal_error("Requesting to read fields which were not dumped.");
434 }
435
436 // 1.pref on all nodes:
437 // Read own prefix (1 int at prefix rank).
438 // Communicate own prefix to rank-1
439 // Determine nlocalpart (prefix of rank+1 - own prefix) on every node.
440 auto const [pref, nlocalpart] =
441 read_prefs(prefix + ".pref", rank, size, nglobalpart);
442
443 std::vector<Particle> particles(nlocalpart);
444
445 {
446 // 1.id on all nodes:
447 // Read nlocalpart ints at defined prefix.
448 std::vector<int> id(nlocalpart);
449 auto id_it = id.begin();
450 mpiio_read_array<int>(prefix + ".id", id.data(), nlocalpart, pref, MPI_INT);
451
452 for (auto &p : particles) {
453 p.id() = *id_it;
454 ++id_it;
455 }
456 }
457
458 if (fields & MPIIO_OUT_POS) {
459 // 1.pos on all nodes:
460 // Read nlocalpart * 3 doubles at defined prefix * 3
461 std::vector<double> pos(3ul * nlocalpart);
462 auto pos_it = pos.begin();
463 mpiio_read_array<double>(prefix + ".pos", pos.data(), 3ul * nlocalpart,
464 3ul * pref, MPI_DOUBLE);
465
466 for (auto &p : particles) {
467 std::copy_n(pos_it, 3u, std::begin(p.pos()));
468 pos_it += 3u;
469 }
470 }
471
472 if (fields & MPIIO_OUT_TYP) {
473 // 1.type on all nodes:
474 // Read nlocalpart ints at defined prefix.
475 std::vector<int> type(nlocalpart);
476 auto type_it = type.begin();
477 mpiio_read_array<int>(prefix + ".type", type.data(), nlocalpart, pref,
478 MPI_INT);
479
480 for (auto &p : particles) {
481 p.type() = *type_it;
482 ++type_it;
483 }
484 }
485
486 if (fields & MPIIO_OUT_VEL) {
487 // 1.vel on all nodes:
488 // Read nlocalpart * 3 doubles at defined prefix * 3
489 std::vector<double> vel(3ul * nlocalpart);
490 auto vel_it = vel.begin();
491 mpiio_read_array<double>(prefix + ".vel", vel.data(), 3ul * nlocalpart,
492 3ul * pref, MPI_DOUBLE);
493
494 for (auto &p : particles) {
495 std::copy_n(vel_it, 3u, std::begin(p.v()));
496 vel_it += 3u;
497 }
498 }
499
500 if (fields & MPIIO_OUT_BND) {
501 // 1.boff
502 // 1 long int per process
503 auto const pref_offset = static_cast<unsigned long>(rank);
504 unsigned long bonds_size = 0u;
505 mpiio_read_array<unsigned long>(prefix + ".boff", &bonds_size, 1ul,
506 pref_offset, MPI_UNSIGNED_LONG);
507 auto const bonds_offset = mpi_calculate_file_offset(bonds_size);
508
509 // 1.bond
510 // nlocalbonds ints per process
511 std::vector<char> bond(bonds_size);
512 mpiio_read_array<char>(prefix + ".bond", bond.data(), bonds_size,
513 bonds_offset, MPI_CHAR);
514
515 boost::iostreams::array_source src(bond.data(), bond.size());
516 boost::iostreams::stream<boost::iostreams::array_source> ss(src);
517 boost::archive::binary_iarchive ia(ss);
518
519 for (auto &p : particles) {
520 ia >> p.bonds();
521 }
522 }
523
524 for (auto &p : particles) {
525 cell_structure.add_particle(std::move(p));
526 }
527}
528} // namespace Mpiio
Vector implementation and trait types for boost qvm interoperability.
Data structures for bonded interactions.
int number_of_partners(Bonded_IA_Parameters const &iaparams)
Get the number of bonded partners for the specified bond.
container for bonded interactions.
mapped_type at(key_type const &key) const
bool contains(key_type const &key) const
Describes a cell structure / cell system.
Particle * add_particle(Particle &&p)
Add a particle.
void remove_all_particles()
Remove all particles from the cell system.
A range of particles.
base_type::size_type size() const
void errexit()
exit ungracefully, core dump if switched on.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
static unsigned long mpi_calculate_file_offset(unsigned long n_items)
Calculate the file offset on the local node.
Definition mpiio.cpp:182
void mpi_mpiio_common_write(std::string const &prefix, unsigned fields, BondedInteractionsMap const &bonded_ias, ParticleRange const &particles, write_buffers &buffers)
Parallel binary output using MPI-IO.
Definition mpiio.cpp:221
static std::tuple< unsigned long, unsigned long > read_prefs(const std::string &fn, int rank, int size, unsigned long nglobalpart)
Read the pref file.
Definition mpiio.cpp:395
static unsigned long get_num_elem(const std::string &fn, std::size_t elem_sz)
Get the number of elements in a file by its file size and elem_sz.
Definition mpiio.cpp:317
static void mpiio_read_array(const std::string &fn, T *arr, std::size_t len, std::size_t pref, MPI_Datatype MPI_T)
Read a previously dumped array of size len starting from prefix pref of type T using MPI_T as MPI dat...
Definition mpiio.cpp:341
static void mpiio_dump_array(const std::string &fn, T const *arr, std::size_t len, std::size_t pref, MPI_Datatype MPI_T)
Dump data arr of size len starting from prefix pref of type T using MPI_T as MPI datatype.
Definition mpiio.cpp:155
static unsigned read_head(const std::string &fn, int rank)
Read the header file and return the first value.
Definition mpiio.cpp:368
void mpi_mpiio_common_read(const std::string &prefix, unsigned fields, CellStructure &cell_structure)
Parallel binary input using MPI-IO.
Definition mpiio.cpp:413
static void dump_info(std::string const &fn, unsigned fields, BondedInteractionsMap const &bonded_ias)
Dump the fields and bond information.
Definition mpiio.cpp:196
static bool fatal_error(char const *msg, std::string const &fn="", std::string const &extra="")
Fatal error handler.
Definition mpiio.cpp:96
std::vector< double > pos
std::vector< double > vel