ESPResSo
Extensible Simulation Package for Research on Soft Matter Systems
Loading...
Searching...
No Matches
mpiio.cpp
Go to the documentation of this file.
1/*
2 * Copyright (C) 2010-2022 The ESPResSo project
3 * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010
4 * Max-Planck-Institute for Polymer Research, Theory Group
5 *
6 * This file is part of ESPResSo.
7 *
8 * ESPResSo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * ESPResSo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21/** @file
22 *
23 * Concerning the file layouts.
24 * - Scalar arrays are written like this:
25 * <tt>rank0 --- rank1 --- rank2 ...</tt>
26 * where each rank dumps its scalars in the ordering of the particles.
27 * - Vector arrays are written in the rank ordering like scalar arrays.
28 * The ordering of the vector data is: <tt>v[0] v[1] v[2]</tt>, so the data
29 * looks like this:
30 * <tt>v1[0] v1[1] v1[2] v2[0] v2[1] v2[2] v3[0] ...</tt>
31 *
32 * To be able to determine the rank boundaries (a multiple of
33 * @c nlocalparts), the file 1.pref is written, which dumps the partial
34 * sum of @c nlocalparts, i.e. the prefixes in scalar arrays:
35 * - 1.prefs looks like this:
36 * <tt>0 nlocalpats_rank0 nlocalparts_rank0+nlocalparts_rank1 ...</tt>
37 *
38 * Bonds are dumped as two arrays, namely 1.bond which stores the
39 * bonding partners of the particles and 1.boff which stores the
40 * iteration indices for each particle.
41 * - 1.boff is a scalar array of size <tt>(nlocalpart + 1)</tt> per rank.
42 * - The last element (at index @c nlocalpart) of 1.boff's subpart
43 * <tt>[rank * (nlocalpart + 1) : (rank + 1) * (nlocalpart + 1)]</tt>
44 * determines the number of bonds for processor @c rank.
45 * - In this subarray one can find the bonding partners of particle
46 * <tt>id[i]</tt>. The iteration indices for local part of 1.bonds are:
47 * <tt>subarray[i] : subarray[i+1]</tt>
48 * - Take a look at the bond input code. It's easy to understand.
49 */
50
51#include "mpiio.hpp"
52
53#include "Particle.hpp"
56#include "errorhandling.hpp"
57#include "system/System.hpp"
58
59#include <utils/Vector.hpp>
60
61#include <boost/archive/binary_iarchive.hpp>
62#include <boost/archive/binary_oarchive.hpp>
63#include <boost/iostreams/device/array.hpp>
64#include <boost/iostreams/device/back_inserter.hpp>
65#include <boost/iostreams/stream.hpp>
66
67#include <mpi.h>
68
69#include <algorithm>
70#include <cassert>
71#include <cerrno>
72#include <cstddef>
73#include <cstdio>
74#include <cstring>
75#include <sstream>
76#include <string>
77#include <sys/stat.h>
78#include <tuple>
79#include <utility>
80#include <vector>
81
82namespace Mpiio {
83
84/**
85 * @brief Fatal error handler.
86 * On 1 MPI rank the error is recoverable and an exception is thrown.
87 * On more than 1 MPI rank the error is not recoverable.
88 * @param msg Custom error message
89 * @param fn File path
90 * @param extra Extra context
91 */
92static bool fatal_error(char const *msg, std::string const &fn = "",
93 std::string const &extra = "") {
94 std::stringstream what;
95 what << "MPI-IO Error: " << msg;
96 if (not fn.empty()) {
97 what << " \"" << fn << "\"";
98 }
99 if (not extra.empty()) {
100 what << " :" << extra;
101 }
102 int size;
103 MPI_Comm_size(MPI_COMM_WORLD, &size);
104 if (size == 1) {
105 throw std::runtime_error(what.str());
106 }
107 fprintf(stderr, "%s\n", what.str().c_str());
108 errexit();
109 return false;
110}
111
112/**
113 * @brief Fatal error handler that closes an open file and queries the
114 * message associated with an MPI error code.
115 * On 1 MPI rank the error is recoverable and an exception is thrown.
116 * On more than 1 MPI rank the error is not recoverable.
117 * @param msg Custom error message
118 * @param fn File path
119 * @param fp File handle
120 * @param errnum MPI error code
121 */
122static bool fatal_error(char const *msg, std::string const &fn, MPI_File *fp,
123 int errnum) {
124 // get MPI error message
125 char buf[MPI_MAX_ERROR_STRING];
126 int buf_len;
127 MPI_Error_string(errnum, buf, &buf_len);
128 buf[buf_len] = '\0';
129 // close file handle
130 if (fp) {
131 MPI_File_close(fp);
132 }
133 return fatal_error(msg, fn, buf);
134}
135
136/**
137 * @brief Dump data @p arr of size @p len starting from prefix @p pref
138 * of type @p T using @p MPI_T as MPI datatype. Beware, that @p T and
139 * @p MPI_T have to match!
140 *
141 * @param fn The file name to write to (must not already exist!)
142 * @param arr The array to dump
143 * @param len The number of elements to dump
144 * @param pref The prefix for this process
145 * @param MPI_T The MPI datatype corresponding to the template parameter @p T
146 */
147template <typename T>
148static void mpiio_dump_array(const std::string &fn, T const *arr,
149 std::size_t len, std::size_t pref,
150 MPI_Datatype MPI_T) {
151 MPI_File f;
152 int ret;
153 ret = MPI_File_open(MPI_COMM_WORLD, const_cast<char *>(fn.c_str()),
154 // MPI_MODE_EXCL: Prohibit overwriting
155 MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_EXCL,
156 MPI_INFO_NULL, &f);
157 if (ret) {
158 fatal_error("Could not open file", fn, &f, ret);
159 }
160 auto const offset =
161 static_cast<MPI_Offset>(pref) * static_cast<MPI_Offset>(sizeof(T));
162 ret = MPI_File_set_view(f, offset, MPI_T, MPI_T, const_cast<char *>("native"),
163 MPI_INFO_NULL);
164 ret |= MPI_File_write_all(f, arr, static_cast<int>(len), MPI_T,
165 MPI_STATUS_IGNORE);
166 static_cast<void>(ret and fatal_error("Could not write file", fn, &f, ret));
167 MPI_File_close(&f);
168}
169
170/**
171 * @brief Calculate the file offset on the local node.
172 * @param n_items Number of items on the local node.
173 * @return The number of items on all nodes with lower rank.
174 */
175static unsigned long mpi_calculate_file_offset(unsigned long n_items) {
176 unsigned long offset = 0ul;
177 MPI_Exscan(&n_items, &offset, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD);
178 return offset;
179}
180
181/**
182 * @brief Dump the fields and bond information.
183 * To be called by the head node only.
184 *
185 * @param fn The filename to write to
186 * @param fields The dumped fields
187 */
188static void dump_info(const std::string &fn, unsigned fields) {
189 // MPI-IO requires consecutive bond ids
190 auto const nbonds = bonded_ia_params.size();
191 assert(static_cast<std::size_t>(bonded_ia_params.get_next_key()) == nbonds);
192
193 FILE *f = fopen(fn.c_str(), "wb");
194 if (!f) {
195 fatal_error("Could not open file", fn);
196 }
197 static std::vector<int> npartners;
198 bool success = (fwrite(&fields, sizeof(fields), 1u, f) == 1);
199 // Pack the necessary information of bonded_ia_params:
200 // The number of partners. This is needed to interpret the bond IntList.
201 if (nbonds > npartners.size())
202 npartners.resize(nbonds);
203
204 auto npartners_it = npartners.begin();
205 for (int i = 0; i < bonded_ia_params.get_next_key(); ++i, ++npartners_it) {
206 *npartners_it = number_of_partners(*bonded_ia_params.at(i));
207 }
208 success = success && (fwrite(&nbonds, sizeof(std::size_t), 1u, f) == 1);
209 success =
210 success && (fwrite(npartners.data(), sizeof(int), nbonds, f) == nbonds);
211 fclose(f);
212 static_cast<void>(success or fatal_error("Could not write file", fn));
213}
214
215void mpi_mpiio_common_write(const std::string &prefix, unsigned fields,
216 const ParticleRange &particles) {
217 auto const nlocalpart = static_cast<unsigned long>(particles.size());
218 auto const offset = mpi_calculate_file_offset(nlocalpart);
219 // Keep static buffers in order to avoid allocating them on every
220 // function call
221 static std::vector<double> pos, vel;
222 static std::vector<int> id, type;
223
224 // Realloc static buffers if necessary
225 if (nlocalpart > id.size())
226 id.resize(nlocalpart);
227 if (fields & MPIIO_OUT_POS && 3ul * nlocalpart > pos.size())
228 pos.resize(3ul * nlocalpart);
229 if (fields & MPIIO_OUT_VEL && 3ul * nlocalpart > vel.size())
230 vel.resize(3ul * nlocalpart);
231 if (fields & MPIIO_OUT_TYP && nlocalpart > type.size())
232 type.resize(nlocalpart);
233
234 // Pack the necessary information
235 auto id_it = id.begin();
236 auto type_it = type.begin();
237 auto pos_it = pos.begin();
238 auto vel_it = vel.begin();
239 for (auto const &p : particles) {
240 *id_it = p.id();
241 ++id_it;
242 if (fields & MPIIO_OUT_POS) {
243 std::copy_n(std::begin(p.pos()), 3u, pos_it);
244 pos_it += 3u;
245 }
246 if (fields & MPIIO_OUT_VEL) {
247 std::copy_n(std::begin(p.v()), 3u, vel_it);
248 vel_it += 3u;
249 }
250 if (fields & MPIIO_OUT_TYP) {
251 *type_it = p.type();
252 ++type_it;
253 }
254 }
255
256 int rank;
257 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
258 if (rank == 0)
259 dump_info(prefix + ".head", fields);
260 auto const pref_offset = static_cast<unsigned long>(rank);
261 mpiio_dump_array<unsigned long>(prefix + ".pref", &offset, 1ul, pref_offset,
262 MPI_UNSIGNED_LONG);
263 mpiio_dump_array<int>(prefix + ".id", id.data(), nlocalpart, offset, MPI_INT);
264 if (fields & MPIIO_OUT_POS)
265 mpiio_dump_array<double>(prefix + ".pos", pos.data(), 3ul * nlocalpart,
266 3ul * offset, MPI_DOUBLE);
267 if (fields & MPIIO_OUT_VEL)
268 mpiio_dump_array<double>(prefix + ".vel", vel.data(), 3ul * nlocalpart,
269 3ul * offset, MPI_DOUBLE);
270 if (fields & MPIIO_OUT_TYP)
271 mpiio_dump_array<int>(prefix + ".type", type.data(), nlocalpart, offset,
272 MPI_INT);
273
274 if (fields & MPIIO_OUT_BND) {
275 std::vector<char> bonds;
276
277 /* Construct archive that pushes back to the bond buffer */
278 {
279 namespace io = boost::iostreams;
280 io::stream_buffer<io::back_insert_device<std::vector<char>>> os{
281 io::back_inserter(bonds)};
282 boost::archive::binary_oarchive bond_archiver{os};
283
284 for (auto const &p : particles) {
285 bond_archiver << p.bonds();
286 }
287 }
288
289 // Determine the prefixes in the bond file
290 auto const bonds_size = static_cast<unsigned long>(bonds.size());
291 auto const bonds_offset = mpi_calculate_file_offset(bonds_size);
292
293 mpiio_dump_array<unsigned long>(prefix + ".boff", &bonds_size, 1ul,
294 pref_offset, MPI_UNSIGNED_LONG);
295 mpiio_dump_array<char>(prefix + ".bond", bonds.data(), bonds.size(),
296 bonds_offset, MPI_CHAR);
297 }
298}
299
300/**
301 * @brief Get the number of elements in a file by its file size and @p elem_sz.
302 * I.e. query the file size using stat(2) and divide it by @p elem_sz.
303 *
304 * @param fn The filename
305 * @param elem_sz Size of a single element
306 * @return The number of elements stored in the file
307 */
308static unsigned long get_num_elem(const std::string &fn, std::size_t elem_sz) {
309 // Could also be done via MPI_File_open, MPI_File_get_size,
310 // MPI_File_close.
311 struct stat st;
312 errno = 0;
313 if (stat(fn.c_str(), &st) != 0) {
314 auto const reason = strerror(errno);
315 fatal_error("Could not get file size of", fn, reason);
316 }
317 return static_cast<unsigned long>(st.st_size) / elem_sz;
318}
319
320/**
321 * @brief Read a previously dumped array of size @p len starting from prefix
322 * @p pref of type @p T using @p MPI_T as MPI datatype. Beware, that
323 * @p T and @p MPI_T have to match!
324 *
325 * @param fn The file name to read from
326 * @param arr The array to populate
327 * @param len The number of elements to read
328 * @param pref The prefix for this process
329 * @param MPI_T The MPI datatype corresponding to the template parameter @p T
330 */
331template <typename T>
332static void mpiio_read_array(const std::string &fn, T *arr, std::size_t len,
333 std::size_t pref, MPI_Datatype MPI_T) {
334 MPI_File f;
335 int ret;
336 ret = MPI_File_open(MPI_COMM_WORLD, const_cast<char *>(fn.c_str()),
337 MPI_MODE_RDONLY, MPI_INFO_NULL, &f);
338 if (ret) {
339 fatal_error("Could not open file", fn, &f, ret);
340 }
341 auto const offset =
342 static_cast<MPI_Offset>(pref) * static_cast<MPI_Offset>(sizeof(T));
343 ret = MPI_File_set_view(f, offset, MPI_T, MPI_T, const_cast<char *>("native"),
344 MPI_INFO_NULL);
345
346 ret |= MPI_File_read_all(f, arr, static_cast<int>(len), MPI_T,
347 MPI_STATUS_IGNORE);
348 static_cast<void>(ret and fatal_error("Could not read file", fn, &f, ret));
349 MPI_File_close(&f);
350}
351
352/**
353 * @brief Read the header file and return the first value.
354 * To be called by all processes.
355 *
356 * @param fn Filename of the head file
357 * @param rank The rank of the current process in @c MPI_COMM_WORLD
358 */
359static unsigned read_head(const std::string &fn, int rank) {
360 unsigned n_fields = 0u;
361 FILE *f = nullptr;
362 if (rank == 0) {
363 f = fopen(fn.c_str(), "rb");
364 static_cast<void>(not f and fatal_error("Could not open file", fn));
365 auto const n = fread(static_cast<void *>(&n_fields), sizeof n_fields, 1, f);
366 static_cast<void>((n == 1) or fatal_error("Could not read file", fn));
367 }
368 MPI_Bcast(&n_fields, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);
369 if (f) {
370 fclose(f);
371 }
372 return n_fields;
373}
374
375/**
376 * @brief Read the pref file.
377 * Needs to be called by all processes.
378 *
379 * @param fn The file name of the prefs file
380 * @param rank The rank of the current process in @c MPI_COMM_WORLD
381 * @param size The size of @c MPI_COMM_WORLD
382 * @param nglobalpart The global amount of particles
383 * @return The prefix and the local number of particles.
384 */
385static std::tuple<unsigned long, unsigned long>
386read_prefs(const std::string &fn, int rank, int size,
387 unsigned long nglobalpart) {
388 auto const pref_offset = static_cast<unsigned long>(rank);
389 unsigned long pref = 0ul;
390 unsigned long nlocalpart = 0ul;
391 mpiio_read_array<unsigned long>(fn, &pref, 1ul, pref_offset,
392 MPI_UNSIGNED_LONG);
393 if (rank > 0)
394 MPI_Send(&pref, 1, MPI_UNSIGNED_LONG, rank - 1, 0, MPI_COMM_WORLD);
395 if (rank < size - 1)
396 MPI_Recv(&nlocalpart, 1, MPI_UNSIGNED_LONG, rank + 1, MPI_ANY_TAG,
397 MPI_COMM_WORLD, MPI_STATUS_IGNORE);
398 else
399 nlocalpart = nglobalpart;
400 nlocalpart -= pref;
401 return {pref, nlocalpart};
402}
403
404void mpi_mpiio_common_read(const std::string &prefix, unsigned fields) {
405 auto &cell_structure = *System::get_system().cell_structure;
406 cell_structure.remove_all_particles();
407
408 int size, rank;
409 MPI_Comm_size(MPI_COMM_WORLD, &size);
410 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
411 auto const nproc = get_num_elem(prefix + ".pref", sizeof(unsigned long));
412 auto const nglobalpart = get_num_elem(prefix + ".id", sizeof(int));
413
414 if (rank == 0 && nproc != static_cast<unsigned long>(size)) {
415 fatal_error("Trying to read a file with a different COMM "
416 "size than at point of writing.");
417 }
418
419 // 1.head on head node:
420 // Read head to determine fields at time of writing.
421 // Compare this var to the current fields.
422 auto const avail_fields = read_head(prefix + ".head", rank);
423 if (rank == 0 && (fields & avail_fields) != fields) {
424 fatal_error("Requesting to read fields which were not dumped.");
425 }
426
427 // 1.pref on all nodes:
428 // Read own prefix (1 int at prefix rank).
429 // Communicate own prefix to rank-1
430 // Determine nlocalpart (prefix of rank+1 - own prefix) on every node.
431 auto const [pref, nlocalpart] =
432 read_prefs(prefix + ".pref", rank, size, nglobalpart);
433
434 std::vector<Particle> particles(nlocalpart);
435
436 {
437 // 1.id on all nodes:
438 // Read nlocalpart ints at defined prefix.
439 std::vector<int> id(nlocalpart);
440 auto id_it = id.begin();
441 mpiio_read_array<int>(prefix + ".id", id.data(), nlocalpart, pref, MPI_INT);
442
443 for (auto &p : particles) {
444 p.id() = *id_it;
445 ++id_it;
446 }
447 }
448
449 if (fields & MPIIO_OUT_POS) {
450 // 1.pos on all nodes:
451 // Read nlocalpart * 3 doubles at defined prefix * 3
452 std::vector<double> pos(3ul * nlocalpart);
453 auto pos_it = pos.begin();
454 mpiio_read_array<double>(prefix + ".pos", pos.data(), 3ul * nlocalpart,
455 3ul * pref, MPI_DOUBLE);
456
457 for (auto &p : particles) {
458 std::copy_n(pos_it, 3u, std::begin(p.pos()));
459 pos_it += 3u;
460 }
461 }
462
463 if (fields & MPIIO_OUT_TYP) {
464 // 1.type on all nodes:
465 // Read nlocalpart ints at defined prefix.
466 std::vector<int> type(nlocalpart);
467 auto type_it = type.begin();
468 mpiio_read_array<int>(prefix + ".type", type.data(), nlocalpart, pref,
469 MPI_INT);
470
471 for (auto &p : particles) {
472 p.type() = *type_it;
473 ++type_it;
474 }
475 }
476
477 if (fields & MPIIO_OUT_VEL) {
478 // 1.vel on all nodes:
479 // Read nlocalpart * 3 doubles at defined prefix * 3
480 std::vector<double> vel(3ul * nlocalpart);
481 auto vel_it = vel.begin();
482 mpiio_read_array<double>(prefix + ".vel", vel.data(), 3ul * nlocalpart,
483 3ul * pref, MPI_DOUBLE);
484
485 for (auto &p : particles) {
486 std::copy_n(vel_it, 3u, std::begin(p.v()));
487 vel_it += 3u;
488 }
489 }
490
491 if (fields & MPIIO_OUT_BND) {
492 // 1.boff
493 // 1 long int per process
494 auto const pref_offset = static_cast<unsigned long>(rank);
495 unsigned long bonds_size = 0u;
496 mpiio_read_array<unsigned long>(prefix + ".boff", &bonds_size, 1ul,
497 pref_offset, MPI_UNSIGNED_LONG);
498 auto const bonds_offset = mpi_calculate_file_offset(bonds_size);
499
500 // 1.bond
501 // nlocalbonds ints per process
502 std::vector<char> bond(bonds_size);
503 mpiio_read_array<char>(prefix + ".bond", bond.data(), bonds_size,
504 bonds_offset, MPI_CHAR);
505
506 boost::iostreams::array_source src(bond.data(), bond.size());
507 boost::iostreams::stream<boost::iostreams::array_source> ss(src);
508 boost::archive::binary_iarchive ia(ss);
509
510 for (auto &p : particles) {
511 ia >> p.bonds();
512 }
513 }
514
515 for (auto &p : particles) {
516 cell_structure.add_particle(std::move(p));
517 }
518}
519} // namespace Mpiio
Vector implementation and trait types for boost qvm interoperability.
float f[3]
__shared__ int pos[MAXDEPTH *THREADS5/WARPSIZE]
float u[3]
BondedInteractionsMap bonded_ia_params
Field containing the parameters of the bonded ia types.
Data structures for bonded interactions.
int number_of_partners(Bonded_IA_Parameters const &iaparams)
Return the number of bonded partners for the specified bond.
mapped_type at(key_type const &key) const
A range of particles.
base_type::size_type size() const
std::shared_ptr< CellStructure > cell_structure
void errexit()
exit ungracefully, core dump if switched on.
This file contains the errorhandling code for severe errors, like a broken bond or illegal parameter ...
static unsigned long mpi_calculate_file_offset(unsigned long n_items)
Calculate the file offset on the local node.
Definition mpiio.cpp:175
static void dump_info(const std::string &fn, unsigned fields)
Dump the fields and bond information.
Definition mpiio.cpp:188
static std::tuple< unsigned long, unsigned long > read_prefs(const std::string &fn, int rank, int size, unsigned long nglobalpart)
Read the pref file.
Definition mpiio.cpp:386
void mpi_mpiio_common_read(const std::string &prefix, unsigned fields)
Parallel binary input using MPI-IO.
Definition mpiio.cpp:404
static unsigned long get_num_elem(const std::string &fn, std::size_t elem_sz)
Get the number of elements in a file by its file size and elem_sz.
Definition mpiio.cpp:308
static void mpiio_read_array(const std::string &fn, T *arr, std::size_t len, std::size_t pref, MPI_Datatype MPI_T)
Read a previously dumped array of size len starting from prefix pref of type T using MPI_T as MPI dat...
Definition mpiio.cpp:332
static void mpiio_dump_array(const std::string &fn, T const *arr, std::size_t len, std::size_t pref, MPI_Datatype MPI_T)
Dump data arr of size len starting from prefix pref of type T using MPI_T as MPI datatype.
Definition mpiio.cpp:148
static unsigned read_head(const std::string &fn, int rank)
Read the header file and return the first value.
Definition mpiio.cpp:359
void mpi_mpiio_common_write(const std::string &prefix, unsigned fields, const ParticleRange &particles)
Parallel binary output using MPI-IO.
Definition mpiio.cpp:215
static bool fatal_error(char const *msg, std::string const &fn="", std::string const &extra="")
Fatal error handler.
Definition mpiio.cpp:92
System & get_system()