Skip to content

Commit d64dbc2

Browse files
Parallel JSON (#1475)
* Plumberwork for MPI communicator in JSON backend * Parallel reading * ... and writing * Set padding according to MPI rank * Write README.txt file * Bug fix: don't double prepend base dir * Test parallel output in openpmd-pipe test * Bug fix: use mpi_rank_%i.toml when writing to TOML * Refactor `if` statement * Add documentation
1 parent 5fec415 commit d64dbc2

8 files changed

Lines changed: 294 additions & 31 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1370,7 +1370,7 @@ if(openPMD_BUILD_TESTING)
13701370
--outfile \
13711371
../samples/git-sample/thetaMode/data_%T.bp && \
13721372
\
1373-
${Python_EXECUTABLE} \
1373+
${MPI_TEST_EXE} ${Python_EXECUTABLE} \
13741374
${openPMD_RUNTIME_OUTPUT_DIRECTORY}/openpmd-pipe \
13751375
--infile ../samples/git-sample/thetaMode/data_%T.bp \
13761376
--outfile ../samples/git-sample/thetaMode/data%T.json \

docs/source/backends/json.rst

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ propagate the exception thrown by Niels Lohmann's library.
9292

9393
The (keys) names ``"attributes"``, ``"data"`` and ``"datatype"`` are reserved and must not be used for base/mesh/particles path, records and their components.
9494

95-
A parallel (i.e. MPI) implementation is *not* available.
9695

9796
TOML Restrictions
9897
-----------------
@@ -106,7 +105,41 @@ TOML does not support null values.
106105

107106
The (keys) names ``"attributes"``, ``"data"`` and ``"datatype"`` are reserved and must not be used for base/mesh/particles path, records and their components.
108107

109-
A parallel (i.e. MPI) implementation is *not* available.
108+
109+
Using in parallel (MPI)
110+
-----------------------
111+
112+
Parallel I/O is not a first-class citizen in the JSON and TOML backends, and neither backend will "go out of its way" to support parallel workflows.
113+
114+
However there is a rudimentary form of read and write support in parallel:
115+
116+
Parallel reading
117+
................
118+
119+
In order not to overload the parallel filesystem with parallel reads, read access to JSON datasets is done by rank 0 and then broadcast to all other ranks.
120+
Note that there is no granularity whatsoever in reading a JSON file.
121+
A JSON file is always read into memory and broadcast to all other ranks in its entirety.
122+
123+
Parallel writing
124+
................
125+
126+
When executed in an MPI context, the JSON/TOML backends will not directly output a single text file, but instead a folder containing one file per MPI rank.
127+
Neither backend will perform any data aggregation at all.
128+
129+
.. note::
130+
131+
The parallel write support of the JSON/TOML backends is intended mainly for debugging and prototyping workflows.
132+
133+
The folder will use the specified Series name, but append the postfix ``.parallel``.
134+
(This is a deliberate indication that this folder cannot directly be opened again by the openPMD-api as a JSON/TOML dataset.)
135+
This folder contains for each MPI rank *i* a file ``mpi_rank_<i>.json`` (resp. ``mpi_rank_<i>.toml``), containing the serial output of that rank.
136+
A ``README.txt`` with basic usage instructions is also written.
137+
138+
.. note::
139+
140+
There is no direct support in the openPMD-api to read a JSON/TOML dataset written in this parallel fashion. The single files (e.g. ``data.json.parallel/mpi_rank_0.json``) are each valid openPMD files and can be read separately, however.
141+
142+
Note that the auxiliary function ``json::merge()`` (or in Python ``openpmd_api.merge_json()``) is not adequate for merging the single JSON/TOML files back into one, since it does not merge anything below the array level.
110143

111144

112145
Example

include/openPMD/IO/JSON/JSONIOHandler.hpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,30 @@
2424
#include "openPMD/IO/AbstractIOHandler.hpp"
2525
#include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp"
2626

27+
#if openPMD_HAVE_MPI
28+
#include <mpi.h>
29+
#endif
30+
2731
namespace openPMD
2832
{
2933
class JSONIOHandler : public AbstractIOHandler
3034
{
3135
public:
3236
JSONIOHandler(
33-
std::string const &path,
37+
std::string path,
38+
Access at,
39+
openPMD::json::TracingJSON config,
40+
JSONIOHandlerImpl::FileFormat,
41+
std::string originalExtension);
42+
#if openPMD_HAVE_MPI
43+
JSONIOHandler(
44+
std::string path,
3445
Access at,
46+
MPI_Comm,
3547
openPMD::json::TracingJSON config,
3648
JSONIOHandlerImpl::FileFormat,
3749
std::string originalExtension);
50+
#endif
3851

3952
~JSONIOHandler() override;
4053

include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131

3232
#include <istream>
3333
#include <nlohmann/json.hpp>
34+
#if openPMD_HAVE_MPI
35+
#include <mpi.h>
36+
#endif
3437

3538
#include <complex>
3639
#include <fstream>
@@ -70,6 +73,7 @@ struct File
7073

7174
std::string name;
7275
bool valid = true;
76+
bool printedReadmeWarningAlready = false;
7377
};
7478

7579
std::shared_ptr<FileState> fileState;
@@ -167,6 +171,15 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
167171
FileFormat,
168172
std::string originalExtension);
169173

174+
#if openPMD_HAVE_MPI
175+
JSONIOHandlerImpl(
176+
AbstractIOHandler *,
177+
MPI_Comm,
178+
openPMD::json::TracingJSON config,
179+
FileFormat,
180+
std::string originalExtension);
181+
#endif
182+
170183
~JSONIOHandlerImpl() override;
171184

172185
void
@@ -230,6 +243,10 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
230243
std::future<void> flush();
231244

232245
private:
246+
#if openPMD_HAVE_MPI
247+
std::optional<MPI_Comm> m_communicator;
248+
#endif
249+
233250
using FILEHANDLE = std::fstream;
234251

235252
// map each Writable to its associated file
@@ -323,7 +340,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
323340

324341
// write to disk the json contents associated with the file
325342
// remove from m_dirty if unsetDirty == true
326-
void putJsonContents(File const &, bool unsetDirty = true);
343+
auto putJsonContents(File const &, bool unsetDirty = true)
344+
-> decltype(m_jsonVals)::iterator;
327345

328346
// figure out the file position of the writable
329347
// (preferring the parent's file position) and extend it

src/IO/AbstractIOHandlerHelper.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,23 @@ std::unique_ptr<AbstractIOHandler> createIOHandler<json::TracingJSON>(
125125
"ssc",
126126
std::move(originalExtension));
127127
case Format::JSON:
128-
throw error::WrongAPIUsage(
129-
"JSON backend not available in parallel openPMD.");
128+
return constructIOHandler<JSONIOHandler, openPMD_HAVE_JSON>(
129+
"JSON",
130+
path,
131+
access,
132+
comm,
133+
std::move(options),
134+
JSONIOHandlerImpl::FileFormat::Json,
135+
std::move(originalExtension));
136+
case Format::TOML:
137+
return constructIOHandler<JSONIOHandler, openPMD_HAVE_JSON>(
138+
"JSON",
139+
path,
140+
access,
141+
comm,
142+
std::move(options),
143+
JSONIOHandlerImpl::FileFormat::Toml,
144+
std::move(originalExtension));
130145
default:
131146
throw error::WrongAPIUsage(
132147
"Unknown file format! Did you specify a file ending? Specified "

src/IO/JSON/JSONIOHandler.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,29 @@ namespace openPMD
2626
JSONIOHandler::~JSONIOHandler() = default;
2727

2828
JSONIOHandler::JSONIOHandler(
29-
std::string const &path,
29+
std::string path,
3030
Access at,
3131
openPMD::json::TracingJSON jsonCfg,
3232
JSONIOHandlerImpl::FileFormat format,
3333
std::string originalExtension)
34-
: AbstractIOHandler{path, at}
34+
: AbstractIOHandler{std::move(path), at}
3535
, m_impl{this, std::move(jsonCfg), format, std::move(originalExtension)}
3636
{}
3737

38+
#if openPMD_HAVE_MPI
39+
JSONIOHandler::JSONIOHandler(
40+
std::string path,
41+
Access at,
42+
MPI_Comm comm,
43+
openPMD::json::TracingJSON jsonCfg,
44+
JSONIOHandlerImpl::FileFormat format,
45+
std::string originalExtension)
46+
: AbstractIOHandler{std::move(path), at}
47+
, m_impl{JSONIOHandlerImpl{
48+
this, comm, std::move(jsonCfg), format, std::move(originalExtension)}}
49+
{}
50+
#endif
51+
3852
std::future<void> JSONIOHandler::flush(internal::ParsedFlushParams &)
3953
{
4054
return m_impl.flush();

0 commit comments

Comments
 (0)