Skip to content

Commit c2e0e83

Browse files
franzpoeschelax3l
andauthored
Filename extensions: Allow specifying wildcards (#1584)
* Rough, but fundamentally functioning draft * Properly initialize * Only read extensions from disk if the access type is read * Avoid parameter shadowing * Temporary attempt to defer initialization todo: 1. temporarily initialize with dummy io handler 2. move the late initialization logic to IOHandler() * Without Cleanup: todos from previous commit * Cleanup for previous, to be squashed * More fine-grained deferral of initialization * Fixes that would theoretically allow deferring also CREATE mode * Initialize early if possible, defer only when needed * Access type related fixes * CI fixes for MSVC * Select ADIOS2 file ending more specifically * Little fix * Slight fixes * Testing * Add to examples * Better error messages * Little fix * Documentation * Cleanup * Fix test * ambiguous Co-authored-by: Franz Pöschel <franz.poeschel@gmail.com> * Include Order * Missing Includes Co-authored-by: Franz Pöschel <franz.poeschel@gmail.com> * First read/write --------- Co-authored-by: Axel Huebl <axel.huebl@plasma.ninja>
1 parent d64dbc2 commit c2e0e83

17 files changed

Lines changed: 659 additions & 110 deletions

docs/source/usage/firstread.rst

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ C++17
8686
.. code-block:: cpp
8787
8888
auto series = io::Series(
89-
"data%T.h5",
89+
"data_%T.h5",
9090
io::Access::READ_ONLY);
9191
9292
@@ -96,9 +96,20 @@ Python
9696
.. code-block:: python3
9797
9898
series = io.Series(
99-
"data%T.h5",
99+
"data_%T.h5",
100100
io.Access.read_only)
101101
102+
.. tip::
103+
104+
Replace the file ending ``.h5`` with a wildcard ``.%E`` to let openPMD autodetect the ending from the file system.
105+
Use the wildcard ``%T`` to match filename encoded iterations.
106+
107+
.. tip::
108+
109+
More detailed options can be passed via JSON or TOML as a further constructor parameter.
110+
Try ``{"defer_iteration_parsing": true}`` to speed up the first access.
111+
(Remember to explicitly ``it.open()`` iterations in that case.)
112+
102113
Iteration
103114
---------
104115

docs/source/usage/firstwrite.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ Python
100100
Iteration
101101
---------
102102

103-
Grouping by an arbitrary, positive integer number ``<N>`` in a series:
103+
Grouping by an arbitrary, nonnegative integer number ``<N>`` in a series:
104104

105105
C++17
106106
^^^^^

examples/2a_read_thetaMode_serial.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ using namespace openPMD;
2929

3030
int main()
3131
{
32+
/* The pattern %E instructs the openPMD-api to determine the file ending
33+
* automatically. It can also be given explicitly, e.g. `data%T.h5`. */
3234
Series series =
3335
Series("../samples/git-sample/thetaMode/data%T.h5", Access::READ_ONLY);
3436

examples/2a_read_thetaMode_serial.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import openpmd_api as io
1010

1111
if __name__ == "__main__":
12+
# The pattern %E instructs the openPMD-api to determine the file ending
13+
# automatically. It can also be given explicitly, e.g. `data%T.h5`.
1214
series = io.Series("../samples/git-sample/thetaMode/data%T.h5",
1315
io.Access.read_only)
1416

include/openPMD/IO/AbstractIOHandler.hpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,12 @@ class AbstractIOHandler
218218
{}
219219
virtual ~AbstractIOHandler() = default;
220220

221+
AbstractIOHandler(AbstractIOHandler const &) = default;
222+
AbstractIOHandler(AbstractIOHandler &&) = default;
223+
224+
AbstractIOHandler &operator=(AbstractIOHandler const &) = default;
225+
AbstractIOHandler &operator=(AbstractIOHandler &&) = default;
226+
221227
/** Add provided task to queue according to FIFO.
222228
*
223229
* @param iotask Task to be executed after all previously enqueued
@@ -245,7 +251,7 @@ class AbstractIOHandler
245251
/** The currently used backend */
246252
virtual std::string backendName() const = 0;
247253

248-
std::string const directory;
254+
std::string directory;
249255
/*
250256
* Originally, the reason for distinguishing these two was that during
251257
* parsing in reading access modes, the access type would be temporarily
@@ -261,8 +267,8 @@ class AbstractIOHandler
261267
* which is entirely implemented by the frontend, which internally uses
262268
* the backend in CREATE mode.
263269
*/
264-
Access const m_backendAccess;
265-
Access const m_frontendAccess;
270+
Access m_backendAccess;
271+
Access m_frontendAccess;
266272
internal::SeriesStatus m_seriesStatus = internal::SeriesStatus::Default;
267273
std::queue<IOTask> m_work;
268274
/**

include/openPMD/IO/DummyIOHandler.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,5 +45,6 @@ class DummyIOHandler : public AbstractIOHandler
4545
* without IO.
4646
*/
4747
std::future<void> flush(internal::ParsedFlushParams &) override;
48+
std::string backendName() const override;
4849
}; // DummyIOHandler
4950
} // namespace openPMD

include/openPMD/IO/Format.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ enum class Format
3636
ADIOS2_SSC,
3737
JSON,
3838
TOML,
39+
GENERIC,
3940
DUMMY
4041
};
4142

include/openPMD/IO/IOTask.hpp

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -689,19 +689,10 @@ class OPENPMDAPI_EXPORT IOTask
689689
, parameter{std::move(p).to_heap()}
690690
{}
691691

692-
explicit IOTask(IOTask const &other)
693-
: writable{other.writable}
694-
, operation{other.operation}
695-
, parameter{other.parameter}
696-
{}
697-
698-
IOTask &operator=(IOTask const &other)
699-
{
700-
writable = other.writable;
701-
operation = other.operation;
702-
parameter = other.parameter;
703-
return *this;
704-
}
692+
IOTask(IOTask const &other);
693+
IOTask(IOTask &&other) noexcept;
694+
IOTask &operator=(IOTask const &other);
695+
IOTask &operator=(IOTask &&other) noexcept;
705696

706697
Writable *writable;
707698
Operation operation;

include/openPMD/Series.hpp

Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121
#pragma once
2222

23+
#include "openPMD/Error.hpp"
2324
#include "openPMD/IO/AbstractIOHandler.hpp"
2425
#include "openPMD/IO/Access.hpp"
2526
#include "openPMD/IO/Format.hpp"
@@ -40,10 +41,14 @@
4041

4142
#include <cstdint> // uint64_t
4243
#include <deque>
44+
#include <functional>
4345
#include <map>
46+
#include <memory>
4447
#include <optional>
4548
#include <set>
49+
#include <stdexcept>
4650
#include <string>
51+
#include <tuple>
4752

4853
// expose private and protected members for invasive testing
4954
#ifndef OPENPMD_private
@@ -192,6 +197,9 @@ namespace internal
192197
*/
193198
std::optional<ParsePreference> m_parsePreference;
194199

200+
std::optional<std::function<AbstractIOHandler *(Series &)>>
201+
m_deferred_initialization = std::nullopt;
202+
195203
void close();
196204
}; // SeriesData
197205

@@ -221,6 +229,18 @@ class Series : public Attributable
221229
explicit Series();
222230

223231
#if openPMD_HAVE_MPI
232+
/**
233+
* @brief Construct a new Series
234+
*
235+
* For further details, refer to the documentation of the non-MPI overload.
236+
*
237+
* @param filepath The file path.
238+
* @param at Access mode.
239+
* @param comm The MPI communicator.
240+
* @param options Advanced backend configuration via JSON.
241+
* May be specified as a JSON-formatted string directly, or as a path
242+
* to a JSON textfile, prepended by an at sign '@'.
243+
*/
224244
Series(
225245
std::string const &filepath,
226246
Access at,
@@ -229,13 +249,50 @@ class Series : public Attributable
229249
#endif
230250

231251
/**
232-
* @brief Construct a new Series
233-
*
234-
* @param filepath The backend will be determined by the filepath extension.
252+
* @brief Construct a new Series.
253+
*
254+
* For details on access modes, JSON/TOML configuration and iteration
255+
* encoding, refer to:
256+
*
257+
* * https://openpmd-api.readthedocs.io/en/latest/usage/workflow.html#access-modes
258+
* * https://openpmd-api.readthedocs.io/en/latest/details/backendconfig.html
259+
* * https://openpmd-api.readthedocs.io/en/latest/usage/concepts.html#iteration-and-series
260+
*
261+
* In case of file-based iteration encoding, the file names for each
262+
* iteration are determined by an expansion pattern that must be specified.
263+
* It takes one out of two possible forms:
264+
*
265+
* 1. Simple form: %T is replaced with the iteration index, e.g.
266+
* `simData_%T.bp` becomes `simData_50.bp`.
267+
* 2. Padded form: e.g. %06T is replaced with the iteration index padded to
268+
* at least six digits. `simData_%06T.bp` becomes `simData_000050.bp`.
269+
*
270+
* The backend is determined:
271+
*
272+
* 1. Explicitly via the JSON/TOML parameter `backend`, e.g. `{"backend":
273+
* "adios2"}`.
274+
* 2. Otherwise implicitly from the filename extension, e.g.
275+
* `simData_%T.h5`.
276+
*
277+
* The filename extension can be replaced with a globbing pattern %E.
278+
* It will be replaced with an automatically determined file name extension:
279+
*
280+
* 1. In CREATE mode: The extension is set to a backend-specific default
281+
* extension. This requires that the backend is specified via JSON/TOML.
282+
* 2. In READ_ONLY, READ_WRITE and READ_LINEAR modes: These modes require
283+
* that files already exist on disk. The disk will be scanned for files
284+
* that match the pattern and the resulting file extension will be used.
285+
* If the result is ambiguous or no such file is found, an error is
286+
* raised.
287+
* 3. In APPEND mode: Like (2.), except if no matching file is found. In
288+
* that case, the procedure of (1.) is used, owing to the fact that
289+
* APPEND mode can be used to create new datasets.
290+
*
291+
* @param filepath The file path.
235292
* @param at Access mode.
236293
* @param options Advanced backend configuration via JSON.
237-
* May be specified as a JSON-formatted string directly, or as a path
238-
* to a JSON textfile, prepended by an at sign '@'.
294+
* May be specified as a JSON/TOML-formatted string directly, or as a
295+
* path to a JSON/TOML textfile, prepended by an at sign '@'.
239296
*/
240297
Series(
241298
std::string const &filepath,
@@ -502,6 +559,7 @@ class Series : public Attributable
502559
* @return String of a pattern for data backend.
503560
*/
504561
std::string backend() const;
562+
std::string backend();
505563

506564
/** Execute all required remaining IO operations to write or read data.
507565
*
@@ -636,7 +694,20 @@ OPENPMD_private
636694
void parseJsonOptions(TracingJSON &options, ParsedInput &);
637695
bool hasExpansionPattern(std::string filenameWithExtension);
638696
bool reparseExpansionPattern(std::string filenameWithExtension);
639-
void init(std::unique_ptr<AbstractIOHandler>, std::unique_ptr<ParsedInput>);
697+
template <typename... MPI_Communicator>
698+
void init(
699+
std::string const &filepath,
700+
Access at,
701+
std::string const &options,
702+
MPI_Communicator &&...);
703+
template <typename TracingJSON>
704+
std::tuple<std::unique_ptr<ParsedInput>, TracingJSON> initIOHandler(
705+
std::string const &filepath,
706+
std::string const &options,
707+
Access at,
708+
bool resolve_generic_extension);
709+
void initSeries(
710+
std::unique_ptr<AbstractIOHandler>, std::unique_ptr<ParsedInput>);
640711
void initDefaults(IterationEncoding, bool initAll = false);
641712
/**
642713
* @brief Internal call for flushing a Series.
@@ -688,7 +759,7 @@ OPENPMD_private
688759
* ReadIterations since those methods should be aware when the current step
689760
* is broken).
690761
*/
691-
std::optional<std::deque<IterationIndex_t> > readGorVBased(
762+
std::optional<std::deque<IterationIndex_t>> readGorVBased(
692763
bool do_always_throw_errors,
693764
bool init,
694765
std::set<IterationIndex_t> const &ignoreIterations = {});
@@ -758,7 +829,12 @@ OPENPMD_private
758829
* Returns the current content of the /data/snapshot attribute.
759830
* (We could also add this to the public API some time)
760831
*/
761-
std::optional<std::vector<IterationIndex_t> > currentSnapshot() const;
832+
std::optional<std::vector<IterationIndex_t>> currentSnapshot() const;
833+
834+
AbstractIOHandler *runDeferredInitialization();
835+
836+
AbstractIOHandler *IOHandler();
837+
AbstractIOHandler const *IOHandler() const;
762838
}; // Series
763839
} // namespace openPMD
764840

src/Format.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ Format determineFormat(std::string const &filename)
4545
return Format::JSON;
4646
if (auxiliary::ends_with(filename, ".toml"))
4747
return Format::TOML;
48+
if (auxiliary::ends_with(filename, ".%E"))
49+
return Format::GENERIC;
4850

4951
// Format might still be specified via JSON
5052
return Format::DUMMY;
@@ -70,6 +72,8 @@ std::string suffix(Format f)
7072
return ".json";
7173
case Format::TOML:
7274
return ".toml";
75+
case Format::GENERIC:
76+
return ".%E";
7377
default:
7478
return "";
7579
}

0 commit comments

Comments
 (0)