From 538c99a68d386bbedcea03d212568c0be6227850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Jul 2024 15:50:47 +0200 Subject: [PATCH 01/26] Add JSONMatcher class --- CMakeLists.txt | 1 + include/openPMD/auxiliary/JSONMatcher.hpp | 140 ++++++++++++ include/openPMD/auxiliary/JSON_internal.hpp | 3 +- src/IO/IOTask.cpp | 2 +- src/auxiliary/JSON.cpp | 13 +- src/auxiliary/JSONMatcher.cpp | 222 ++++++++++++++++++++ 6 files changed, 373 insertions(+), 8 deletions(-) create mode 100644 include/openPMD/auxiliary/JSONMatcher.hpp create mode 100644 src/auxiliary/JSONMatcher.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 367bc15e92..e8a60b7f6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -406,6 +406,7 @@ set(CORE_SOURCE src/auxiliary/Date.cpp src/auxiliary/Filesystem.cpp src/auxiliary/JSON.cpp + src/auxiliary/JSONMatcher.cpp src/auxiliary/Mpi.cpp src/backend/Attributable.cpp src/backend/BaseRecordComponent.cpp diff --git a/include/openPMD/auxiliary/JSONMatcher.hpp b/include/openPMD/auxiliary/JSONMatcher.hpp new file mode 100644 index 0000000000..266b6e3b3f --- /dev/null +++ b/include/openPMD/auxiliary/JSONMatcher.hpp @@ -0,0 +1,140 @@ +#pragma once + +/* Copyright 2021-2023 Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#include "openPMD/auxiliary/JSON_internal.hpp" + +#include +#include +#include + +namespace openPMD::json +{ +struct Pattern +{ + std::regex pattern; + nlohmann::json config; + + Pattern(std::string const &pattern_in, nlohmann::json config_in) + // we construct the patterns once and use them often, so let's ask for + // some optimization + : pattern{pattern_in, std::regex_constants::egrep | std::regex_constants::optimize} + , config{std::move(config_in)} + {} +}; + +/** + * @brief Matcher for dataset configurations per backend. + * + */ +class MatcherPerBackend +{ +private: + std::vector m_patterns; + + void init(TracingJSON config); + +public: + /** + * @brief For default construction. + */ + explicit MatcherPerBackend(); + + /** + * @brief Initialize one backend's JSON matcher from its configuration. + * + * This constructor will parse the given config. + * It will distinguish between ordinary openPMD JSON configurations + * and extended configurations as defined by PIConGPU. + * If an ordinary JSON configuration was detected, given regex + * patterns will be matched against "" (the empty string). + * + * @param config The JSON configuration for one backend. + * E.g. for ADIOS2, this will be the sub-object/array found + * under config["adios2"]["dataset"]. + */ + MatcherPerBackend(std::string backendName_in, TracingJSON config); + + std::string backendName; + + /** + * @brief Get the JSON config associated with a regex pattern. + * + * @param datasetPath The regex. + * @return The matched JSON configuration, as a string. + */ + auto get(std::string const &datasetPath) const -> nlohmann::json const &; +}; +/** + * @brief Class to handle extended JSON configurations as used by + * the openPMD plugin. + * + * This class handles parsing of the extended JSON patterns as well as + * selection of one JSON configuration by regex. + * + */ +class JsonMatcher +{ +private: + std::vector m_perBackend; + TracingJSON m_entireConfig; + + auto init() -> void; + +public: + /** + * @brief For default construction. + */ + explicit JsonMatcher(); + + /** + * @brief Initialize JSON matcher from command line arguments. + * + * This constructor will parse the given config, after reading it + * from a file if needed. In this case, the constructor is + * MPI-collective. + * It will distinguish between ordinary openPMD JSON configurations + * and extended configurations as defined by PIConGPU. + * If an ordinary JSON configuration was detected, given regex + * patterns will be matched against "" (the empty string). + * + * @param config The JSON configuration, exactly as in + * --openPMD.json. + * @param comm MPI communicator for collective file reading, + * if needed. + */ + JsonMatcher(openPMD::json::TracingJSON); + + /** + * @brief Get the JSON config associated with a regex pattern. + * + * @param datasetPath The regex. + * @return The matched JSON configuration, as a string. + */ + auto get(std::string const &datasetPath) const -> ParsedConfig; + + /** + * @brief Get the default JSON config. + * + * @return The default JSON configuration. + */ + auto getDefault() -> TracingJSON; +}; +} // namespace openPMD::json diff --git a/include/openPMD/auxiliary/JSON_internal.hpp b/include/openPMD/auxiliary/JSON_internal.hpp index 0ce32f2b14..87e6a85452 100644 --- a/include/openPMD/auxiliary/JSON_internal.hpp +++ b/include/openPMD/auxiliary/JSON_internal.hpp @@ -261,7 +261,8 @@ namespace json * Vector containing the lower-case keys to the single backends' * configurations. */ - extern std::vector backendKeys(); + constexpr std::array backendKeys{ + "adios2", "json", "toml", "hdf5"}; /** * Function that can be called after reading all global options from the diff --git a/src/IO/IOTask.cpp b/src/IO/IOTask.cpp index 0692547745..dfa227f5e1 100644 --- a/src/IO/IOTask.cpp +++ b/src/IO/IOTask.cpp @@ -50,7 +50,7 @@ void Parameter::warnUnusedParameters< auto shadow = config.invertShadow(); // The backends are supposed to deal with this // Only global options here - for (auto const &backendKey : json::backendKeys()) + for (auto const &backendKey : json::backendKeys) { if (backendKey != currentBackendName) { diff --git a/src/auxiliary/JSON.cpp b/src/auxiliary/JSON.cpp index 21d5e6c276..c0c5ca8909 100644 --- a/src/auxiliary/JSON.cpp +++ b/src/auxiliary/JSON.cpp @@ -578,6 +578,12 @@ nlohmann::json &lowerCase(nlohmann::json &json) * We use "\vnum" to indicate "any array index". */ "\vnum", + "parameters"}, + {"adios2", + "dataset", + "\vnum", + "operators", + "\vnum", "parameters"}}; for (auto const &ignored : ignoredPaths) { @@ -621,17 +627,12 @@ std::optional asLowerCaseStringDynamic(nlohmann::json const &value) return maybeString; } -std::vector backendKeys() -{ - return {"adios2", "json", "toml", "hdf5"}; -} - void warnGlobalUnusedOptions(TracingJSON const &config) { auto shadow = config.invertShadow(); // The backends are supposed to deal with this // Only global options here - for (auto const &backendKey : json::backendKeys()) + for (auto const &backendKey : json::backendKeys) { shadow.erase(backendKey); } diff --git a/src/auxiliary/JSONMatcher.cpp b/src/auxiliary/JSONMatcher.cpp new file mode 100644 index 0000000000..910434c011 --- /dev/null +++ b/src/auxiliary/JSONMatcher.cpp @@ -0,0 +1,222 @@ +#include "openPMD/auxiliary/JSONMatcher.hpp" +#include "openPMD/Error.hpp" +#include "openPMD/auxiliary/JSON_internal.hpp" +#include + +namespace openPMD::json +{ +// Anonymous namespace so these helpers don't get exported +namespace +{ + /** + * @brief Read a single JSON pattern of the form {"select": ..., "cfg": ...} + * + * The "select" key is optional, indicating the default configuration if it + * is missing. + * + * @param patterns Output parameter: Emplace a parsed pattern into this + * list. + * @param defaultConfig Output parameter: If the pattern was the default + * pattern, emplace it here. + * @param object The JSON object that is parsed as the pattern. + * @return Whether the pattern was the default configuration or not. + */ + auto readPattern( + std::vector &patterns, + std::optional &defaultConfig, + nlohmann::json object) -> void; +} // namespace + +void MatcherPerBackend::init(TracingJSON tracing_config) +{ + auto &config = tracing_config.json(); + if (config.is_object()) + { + return; + } + else if (config.is_array()) + { + std::optional defaultConfig; + // enhanced PIConGPU-defined layout + for (auto &value : config) + { + readPattern(m_patterns, defaultConfig, std::move(value)); + } + // now replace the pattern list with the default config + tracing_config.json() = + std::move(defaultConfig).value_or(nlohmann::json::object()); + } + else + { + throw std::runtime_error( + "[openPMD plugin] Expecting an object or an array as JSON " + "configuration."); + } +} + +/** + * @brief Get the JSON config associated with a regex pattern. + * + * @param datasetPath The regex. + * @return The matched JSON configuration, as a string. + */ +nlohmann::json const & +MatcherPerBackend::get(std::string const &datasetPath) const +{ + for (auto const &pattern : m_patterns) + { + if (std::regex_match(datasetPath, pattern.pattern)) + { + return pattern.config; + } + } + static nlohmann::json const emptyConfig; // null + return emptyConfig; +} + +auto JsonMatcher::init() -> void +{ + // Copy required since the config will be modified + if (!m_entireConfig.json().is_object()) + { + throw error::BackendConfigSchema( + {}, "Expected an object for the JSON configuration."); + } + m_perBackend.reserve(backendKeys.size()); + for (auto it = m_entireConfig.json().begin(); + it != m_entireConfig.json().end(); + ++it) + { + std::string const &backendName = it.key(); + if (std::find(backendKeys.begin(), backendKeys.end(), backendName) == + backendKeys.end()) + { + // The key does not point to the configuration of a backend + // recognized by PIConGPU Ignore it. + continue; + } + if (!it.value().is_object()) + { + throw error::BackendConfigSchema( + {it.key()}, + "Each backend's configuration must be a JSON object (config " + "for backend " + + backendName + ")."); + } + if (it.value().contains("dataset")) + { + m_perBackend.emplace_back( + backendName, m_entireConfig[it.key()]["dataset"]); + } + } +} + +MatcherPerBackend::MatcherPerBackend() = default; + +MatcherPerBackend::MatcherPerBackend( + std::string backendName_in, TracingJSON config) + : backendName(std::move(backendName_in)) +{ + init(std::move(config)); +} + +JsonMatcher::JsonMatcher() = default; + +JsonMatcher::JsonMatcher(TracingJSON entireConfig) + : m_entireConfig(std::move(entireConfig)) +{ + init(); +} + +auto JsonMatcher::get(std::string const &datasetPath) const -> ParsedConfig +{ + nlohmann::json result = nlohmann::json::object(); + for (auto const &backend : m_perBackend) + { + auto const &datasetConfig = backend.get(datasetPath); + if (datasetConfig.empty()) + { + // ensure that there actually is an object to erase this from + result[backend.backendName]["dataset"] = {}; + result[backend.backendName].erase("dataset"); + } + else + { + result[backend.backendName]["dataset"] = datasetConfig; + } + } + return {result, m_entireConfig.originallySpecifiedAs}; +} + +auto JsonMatcher::getDefault() -> TracingJSON +{ + return m_entireConfig; +} + +namespace +{ + auto readPattern( + std::vector &patterns, + std::optional &defaultConfig, + nlohmann::json object) -> void + { + constexpr char const *errorMsg = &R"END( +Each single pattern in an extended JSON configuration must be a JSON object +with keys 'select' and 'cfg'. +The key 'select' is optional, indicating a default configuration if it is +not set. +The key 'select' must point to either a single string or an array of strings.)END" + [1]; + + if (!object.is_object()) + { + throw std::runtime_error(errorMsg); + } + try + { + nlohmann::json &cfg = object.at("cfg"); + if (!object.contains("select")) + { + if (defaultConfig.has_value()) + { + throw std::runtime_error( + "Specified more than one default configuration."); + } + defaultConfig.emplace(std::move(cfg)); + return; + } + else + { + nlohmann::json const &pattern = object.at("select"); + std::string pattern_str = [&]() -> std::string { + if (pattern.is_string()) + { + return pattern.get(); + } + else if (pattern.is_array()) + { + std::stringstream res; + res << "($^)"; + for (auto const &sub_pattern : pattern) + { + res << "|(" << sub_pattern.get() + << ")"; + } + return res.str(); + } + else + { + throw std::runtime_error(errorMsg); + } + }(); + patterns.emplace_back(pattern_str, std::move(cfg)); + return; + } + } + catch (nlohmann::json::out_of_range const &) + { + throw std::runtime_error(errorMsg); + } + } +} // namespace +} // namespace openPMD::json From eabefdcee3f5b5d5eef8acc2b6b956e0a2a275c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Jul 2024 16:30:19 +0200 Subject: [PATCH 02/26] Embed JSONMatcher into the backends --- include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp | 2 - include/openPMD/IO/AbstractIOHandler.hpp | 34 +++++++++++----- include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp | 5 +-- .../IO/HDF5/ParallelHDF5IOHandlerImpl.hpp | 3 +- include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp | 6 +-- include/openPMD/auxiliary/JSON_internal.hpp | 2 +- src/IO/ADIOS/ADIOS2IOHandler.cpp | 24 ++++------- src/IO/AbstractIOHandler.cpp | 40 +++++++++++++++++++ src/IO/DummyIOHandler.cpp | 7 +++- src/IO/HDF5/HDF5IOHandler.cpp | 11 ++--- src/IO/HDF5/ParallelHDF5IOHandler.cpp | 8 ++-- src/IO/JSON/JSONIOHandler.cpp | 10 ++--- src/IO/JSON/JSONIOHandlerImpl.cpp | 8 ++-- src/Series.cpp | 2 +- 14 files changed, 99 insertions(+), 63 deletions(-) diff --git a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp index da63b1196a..1ef5fb8725 100644 --- a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp +++ b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp @@ -122,7 +122,6 @@ class ADIOS2IOHandlerImpl ADIOS2IOHandlerImpl( AbstractIOHandler *, MPI_Comm, - json::TracingJSON config, std::string engineType, std::string specifiedExtension); @@ -130,7 +129,6 @@ class ADIOS2IOHandlerImpl explicit ADIOS2IOHandlerImpl( AbstractIOHandler *, - json::TracingJSON config, std::string engineType, std::string specifiedExtension); diff --git a/include/openPMD/IO/AbstractIOHandler.hpp b/include/openPMD/IO/AbstractIOHandler.hpp index 291bc405a2..0e520fd36b 100644 --- a/include/openPMD/IO/AbstractIOHandler.hpp +++ b/include/openPMD/IO/AbstractIOHandler.hpp @@ -40,6 +40,11 @@ namespace openPMD { +namespace json +{ + class JsonMatcher; +} + /** * @brief Determine what items should be flushed upon Series::flush() * @@ -202,27 +207,34 @@ class AbstractIOHandler friend class Series; friend class ADIOS2IOHandlerImpl; friend class JSONIOHandlerImpl; + friend class HDF5IOHandlerImpl; friend class detail::ADIOS2File; private: void setIterationEncoding(IterationEncoding encoding); +protected: + // Needs to be a pointer due to include structure, this header is + // transitively included in user code, but we don't reexport the JSON + // library + std::unique_ptr jsonMatcher; + public: #if openPMD_HAVE_MPI - AbstractIOHandler(std::string path, Access at, MPI_Comm) - : directory{std::move(path)}, m_backendAccess{at}, m_frontendAccess{at} - {} + template + AbstractIOHandler( + std::string path, Access at, TracingJSON &&jsonConfig, MPI_Comm); #endif - AbstractIOHandler(std::string path, Access at) - : directory{std::move(path)}, m_backendAccess{at}, m_frontendAccess{at} - {} - virtual ~AbstractIOHandler() = default; - AbstractIOHandler(AbstractIOHandler const &) = default; - AbstractIOHandler(AbstractIOHandler &&) = default; + template + AbstractIOHandler(std::string path, Access at, TracingJSON &&jsonConfig); + virtual ~AbstractIOHandler(); + + AbstractIOHandler(AbstractIOHandler const &) = delete; + AbstractIOHandler(AbstractIOHandler &&) noexcept; - AbstractIOHandler &operator=(AbstractIOHandler const &) = default; - AbstractIOHandler &operator=(AbstractIOHandler &&) = default; + AbstractIOHandler &operator=(AbstractIOHandler const &) = delete; + AbstractIOHandler &operator=(AbstractIOHandler &&) noexcept; /** Add provided task to queue according to FIFO. * diff --git a/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp b/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp index e4efc06ea6..a681217adf 100644 --- a/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp +++ b/include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp @@ -42,10 +42,7 @@ class HDF5IOHandlerImpl : public AbstractIOHandlerImpl friend class ParallelHDF5IOHandler; public: - HDF5IOHandlerImpl( - AbstractIOHandler *, - json::TracingJSON config, - bool do_warn_unused_params = true); + HDF5IOHandlerImpl(AbstractIOHandler *, bool do_warn_unused_params = true); ~HDF5IOHandlerImpl() override; void diff --git a/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp b/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp index 3b214b64cb..5f0570d217 100644 --- a/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp +++ b/include/openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp @@ -37,8 +37,7 @@ namespace openPMD class ParallelHDF5IOHandlerImpl : public HDF5IOHandlerImpl { public: - ParallelHDF5IOHandlerImpl( - AbstractIOHandler *, MPI_Comm, json::TracingJSON config); + ParallelHDF5IOHandlerImpl(AbstractIOHandler *, MPI_Comm); ~ParallelHDF5IOHandlerImpl() override; MPI_Comm m_mpiComm; diff --git a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp index 38966e3b82..9457abb89f 100644 --- a/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp @@ -166,16 +166,12 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl }; explicit JSONIOHandlerImpl( - AbstractIOHandler *, - openPMD::json::TracingJSON config, - FileFormat, - std::string originalExtension); + AbstractIOHandler *, FileFormat, std::string originalExtension); #if openPMD_HAVE_MPI JSONIOHandlerImpl( AbstractIOHandler *, MPI_Comm, - openPMD::json::TracingJSON config, FileFormat, std::string originalExtension); #endif diff --git a/include/openPMD/auxiliary/JSON_internal.hpp b/include/openPMD/auxiliary/JSON_internal.hpp index 87e6a85452..3b1bd69806 100644 --- a/include/openPMD/auxiliary/JSON_internal.hpp +++ b/include/openPMD/auxiliary/JSON_internal.hpp @@ -48,7 +48,7 @@ namespace json struct ParsedConfig { - nlohmann::json config; + nlohmann::json config = nlohmann::json::object(); SupportedLanguages originallySpecifiedAs{SupportedLanguages::JSON}; }; diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 647c31d856..be2a2b7326 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -34,6 +34,7 @@ #include "openPMD/ThrowError.hpp" #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/auxiliary/JSONMatcher.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Mpi.hpp" #include "openPMD/auxiliary/StringManip.hpp" @@ -106,7 +107,6 @@ std::optional joinedDimension(adios2::Dims const &dims) ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( AbstractIOHandler *handler, MPI_Comm communicator, - json::TracingJSON cfg, std::string engineType, std::string specifiedExtension) : AbstractIOHandlerImplCommon(handler) @@ -116,7 +116,7 @@ ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( , m_userSpecifiedExtension{std::move(specifiedExtension)} { init( - std::move(cfg), + handler->jsonMatcher->getDefault(), /* callbackWriteAttributesFromRank = */ [communicator, this](nlohmann::json const &attribute_writing_ranks) { int rank = 0; @@ -158,7 +158,6 @@ ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( AbstractIOHandler *handler, - json::TracingJSON cfg, std::string engineType, std::string specifiedExtension) : AbstractIOHandlerImplCommon(handler) @@ -166,7 +165,7 @@ ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( , m_engineType(std::move(engineType)) , m_userSpecifiedExtension(std::move(specifiedExtension)) { - init(std::move(cfg), [](auto const &...) {}); + init(handler->jsonMatcher->getDefault(), [](auto const &...) {}); } ADIOS2IOHandlerImpl::~ADIOS2IOHandlerImpl() @@ -2355,13 +2354,8 @@ ADIOS2IOHandler::ADIOS2IOHandler( json::TracingJSON options, std::string engineType, std::string specifiedExtension) - : AbstractIOHandler(std::move(path), at, comm) - , m_impl{ - this, - comm, - std::move(options), - std::move(engineType), - std::move(specifiedExtension)} + : AbstractIOHandler(std::move(path), at, std::move(options), comm) + , m_impl{this, comm, std::move(engineType), std::move(specifiedExtension)} {} #endif @@ -2372,12 +2366,8 @@ ADIOS2IOHandler::ADIOS2IOHandler( json::TracingJSON options, std::string engineType, std::string specifiedExtension) - : AbstractIOHandler(std::move(path), at) - , m_impl{ - this, - std::move(options), - std::move(engineType), - std::move(specifiedExtension)} + : AbstractIOHandler(std::move(path), at, std::move(options)) + , m_impl{this, std::move(engineType), std::move(specifiedExtension)} {} std::future diff --git a/src/IO/AbstractIOHandler.cpp b/src/IO/AbstractIOHandler.cpp index c8d3412fe2..3284d9d1d6 100644 --- a/src/IO/AbstractIOHandler.cpp +++ b/src/IO/AbstractIOHandler.cpp @@ -23,6 +23,8 @@ #include "openPMD/Error.hpp" #include "openPMD/IO/FlushParametersInternal.hpp" +#include "openPMD/auxiliary/JSONMatcher.hpp" + #include namespace openPMD::auxiliary @@ -121,4 +123,42 @@ bool AbstractIOHandler::fullSupportForVariableBasedEncoding() const { return false; } + +#if openPMD_HAVE_MPI +template +AbstractIOHandler::AbstractIOHandler( + std::string path, Access at, TracingJSON &&jsonConfig, MPI_Comm) + : jsonMatcher(std::make_unique( + std::forward(jsonConfig))) + , directory{std::move(path)} + , m_backendAccess{at} + , m_frontendAccess{at} +{} + +template AbstractIOHandler::AbstractIOHandler( + std::string path, Access at, json::TracingJSON &&jsonConfig, MPI_Comm); +#endif + +template +AbstractIOHandler::AbstractIOHandler( + std::string path, Access at, TracingJSON &&jsonConfig) + : jsonMatcher(std::make_unique( + std::forward(jsonConfig))) + , directory{std::move(path)} + , m_backendAccess{at} + , m_frontendAccess{at} +{} + +template AbstractIOHandler::AbstractIOHandler( + std::string path, Access at, json::TracingJSON &&jsonConfig); + +AbstractIOHandler::~AbstractIOHandler() = default; + +// AbstractIOHandler::AbstractIOHandler(AbstractIOHandler const &) = default; +AbstractIOHandler::AbstractIOHandler(AbstractIOHandler &&) noexcept = default; + +// AbstractIOHandler & +// AbstractIOHandler::operator=(AbstractIOHandler const &) = default; +AbstractIOHandler & +AbstractIOHandler::operator=(AbstractIOHandler &&) noexcept = default; } // namespace openPMD diff --git a/src/IO/DummyIOHandler.cpp b/src/IO/DummyIOHandler.cpp index 7882c9d5e3..fbf18d4791 100644 --- a/src/IO/DummyIOHandler.cpp +++ b/src/IO/DummyIOHandler.cpp @@ -19,6 +19,7 @@ * If not, see . */ #include "openPMD/IO/DummyIOHandler.hpp" +#include "openPMD/auxiliary/JSON_internal.hpp" #include #include @@ -26,7 +27,11 @@ namespace openPMD { DummyIOHandler::DummyIOHandler(std::string path, Access at) - : AbstractIOHandler(std::move(path), at) + : AbstractIOHandler( + std::move(path), + at, + json::TracingJSON( + nlohmann::json::object(), json::SupportedLanguages::JSON)) {} void DummyIOHandler::enqueue(IOTask const &) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 48e24fd89b..d2cdf62832 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -37,6 +37,7 @@ #include "openPMD/IO/HDF5/HDF5FilePosition.hpp" #include "openPMD/IO/IOTask.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/auxiliary/JSONMatcher.hpp" #include "openPMD/auxiliary/Mpi.hpp" #include "openPMD/auxiliary/StringManip.hpp" #include "openPMD/auxiliary/TypeTraits.hpp" @@ -75,9 +76,7 @@ namespace openPMD #endif HDF5IOHandlerImpl::HDF5IOHandlerImpl( - AbstractIOHandler *handler, - json::TracingJSON config, - bool do_warn_unused_params) + AbstractIOHandler *handler, bool do_warn_unused_params) : AbstractIOHandlerImpl(handler) , m_datasetTransferProperty{H5P_DEFAULT} , m_fileAccessProperty{H5P_DEFAULT} @@ -143,6 +142,8 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( m_H5T_LONG_DOUBLE_80_LE >= 0, "[HDF5] Internal error: Failed to create 128-bit complex long double"); + auto config = handler->jsonMatcher->getDefault(); + // JSON option can overwrite env option: if (config.json().contains("hdf5")) { @@ -3008,8 +3009,8 @@ std::future HDF5IOHandlerImpl::flush(internal::ParsedFlushParams ¶ms) #if openPMD_HAVE_HDF5 HDF5IOHandler::HDF5IOHandler( std::string path, Access at, json::TracingJSON config) - : AbstractIOHandler(std::move(path), at) - , m_impl{new HDF5IOHandlerImpl(this, std::move(config))} + : AbstractIOHandler(std::move(path), at, std::move(config)) + , m_impl{new HDF5IOHandlerImpl(this)} {} HDF5IOHandler::~HDF5IOHandler() = default; diff --git a/src/IO/HDF5/ParallelHDF5IOHandler.cpp b/src/IO/HDF5/ParallelHDF5IOHandler.cpp index e1192c8a9c..0418b7b76c 100644 --- a/src/IO/HDF5/ParallelHDF5IOHandler.cpp +++ b/src/IO/HDF5/ParallelHDF5IOHandler.cpp @@ -59,8 +59,8 @@ namespace openPMD ParallelHDF5IOHandler::ParallelHDF5IOHandler( std::string path, Access at, MPI_Comm comm, json::TracingJSON config) - : AbstractIOHandler(std::move(path), at, comm) - , m_impl{new ParallelHDF5IOHandlerImpl(this, comm, std::move(config))} + : AbstractIOHandler(std::move(path), at, std::move(config), comm) + , m_impl{new ParallelHDF5IOHandlerImpl(this, comm)} {} ParallelHDF5IOHandler::~ParallelHDF5IOHandler() = default; @@ -83,8 +83,8 @@ ParallelHDF5IOHandler::flush(internal::ParsedFlushParams ¶ms) } ParallelHDF5IOHandlerImpl::ParallelHDF5IOHandlerImpl( - AbstractIOHandler *handler, MPI_Comm comm, json::TracingJSON config) - : HDF5IOHandlerImpl{handler, std::move(config), /* do_warn_unused_params = */ false} + AbstractIOHandler *handler, MPI_Comm comm) + : HDF5IOHandlerImpl{handler, /* do_warn_unused_params = */ false} , m_mpiComm{comm} , m_mpiInfo{MPI_INFO_NULL} /* MPI 3.0+: MPI_INFO_ENV */ { diff --git a/src/IO/JSON/JSONIOHandler.cpp b/src/IO/JSON/JSONIOHandler.cpp index d2a6217eb5..d0261ba8de 100644 --- a/src/IO/JSON/JSONIOHandler.cpp +++ b/src/IO/JSON/JSONIOHandler.cpp @@ -31,8 +31,8 @@ JSONIOHandler::JSONIOHandler( openPMD::json::TracingJSON jsonCfg, JSONIOHandlerImpl::FileFormat format, std::string originalExtension) - : AbstractIOHandler{std::move(path), at} - , m_impl{this, std::move(jsonCfg), format, std::move(originalExtension)} + : AbstractIOHandler{std::move(path), at, std::move(jsonCfg)} + , m_impl{this, format, std::move(originalExtension)} {} #if openPMD_HAVE_MPI @@ -43,9 +43,9 @@ JSONIOHandler::JSONIOHandler( openPMD::json::TracingJSON jsonCfg, JSONIOHandlerImpl::FileFormat format, std::string originalExtension) - : AbstractIOHandler{std::move(path), at} - , m_impl{JSONIOHandlerImpl{ - this, comm, std::move(jsonCfg), format, std::move(originalExtension)}} + : AbstractIOHandler{std::move(path), at, std::move(jsonCfg)} + , m_impl{ + JSONIOHandlerImpl{this, comm, format, std::move(originalExtension)}} {} #endif diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index a432737188..4ea5fe7eab 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -21,12 +21,12 @@ #include "openPMD/IO/JSON/JSONIOHandlerImpl.hpp" #include "openPMD/Datatype.hpp" -#include "openPMD/DatatypeHelpers.hpp" #include "openPMD/Error.hpp" #include "openPMD/IO/AbstractIOHandler.hpp" #include "openPMD/IO/AbstractIOHandlerImpl.hpp" #include "openPMD/ThrowError.hpp" #include "openPMD/auxiliary/Filesystem.hpp" +#include "openPMD/auxiliary/JSONMatcher.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/auxiliary/Memory.hpp" #include "openPMD/auxiliary/StringManip.hpp" @@ -387,21 +387,19 @@ JSONIOHandlerImpl::getBackendConfig(openPMD::json::TracingJSON &config) const JSONIOHandlerImpl::JSONIOHandlerImpl( AbstractIOHandler *handler, - openPMD::json::TracingJSON config, FileFormat format, std::string originalExtension) : AbstractIOHandlerImpl(handler) , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} { - init(std::move(config)); + init(handler->jsonMatcher->getDefault()); } #if openPMD_HAVE_MPI JSONIOHandlerImpl::JSONIOHandlerImpl( AbstractIOHandler *handler, MPI_Comm comm, - openPMD::json::TracingJSON config, FileFormat format, std::string originalExtension) : AbstractIOHandlerImpl(handler) @@ -409,7 +407,7 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} { - init(std::move(config)); + init(handler->jsonMatcher->getDefault()); } #endif diff --git a/src/Series.cpp b/src/Series.cpp index c0733e34cc..e732293dbf 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -1064,7 +1064,7 @@ void Series::initSeries( * A temporary IOHandler has been used. In this case, copy the * values from that IOHandler over into the real one. */ - ioHandler->operator=(***writable.IOHandler); + ioHandler->operator=(std::move(***writable.IOHandler)); *writable.IOHandler = std::move(ioHandler); } else From 59545a9dc730f4bfbd4b060e9a4c2e510f7d24f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Jul 2024 18:04:35 +0200 Subject: [PATCH 03/26] First attempt: Dataset-specific configuration --- examples/13_write_dynamic_configuration.cpp | 18 +++++++++--------- include/openPMD/IO/IOTask.hpp | 8 ++++++++ include/openPMD/auxiliary/JSONMatcher.hpp | 7 +------ include/openPMD/backend/Attributable.hpp | 1 + include/openPMD/backend/Writable.hpp | 1 + src/IO/ADIOS/ADIOS2IOHandler.cpp | 3 ++- src/IO/HDF5/HDF5IOHandler.cpp | 5 +++-- src/IO/IOTask.cpp | 18 ++++++++++++++++++ src/auxiliary/JSONMatcher.cpp | 15 +++++++++++++++ 9 files changed, 58 insertions(+), 18 deletions(-) diff --git a/examples/13_write_dynamic_configuration.cpp b/examples/13_write_dynamic_configuration.cpp index b480cb2f00..479cc65354 100644 --- a/examples/13_write_dynamic_configuration.cpp +++ b/examples/13_write_dynamic_configuration.cpp @@ -34,7 +34,7 @@ int main() # be passed by adding an at-sign `@` in front of the path # The format will then be recognized by filename extension, i.e. .json or .toml -backend = "adios2" +backend = "hdf5" iteration_encoding = "group_based" # The following is only relevant in read mode defer_iteration_parsing = true @@ -57,13 +57,18 @@ parameters.clevel = 5 # type = "some other parameter" # # ... -[hdf5.dataset] -chunks = "auto" +[[hdf5.dataset]] +cfg.chunks = "auto" + +[[hdf5.dataset]] +select = "particles/e/.*" +cfg.chunks = [10] +cfg.chornks = [] )END"; // open file for writing Series series = - Series("../samples/dynamicConfig.bp", Access::CREATE, defaults); + Series("../samples/dynamicConfig.h5", Access::CREATE, defaults); Datatype datatype = determineDatatype(); constexpr unsigned long length = 10ul; @@ -100,11 +105,6 @@ chunks = "auto" std::string const differentCompressionSettings = R"END( { "resizable": true, - "adios1": { - "dataset": { - "transform": "blosc:compressor=zlib,shuffle=bit,lvl=1;nometa" - } - }, "adios2": { "dataset": { "operators": [ diff --git a/include/openPMD/IO/IOTask.hpp b/include/openPMD/IO/IOTask.hpp index fafe9c669b..7fef9fcf81 100644 --- a/include/openPMD/IO/IOTask.hpp +++ b/include/openPMD/IO/IOTask.hpp @@ -43,6 +43,10 @@ namespace openPMD { class Attributable; class Writable; +namespace json +{ + class JsonMatcher; +} Writable *getWritable(Attributable *); @@ -372,6 +376,10 @@ struct OPENPMDAPI_EXPORT Parameter TracingJSON &, std::string const ¤tBackendName, std::string const &warningMessage); + + template + TracingJSON + compileJSONConfig(Writable const *writable, json::JsonMatcher &) const; }; template <> diff --git a/include/openPMD/auxiliary/JSONMatcher.hpp b/include/openPMD/auxiliary/JSONMatcher.hpp index 266b6e3b3f..8c2cc88371 100644 --- a/include/openPMD/auxiliary/JSONMatcher.hpp +++ b/include/openPMD/auxiliary/JSONMatcher.hpp @@ -32,12 +32,7 @@ struct Pattern std::regex pattern; nlohmann::json config; - Pattern(std::string const &pattern_in, nlohmann::json config_in) - // we construct the patterns once and use them often, so let's ask for - // some optimization - : pattern{pattern_in, std::regex_constants::egrep | std::regex_constants::optimize} - , config{std::move(config_in)} - {} + Pattern(std::string const &pattern_in, nlohmann::json config_in); }; /** diff --git a/include/openPMD/backend/Attributable.hpp b/include/openPMD/backend/Attributable.hpp index a77d8fe524..732b2d1b5c 100644 --- a/include/openPMD/backend/Attributable.hpp +++ b/include/openPMD/backend/Attributable.hpp @@ -208,6 +208,7 @@ class Attributable template friend T &internal::makeOwning(T &self, Series); friend class StatefulSnapshotsContainer; + friend class internal::AttributableData; protected: // tag for internal constructor diff --git a/include/openPMD/backend/Writable.hpp b/include/openPMD/backend/Writable.hpp index be36f47758..73d5ba826f 100644 --- a/include/openPMD/backend/Writable.hpp +++ b/include/openPMD/backend/Writable.hpp @@ -103,6 +103,7 @@ class Writable final template friend class Span; friend void debug::printDirty(Series const &); + friend struct Parameter; private: Writable(internal::AttributableData *); diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index be2a2b7326..4c6ab2e653 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -785,7 +785,8 @@ void ADIOS2IOHandlerImpl::createDataset( std::vector operators; json::TracingJSON options = - json::parseOptions(parameters.options, /* considerFiles = */ false); + parameters.compileJSONConfig( + writable, *m_handler->jsonMatcher); if (options.json().contains("adios2")) { json::TracingJSON datasetConfig(options["adios2"]); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index d2cdf62832..fdf5b8b58f 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -503,8 +503,9 @@ void HDF5IOHandlerImpl::createDataset( } json::TracingJSON config = [&]() { - auto parsed_config = json::parseOptions( - parameters.options, /* considerFiles = */ false); + auto parsed_config = + parameters.compileJSONConfig( + writable, *m_handler->jsonMatcher); if (auto hdf5_config_it = parsed_config.config.find("hdf5"); hdf5_config_it != parsed_config.config.end()) { diff --git a/src/IO/IOTask.cpp b/src/IO/IOTask.cpp index dfa227f5e1..8edd8ff854 100644 --- a/src/IO/IOTask.cpp +++ b/src/IO/IOTask.cpp @@ -19,6 +19,7 @@ * If not, see . */ #include "openPMD/IO/IOTask.hpp" +#include "openPMD/auxiliary/JSONMatcher.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" #include "openPMD/backend/Attributable.hpp" @@ -74,6 +75,23 @@ void Parameter::warnUnusedParameters< } } +template <> +json::ParsedConfig Parameter::compileJSONConfig( + Writable const *writable, json::JsonMatcher &jsonMatcher) const +{ + auto attri = writable->attributable->asInternalCopyOf(); + auto path = attri.myPath().openPMDPath(); + auto base_config = jsonMatcher.get(path); + auto manual_config = + json::parseOptions(options, /* considerFiles = */ false); + json::merge(base_config.config, manual_config.config); + return json::ParsedConfig{ + std::move(base_config.config), + (options.empty() || options == "{}") + ? manual_config.originallySpecifiedAs + : base_config.originallySpecifiedAs}; +} + namespace internal { std::string operationAsString(Operation op) diff --git a/src/auxiliary/JSONMatcher.cpp b/src/auxiliary/JSONMatcher.cpp index 910434c011..3da52c3844 100644 --- a/src/auxiliary/JSONMatcher.cpp +++ b/src/auxiliary/JSONMatcher.cpp @@ -1,7 +1,9 @@ #include "openPMD/auxiliary/JSONMatcher.hpp" #include "openPMD/Error.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" + #include +#include namespace openPMD::json { @@ -27,6 +29,19 @@ namespace nlohmann::json object) -> void; } // namespace +Pattern::Pattern(std::string const &pattern_in, nlohmann::json config_in) + : config(std::move(config_in)) +{ + // transform the regex such that the path to the Iteration is optional + std::stringstream build_pattern; + build_pattern << "(/data/[0-9]+/)?(" << pattern_in << ")"; + // we construct the patterns once and use them often, so let's ask for + // some optimization + pattern = std::regex( + build_pattern.str(), + std::regex_constants::egrep | std::regex_constants::optimize); +} + void MatcherPerBackend::init(TracingJSON tracing_config) { auto &config = tracing_config.json(); From c2de046b1b146e8b7c4a211c1350ea7336406b7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 9 Jul 2024 18:39:56 +0200 Subject: [PATCH 04/26] Seems to work --- src/IO/IOTask.cpp | 4 ++-- src/Series.cpp | 2 +- src/backend/Attributable.cpp | 9 +++------ 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/IO/IOTask.cpp b/src/IO/IOTask.cpp index 8edd8ff854..10062ee463 100644 --- a/src/IO/IOTask.cpp +++ b/src/IO/IOTask.cpp @@ -88,8 +88,8 @@ json::ParsedConfig Parameter::compileJSONConfig( return json::ParsedConfig{ std::move(base_config.config), (options.empty() || options == "{}") - ? manual_config.originallySpecifiedAs - : base_config.originallySpecifiedAs}; + ? base_config.originallySpecifiedAs + : manual_config.originallySpecifiedAs}; } namespace internal diff --git a/src/Series.cpp b/src/Series.cpp index e732293dbf..126cccfaca 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -1081,7 +1081,7 @@ void Series::initSeries( } series.iterations.linkHierarchy(writable); - series.iterations.writable().ownKeyWithinParent = "iterations"; + series.iterations.writable().ownKeyWithinParent = "data"; series.m_rankTable.m_attributable.linkHierarchy(writable); series.m_name = input->name; diff --git a/src/backend/Attributable.cpp b/src/backend/Attributable.cpp index da9e09e2e0..ade77e24d5 100644 --- a/src/backend/Attributable.cpp +++ b/src/backend/Attributable.cpp @@ -213,17 +213,14 @@ std::string Attributable::MyPath::openPMDPath() const { if (group.empty()) { - return std::string(); + return std::string("/"); } else { std::stringstream res; - auto it = group.begin(); - auto end = group.end(); - res << *it++; - for (; it != end; ++it) + for (auto const &element : group) { - res << '/' << *it; + res << '/' << element; } return res.str(); } From 74715fd0ec157d9d159fad246e3c3966ec716c0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 10 Jul 2024 10:40:06 +0200 Subject: [PATCH 05/26] Adapt Coretest to new output of myPath() --- test/CoreTest.cpp | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp index c5d98a73c5..0239e5bab9 100644 --- a/test/CoreTest.cpp +++ b/test/CoreTest.cpp @@ -207,81 +207,73 @@ TEST_CASE("myPath", "[core]") Series series("../samples/myPath.json", Access::CREATE); REQUIRE(pathOf(series) == vec_t{}); auto iteration = series.iterations[1234]; - REQUIRE(pathOf(iteration) == vec_t{"iterations", "1234"}); + REQUIRE(pathOf(iteration) == vec_t{"data", "1234"}); auto writeSomething = [](auto &recordComponent) { recordComponent.resetDataset({Datatype::INT, {100}}); recordComponent.template makeConstant(5678); }; - REQUIRE(pathOf(iteration.meshes) == vec_t{"iterations", "1234", "meshes"}); + REQUIRE(pathOf(iteration.meshes) == vec_t{"data", "1234", "meshes"}); auto scalarMesh = iteration.meshes["e_chargeDensity"]; REQUIRE( pathOf(scalarMesh) == - vec_t{"iterations", "1234", "meshes", "e_chargeDensity"}); + vec_t{"data", "1234", "meshes", "e_chargeDensity"}); auto scalarMeshComponent = scalarMesh[RecordComponent::SCALAR]; REQUIRE( pathOf(scalarMeshComponent) == - vec_t{"iterations", "1234", "meshes", "e_chargeDensity"}); + vec_t{"data", "1234", "meshes", "e_chargeDensity"}); writeSomething(scalarMeshComponent); auto vectorMesh = iteration.meshes["E"]; - REQUIRE(pathOf(vectorMesh) == vec_t{"iterations", "1234", "meshes", "E"}); + REQUIRE(pathOf(vectorMesh) == vec_t{"data", "1234", "meshes", "E"}); auto vectorMeshComponent = vectorMesh["x"]; REQUIRE( pathOf(vectorMeshComponent) == - vec_t{"iterations", "1234", "meshes", "E", "x"}); + vec_t{"data", "1234", "meshes", "E", "x"}); - REQUIRE( - pathOf(iteration.particles) == - vec_t{"iterations", "1234", "particles"}); + REQUIRE(pathOf(iteration.particles) == vec_t{"data", "1234", "particles"}); auto speciesE = iteration.particles["e"]; - REQUIRE(pathOf(speciesE) == vec_t{"iterations", "1234", "particles", "e"}); + REQUIRE(pathOf(speciesE) == vec_t{"data", "1234", "particles", "e"}); auto speciesPosition = speciesE["position"]; REQUIRE( pathOf(speciesPosition) == - vec_t{"iterations", "1234", "particles", "e", "position"}); + vec_t{"data", "1234", "particles", "e", "position"}); auto speciesPositionX = speciesPosition["x"]; REQUIRE( pathOf(speciesPositionX) == - vec_t{"iterations", "1234", "particles", "e", "position", "x"}); + vec_t{"data", "1234", "particles", "e", "position", "x"}); writeSomething(speciesPositionX); auto speciesWeighting = speciesE["weighting"]; REQUIRE( pathOf(speciesWeighting) == - vec_t{"iterations", "1234", "particles", "e", "weighting"}); + vec_t{"data", "1234", "particles", "e", "weighting"}); auto speciesWeightingX = speciesWeighting[RecordComponent::SCALAR]; REQUIRE( pathOf(speciesWeightingX) == - vec_t{"iterations", "1234", "particles", "e", "weighting"}); + vec_t{"data", "1234", "particles", "e", "weighting"}); writeSomething(speciesWeightingX); REQUIRE( pathOf(speciesE.particlePatches) == - vec_t{"iterations", "1234", "particles", "e", "particlePatches"}); + vec_t{"data", "1234", "particles", "e", "particlePatches"}); auto patchExtent = speciesE.particlePatches["extent"]; REQUIRE( pathOf(patchExtent) == - vec_t{ - "iterations", - "1234", - "particles", - "e", - "particlePatches", - "extent"}); + vec_t{"data", "1234", "particles", "e", "particlePatches", "extent"}); auto patchExtentX = patchExtent["x"]; REQUIRE( pathOf(patchExtentX) == vec_t{ - "iterations", + "data", "1234", "particles", "e", @@ -293,7 +285,7 @@ TEST_CASE("myPath", "[core]") REQUIRE( pathOf(patchNumParticles) == vec_t{ - "iterations", + "data", "1234", "particles", "e", @@ -305,7 +297,7 @@ TEST_CASE("myPath", "[core]") REQUIRE( pathOf(patchNumParticlesComponent) == vec_t{ - "iterations", + "data", "1234", "particles", "e", From d496b0e122d1e283f207c8301cda322f712bf783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 10 Jul 2024 11:36:01 +0200 Subject: [PATCH 06/26] Better error messages and some documentation inside example --- examples/13_write_dynamic_configuration.cpp | 33 ++++- src/auxiliary/JSONMatcher.cpp | 142 +++++++++++++------- 2 files changed, 125 insertions(+), 50 deletions(-) diff --git a/examples/13_write_dynamic_configuration.cpp b/examples/13_write_dynamic_configuration.cpp index 479cc65354..d507b4c8bd 100644 --- a/examples/13_write_dynamic_configuration.cpp +++ b/examples/13_write_dynamic_configuration.cpp @@ -10,7 +10,7 @@ using namespace openPMD; int main() { - if (!getVariants()["adios2"]) + if (!getVariants()["hdf5"]) { // Example configuration below selects the ADIOS2 backend return 0; @@ -57,13 +57,40 @@ parameters.clevel = 5 # type = "some other parameter" # # ... +# Sometimes, dataset configurations should not affect all datasets, but only +# specific ones, e.g. only particle data. +# Dataset configurations can be given as a list, here at the example of HDF5. +# In such lists, each entry is an object with two keys: +# +# 1. 'cfg': Mandatory key, this is the actual dataset configuration. +# 2. 'select': A Regex or a list of Regexes to match against the dataset name. +# +# This makes it possible to give dataset-specific configurations. +# The dataset name is the same as returned +# by `Attributable::myPath().openPMDPath()`. +# The regex must match against either the full path (e.g. "/data/1/meshes/E/x") +# or against the path within the iteration (e.g. "meshes/E/x"). + +# Example: +# Let HDF5 datasets be automatically chunked by default [[hdf5.dataset]] cfg.chunks = "auto" +# For particles, we can specify the chunking explicitly +[[hdf5.dataset]] +# Multiple selection regexes can be given as a list. +# They will be fused into a single regex '($^)|(regex1)|(regex2)|(regex3)|...'. +select = ["/data/1/particles/e/.*", "/data/2/particles/e/.*"] +cfg.chunks = [5] + +# Selecting a match works top-down, the order of list entries is important. [[hdf5.dataset]] +# Specifying only a single regex. +# The regex can match against the full dataset path +# or against the path within the Iteration. +# Capitalization is irrelevant. select = "particles/e/.*" -cfg.chunks = [10] -cfg.chornks = [] +CFG.CHUNKS = [10] )END"; // open file for writing diff --git a/src/auxiliary/JSONMatcher.cpp b/src/auxiliary/JSONMatcher.cpp index 3da52c3844..cabc269e61 100644 --- a/src/auxiliary/JSONMatcher.cpp +++ b/src/auxiliary/JSONMatcher.cpp @@ -2,8 +2,10 @@ #include "openPMD/Error.hpp" #include "openPMD/auxiliary/JSON_internal.hpp" +#include #include #include +#include namespace openPMD::json { @@ -16,6 +18,8 @@ namespace * The "select" key is optional, indicating the default configuration if it * is missing. * + * @param backend_name For error messages. + * @param index_in_list For error messages. * @param patterns Output parameter: Emplace a parsed pattern into this * list. * @param defaultConfig Output parameter: If the pattern was the default @@ -24,6 +28,8 @@ namespace * @return Whether the pattern was the default configuration or not. */ auto readPattern( + std::string const &backend_name, + size_t index_in_list, std::vector &patterns, std::optional &defaultConfig, nlohmann::json object) -> void; @@ -53,9 +59,14 @@ void MatcherPerBackend::init(TracingJSON tracing_config) { std::optional defaultConfig; // enhanced PIConGPU-defined layout - for (auto &value : config) + for (size_t i = 0; i < config.size(); ++i) { - readPattern(m_patterns, defaultConfig, std::move(value)); + readPattern( + backendName, + i, + m_patterns, + defaultConfig, + std::move(config.at(i))); } // now replace the pattern list with the default config tracing_config.json() = @@ -63,9 +74,8 @@ void MatcherPerBackend::init(TracingJSON tracing_config) } else { - throw std::runtime_error( - "[openPMD plugin] Expecting an object or an array as JSON " - "configuration."); + throw error::BackendConfigSchema( + {backendName, "dataset"}, "Expecting an object or an array."); } } @@ -171,66 +181,104 @@ auto JsonMatcher::getDefault() -> TracingJSON namespace { auto readPattern( + std::string const &backend_name, + size_t index_in_list, std::vector &patterns, std::optional &defaultConfig, nlohmann::json object) -> void { - constexpr char const *errorMsg = &R"END( -Each single pattern in an extended JSON configuration must be a JSON object -with keys 'select' and 'cfg'. -The key 'select' is optional, indicating a default configuration if it is -not set. -The key 'select' must point to either a single string or an array of strings.)END" - [1]; + constexpr char const *errorMsg = R"END( +Each single pattern in an dataset-specific JSON/TOML configuration must be +an object with mandatory key 'cfg' and optional key 'select'. +When the key 'select' is not specified, the given configuration is used +for setting up the default dataset configuration upon backend initialization. +The key 'select' must point to either a single string or an array of strings +and is interpreted as a regular expression against which the dataset name +(full path or path within an iteration) must match.)END"; + auto throw_up = [&](std::string const &additional_info, + auto &&...additional_path) { + throw error::BackendConfigSchema( + {backend_name, + "dataset", + std::to_string(index_in_list), + additional_path...}, + additional_info + errorMsg); + }; if (!object.is_object()) { - throw std::runtime_error(errorMsg); + throw_up("Not an object!"); + } + if (!object.contains("cfg")) + { + throw_up("Mandatory key missing: 'cfg'!"); } - try { - nlohmann::json &cfg = object.at("cfg"); - if (!object.contains("select")) + std::vector unrecognized_keys; + for (auto it = object.begin(); it != object.end(); ++it) { - if (defaultConfig.has_value()) + if (it.key() == "select" || it.key() == "cfg") { - throw std::runtime_error( - "Specified more than one default configuration."); + continue; } - defaultConfig.emplace(std::move(cfg)); - return; + unrecognized_keys.emplace_back(it.key()); } - else + if (!unrecognized_keys.empty()) { - nlohmann::json const &pattern = object.at("select"); - std::string pattern_str = [&]() -> std::string { - if (pattern.is_string()) - { - return pattern.get(); - } - else if (pattern.is_array()) + std::cerr << "[Warning] JSON/TOML config at '" << backend_name + << ".dataset." << index_in_list + << "' has unrecognized keys:"; + for (auto const &item : unrecognized_keys) + { + std::cerr << " '" << item << '\''; + } + std::cerr << '.' << std::endl; + } + } + + nlohmann::json &cfg = object.at("cfg"); + if (!object.contains("select")) + { + if (defaultConfig.has_value()) + { + throw_up("Specified more than one default configuration!"); + } + defaultConfig.emplace(std::move(cfg)); + return; + } + else + { + nlohmann::json const &pattern = object.at("select"); + std::string pattern_str = [&]() -> std::string { + if (pattern.is_string()) + { + return pattern.get(); + } + else if (pattern.is_array()) + { + std::stringstream res; + res << "($^)"; + for (auto const &sub_pattern : pattern) { - std::stringstream res; - res << "($^)"; - for (auto const &sub_pattern : pattern) + if (!sub_pattern.is_string()) { - res << "|(" << sub_pattern.get() - << ")"; + throw_up( + "Must be a string or an array of string!", + "select"); } - return res.str(); - } - else - { - throw std::runtime_error(errorMsg); + res << "|(" << sub_pattern.get() << ")"; } - }(); - patterns.emplace_back(pattern_str, std::move(cfg)); - return; - } - } - catch (nlohmann::json::out_of_range const &) - { - throw std::runtime_error(errorMsg); + return res.str(); + } + else + { + throw_up( + "Must be a string or an array of string!", "select"); + throw std::runtime_error("Unreachable!"); + } + }(); + patterns.emplace_back(pattern_str, std::move(cfg)); + return; } } } // namespace From 66ae9a82ccb1f908ef9326cbdcf5d9213dd4e48d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 10 Jul 2024 13:14:21 +0200 Subject: [PATCH 07/26] Adapt constructors for installation without ADIOS2/HDF5 --- src/IO/ADIOS/ADIOS2IOHandler.cpp | 10 ++++------ src/IO/HDF5/HDF5IOHandler.cpp | 7 ++----- src/IO/HDF5/ParallelHDF5IOHandler.cpp | 15 ++++----------- 3 files changed, 10 insertions(+), 22 deletions(-) diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 4c6ab2e653..6d6f215e5b 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -2384,13 +2384,12 @@ ADIOS2IOHandler::ADIOS2IOHandler( std::string path, Access at, MPI_Comm comm, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - json::TracingJSON, + json::TracingJSON config, // NOLINTNEXTLINE(performance-unnecessary-value-param) std::string, // NOLINTNEXTLINE(performance-unnecessary-value-param) std::string) - : AbstractIOHandler(std::move(path), at, comm) + : AbstractIOHandler(std::move(path), at, std::move(config), comm) {} #endif // openPMD_HAVE_MPI @@ -2398,13 +2397,12 @@ ADIOS2IOHandler::ADIOS2IOHandler( ADIOS2IOHandler::ADIOS2IOHandler( std::string path, Access at, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - json::TracingJSON, + json::TracingJSON config, // NOLINTNEXTLINE(performance-unnecessary-value-param) std::string, // NOLINTNEXTLINE(performance-unnecessary-value-param) std::string) - : AbstractIOHandler(std::move(path), at) + : AbstractIOHandler(std::move(path), at, std::move(config)) {} std::future ADIOS2IOHandler::flush(internal::ParsedFlushParams &) diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index fdf5b8b58f..c74a942de2 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -3023,11 +3023,8 @@ std::future HDF5IOHandler::flush(internal::ParsedFlushParams ¶ms) #else HDF5IOHandler::HDF5IOHandler( - std::string path, - Access at, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] json::TracingJSON config) - : AbstractIOHandler(std::move(path), at) + std::string path, Access at, json::TracingJSON config) + : AbstractIOHandler(std::move(path), at, std::move(config)) { throw std::runtime_error("openPMD-api built without HDF5 support"); } diff --git a/src/IO/HDF5/ParallelHDF5IOHandler.cpp b/src/IO/HDF5/ParallelHDF5IOHandler.cpp index 0418b7b76c..dd1816975a 100644 --- a/src/IO/HDF5/ParallelHDF5IOHandler.cpp +++ b/src/IO/HDF5/ParallelHDF5IOHandler.cpp @@ -422,22 +422,15 @@ ParallelHDF5IOHandlerImpl::flush(internal::ParsedFlushParams ¶ms) #if openPMD_HAVE_MPI ParallelHDF5IOHandler::ParallelHDF5IOHandler( - std::string path, - Access at, - MPI_Comm comm, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] json::TracingJSON config) - : AbstractIOHandler(std::move(path), at, comm) + std::string path, Access at, MPI_Comm comm, json::TracingJSON config) + : AbstractIOHandler(std::move(path), at, std::move(config), comm) { throw std::runtime_error("openPMD-api built without HDF5 support"); } #else ParallelHDF5IOHandler::ParallelHDF5IOHandler( - std::string const &path, - Access at, - // NOLINTNEXTLINE(performance-unnecessary-value-param) - [[maybe_unused]] json::TracingJSON config) - : AbstractIOHandler(path, at) + std::string const &path, Access at, json::TracingJSON config) + : AbstractIOHandler(path, at, std::move(config)) { throw std::runtime_error( "openPMD-api built without parallel support and without HDF5 support"); From 0b651789ba948fcf5ab0b87305069148dbaf5e7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 10 Jul 2024 16:10:29 +0200 Subject: [PATCH 08/26] CI fixes --- include/openPMD/IO/AbstractIOHandler.hpp | 4 +- include/openPMD/auxiliary/JSONMatcher.hpp | 50 +++++++++++------------ src/IO/AbstractIOHandler.cpp | 28 ++++--------- 3 files changed, 35 insertions(+), 47 deletions(-) diff --git a/include/openPMD/IO/AbstractIOHandler.hpp b/include/openPMD/IO/AbstractIOHandler.hpp index 0e520fd36b..52c16a0b9a 100644 --- a/include/openPMD/IO/AbstractIOHandler.hpp +++ b/include/openPMD/IO/AbstractIOHandler.hpp @@ -231,7 +231,9 @@ class AbstractIOHandler virtual ~AbstractIOHandler(); AbstractIOHandler(AbstractIOHandler const &) = delete; - AbstractIOHandler(AbstractIOHandler &&) noexcept; + // std::queue::queue(queue&&) is not noexcept + // NOLINTNEXTLINE(performance-noexcept-move-constructor) + AbstractIOHandler(AbstractIOHandler &&) noexcept(false); AbstractIOHandler &operator=(AbstractIOHandler const &) = delete; AbstractIOHandler &operator=(AbstractIOHandler &&) noexcept; diff --git a/include/openPMD/auxiliary/JSONMatcher.hpp b/include/openPMD/auxiliary/JSONMatcher.hpp index 8c2cc88371..14b8724a79 100644 --- a/include/openPMD/auxiliary/JSONMatcher.hpp +++ b/include/openPMD/auxiliary/JSONMatcher.hpp @@ -1,21 +1,23 @@ #pragma once -/* Copyright 2021-2023 Franz Poeschel +/* Copyright 2021-2024 Franz Poeschel * - * This file is part of PIConGPU. + * This file is part of openPMD-api. * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by + * openPMD-api is free software: you can redistribute it and/or modify + * it under the terms of of either the GNU General Public License or + * the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * - * PIConGPU is distributed in the hope that it will be useful, + * openPMD-api is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. * * You should have received a copy of the GNU General Public License - * along with PIConGPU. + * and the GNU Lesser General Public License along with openPMD-api. * If not, see . */ @@ -57,15 +59,14 @@ class MatcherPerBackend * * This constructor will parse the given config. * It will distinguish between ordinary openPMD JSON configurations - * and extended configurations as defined by PIConGPU. - * If an ordinary JSON configuration was detected, given regex - * patterns will be matched against "" (the empty string). + * and dataset-specific configurations. * + * @param backendName The backend's JSON key. * @param config The JSON configuration for one backend. * E.g. for ADIOS2, this will be the sub-object/array found * under config["adios2"]["dataset"]. */ - MatcherPerBackend(std::string backendName_in, TracingJSON config); + MatcherPerBackend(std::string backendName, TracingJSON config); std::string backendName; @@ -78,8 +79,7 @@ class MatcherPerBackend auto get(std::string const &datasetPath) const -> nlohmann::json const &; }; /** - * @brief Class to handle extended JSON configurations as used by - * the openPMD plugin. + * @brief Class to handle default and dataset-specific JSON configurations. * * This class handles parsing of the extended JSON patterns as well as * selection of one JSON configuration by regex. @@ -100,22 +100,18 @@ class JsonMatcher explicit JsonMatcher(); /** - * @brief Initialize JSON matcher from command line arguments. + * @brief Initialize JSON matcher from a parsed JSON config. * - * This constructor will parse the given config, after reading it - * from a file if needed. In this case, the constructor is - * MPI-collective. - * It will distinguish between ordinary openPMD JSON configurations - * and extended configurations as defined by PIConGPU. - * If an ordinary JSON configuration was detected, given regex - * patterns will be matched against "" (the empty string). + * Will go through the backends' configurations (keys defined by + * `backendKeys` in JSON_internal.hpp) and check for dataset-specific + * configurations. It will then construct: + * + * 1. A default configuration. + * 2. Matchers for retrieving dataset-specific configurations. * - * @param config The JSON configuration, exactly as in - * --openPMD.json. - * @param comm MPI communicator for collective file reading, - * if needed. + * @param config The parsed JSON configuration as specified by the user. */ - JsonMatcher(openPMD::json::TracingJSON); + JsonMatcher(openPMD::json::TracingJSON config); /** * @brief Get the JSON config associated with a regex pattern. diff --git a/src/IO/AbstractIOHandler.cpp b/src/IO/AbstractIOHandler.cpp index 3284d9d1d6..e7464e348a 100644 --- a/src/IO/AbstractIOHandler.cpp +++ b/src/IO/AbstractIOHandler.cpp @@ -125,40 +125,30 @@ bool AbstractIOHandler::fullSupportForVariableBasedEncoding() const } #if openPMD_HAVE_MPI -template +template <> AbstractIOHandler::AbstractIOHandler( - std::string path, Access at, TracingJSON &&jsonConfig, MPI_Comm) - : jsonMatcher(std::make_unique( - std::forward(jsonConfig))) + std::string path, Access at, json::TracingJSON &&jsonConfig, MPI_Comm) + : jsonMatcher(std::make_unique(std::move(jsonConfig))) , directory{std::move(path)} , m_backendAccess{at} , m_frontendAccess{at} {} - -template AbstractIOHandler::AbstractIOHandler( - std::string path, Access at, json::TracingJSON &&jsonConfig, MPI_Comm); #endif -template +template <> AbstractIOHandler::AbstractIOHandler( - std::string path, Access at, TracingJSON &&jsonConfig) - : jsonMatcher(std::make_unique( - std::forward(jsonConfig))) + std::string path, Access at, json::TracingJSON &&jsonConfig) + : jsonMatcher(std::make_unique(std::move(jsonConfig))) , directory{std::move(path)} , m_backendAccess{at} , m_frontendAccess{at} {} -template AbstractIOHandler::AbstractIOHandler( - std::string path, Access at, json::TracingJSON &&jsonConfig); - AbstractIOHandler::~AbstractIOHandler() = default; +// std::queue::queue(queue&&) is not noexcept +// NOLINTNEXTLINE(performance-noexcept-move-constructor) +AbstractIOHandler::AbstractIOHandler(AbstractIOHandler &&) = default; -// AbstractIOHandler::AbstractIOHandler(AbstractIOHandler const &) = default; -AbstractIOHandler::AbstractIOHandler(AbstractIOHandler &&) noexcept = default; - -// AbstractIOHandler & -// AbstractIOHandler::operator=(AbstractIOHandler const &) = default; AbstractIOHandler & AbstractIOHandler::operator=(AbstractIOHandler &&) noexcept = default; } // namespace openPMD From 92c6ced885fb3f27b121538cde6482790b403253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 10 Jul 2024 16:58:34 +0200 Subject: [PATCH 09/26] Basic implementation --- include/openPMD/auxiliary/JSON.hpp | 13 ++++++++++ src/auxiliary/JSON.cpp | 34 ++++++++++++++++++++++--- src/binding/python/Series.cpp | 41 +++++++++++++++++++++++++----- 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/include/openPMD/auxiliary/JSON.hpp b/include/openPMD/auxiliary/JSON.hpp index 8c2551fe0a..223dfbde87 100644 --- a/include/openPMD/auxiliary/JSON.hpp +++ b/include/openPMD/auxiliary/JSON.hpp @@ -21,6 +21,12 @@ #pragma once +#include "openPMD/config.hpp" + +#if openPMD_HAVE_MPI +#include +#endif + #include namespace openPMD @@ -61,5 +67,12 @@ namespace json */ std::string merge(std::string const &defaultValue, std::string const &overwrite); + +#if openPMD_HAVE_MPI + std::string merge( + std::string const &defaultValue, + std::string const &overwrite, + MPI_Comm); +#endif } // namespace json } // namespace openPMD diff --git a/src/auxiliary/JSON.cpp b/src/auxiliary/JSON.cpp index c0c5ca8909..c28a66d107 100644 --- a/src/auxiliary/JSON.cpp +++ b/src/auxiliary/JSON.cpp @@ -694,11 +694,22 @@ merge(nlohmann::json &defaultVal, nlohmann::json const &overwrite) return defaultVal; } -std::string merge(std::string const &defaultValue, std::string const &overwrite) +template +std::string merge_impl( + std::string const &defaultValue, + std::string const &overwrite, + MPI_Comm_t &&...comm) { - auto [res, returnFormat] = - parseOptions(defaultValue, /* considerFiles = */ false); - merge(res, parseOptions(overwrite, /* considerFiles = */ false).config); + auto res = parseOptions( + defaultValue, + std::forward(comm)..., + /* considerFiles = */ true) + .config; + auto [second, returnFormat] = parseOptions( + overwrite, + std::forward(comm)..., + /* considerFiles = */ true); + merge(res, second); switch (returnFormat) { case SupportedLanguages::JSON: @@ -714,6 +725,21 @@ std::string merge(std::string const &defaultValue, std::string const &overwrite) throw std::runtime_error("Unreachable!"); } +std::string merge(std::string const &defaultValue, std::string const &overwrite) +{ + return merge_impl(defaultValue, overwrite); +} + +#if openPMD_HAVE_MPI +std::string merge( + std::string const &defaultValue, + std::string const &overwrite, + MPI_Comm comm) +{ + return merge_impl(defaultValue, overwrite, comm); +} +#endif + nlohmann::json & filterByTemplate(nlohmann::json &defaultVal, nlohmann::json const &positiveMask) { diff --git a/src/binding/python/Series.cpp b/src/binding/python/Series.cpp index d737dc8198..09bcdfee55 100644 --- a/src/binding/python/Series.cpp +++ b/src/binding/python/Series.cpp @@ -423,12 +423,7 @@ Look for the WriteIterations class for further documentation. return series; }); - m.def( - "merge_json", - &json::merge, - py::arg("default_value") = "{}", - py::arg("overwrite") = "{}", - R"END( + constexpr char const *docs_merge_json = &R"END( Merge two JSON/TOML datasets into one. Merging rules: @@ -460,5 +455,37 @@ users to overwrite default options, while keeping any other ones. * returns: The merged dataset, according to the above rules. If `defaultValue` was a JSON dataset, then as a JSON string, otherwise as a TOML string. - )END"); + )END"[1]; + + m.def( + "merge_json", + py::overload_cast( + &json::merge), + py::arg("default_value") = "{}", + py::arg("overwrite") = "{}", + docs_merge_json) +#if openPMD_HAVE_MPI + .def( + "merge_json", + [](std::string const &default_value, + std::string const &overwrite, + py::object &comm) { + auto variant = pythonObjectAsMpiComm(comm); + if (auto errorMsg = std::get_if(&variant)) + { + throw std::runtime_error("[merge_json] " + *errorMsg); + } + else + { + py::gil_scoped_release release; + return json::merge( + default_value, overwrite, std::get(variant)); + } + }, + py::arg("default_value") = "{}", + py::arg("overwrite") = "{}", + py::arg("comm"), + docs_merge_json) +#endif + ; } From 61d4de6b1676197db0a7114e3eb099ca58a1c0a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Thu, 11 Jul 2024 11:24:37 +0200 Subject: [PATCH 10/26] Update documentation and tests --- include/openPMD/auxiliary/JSON.hpp | 48 ++++++++++++++++++++++++++++-- src/binding/python/Series.cpp | 8 ++++- test/JSONTest.cpp | 4 +-- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/include/openPMD/auxiliary/JSON.hpp b/include/openPMD/auxiliary/JSON.hpp index 223dfbde87..26aa52281c 100644 --- a/include/openPMD/auxiliary/JSON.hpp +++ b/include/openPMD/auxiliary/JSON.hpp @@ -59,16 +59,58 @@ namespace json * users to overwrite default options, while keeping any other ones. * * @param defaultValue A string containing either a JSON or a TOML dataset. + * If the string begins with an `@`, the JSON/TOML dataset will be + * read from the filesystem at the specified path. * @param overwrite A string containing either a JSON or TOML dataset (does - * not need to be the same as `defaultValue`). + * not need to be the same as `defaultValue`). + * If the string begins with an `@`, the JSON/TOML dataset will be + * read from the filesystem at the specified path. * @return std::string The merged dataset, according to the above rules. If - * `defaultValue` was a JSON dataset, then as a JSON string, otherwise as a - * TOML string. + * `overwrite` was a JSON dataset, then as a JSON string, otherwise + * as a TOML string. */ std::string merge(std::string const &defaultValue, std::string const &overwrite); #if openPMD_HAVE_MPI + /** + * @brief Merge two JSON/TOML datasets into one. + * + * Merging rules: + * 1. If both `defaultValue` and `overwrite` are JSON/TOML objects, then the + * resulting JSON/TOML object will contain the union of both objects' + * keys. If a key is specified in both objects, the values corresponding + * to the key are merged recursively. Keys that point to a null value + * after this procedure will be pruned. + * 2. In any other case, the JSON/TOML dataset `defaultValue` is replaced in + * its entirety with the JSON/TOML dataset `overwrite`. + * + * Note that item 2 means that datasets of different type will replace each + * other without error. + * It also means that array types will replace each other without any notion + * of appending or merging. + * + * Possible use case: + * An application uses openPMD-api and wants to do the following: + * 1. Set some default backend options as JSON/TOML parameters. + * 2. Let its users specify custom backend options additionally. + * + * By using the json::merge() function, this application can then allow + * users to overwrite default options, while keeping any other ones. + * + * @param defaultValue A string containing either a JSON or a TOML dataset. + * If the string begins with an `@`, the JSON/TOML dataset will be + * read in parallel (using the MPI Communicator) + * from the filesystem at the specified path. + * @param overwrite A string containing either a JSON or TOML dataset (does + * not need to be the same as `defaultValue`). + * If the string begins with an `@`, the JSON/TOML dataset will be + * read in parallel (using the MPI Communicator) + * from the filesystem at the specified path. + * @return std::string The merged dataset, according to the above rules. If + * `overwrite` was a JSON dataset, then as a JSON string, otherwise + * as a TOML string. + */ std::string merge( std::string const &defaultValue, std::string const &overwrite, diff --git a/src/binding/python/Series.cpp b/src/binding/python/Series.cpp index 09bcdfee55..11ad2ae651 100644 --- a/src/binding/python/Series.cpp +++ b/src/binding/python/Series.cpp @@ -450,10 +450,16 @@ users to overwrite default options, while keeping any other ones. Parameters: * default_value: A string containing either a JSON or a TOML dataset. + If the string begins with an `@`, the JSON/TOML dataset will be + read from the filesystem at the specified path. + An MPI communicator can be passed to read in parallel. * overwrite: A string containing either a JSON or TOML dataset (does not need to be the same as `defaultValue`). + If the string begins with an `@`, the JSON/TOML dataset will be + read from the filesystem at the specified path. + An MPI communicator can be passed to read in parallel. * returns: The merged dataset, according to the above rules. - If `defaultValue` was a JSON dataset, then as a JSON string, + If `overwrite` was a JSON dataset, then as a JSON string, otherwise as a TOML string. )END"[1]; diff --git a/test/JSONTest.cpp b/test/JSONTest.cpp index 067919bd89..304ad16e15 100644 --- a/test/JSONTest.cpp +++ b/test/JSONTest.cpp @@ -217,8 +217,8 @@ right = "val" }(); REQUIRE(json::merge(leftJson, rightJson) == resJson); - REQUIRE(json::merge(leftJson, rightToml) == resJson); - REQUIRE(sort_lines(json::merge(leftToml, rightJson)) == resToml); + REQUIRE(sort_lines(json::merge(leftJson, rightToml)) == resToml); + REQUIRE(json::merge(leftToml, rightJson) == resJson); REQUIRE(sort_lines(json::merge(leftToml, rightToml)) == resToml); } } From 6d74a53baace02aedee11f0e5e40369f9bb941ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 16 Jul 2024 15:59:14 +0200 Subject: [PATCH 11/26] Add a JSON translation of the config for NVHPC compilers might also be good for documentation purposes as JSON is more widely known --- examples/13_write_dynamic_configuration.cpp | 61 +++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/examples/13_write_dynamic_configuration.cpp b/examples/13_write_dynamic_configuration.cpp index d507b4c8bd..653d6f1160 100644 --- a/examples/13_write_dynamic_configuration.cpp +++ b/examples/13_write_dynamic_configuration.cpp @@ -17,6 +17,8 @@ int main() } using position_t = double; + +#if !__NVCOMPILER // see https://github.com/ToruNiina/toml11/issues/205 /* * This example demonstrates how to use JSON/TOML-based dynamic * configuration for openPMD. @@ -92,6 +94,65 @@ cfg.chunks = [5] select = "particles/e/.*" CFG.CHUNKS = [10] )END"; +#else + /* + * This is the same configuration in JSON. We need this in deprecated + * NVHPC-compilers due to problems that those compilers have with the + * toruniina::toml11 library. + */ + std::string const defaults = R"( +{ + "backend": "hdf5", + "defer_iteration_parsing": true, + "iteration_encoding": "group_based", + + "adios2": { + "engine": { + "type": "bp4" + }, + "dataset": { + "operators": [ + { + "parameters": { + "clevel": 5 + }, + "type": "zlib" + } + ] + } + }, + + "hdf5": { + "dataset": [ + { + "cfg": { + "chunks": "auto" + } + }, + { + "select": [ + "/data/1/particles/e/.*", + "/data/2/particles/e/.*" + ], + "cfg": { + "chunks": [ + 5 + ] + } + }, + { + "select": "particles/e/.*", + "CFG": { + "CHUNKS": [ + 10 + ] + } + } + ] + } +} +)"; +#endif // open file for writing Series series = From 90d3a64e03f2394100d44616bd3b3d3196f090d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 10 Feb 2025 12:02:24 +0100 Subject: [PATCH 12/26] Use dataset-specific config in tests --- test/ParallelIOTest.cpp | 49 +++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index 59d8bf8570..99cceae7cf 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -274,11 +274,23 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]") MPI_Comm_rank(MPI_COMM_WORLD, &mpi_r); auto mpi_size = static_cast(mpi_s); auto mpi_rank = static_cast(mpi_r); + std::string chunking_config = "[" + std::to_string(mpi_size) + "]"; + // clang-format off + std::string config = R"( + [hdf5] + independent_stores = false + + [[hdf5.dataset]] + select = "particles/.*/position/.*" + cfg = {chunks = [1]} + + [[hdf5.dataset]] + select = "particles/.*/positionOffset/x" + cfg = {chunks = )" + chunking_config + R"(} + )"; + // clang-format on Series o = Series( - "../samples/parallel_write.h5", - Access::CREATE, - MPI_COMM_WORLD, - "hdf5.independent_stores = false"); + "../samples/parallel_write.h5", Access::CREATE, MPI_COMM_WORLD, config); o.setAuthor("Parallel HDF5"); ParticleSpecies &e = o.iterations[1].particles["e"]; @@ -291,10 +303,8 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]") std::shared_ptr position_local(new double); *position_local = position_global[mpi_rank]; - e["position"]["x"].resetDataset(Dataset( - determineDatatype(position_local), - {mpi_size}, - "hdf5.dataset.chunks = [1]")); + e["position"]["x"].resetDataset( + Dataset(determineDatatype(position_local), {mpi_size})); e["position"]["x"].storeChunk(position_local, {mpi_rank}, {1}); o.flush("hdf5.independent_stores = true"); @@ -308,10 +318,8 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]") std::shared_ptr positionOffset_local(new uint64_t); *positionOffset_local = positionOffset_global[mpi_rank]; - e["positionOffset"]["x"].resetDataset(Dataset( - determineDatatype(positionOffset_local), - {mpi_size}, - "hdf5.dataset.chunks = [" + std::to_string(mpi_size) + "]")); + e["positionOffset"]["x"].resetDataset( + Dataset(determineDatatype(positionOffset_local), {mpi_size})); e["positionOffset"]["x"].storeChunk(positionOffset_local, {mpi_rank}, {1}); // Test that chunking settings are not carried over to other datasets. @@ -870,8 +878,18 @@ void file_based_write_read(std::string const &file_ending) }); { + std::string chunking_config = "[" + std::to_string(global_Nx) + ", " + + std::to_string(local_Nz) + "]"; + // clang-format off + std::string out_config = R"( + [[hdf5.dataset]] + select = "meshes/E/.*" + cfg = {chunks = )" + chunking_config + R"(} +)"; + // clang-format on + // open a parallel series - Series series(name, Access::CREATE, MPI_COMM_WORLD); + Series series(name, Access::CREATE, MPI_COMM_WORLD, out_config); series.setIterationEncoding(IterationEncoding::fileBased); int const last_step = 100; @@ -908,10 +926,7 @@ void file_based_write_read(std::string const &file_ending) }); auto dataset = io::Dataset( - io::determineDatatype(), - {global_Nx, global_Nz}, - "hdf5.dataset.chunks = [" + std::to_string(global_Nx) + ", " + - std::to_string(local_Nz) + "]"); + io::determineDatatype(), {global_Nx, global_Nz}); E_x.resetDataset(dataset); Offset chunk_offset = {0, size_t(local_Nz) * mpi_rank}; From d7ceaba92e592a65ee6f1f1f56a038b5bedb9eef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 10 Feb 2025 14:28:28 +0100 Subject: [PATCH 13/26] Fix: do_prune parameter for merge() --- include/openPMD/auxiliary/JSON_internal.hpp | 6 ++++-- src/IO/HDF5/HDF5IOHandler.cpp | 6 ++++-- src/IO/HDF5/ParallelHDF5IOHandler.cpp | 3 ++- src/IO/IOTask.cpp | 3 ++- src/auxiliary/JSON.cpp | 17 ++++++++++------- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/include/openPMD/auxiliary/JSON_internal.hpp b/include/openPMD/auxiliary/JSON_internal.hpp index 3b1bd69806..502074f835 100644 --- a/include/openPMD/auxiliary/JSON_internal.hpp +++ b/include/openPMD/auxiliary/JSON_internal.hpp @@ -276,8 +276,10 @@ namespace json * Like merge() as defined in JSON.hpp, but this overload works directly * on nlohmann::json values. */ - nlohmann::json & - merge(nlohmann::json &defaultVal, nlohmann::json const &overwrite); + nlohmann::json &merge( + nlohmann::json &defaultVal, + nlohmann::json const &overwrite, + bool do_prune); nlohmann::json &filterByTemplate( nlohmann::json &defaultVal, nlohmann::json const &positiveMask); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index c74a942de2..9a75b74040 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -175,7 +175,8 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( json::filterByTemplate( m_global_flush_config, nlohmann::json::parse(flush_cfg_mask)); auto init_json_shadow = nlohmann::json::parse(init_json_shadow_str); - json::merge(m_config.getShadow(), init_json_shadow); + json::merge( + m_config.getShadow(), init_json_shadow, /* do_prune = */ false); } // unused params @@ -510,7 +511,8 @@ void HDF5IOHandlerImpl::createDataset( hdf5_config_it != parsed_config.config.end()) { auto copy = m_global_dataset_config; - json::merge(copy, hdf5_config_it.value()); + json::merge( + copy, hdf5_config_it.value(), /* do_prune = */ true); hdf5_config_it.value() = std::move(copy); } else diff --git a/src/IO/HDF5/ParallelHDF5IOHandler.cpp b/src/IO/HDF5/ParallelHDF5IOHandler.cpp index dd1816975a..457e77e1c9 100644 --- a/src/IO/HDF5/ParallelHDF5IOHandler.cpp +++ b/src/IO/HDF5/ParallelHDF5IOHandler.cpp @@ -72,7 +72,8 @@ ParallelHDF5IOHandler::flush(internal::ParsedFlushParams ¶ms) hdf5_config_it != params.backendConfig.json().end()) { auto copied_global_cfg = m_impl->m_global_flush_config; - json::merge(copied_global_cfg, hdf5_config_it.value()); + json::merge( + copied_global_cfg, hdf5_config_it.value(), /* do_prune = */ true); hdf5_config_it.value() = std::move(copied_global_cfg); } else diff --git a/src/IO/IOTask.cpp b/src/IO/IOTask.cpp index 10062ee463..f7467fe558 100644 --- a/src/IO/IOTask.cpp +++ b/src/IO/IOTask.cpp @@ -84,7 +84,8 @@ json::ParsedConfig Parameter::compileJSONConfig( auto base_config = jsonMatcher.get(path); auto manual_config = json::parseOptions(options, /* considerFiles = */ false); - json::merge(base_config.config, manual_config.config); + json::merge( + base_config.config, manual_config.config, /* do_prune = */ true); return json::ParsedConfig{ std::move(base_config.config), (options.empty() || options == "{}") diff --git a/src/auxiliary/JSON.cpp b/src/auxiliary/JSON.cpp index c28a66d107..8c6fe92ad1 100644 --- a/src/auxiliary/JSON.cpp +++ b/src/auxiliary/JSON.cpp @@ -657,8 +657,8 @@ void warnGlobalUnusedOptions(TracingJSON const &config) } } -nlohmann::json & -merge(nlohmann::json &defaultVal, nlohmann::json const &overwrite) +nlohmann::json &merge( + nlohmann::json &defaultVal, nlohmann::json const &overwrite, bool do_prune) { if (defaultVal.is_object() && overwrite.is_object()) { @@ -666,15 +666,18 @@ merge(nlohmann::json &defaultVal, nlohmann::json const &overwrite) for (auto it = overwrite.begin(); it != overwrite.end(); ++it) { auto &valueInDefault = defaultVal[it.key()]; - merge(valueInDefault, it.value()); - if (valueInDefault.is_null()) + merge(valueInDefault, it.value(), do_prune); + if (do_prune && valueInDefault.is_null()) { prunedKeys.push(it.key()); } } - for (; !prunedKeys.empty(); prunedKeys.pop()) + if (do_prune) { - defaultVal.erase(prunedKeys.front()); + for (; !prunedKeys.empty(); prunedKeys.pop()) + { + defaultVal.erase(prunedKeys.front()); + } } } else @@ -709,7 +712,7 @@ std::string merge_impl( overwrite, std::forward(comm)..., /* considerFiles = */ true); - merge(res, second); + merge(res, second, /* do_prune = */ true); switch (returnFormat) { case SupportedLanguages::JSON: From 9121e0baaa6c61cdd6458eedae33cb3f8e58edc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 10 Feb 2025 14:39:35 +0100 Subject: [PATCH 14/26] Rename merge() -> merge_internal() Having the same name as the public function provoked errors due to conversion from nlohmann::json types. --- include/openPMD/auxiliary/JSON_internal.hpp | 2 +- src/IO/HDF5/HDF5IOHandler.cpp | 4 ++-- src/IO/HDF5/ParallelHDF5IOHandler.cpp | 2 +- src/IO/IOTask.cpp | 2 +- src/auxiliary/JSON.cpp | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/openPMD/auxiliary/JSON_internal.hpp b/include/openPMD/auxiliary/JSON_internal.hpp index 502074f835..c608bd7f9f 100644 --- a/include/openPMD/auxiliary/JSON_internal.hpp +++ b/include/openPMD/auxiliary/JSON_internal.hpp @@ -276,7 +276,7 @@ namespace json * Like merge() as defined in JSON.hpp, but this overload works directly * on nlohmann::json values. */ - nlohmann::json &merge( + nlohmann::json &merge_internal( nlohmann::json &defaultVal, nlohmann::json const &overwrite, bool do_prune); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 9a75b74040..b1a2f7166c 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -175,7 +175,7 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( json::filterByTemplate( m_global_flush_config, nlohmann::json::parse(flush_cfg_mask)); auto init_json_shadow = nlohmann::json::parse(init_json_shadow_str); - json::merge( + json::merge_internal( m_config.getShadow(), init_json_shadow, /* do_prune = */ false); } @@ -511,7 +511,7 @@ void HDF5IOHandlerImpl::createDataset( hdf5_config_it != parsed_config.config.end()) { auto copy = m_global_dataset_config; - json::merge( + json::merge_internal( copy, hdf5_config_it.value(), /* do_prune = */ true); hdf5_config_it.value() = std::move(copy); } diff --git a/src/IO/HDF5/ParallelHDF5IOHandler.cpp b/src/IO/HDF5/ParallelHDF5IOHandler.cpp index 457e77e1c9..9f166b23cb 100644 --- a/src/IO/HDF5/ParallelHDF5IOHandler.cpp +++ b/src/IO/HDF5/ParallelHDF5IOHandler.cpp @@ -72,7 +72,7 @@ ParallelHDF5IOHandler::flush(internal::ParsedFlushParams ¶ms) hdf5_config_it != params.backendConfig.json().end()) { auto copied_global_cfg = m_impl->m_global_flush_config; - json::merge( + json::merge_internal( copied_global_cfg, hdf5_config_it.value(), /* do_prune = */ true); hdf5_config_it.value() = std::move(copied_global_cfg); } diff --git a/src/IO/IOTask.cpp b/src/IO/IOTask.cpp index f7467fe558..e7a8f2d2bd 100644 --- a/src/IO/IOTask.cpp +++ b/src/IO/IOTask.cpp @@ -84,7 +84,7 @@ json::ParsedConfig Parameter::compileJSONConfig( auto base_config = jsonMatcher.get(path); auto manual_config = json::parseOptions(options, /* considerFiles = */ false); - json::merge( + json::merge_internal( base_config.config, manual_config.config, /* do_prune = */ true); return json::ParsedConfig{ std::move(base_config.config), diff --git a/src/auxiliary/JSON.cpp b/src/auxiliary/JSON.cpp index 8c6fe92ad1..84e7006cbb 100644 --- a/src/auxiliary/JSON.cpp +++ b/src/auxiliary/JSON.cpp @@ -657,7 +657,7 @@ void warnGlobalUnusedOptions(TracingJSON const &config) } } -nlohmann::json &merge( +nlohmann::json &merge_internal( nlohmann::json &defaultVal, nlohmann::json const &overwrite, bool do_prune) { if (defaultVal.is_object() && overwrite.is_object()) @@ -666,7 +666,7 @@ nlohmann::json &merge( for (auto it = overwrite.begin(); it != overwrite.end(); ++it) { auto &valueInDefault = defaultVal[it.key()]; - merge(valueInDefault, it.value(), do_prune); + merge_internal(valueInDefault, it.value(), do_prune); if (do_prune && valueInDefault.is_null()) { prunedKeys.push(it.key()); @@ -712,7 +712,7 @@ std::string merge_impl( overwrite, std::forward(comm)..., /* considerFiles = */ true); - merge(res, second, /* do_prune = */ true); + merge_internal(res, second, /* do_prune = */ true); switch (returnFormat) { case SupportedLanguages::JSON: From 33f8e30988ab9a525eb0a43e6fc842a11d7f223d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 10 Feb 2025 16:03:38 +0100 Subject: [PATCH 15/26] Don't compute the matchers for all backends --- include/openPMD/IO/IOTask.hpp | 6 +- include/openPMD/auxiliary/JSONMatcher.hpp | 13 +++- src/IO/ADIOS/ADIOS2IOHandler.cpp | 6 +- src/IO/HDF5/HDF5IOHandler.cpp | 4 +- src/IO/IOTask.cpp | 6 +- src/IO/JSON/JSONIOHandlerImpl.cpp | 4 +- src/auxiliary/JSONMatcher.cpp | 81 ++++++++++++----------- 7 files changed, 67 insertions(+), 53 deletions(-) diff --git a/include/openPMD/IO/IOTask.hpp b/include/openPMD/IO/IOTask.hpp index 7fef9fcf81..3ba6e78371 100644 --- a/include/openPMD/IO/IOTask.hpp +++ b/include/openPMD/IO/IOTask.hpp @@ -378,8 +378,10 @@ struct OPENPMDAPI_EXPORT Parameter std::string const &warningMessage); template - TracingJSON - compileJSONConfig(Writable const *writable, json::JsonMatcher &) const; + TracingJSON compileJSONConfig( + Writable const *writable, + json::JsonMatcher &, + std::string const &backendName) const; }; template <> diff --git a/include/openPMD/auxiliary/JSONMatcher.hpp b/include/openPMD/auxiliary/JSONMatcher.hpp index 14b8724a79..6ae43d9b6c 100644 --- a/include/openPMD/auxiliary/JSONMatcher.hpp +++ b/include/openPMD/auxiliary/JSONMatcher.hpp @@ -88,7 +88,11 @@ class MatcherPerBackend class JsonMatcher { private: - std::vector m_perBackend; + // Only one backend matcher is initialized lazily upon calling + // JsonMatcher::get() + // Usually only one backend is active, so initializing all of them + // is not necessary. + MatcherPerBackend m_backendMatcher; TracingJSON m_entireConfig; auto init() -> void; @@ -119,13 +123,16 @@ class JsonMatcher * @param datasetPath The regex. * @return The matched JSON configuration, as a string. */ - auto get(std::string const &datasetPath) const -> ParsedConfig; + auto get(std::string const &datasetPath, std::string const &backendName) + -> ParsedConfig; /** * @brief Get the default JSON config. * * @return The default JSON configuration. */ - auto getDefault() -> TracingJSON; + auto getDefault(std::string const &backendName) -> TracingJSON; + + auto initBackendLazily(std::string const &backendName) -> void; }; } // namespace openPMD::json diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 6d6f215e5b..ca3b2ac49e 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -116,7 +116,7 @@ ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( , m_userSpecifiedExtension{std::move(specifiedExtension)} { init( - handler->jsonMatcher->getDefault(), + handler->jsonMatcher->getDefault("adios2"), /* callbackWriteAttributesFromRank = */ [communicator, this](nlohmann::json const &attribute_writing_ranks) { int rank = 0; @@ -165,7 +165,7 @@ ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( , m_engineType(std::move(engineType)) , m_userSpecifiedExtension(std::move(specifiedExtension)) { - init(handler->jsonMatcher->getDefault(), [](auto const &...) {}); + init(handler->jsonMatcher->getDefault("adios2"), [](auto const &...) {}); } ADIOS2IOHandlerImpl::~ADIOS2IOHandlerImpl() @@ -786,7 +786,7 @@ void ADIOS2IOHandlerImpl::createDataset( std::vector operators; json::TracingJSON options = parameters.compileJSONConfig( - writable, *m_handler->jsonMatcher); + writable, *m_handler->jsonMatcher, "adios2"); if (options.json().contains("adios2")) { json::TracingJSON datasetConfig(options["adios2"]); diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index b1a2f7166c..1c8b20852e 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -142,7 +142,7 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl( m_H5T_LONG_DOUBLE_80_LE >= 0, "[HDF5] Internal error: Failed to create 128-bit complex long double"); - auto config = handler->jsonMatcher->getDefault(); + auto config = handler->jsonMatcher->getDefault("hdf5"); // JSON option can overwrite env option: if (config.json().contains("hdf5")) @@ -506,7 +506,7 @@ void HDF5IOHandlerImpl::createDataset( json::TracingJSON config = [&]() { auto parsed_config = parameters.compileJSONConfig( - writable, *m_handler->jsonMatcher); + writable, *m_handler->jsonMatcher, "hdf5"); if (auto hdf5_config_it = parsed_config.config.find("hdf5"); hdf5_config_it != parsed_config.config.end()) { diff --git a/src/IO/IOTask.cpp b/src/IO/IOTask.cpp index e7a8f2d2bd..ab2a36a2ec 100644 --- a/src/IO/IOTask.cpp +++ b/src/IO/IOTask.cpp @@ -77,11 +77,13 @@ void Parameter::warnUnusedParameters< template <> json::ParsedConfig Parameter::compileJSONConfig( - Writable const *writable, json::JsonMatcher &jsonMatcher) const + Writable const *writable, + json::JsonMatcher &jsonMatcher, + std::string const &backendName) const { auto attri = writable->attributable->asInternalCopyOf(); auto path = attri.myPath().openPMDPath(); - auto base_config = jsonMatcher.get(path); + auto base_config = jsonMatcher.get(path, backendName); auto manual_config = json::parseOptions(options, /* considerFiles = */ false); json::merge_internal( diff --git a/src/IO/JSON/JSONIOHandlerImpl.cpp b/src/IO/JSON/JSONIOHandlerImpl.cpp index 4ea5fe7eab..fb33d1cd29 100644 --- a/src/IO/JSON/JSONIOHandlerImpl.cpp +++ b/src/IO/JSON/JSONIOHandlerImpl.cpp @@ -393,7 +393,7 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} { - init(handler->jsonMatcher->getDefault()); + init(handler->jsonMatcher->getDefault(backendConfigKey())); } #if openPMD_HAVE_MPI @@ -407,7 +407,7 @@ JSONIOHandlerImpl::JSONIOHandlerImpl( , m_fileFormat{format} , m_originalExtension{std::move(originalExtension)} { - init(handler->jsonMatcher->getDefault()); + init(handler->jsonMatcher->getDefault(backendConfigKey())); } #endif diff --git a/src/auxiliary/JSONMatcher.cpp b/src/auxiliary/JSONMatcher.cpp index cabc269e61..689fbc8333 100644 --- a/src/auxiliary/JSONMatcher.cpp +++ b/src/auxiliary/JSONMatcher.cpp @@ -57,6 +57,7 @@ void MatcherPerBackend::init(TracingJSON tracing_config) } else if (config.is_array()) { + m_patterns.reserve(config.size()); std::optional defaultConfig; // enhanced PIConGPU-defined layout for (size_t i = 0; i < config.size(); ++i) @@ -107,33 +108,6 @@ auto JsonMatcher::init() -> void throw error::BackendConfigSchema( {}, "Expected an object for the JSON configuration."); } - m_perBackend.reserve(backendKeys.size()); - for (auto it = m_entireConfig.json().begin(); - it != m_entireConfig.json().end(); - ++it) - { - std::string const &backendName = it.key(); - if (std::find(backendKeys.begin(), backendKeys.end(), backendName) == - backendKeys.end()) - { - // The key does not point to the configuration of a backend - // recognized by PIConGPU Ignore it. - continue; - } - if (!it.value().is_object()) - { - throw error::BackendConfigSchema( - {it.key()}, - "Each backend's configuration must be a JSON object (config " - "for backend " + - backendName + ")."); - } - if (it.value().contains("dataset")) - { - m_perBackend.emplace_back( - backendName, m_entireConfig[it.key()]["dataset"]); - } - } } MatcherPerBackend::MatcherPerBackend() = default; @@ -153,31 +127,60 @@ JsonMatcher::JsonMatcher(TracingJSON entireConfig) init(); } -auto JsonMatcher::get(std::string const &datasetPath) const -> ParsedConfig +auto JsonMatcher::get( + std::string const &datasetPath, std::string const &backendName) + -> ParsedConfig { + initBackendLazily(backendName); + nlohmann::json result = nlohmann::json::object(); - for (auto const &backend : m_perBackend) + // might not have been initialized due to unspecified configuration + if (m_backendMatcher.backendName == backendName) { - auto const &datasetConfig = backend.get(datasetPath); - if (datasetConfig.empty()) + auto const &datasetConfig = m_backendMatcher.get(datasetPath); + if (!datasetConfig.empty()) { - // ensure that there actually is an object to erase this from - result[backend.backendName]["dataset"] = {}; - result[backend.backendName].erase("dataset"); - } - else - { - result[backend.backendName]["dataset"] = datasetConfig; + result[backendName]["dataset"] = datasetConfig; } } + return {result, m_entireConfig.originallySpecifiedAs}; } -auto JsonMatcher::getDefault() -> TracingJSON +auto JsonMatcher::getDefault(std::string const &backendName) -> TracingJSON { + initBackendLazily(backendName); return m_entireConfig; } +auto JsonMatcher::initBackendLazily(std::string const &backendName) -> void +{ + if (m_backendMatcher.backendName == backendName) + { + // already initialized + return; + } + if (!m_entireConfig.json().contains(backendName)) + { + return; + } + auto const &backendConfig = m_entireConfig.json({backendName}); + if (!backendConfig.is_object()) + { + throw error::BackendConfigSchema( + {backendName}, + "Each backend's configuration must be a JSON object (config " + "for backend " + + backendName + ")."); + } + else if (!backendConfig.contains("dataset")) + { + return; + } + m_backendMatcher = + MatcherPerBackend(backendName, m_entireConfig[backendName]["dataset"]); +} + namespace { auto readPattern( From 067b1a65fd12127e20768b827fed0bba3be1a3eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 10 Feb 2025 17:19:08 +0100 Subject: [PATCH 16/26] Add default block to test configs --- test/ParallelIOTest.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index 99cceae7cf..9b44a25e9b 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -280,6 +280,10 @@ TEST_CASE("hdf5_write_test", "[parallel][hdf5]") [hdf5] independent_stores = false + # default config + [[hdf5.dataset]] + cfg = {chunks = "none"} + [[hdf5.dataset]] select = "particles/.*/position/.*" cfg = {chunks = [1]} @@ -882,6 +886,8 @@ void file_based_write_read(std::string const &file_ending) std::to_string(local_Nz) + "]"; // clang-format off std::string out_config = R"( + [[hdf5.dataset]] + cfg = {chunks = "auto"} [[hdf5.dataset]] select = "meshes/E/.*" cfg = {chunks = )" + chunking_config + R"(} From 2dce16538c7854cc4905630b1786ccacc3a057da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 10 Feb 2025 18:23:23 +0100 Subject: [PATCH 17/26] Documentation --- docs/source/details/backendconfig.rst | 68 +++++++++++++++++++ .../details/openpmd_extended_config.json | 62 +++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 docs/source/details/openpmd_extended_config.json diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index ad0dccf623..8563130156 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -287,3 +287,71 @@ Explanation of the single keys: In "template" mode, only the dataset metadata (type, extent and attributes) are stored and no chunks can be written or read (i.e. write/read operations will be skipped). * ``json.attribute.mode`` / ``toml.attribute.mode``: One of ``"long"`` (default in openPMD 1.*) or ``"short"`` (default in openPMD 2.* and generally in TOML). The long format explicitly encodes the attribute type in the dataset on disk, the short format only writes the actual attribute as a JSON/TOML value, requiring readers to recover the type. + +Dataset-specific configuration +------------------------------ + +Some configuration options should be applicable on a per-dataset basis. +All dataset-specific configuration options supported by the openPMD-api are additionally backend-specific, being format-specific serialization instructions such as compression or chunking. +Dataset-specific configuration is hence specified under the key path ``.dataset``, e.g.: + +.. code-block:: json + + { + "adios2": { + "dataset": { + "operators": [] + } + }, + "hdf5": { + "dataset": { + "chunking": "auto" + } + } + } + +Dataset-specific configuration options can be configured in multiple ways: + +As part of the general JSON/TOML configuration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the simplest case, the dataset configuration is specified without any extra steps as part of the JSON/TOML configuration that is used to initialize the openPMD Series as part of the ``Series`` constructor. This does not allow specifying different configurations per dataset, but sets the default configuration for all datasets. + +As a separate JSON/TOML configuration during dataset initialization +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Similarly to the ``Series`` constructor, the ``Dataset`` constructor optionally receives a JSON/TOML configuration, used for setting options specifically only for those datasets initialized with this ``Dataset`` specification. The default given in the ``Series`` constructor will be overridden. + +By pattern-matching the dataset names +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The above approach has the disadvantage that it has to be supported explicitly at the level of the downstream application, e.g. a simulation or data reader. As an alternative, the the dataset configuration under ``.dataset`` can also be given as a list of alternatives that are matched against the dataset name in sequence, e.g. ``hdf5.dataset = [, , ...]``. + +Each such pattern ```` is a JSON object with key ``cfg`` and optional key ``select``: ``{"select": , "cfg": }``. + +In here, ```` is a regex or a list of regexes, as of type egrep as defined by the `C++ standard library `__. +```` is a configuration that will be forwarded as-is to openPMD. + +.. note:: + + To match lists of regular expressions ``select = [PATTERN_1, PATTERN_2, ..., PATTERN_n]``, the list is internally transformed into a single regular expression ``($^)|(PATTERN_1)|(PATTERN_2)|...|(PATTERN_n)``. + +The single patterns will be processed in top-down manner, selecting the first matching pattern found in the list. +The regexes will be matched against the openPMD dataset path either within the Iteration (e.g. ``meshes/E/x`` or ``particles/.*/position/.*``) or within the Series (e.g. ``/data/1/meshes/E/x`` or ``/data/.*/particles/.*/position/.*``), considering full matches only. + +.. note:: + + The dataset name is determined by the result of ``attributable.myPath().openPMDPath()`` where ``attributable`` is an object in the openPMD hierarchy. + +.. note:: + + To match against the path within the containing Iteration or within the containing Series, the specified regular expression is internally transformed into ``(/data/[0-9]+/)?(PATTERN)`` where ``PATTERN`` is the specified pattern, and then matched against the full dataset path. + +The **default configuration** is specified by omitting the ``select`` key. +Specifying more than one default is an error. +If no pattern matches a dataset, the default configuration is chosen if specified, or an empty JSON object ``{}`` otherwise. + +A full example: + +.. literalinclude:: openpmd_extended_config.json + :language: json diff --git a/docs/source/details/openpmd_extended_config.json b/docs/source/details/openpmd_extended_config.json new file mode 100644 index 0000000000..b4aec29b93 --- /dev/null +++ b/docs/source/details/openpmd_extended_config.json @@ -0,0 +1,62 @@ +{ + "adios2": { + "engine": { + "parameters": { + "Profile": "On" + } + }, + "dataset": [ + { + "cfg": { + "operators": [ + { + "type": "blosc", + "parameters": { + "clevel": "1", + "doshuffle": "BLOSC_BITSHUFFLE" + } + } + ] + } + }, + { + "select": [ + ".*positionOffset.*", + ".*particlePatches.*" + ], + "cfg": { + "operators": [] + } + } + ] + }, + "hdf5": { + "independent_stores": false, + "dataset": [ + { + "cfg": { + "chunks": "auto" + } + }, + { + "select": [ + "/data/1/particles/e/.*", + "/data/2/particles/e/.*" + ], + "cfg": { + "chunks": [ + 5 + ] + } + }, + { + "select": "particles/e/.*", + "cfg": { + "chunks": [ + 10 + ] + } + } + ] + } +} From 59206bb475edaa35cc5fffee4a97a208deae7a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 10 Feb 2025 18:36:46 +0100 Subject: [PATCH 18/26] Add TOML example --- docs/source/details/backendconfig.rst | 3 ++ .../details/openpmd_extended_config.toml | 44 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 docs/source/details/openpmd_extended_config.toml diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index 8563130156..aceac6fd3e 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -355,3 +355,6 @@ A full example: .. literalinclude:: openpmd_extended_config.json :language: json + +.. literalinclude:: openpmd_extended_config.toml + :language: toml diff --git a/docs/source/details/openpmd_extended_config.toml b/docs/source/details/openpmd_extended_config.toml new file mode 100644 index 0000000000..5bffc4ae63 --- /dev/null +++ b/docs/source/details/openpmd_extended_config.toml @@ -0,0 +1,44 @@ + +# ADIOS2 config + +[adios2.engine.parameters] +Profile = "On" + +# default configuration +[[adios2.dataset]] +# nested list as ADIOS2 can add multiple operators to a single dataset +[[adios2.dataset.cfg.operators]] +type = "blosc" +parameters.doshuffle = "BLOSC_BITSHUFFLE" +parameters.clevel = "1" + +# dataset-specific configuration to exclude some datasets +# from applying operators. +[[adios2.dataset]] +select = [".*positionOffset.*", ".*particlePatches.*"] +cfg.operators = [] + +# Now HDF5 + +[hdf5] +independent_stores = false + +# default configuration +# The position of the default configuration does not matter, but there must +# be only one single default configuration. +[[hdf5.dataset]] +cfg.chunks = "auto" + +# Dataset-specific configuration that specifies full paths, +# i.e. including the path to the Iteration. +# The non-default configurations are matched in top-down order, +# so the order is relevant. +[[hdf5.dataset]] +select = ["/data/1/particles/e/.*", "/data/2/particles/e/.*"] +cfg.chunks = [5] + +# dataset-specific configuration that specifies only the path +# within the Iteration +[[hdf5.dataset]] +select = "particles/e/.*" +cfg.chunks = [10] From 56336f37d538ae7ae1ec6d48a66a8a422b383462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Mon, 10 Feb 2025 19:11:53 +0100 Subject: [PATCH 19/26] Add Python binding for openPMD_path --- src/binding/python/Attributable.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/binding/python/Attributable.cpp b/src/binding/python/Attributable.cpp index 206e3741aa..59131704c0 100644 --- a/src/binding/python/Attributable.cpp +++ b/src/binding/python/Attributable.cpp @@ -502,7 +502,9 @@ void init_Attributable(py::module &m) "series_extension", &Attributable::MyPath::seriesExtension) .def_readonly("group", &Attributable::MyPath::group) .def_readonly("access", &Attributable::MyPath::access) - .def_property_readonly("file_path", &Attributable::MyPath::filePath); + .def_property_readonly("file_path", &Attributable::MyPath::filePath) + .def_property_readonly( + "openPMD_path", &Attributable::MyPath::openPMDPath); py::class_(m, "Attributable") .def(py::init()) From 46f480ab8324d6250f8d5b1b9ea0386aba63d517 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 11 Feb 2025 10:22:31 +0100 Subject: [PATCH 20/26] Fix doxygen --- include/openPMD/auxiliary/JSONMatcher.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/openPMD/auxiliary/JSONMatcher.hpp b/include/openPMD/auxiliary/JSONMatcher.hpp index 6ae43d9b6c..e89ce19dfd 100644 --- a/include/openPMD/auxiliary/JSONMatcher.hpp +++ b/include/openPMD/auxiliary/JSONMatcher.hpp @@ -121,6 +121,7 @@ class JsonMatcher * @brief Get the JSON config associated with a regex pattern. * * @param datasetPath The regex. + * @param backendName The backend name for which to resolve the pattern. * @return The matched JSON configuration, as a string. */ auto get(std::string const &datasetPath, std::string const &backendName) @@ -128,6 +129,7 @@ class JsonMatcher /** * @brief Get the default JSON config. + * @param backendName The backend name for which to resolve the pattern. * * @return The default JSON configuration. */ From 3877f2f6f76d16a43b99901b03f721e32c16bb5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 11 Feb 2025 12:43:58 +0100 Subject: [PATCH 21/26] Read dataset-specific configuration also in ADIOS2::openDataset --- include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp | 8 ++- include/openPMD/IO/IOTask.hpp | 30 +++++---- include/openPMD/backend/Writable.hpp | 1 + src/IO/ADIOS/ADIOS2IOHandler.cpp | 71 +++++++++++++------- src/IO/IOTask.cpp | 57 ++++++++++++---- 5 files changed, 117 insertions(+), 50 deletions(-) diff --git a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp index 1ef5fb8725..f5b4ebd790 100644 --- a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp +++ b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp @@ -343,6 +343,10 @@ class ADIOS2IOHandlerImpl // use m_config std::optional> getOperators(); + template + std::vector getDatasetOperators( + Parameter const &, Writable *, std::string const &varName); + std::string fileSuffix(bool verbose = true) const; /* @@ -585,7 +589,9 @@ namespace detail InvalidatableFile const &, std::string const &varName, Parameter ¶meters, - std::optional stepSelection); + std::optional stepSelection, + std::vector const + &operators); static constexpr char const *errorMsg = "ADIOS2: openDataset()"; }; diff --git a/include/openPMD/IO/IOTask.hpp b/include/openPMD/IO/IOTask.hpp index 3ba6e78371..e637df2ede 100644 --- a/include/openPMD/IO/IOTask.hpp +++ b/include/openPMD/IO/IOTask.hpp @@ -104,6 +104,18 @@ struct OPENPMDAPI_EXPORT AbstractParameter virtual std::unique_ptr to_heap() && = 0; + /** Warn about unused JSON paramters + * + * Template parameter so we don't have to include the JSON lib here. + * This function is useful for the createDataset() methods in, + * IOHandlerImpl's, so putting that here is the simplest way to make it + * available for them. */ + template + static void warnUnusedParameters( + TracingJSON &, + std::string const ¤tBackendName, + std::string const &warningMessage); + protected: // avoid object slicing // by allow only child classes to use these things for defining their own @@ -365,18 +377,6 @@ struct OPENPMDAPI_EXPORT Parameter std::string options = "{}"; std::optional joinedDimension; - /** Warn about unused JSON paramters - * - * Template parameter so we don't have to include the JSON lib here. - * This function is useful for the createDataset() methods in, - * IOHandlerImpl's, so putting that here is the simplest way to make it - * available for them. */ - template - static void warnUnusedParameters( - TracingJSON &, - std::string const ¤tBackendName, - std::string const &warningMessage); - template TracingJSON compileJSONConfig( Writable const *writable, @@ -419,6 +419,12 @@ struct OPENPMDAPI_EXPORT Parameter new Parameter(std::move(*this))); } + template + static TracingJSON compileJSONConfig( + Writable const *writable, + json::JsonMatcher &, + std::string const &backendName); + std::string name = ""; std::shared_ptr dtype = std::make_shared(); std::shared_ptr extent = std::make_shared(); diff --git a/include/openPMD/backend/Writable.hpp b/include/openPMD/backend/Writable.hpp index 73d5ba826f..bfb9c67e03 100644 --- a/include/openPMD/backend/Writable.hpp +++ b/include/openPMD/backend/Writable.hpp @@ -104,6 +104,7 @@ class Writable final friend class Span; friend void debug::printDirty(Series const &); friend struct Parameter; + friend struct Parameter; private: Writable(internal::AttributableData *); diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index ca3b2ac49e..c649ec2953 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -374,6 +374,35 @@ ADIOS2IOHandlerImpl::getOperators() return getOperators(m_config); } +template +auto ADIOS2IOHandlerImpl::getDatasetOperators( + Parameter const ¶meters, Writable *writable, std::string const &varName) + -> std::vector +{ + std::vector operators; + json::TracingJSON options = + parameters.template compileJSONConfig( + writable, *m_handler->jsonMatcher, "adios2"); + if (options.json().contains("adios2")) + { + json::TracingJSON datasetConfig(options["adios2"]); + auto datasetOperators = getOperators(datasetConfig); + + operators = datasetOperators ? std::move(datasetOperators.value()) + : defaultOperators; + } + else + { + operators = defaultOperators; + } + parameters.warnUnusedParameters( + options, + "adios2", + "Warning: parts of the backend configuration for ADIOS2 dataset '" + + varName + "' remain unused:\n"); + return operators; +} + using AcceptedEndingsForEngine = std::map; std::string ADIOS2IOHandlerImpl::fileSuffix(bool verbose) const @@ -783,27 +812,8 @@ void ADIOS2IOHandlerImpl::createDataset( filePos->gd = GroupOrDataset::DATASET; auto const varName = nameOfVariable(writable); - std::vector operators; - json::TracingJSON options = - parameters.compileJSONConfig( - writable, *m_handler->jsonMatcher, "adios2"); - if (options.json().contains("adios2")) - { - json::TracingJSON datasetConfig(options["adios2"]); - auto datasetOperators = getOperators(datasetConfig); - - operators = datasetOperators ? std::move(datasetOperators.value()) - : defaultOperators; - } - else - { - operators = defaultOperators; - } - parameters.warnUnusedParameters( - options, - "adios2", - "Warning: parts of the backend configuration for ADIOS2 dataset '" + - varName + "' remain unused:\n"); + std::vector operators = + getDatasetOperators(parameters, writable, varName); // cast from openPMD::Extent to adios2::Dims adios2::Dims shape(parameters.extent.begin(), parameters.extent.end()); @@ -1008,13 +1018,24 @@ void ADIOS2IOHandlerImpl::openDataset( auto &fileData = getFileData(file, IfFileNotOpen::ThrowError); *parameters.dtype = detail::fromADIOS2Type(fileData.m_IO.VariableType(varName)); + + /* + * Technically, the only reason to set read-time operators is for specifying + * decompression threads. This needs not happen at a per-dataset level. + * However, users may apply the same JSON/TOML config for writing and + * reading, so the dataset-specific configuration should still be explored + * here. + */ + std::vector operators = + getDatasetOperators(parameters, writable, varName); switchAdios2VariableType( *parameters.dtype, this, file, varName, parameters, - fileData.stepSelection()); + fileData.stepSelection(), + operators); writable->written = true; } @@ -2181,7 +2202,9 @@ namespace detail InvalidatableFile const &file, const std::string &varName, Parameter ¶meters, - std::optional stepSelection) + std::optional stepSelection, + std::vector const + &operators) { auto &fileData = impl->getFileData( file, ADIOS2IOHandlerImpl::IfFileNotOpen::ThrowError); @@ -2224,7 +2247,7 @@ ERROR: Variable ')"[1] + varName + } // Operators in reading needed e.g. for setting decompression threads - for (auto const &operation : impl->defaultOperators) + for (auto const &operation : operators) { if (operation.op) { diff --git a/src/IO/IOTask.cpp b/src/IO/IOTask.cpp index ab2a36a2ec..50fe180a40 100644 --- a/src/IO/IOTask.cpp +++ b/src/IO/IOTask.cpp @@ -24,6 +24,7 @@ #include "openPMD/backend/Attributable.hpp" #include // std::cerr +#include namespace openPMD { @@ -33,8 +34,7 @@ Writable *getWritable(Attributable *a) } template <> -void Parameter::warnUnusedParameters< - json::TracingJSON>( +void AbstractParameter::warnUnusedParameters( json::TracingJSON &config, std::string const ¤tBackendName, std::string const &warningMessage) @@ -75,6 +75,24 @@ void Parameter::warnUnusedParameters< } } +namespace +{ + template + json::ParsedConfig doCompileJSONConfig( + Attributable const &attri, + json::JsonMatcher &jsonMatcher, + std::string const &backendName, + Functor &&transformResult) + { + auto path = attri.myPath().openPMDPath(); + auto base_config = jsonMatcher.get(path, backendName); + json::ParsedConfig res{ + std::move(base_config.config), base_config.originallySpecifiedAs}; + std::forward(transformResult)(res); + return res; + } +} // namespace + template <> json::ParsedConfig Parameter::compileJSONConfig( Writable const *writable, @@ -82,17 +100,30 @@ json::ParsedConfig Parameter::compileJSONConfig( std::string const &backendName) const { auto attri = writable->attributable->asInternalCopyOf(); - auto path = attri.myPath().openPMDPath(); - auto base_config = jsonMatcher.get(path, backendName); - auto manual_config = - json::parseOptions(options, /* considerFiles = */ false); - json::merge_internal( - base_config.config, manual_config.config, /* do_prune = */ true); - return json::ParsedConfig{ - std::move(base_config.config), - (options.empty() || options == "{}") - ? base_config.originallySpecifiedAs - : manual_config.originallySpecifiedAs}; + return doCompileJSONConfig( + attri, jsonMatcher, backendName, [&](json::ParsedConfig &base_config) { + auto manual_config = + json::parseOptions(options, /* considerFiles = */ false); + json::merge_internal( + base_config.config, + manual_config.config, + /* do_prune = */ true); + base_config.originallySpecifiedAs = + (options.empty() || options == "{}") + ? base_config.originallySpecifiedAs + : manual_config.originallySpecifiedAs; + }); +} + +template <> +json::ParsedConfig Parameter::compileJSONConfig( + Writable const *writable, + json::JsonMatcher &jsonMatcher, + std::string const &backendName) +{ + auto attri = writable->attributable->asInternalCopyOf(); + return doCompileJSONConfig( + attri, jsonMatcher, backendName, [](auto const &) {}); } namespace internal From 63a3dad80414b3d94cc2a5aab288ef7358f5c58d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 11 Feb 2025 14:08:35 +0100 Subject: [PATCH 22/26] Cleanup --- docs/source/details/backendconfig.rst | 28 +++++++++++---------- examples/13_write_dynamic_configuration.cpp | 3 ++- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index aceac6fd3e..b5c90a7b6a 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -292,8 +292,8 @@ Dataset-specific configuration ------------------------------ Some configuration options should be applicable on a per-dataset basis. -All dataset-specific configuration options supported by the openPMD-api are additionally backend-specific, being format-specific serialization instructions such as compression or chunking. -Dataset-specific configuration is hence specified under the key path ``.dataset``, e.g.: +Most dataset-specific configuration options supported by the openPMD-api are additionally backend-specific, being format-specific serialization instructions such as compression or chunking. +Such dataset-specific and backend-specific configuration is hence specified under the key path ``.dataset``, e.g.: .. code-block:: json @@ -322,22 +322,24 @@ As a separate JSON/TOML configuration during dataset initialization Similarly to the ``Series`` constructor, the ``Dataset`` constructor optionally receives a JSON/TOML configuration, used for setting options specifically only for those datasets initialized with this ``Dataset`` specification. The default given in the ``Series`` constructor will be overridden. +This is the preferred way for configuring dataset-specific options that are *not* backend-specific (currently only ``{"resizable": true}``). + By pattern-matching the dataset names ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The above approach has the disadvantage that it has to be supported explicitly at the level of the downstream application, e.g. a simulation or data reader. As an alternative, the the dataset configuration under ``.dataset`` can also be given as a list of alternatives that are matched against the dataset name in sequence, e.g. ``hdf5.dataset = [, , ...]``. +The above approach has the disadvantage that it has to be supported explicitly at the level of the downstream application, e.g. a simulation or data reader. As an alternative, the the backend-specific dataset configuration under ``.dataset`` can also be given as a list of alternatives that are matched against the dataset name in sequence, e.g. ``hdf5.dataset = [, , ...]``. -Each such pattern ```` is a JSON object with key ``cfg`` and optional key ``select``: ``{"select": , "cfg": }``. +Each such pattern ```` is a JSON object with key ``cfg`` and optional key ``select``: ``{"select": , "cfg": }``. -In here, ```` is a regex or a list of regexes, as of type egrep as defined by the `C++ standard library `__. -```` is a configuration that will be forwarded as-is to openPMD. +In here, ```` is a regex or a list of regexes, of type egrep as defined by the `C++ standard library `__. +```` is a configuration that will be forwarded as a "regular" dataset configuration to the backend. .. note:: - To match lists of regular expressions ``select = [PATTERN_1, PATTERN_2, ..., PATTERN_n]``, the list is internally transformed into a single regular expression ``($^)|(PATTERN_1)|(PATTERN_2)|...|(PATTERN_n)``. + To match lists of regular expressions ``select = [REGEX_1, REGEX_2, ..., REGEX_n]``, the list is internally transformed into a single regular expression ``($^)|(REGEX_1)|(REGEX_2)|...|(REGEX_n)``. -The single patterns will be processed in top-down manner, selecting the first matching pattern found in the list. -The regexes will be matched against the openPMD dataset path either within the Iteration (e.g. ``meshes/E/x`` or ``particles/.*/position/.*``) or within the Series (e.g. ``/data/1/meshes/E/x`` or ``/data/.*/particles/.*/position/.*``), considering full matches only. +In a configuration such as ``hdf5.dataset = [, , ...]``, the single patterns will be processed in top-down manner, selecting the first matching pattern found in the list. +The specified regexes will be matched against the openPMD dataset path either within the Iteration (e.g. ``meshes/E/x`` or ``particles/.*/position/.*``) or within the Series (e.g. ``/data/1/meshes/E/x`` or ``/data/.*/particles/.*/position/.*``), considering full matches only. .. note:: @@ -345,7 +347,7 @@ The regexes will be matched against the openPMD dataset path either within the I .. note:: - To match against the path within the containing Iteration or within the containing Series, the specified regular expression is internally transformed into ``(/data/[0-9]+/)?(PATTERN)`` where ``PATTERN`` is the specified pattern, and then matched against the full dataset path. + To match against the path within the containing Iteration or within the containing Series, the specified regular expression is internally transformed into ``(/data/[0-9]+/)?(REGEX)`` where ``REGEX`` is the specified pattern, and then matched against the full dataset path. The **default configuration** is specified by omitting the ``select`` key. Specifying more than one default is an error. @@ -353,8 +355,8 @@ If no pattern matches a dataset, the default configuration is chosen if specifie A full example: -.. literalinclude:: openpmd_extended_config.json - :language: json - .. literalinclude:: openpmd_extended_config.toml :language: toml + +.. literalinclude:: openpmd_extended_config.json + :language: json diff --git a/examples/13_write_dynamic_configuration.cpp b/examples/13_write_dynamic_configuration.cpp index 653d6f1160..54b9ad0e8c 100644 --- a/examples/13_write_dynamic_configuration.cpp +++ b/examples/13_write_dynamic_configuration.cpp @@ -186,7 +186,8 @@ CFG.CHUNKS = [10] /* * We want different compression settings for this dataset, so we pass - * a dataset-specific configuration. + * a dataset-specific configuration. This will override any definition + * specified above. * Also showcase how to define an resizable dataset. * This time in JSON. */ From 458b4c5cdcebde669def51703c3f523cb92617f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 11 Feb 2025 16:26:04 +0100 Subject: [PATCH 23/26] Fix initialization from Dummy IO Handler --- include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp | 2 + include/openPMD/IO/AbstractIOHandler.hpp | 15 ++++++- .../openPMD/IO/AbstractIOHandlerHelper.hpp | 3 ++ include/openPMD/IO/HDF5/HDF5IOHandler.hpp | 6 ++- .../openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp | 11 ++++- include/openPMD/IO/JSON/JSONIOHandler.hpp | 2 + src/IO/ADIOS/ADIOS2IOHandler.cpp | 24 ++++++++-- src/IO/AbstractIOHandler.cpp | 44 ++++++++++++++----- src/IO/AbstractIOHandlerHelper.cpp | 31 ++++++++++++- src/IO/DummyIOHandler.cpp | 2 + src/IO/HDF5/HDF5IOHandler.cpp | 16 +++++-- src/IO/HDF5/ParallelHDF5IOHandler.cpp | 33 +++++++++++--- src/IO/JSON/JSONIOHandler.cpp | 6 ++- src/Series.cpp | 30 ++++++------- test/AuxiliaryTest.cpp | 12 +++-- 15 files changed, 182 insertions(+), 55 deletions(-) diff --git a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp index f5b4ebd790..9dee72e02e 100644 --- a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp +++ b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp @@ -862,6 +862,7 @@ class ADIOS2IOHandler : public AbstractIOHandler #if openPMD_HAVE_MPI ADIOS2IOHandler( + std::optional> initialize_from, std::string path, Access, MPI_Comm, @@ -872,6 +873,7 @@ class ADIOS2IOHandler : public AbstractIOHandler #endif ADIOS2IOHandler( + std::optional> initialize_from, std::string path, Access, json::TracingJSON options, diff --git a/include/openPMD/IO/AbstractIOHandler.hpp b/include/openPMD/IO/AbstractIOHandler.hpp index 52c16a0b9a..29b3de8bff 100644 --- a/include/openPMD/IO/AbstractIOHandler.hpp +++ b/include/openPMD/IO/AbstractIOHandler.hpp @@ -223,11 +223,22 @@ class AbstractIOHandler #if openPMD_HAVE_MPI template AbstractIOHandler( - std::string path, Access at, TracingJSON &&jsonConfig, MPI_Comm); + std::optional> initialize_from, + std::string path, + Access at, + TracingJSON &&jsonConfig, + MPI_Comm); #endif template - AbstractIOHandler(std::string path, Access at, TracingJSON &&jsonConfig); + AbstractIOHandler( + std::optional> initialize_from, + std::string path, + Access at, + TracingJSON &&jsonConfig); + + AbstractIOHandler(std::optional>); + virtual ~AbstractIOHandler(); AbstractIOHandler(AbstractIOHandler const &) = delete; diff --git a/include/openPMD/IO/AbstractIOHandlerHelper.hpp b/include/openPMD/IO/AbstractIOHandlerHelper.hpp index a5ce7a39be..9314b2b61a 100644 --- a/include/openPMD/IO/AbstractIOHandlerHelper.hpp +++ b/include/openPMD/IO/AbstractIOHandlerHelper.hpp @@ -47,6 +47,7 @@ namespace openPMD */ template std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, @@ -74,6 +75,7 @@ std::unique_ptr createIOHandler( */ template std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, @@ -83,6 +85,7 @@ std::unique_ptr createIOHandler( // version without configuration to use in AuxiliaryTest std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, diff --git a/include/openPMD/IO/HDF5/HDF5IOHandler.hpp b/include/openPMD/IO/HDF5/HDF5IOHandler.hpp index e81996b389..e661edbc2d 100644 --- a/include/openPMD/IO/HDF5/HDF5IOHandler.hpp +++ b/include/openPMD/IO/HDF5/HDF5IOHandler.hpp @@ -34,7 +34,11 @@ class HDF5IOHandlerImpl; class HDF5IOHandler : public AbstractIOHandler { public: - HDF5IOHandler(std::string path, Access, json::TracingJSON config); + HDF5IOHandler( + std::optional> initialize_from, + std::string path, + Access, + json::TracingJSON config); ~HDF5IOHandler() override; std::string backendName() const override diff --git a/include/openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp b/include/openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp index cd951be5d2..66518b5d0c 100644 --- a/include/openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp +++ b/include/openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp @@ -37,10 +37,17 @@ class ParallelHDF5IOHandler : public AbstractIOHandler public: #if openPMD_HAVE_MPI ParallelHDF5IOHandler( - std::string path, Access, MPI_Comm, json::TracingJSON config); + std::optional> initialize_from, + std::string path, + Access, + MPI_Comm, + json::TracingJSON config); #else ParallelHDF5IOHandler( - std::string const &path, Access, json::TracingJSON config); + std::optional> initialize_from, + std::string const &path, + Access, + json::TracingJSON config); #endif ~ParallelHDF5IOHandler() override; diff --git a/include/openPMD/IO/JSON/JSONIOHandler.hpp b/include/openPMD/IO/JSON/JSONIOHandler.hpp index e22fdb93d1..e3f9b24bac 100644 --- a/include/openPMD/IO/JSON/JSONIOHandler.hpp +++ b/include/openPMD/IO/JSON/JSONIOHandler.hpp @@ -35,6 +35,7 @@ class JSONIOHandler : public AbstractIOHandler { public: JSONIOHandler( + std::optional> initialize_from, std::string path, Access at, openPMD::json::TracingJSON config, @@ -42,6 +43,7 @@ class JSONIOHandler : public AbstractIOHandler std::string originalExtension); #if openPMD_HAVE_MPI JSONIOHandler( + std::optional> initialize_from, std::string path, Access at, MPI_Comm, diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index c649ec2953..d90fe7c8c7 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -2372,25 +2372,33 @@ ERROR: Variable ')"[1] + varName + #if openPMD_HAVE_MPI ADIOS2IOHandler::ADIOS2IOHandler( + std::optional> initialize_from, std::string path, openPMD::Access at, MPI_Comm comm, json::TracingJSON options, std::string engineType, std::string specifiedExtension) - : AbstractIOHandler(std::move(path), at, std::move(options), comm) + : AbstractIOHandler( + std::move(initialize_from), + std::move(path), + at, + std::move(options), + comm) , m_impl{this, comm, std::move(engineType), std::move(specifiedExtension)} {} #endif ADIOS2IOHandler::ADIOS2IOHandler( + std::optional> initialize_from, std::string path, Access at, json::TracingJSON options, std::string engineType, std::string specifiedExtension) - : AbstractIOHandler(std::move(path), at, std::move(options)) + : AbstractIOHandler( + std::move(initialize_from), std::move(path), at, std::move(options)) , m_impl{this, std::move(engineType), std::move(specifiedExtension)} {} @@ -2404,6 +2412,7 @@ ADIOS2IOHandler::flush(internal::ParsedFlushParams &flushParams) #if openPMD_HAVE_MPI ADIOS2IOHandler::ADIOS2IOHandler( + std::optional> initialize_from, std::string path, Access at, MPI_Comm comm, @@ -2412,12 +2421,18 @@ ADIOS2IOHandler::ADIOS2IOHandler( std::string, // NOLINTNEXTLINE(performance-unnecessary-value-param) std::string) - : AbstractIOHandler(std::move(path), at, std::move(config), comm) + : AbstractIOHandler( + std::move(initialize_from), + std::move(path), + at, + std::move(config), + comm) {} #endif // openPMD_HAVE_MPI ADIOS2IOHandler::ADIOS2IOHandler( + std::optional> initialize_from, std::string path, Access at, json::TracingJSON config, @@ -2425,7 +2440,8 @@ ADIOS2IOHandler::ADIOS2IOHandler( std::string, // NOLINTNEXTLINE(performance-unnecessary-value-param) std::string) - : AbstractIOHandler(std::move(path), at, std::move(config)) + : AbstractIOHandler( + std::move(initialize_from), std::move(path), at, std::move(config)) {} std::future ADIOS2IOHandler::flush(internal::ParsedFlushParams &) diff --git a/src/IO/AbstractIOHandler.cpp b/src/IO/AbstractIOHandler.cpp index e7464e348a..f2a30b9974 100644 --- a/src/IO/AbstractIOHandler.cpp +++ b/src/IO/AbstractIOHandler.cpp @@ -127,22 +127,42 @@ bool AbstractIOHandler::fullSupportForVariableBasedEncoding() const #if openPMD_HAVE_MPI template <> AbstractIOHandler::AbstractIOHandler( - std::string path, Access at, json::TracingJSON &&jsonConfig, MPI_Comm) - : jsonMatcher(std::make_unique(std::move(jsonConfig))) - , directory{std::move(path)} - , m_backendAccess{at} - , m_frontendAccess{at} -{} + std::optional> initialize_from, + std::string path, + Access at, + json::TracingJSON &&jsonConfig, + MPI_Comm) + : AbstractIOHandler(std::move(initialize_from)) +{ + jsonMatcher = std::make_unique(std::move(jsonConfig)); + directory = std::move(path); + m_backendAccess = at; + m_frontendAccess = at; +} #endif template <> AbstractIOHandler::AbstractIOHandler( - std::string path, Access at, json::TracingJSON &&jsonConfig) - : jsonMatcher(std::make_unique(std::move(jsonConfig))) - , directory{std::move(path)} - , m_backendAccess{at} - , m_frontendAccess{at} -{} + std::optional> initialize_from, + std::string path, + Access at, + json::TracingJSON &&jsonConfig) + : AbstractIOHandler(std::move(initialize_from)) +{ + jsonMatcher = std::make_unique(std::move(jsonConfig)); + directory = std::move(path); + m_backendAccess = at; + m_frontendAccess = at; +} + +AbstractIOHandler::AbstractIOHandler( + std::optional> initialize_from) +{ + if (initialize_from.has_value() && *initialize_from) + { + this->operator=(std::move(**initialize_from)); + } +} AbstractIOHandler::~AbstractIOHandler() = default; // std::queue::queue(queue&&) is not noexcept diff --git a/src/IO/AbstractIOHandlerHelper.cpp b/src/IO/AbstractIOHandlerHelper.cpp index 8576343e5d..55e7f9cb5d 100644 --- a/src/IO/AbstractIOHandlerHelper.cpp +++ b/src/IO/AbstractIOHandlerHelper.cpp @@ -65,6 +65,7 @@ namespace #if openPMD_HAVE_MPI template <> std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, @@ -78,10 +79,16 @@ std::unique_ptr createIOHandler( { case Format::HDF5: return constructIOHandler( - "HDF5", path, access, comm, std::move(options)); + "HDF5", + std::move(initialize_from), + path, + access, + comm, + std::move(options)); case Format::ADIOS2_BP: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, comm, @@ -91,6 +98,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_BP4: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, comm, @@ -100,6 +108,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_BP5: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, comm, @@ -109,6 +118,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_SST: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, comm, @@ -118,6 +128,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_SSC: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, comm, @@ -127,6 +138,7 @@ std::unique_ptr createIOHandler( case Format::JSON: return constructIOHandler( "JSON", + std::move(initialize_from), path, access, comm, @@ -136,6 +148,7 @@ std::unique_ptr createIOHandler( case Format::TOML: return constructIOHandler( "JSON", + std::move(initialize_from), path, access, comm, @@ -153,6 +166,7 @@ std::unique_ptr createIOHandler( template <> std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, @@ -165,10 +179,15 @@ std::unique_ptr createIOHandler( { case Format::HDF5: return constructIOHandler( - "HDF5", path, access, std::move(options)); + "HDF5", + std::move(initialize_from), + path, + access, + std::move(options)); case Format::ADIOS2_BP: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -177,6 +196,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_BP4: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -185,6 +205,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_BP5: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -193,6 +214,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_SST: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -201,6 +223,7 @@ std::unique_ptr createIOHandler( case Format::ADIOS2_SSC: return constructIOHandler( "ADIOS2", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -209,6 +232,7 @@ std::unique_ptr createIOHandler( case Format::JSON: return constructIOHandler( "JSON", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -217,6 +241,7 @@ std::unique_ptr createIOHandler( case Format::TOML: return constructIOHandler( "JSON", + std::move(initialize_from), std::move(path), access, std::move(options), @@ -231,12 +256,14 @@ std::unique_ptr createIOHandler( } std::unique_ptr createIOHandler( + std::optional> initialize_from, std::string path, Access access, Format format, std::string originalExtension) { return createIOHandler( + std::move(initialize_from), std::move(path), access, format, diff --git a/src/IO/DummyIOHandler.cpp b/src/IO/DummyIOHandler.cpp index fbf18d4791..9d8a7fac1a 100644 --- a/src/IO/DummyIOHandler.cpp +++ b/src/IO/DummyIOHandler.cpp @@ -22,12 +22,14 @@ #include "openPMD/auxiliary/JSON_internal.hpp" #include +#include #include namespace openPMD { DummyIOHandler::DummyIOHandler(std::string path, Access at) : AbstractIOHandler( + std::nullopt, std::move(path), at, json::TracingJSON( diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 1c8b20852e..f852049463 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -3011,8 +3011,12 @@ std::future HDF5IOHandlerImpl::flush(internal::ParsedFlushParams ¶ms) #if openPMD_HAVE_HDF5 HDF5IOHandler::HDF5IOHandler( - std::string path, Access at, json::TracingJSON config) - : AbstractIOHandler(std::move(path), at, std::move(config)) + std::optional> initialize_from, + std::string path, + Access at, + json::TracingJSON config) + : AbstractIOHandler( + std::move(initialize_from), std::move(path), at, std::move(config)) , m_impl{new HDF5IOHandlerImpl(this)} {} @@ -3025,8 +3029,12 @@ std::future HDF5IOHandler::flush(internal::ParsedFlushParams ¶ms) #else HDF5IOHandler::HDF5IOHandler( - std::string path, Access at, json::TracingJSON config) - : AbstractIOHandler(std::move(path), at, std::move(config)) + std::optional> initialize_from, + std::string path, + Access at, + json::TracingJSON config) + : AbstractIOHandler( + std::move(initialize_from), std::move(path), at, std::move(config)) { throw std::runtime_error("openPMD-api built without HDF5 support"); } diff --git a/src/IO/HDF5/ParallelHDF5IOHandler.cpp b/src/IO/HDF5/ParallelHDF5IOHandler.cpp index 9f166b23cb..404b33c888 100644 --- a/src/IO/HDF5/ParallelHDF5IOHandler.cpp +++ b/src/IO/HDF5/ParallelHDF5IOHandler.cpp @@ -58,8 +58,17 @@ namespace openPMD #endif ParallelHDF5IOHandler::ParallelHDF5IOHandler( - std::string path, Access at, MPI_Comm comm, json::TracingJSON config) - : AbstractIOHandler(std::move(path), at, std::move(config), comm) + std::optional> initialize_from, + std::string path, + Access at, + MPI_Comm comm, + json::TracingJSON config) + : AbstractIOHandler( + std::move(initialize_from), + std::move(path), + at, + std::move(config), + comm) , m_impl{new ParallelHDF5IOHandlerImpl(this, comm)} {} @@ -423,15 +432,27 @@ ParallelHDF5IOHandlerImpl::flush(internal::ParsedFlushParams ¶ms) #if openPMD_HAVE_MPI ParallelHDF5IOHandler::ParallelHDF5IOHandler( - std::string path, Access at, MPI_Comm comm, json::TracingJSON config) - : AbstractIOHandler(std::move(path), at, std::move(config), comm) + std::optional> initialize_from, + std::string path, + Access at, + MPI_Comm comm, + json::TracingJSON config) + : AbstractIOHandler( + std::move(initialize_from), + std::move(path), + at, + std::move(config), + comm) { throw std::runtime_error("openPMD-api built without HDF5 support"); } #else ParallelHDF5IOHandler::ParallelHDF5IOHandler( - std::string const &path, Access at, json::TracingJSON config) - : AbstractIOHandler(path, at, std::move(config)) + std::optional> initialize_from, + std::string const &path, + Access at, + json::TracingJSON config) + : AbstractIOHandler(std::move(initialize_from), path, at, std::move(config)) { throw std::runtime_error( "openPMD-api built without parallel support and without HDF5 support"); diff --git a/src/IO/JSON/JSONIOHandler.cpp b/src/IO/JSON/JSONIOHandler.cpp index d0261ba8de..2102837142 100644 --- a/src/IO/JSON/JSONIOHandler.cpp +++ b/src/IO/JSON/JSONIOHandler.cpp @@ -26,24 +26,26 @@ namespace openPMD JSONIOHandler::~JSONIOHandler() = default; JSONIOHandler::JSONIOHandler( + std::optional> initialize_from, std::string path, Access at, openPMD::json::TracingJSON jsonCfg, JSONIOHandlerImpl::FileFormat format, std::string originalExtension) - : AbstractIOHandler{std::move(path), at, std::move(jsonCfg)} + : AbstractIOHandler{std::move(initialize_from), std::move(path), at, std::move(jsonCfg)} , m_impl{this, format, std::move(originalExtension)} {} #if openPMD_HAVE_MPI JSONIOHandler::JSONIOHandler( + std::optional> initialize_from, std::string path, Access at, MPI_Comm comm, openPMD::json::TracingJSON jsonCfg, JSONIOHandlerImpl::FileFormat format, std::string originalExtension) - : AbstractIOHandler{std::move(path), at, std::move(jsonCfg)} + : AbstractIOHandler{std::move(initialize_from), std::move(path), at, std::move(jsonCfg)} , m_impl{ JSONIOHandlerImpl{this, comm, format, std::move(originalExtension)}} {} diff --git a/src/Series.cpp b/src/Series.cpp index 126cccfaca..47a642a88d 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -841,6 +841,7 @@ void Series::init( std::unique_ptr parsed_input, json::TracingJSON tracing_json) { auto io_handler = createIOHandler( + std::nullopt, parsed_input->path, at, parsed_input->format, @@ -882,7 +883,11 @@ void Series::init( true, std::forward(comm)...); + auto &writable = s.get()->m_writable; + auto io_handler = createIOHandler( + writable.IOHandler ? std::move(*writable.IOHandler) + : std::nullopt, parsed_input->path, at, parsed_input->format, @@ -1053,25 +1058,18 @@ void Series::initSeries( auto &writable = series->m_writable; /* - * In Access modes READ_LINEAR and APPEND, the Series constructor might have - * emplaced a temporary IOHandler. Check if this is the case. + * In access modes APPEND and READ_LINEAR, a dummy IO Handler might have + * been emplaced. The real IO Handler (ioHandler) was created from this + * intermediate handler, moving from it. + * In that case, the pointer is still valid, it just points to an empty + * optional at the moment. Reuse the pointer, so that any objects that + * might have been initialized with the old pointer are still valid. */ if (writable.IOHandler) { - if (writable.IOHandler->has_value()) - { - /* - * A temporary IOHandler has been used. In this case, copy the - * values from that IOHandler over into the real one. - */ - ioHandler->operator=(std::move(***writable.IOHandler)); - *writable.IOHandler = std::move(ioHandler); - } - else - { - throw error::Internal( - "Control flow error. This should not happen."); - } + *writable.IOHandler = + std::make_optional>( + std::move(ioHandler)); } else { diff --git a/test/AuxiliaryTest.cpp b/test/AuxiliaryTest.cpp index 7cc88f3b65..e863dd1527 100644 --- a/test/AuxiliaryTest.cpp +++ b/test/AuxiliaryTest.cpp @@ -35,7 +35,8 @@ struct TestHelper : public Attributable { writable().IOHandler = std::make_shared>>( - createIOHandler(".", Access::CREATE, Format::JSON, ".json")); + createIOHandler( + std::nullopt, ".", Access::CREATE, Format::JSON, ".json")); } }; } // namespace openPMD::test @@ -150,7 +151,8 @@ TEST_CASE("container_default_test", "[auxiliary]") Container c = Container(); c.writable().IOHandler = std::make_shared>>( - createIOHandler(".", Access::CREATE, Format::JSON, ".json")); + createIOHandler( + std::nullopt, ".", Access::CREATE, Format::JSON, ".json")); REQUIRE(c.empty()); REQUIRE(c.erase("nonExistentKey") == false); @@ -189,7 +191,8 @@ TEST_CASE("container_retrieve_test", "[auxiliary]") Container c = Container(); c.writable().IOHandler = std::make_shared>>( - createIOHandler(".", Access::CREATE, Format::JSON, ".json")); + createIOHandler( + std::nullopt, ".", Access::CREATE, Format::JSON, ".json")); structure s; std::string text = @@ -263,7 +266,8 @@ TEST_CASE("container_access_test", "[auxiliary]") Container c = Container(); c.writable().IOHandler = std::make_shared>>( - createIOHandler(".", Access::CREATE, Format::JSON, ".json")); + createIOHandler( + std::nullopt, ".", Access::CREATE, Format::JSON, ".json")); c["1firstWidget"] = Widget(0); REQUIRE(c.size() == 1); From 9c7efb4d570d79df41d6be866e2132e434b55b99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 12 Feb 2025 17:54:52 +0100 Subject: [PATCH 24/26] Fix Doxygen --- include/openPMD/IO/AbstractIOHandlerHelper.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/openPMD/IO/AbstractIOHandlerHelper.hpp b/include/openPMD/IO/AbstractIOHandlerHelper.hpp index 9314b2b61a..5b39a288b3 100644 --- a/include/openPMD/IO/AbstractIOHandlerHelper.hpp +++ b/include/openPMD/IO/AbstractIOHandlerHelper.hpp @@ -30,6 +30,9 @@ namespace openPMD /** Construct an appropriate specific IOHandler for the desired IO mode that may be MPI-aware. * + * @param initialize_from Optionally initialize the IOHandler from a previous + * interim IOHandler which to replace with the handler now + * being initialized. * @param path Path to root folder for all operations associated with the desired handler. * @param access Access mode describing desired operations and @@ -59,6 +62,9 @@ std::unique_ptr createIOHandler( /** Construct an appropriate specific IOHandler for the desired IO mode. * + * @param initialize_from Optionally initialize the IOHandler from a previous + * interim IOHandler which to replace with the handler now + * being initialized. * @param path Path to root folder for all operations associated with * the desired handler. * @param access Access describing desired operations and permissions From 75df9870196ca329c35b451be6e901f7d51da79f Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 20 Mar 2025 16:51:44 -0700 Subject: [PATCH 25/26] Documentation Update --- docs/source/details/backendconfig.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index b5c90a7b6a..123b0a58e0 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -291,9 +291,11 @@ Explanation of the single keys: Dataset-specific configuration ------------------------------ -Some configuration options should be applicable on a per-dataset basis. +Sometimes it is beneficial to set configuration options for specific datasets. Most dataset-specific configuration options supported by the openPMD-api are additionally backend-specific, being format-specific serialization instructions such as compression or chunking. -Such dataset-specific and backend-specific configuration is hence specified under the key path ``.dataset``, e.g.: + +All dataset-specific and backend-specific configuration is specified under the key path ``.dataset``. +Without filtering by dataset name (see the ``select``` key below) this looks like: .. code-block:: json From 5e1e1765485163a972fe3f270140188b28e29ee3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 25 Mar 2025 18:11:39 +0100 Subject: [PATCH 26/26] Fix NVCOMPILER macro in example --- examples/13_write_dynamic_configuration.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/13_write_dynamic_configuration.cpp b/examples/13_write_dynamic_configuration.cpp index 54b9ad0e8c..10a5bde40f 100644 --- a/examples/13_write_dynamic_configuration.cpp +++ b/examples/13_write_dynamic_configuration.cpp @@ -18,7 +18,8 @@ int main() using position_t = double; -#if !__NVCOMPILER // see https://github.com/ToruNiina/toml11/issues/205 +// see https://github.com/ToruNiina/toml11/issues/205 +#if !defined(__NVCOMPILER_MAJOR__) || __NVCOMPILER_MAJOR__ >= 23 /* * This example demonstrates how to use JSON/TOML-based dynamic * configuration for openPMD.