From 18b75408cef6ef0caa00beeb7d63aac0c3891d99 Mon Sep 17 00:00:00 2001 From: Thiago Carneiro Date: Fri, 25 Jul 2025 13:50:08 -0300 Subject: [PATCH 1/4] fix missing inlines and incorrect pointers in 'from_variable' --- mdio/dataset.h | 8 ++++---- mdio/dataset_factory.h | 18 +++++++++--------- mdio/dataset_validator.h | 8 ++++---- mdio/utils/delete.h | 2 +- mdio/variable.h | 6 +++--- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/mdio/dataset.h b/mdio/dataset.h index da96a59..150940b 100644 --- a/mdio/dataset.h +++ b/mdio/dataset.h @@ -53,7 +53,7 @@ namespace internal { * @return An `mdio::Result` containing the .zarray JSON metadata on success, or * an error on failure. */ -Result get_zarray(const ::nlohmann::json metadata) { +inline Result get_zarray(const ::nlohmann::json metadata) { // derive .zarray json metadata (without reading it). auto json = metadata; // Why am I doing this? It's an extra copy that does nothing! @@ -125,7 +125,7 @@ Result get_zarray(const ::nlohmann::json metadata) { * @param json_variables The JSON variables. * @return An `mdio::Future` representing the asynchronous write. */ -Future write_zmetadata( +inline Future write_zmetadata( const ::nlohmann::json& dataset_metadata, const std::vector<::nlohmann::json>& json_variables) { // header material at the root of the dataset ... @@ -252,7 +252,7 @@ Future write_zmetadata( * It will default to the "file" driver if no prefix is found. * @param dataset_path The path to the dataset. */ -Future dataset_kvs_store( +inline Future dataset_kvs_store( const std::string& dataset_path) { // the tensorstore driver needs a bucket field ::nlohmann::json kvstore; @@ -298,7 +298,7 @@ Future dataset_kvs_store( * @return An `mdio::Future` containing the .zmetadata JSON on success, or an * error on failure. */ -Future>> +inline Future>> from_zmetadata(const std::string& dataset_path) { // e.g. dataset_path = "zarrs/acceptance/"; // FIXME - enable async diff --git a/mdio/dataset_factory.h b/mdio/dataset_factory.h index 96b5b8a..5b5656c 100644 --- a/mdio/dataset_factory.h +++ b/mdio/dataset_factory.h @@ -36,7 +36,7 @@ * @param raw A string to be encoded * @return A string encoded in base64 */ -std::string encode_base64(const std::string raw) { +inline std::string encode_base64(const std::string raw) { std::string encoded = absl::Base64Escape(raw); return encoded; } @@ -50,7 +50,7 @@ std::string encode_base64(const std::string raw) { * @return A string representing the dtype in numpy format limited to the dtypes * supported by MDIO Dataset */ -tensorstore::Result to_zarr_dtype(const std::string dtype) { +inline tensorstore::Result to_zarr_dtype(const std::string dtype) { // Convert the input dtype to Zarr dtype if (dtype == "int8") { return " to_zarr_dtype(const std::string dtype) { * @return OkStatus if successful, InvalidArgumentError if dtype is not * supported */ -absl::Status transform_dtype(nlohmann::json& input /*NOLINT*/, +inline absl::Status transform_dtype(nlohmann::json& input /*NOLINT*/, nlohmann::json& variable /*NOLINT*/) { if (input["dataType"].contains("fields")) { nlohmann::json dtypeFields = nlohmann::json::array(); @@ -124,7 +124,7 @@ absl::Status transform_dtype(nlohmann::json& input /*NOLINT*/, * @return OkStatus if successful, InvalidArgumentError if compressor is invalid * for MDIO */ -absl::Status transform_compressor(nlohmann::json& input /*NOLINT*/, +inline absl::Status transform_compressor(nlohmann::json& input /*NOLINT*/, nlohmann::json& variable /*NOLINT*/) { if (input.contains("compressor")) { if (input["compressor"].contains("name")) { @@ -182,7 +182,7 @@ absl::Status transform_compressor(nlohmann::json& input /*NOLINT*/, * before this step This presumes that the user does not attempt to use these * functions directly */ -void transform_shape( +inline void transform_shape( nlohmann::json& input /*NOLINT*/, nlohmann::json& variable /*NOLINT*/, std::unordered_map& dimensionMap /*NOLINT*/) { if (input["dimensions"][0].is_object()) { @@ -208,7 +208,7 @@ void transform_shape( * @param variable A Variable stub (Will be modified) * @return OkStatus if successful, InvalidArgumentError if the path is invalid */ -absl::Status transform_metadata(const std::string& path, +inline absl::Status transform_metadata(const std::string& path, nlohmann::json& variable /*NOLINT*/) { std::string bucket = "NULL"; // Default value, if is NULL don't add a bucket field @@ -261,7 +261,7 @@ absl::Status transform_metadata(const std::string& path, * @param dimensionMap A map of dimension names to sizes * @return A Variable spec or an error if the Variable spec is invalid */ -tensorstore::Result from_json_to_spec( +inline tensorstore::Result from_json_to_spec( nlohmann::json& json /*NOLINT*/, std::unordered_map& dimensionMap /*NOLINT*/, const std::string& path) { @@ -404,7 +404,7 @@ tensorstore::Result from_json_to_spec( * @return A map of dimension names to sizes or error if the dimensions are not * consistently sized */ -tensorstore::Result> get_dimensions( +inline tensorstore::Result> get_dimensions( nlohmann::json& spec /*NOLINT*/) { std::unordered_map dimensions; for (auto& variable : spec["variables"]) { @@ -438,7 +438,7 @@ tensorstore::Result> get_dimensions( * @param spec A Dataset spec * @return A vector of Variable specs or an error if the Dataset spec is invalid */ -tensorstore::Result>> +inline tensorstore::Result>> Construct(nlohmann::json& spec /*NOLINT*/, const std::string& path) { // Validation should only return status codes. If it returns data then it // should be a "constructor" diff --git a/mdio/dataset_validator.h b/mdio/dataset_validator.h index 99f0c14..494cfb6 100644 --- a/mdio/dataset_validator.h +++ b/mdio/dataset_validator.h @@ -30,7 +30,7 @@ * @brief Checks if a key exists in a map * Specific for our case of {coordinate: index} mapping */ -bool contains(const std::unordered_set& set, +inline bool contains(const std::unordered_set& set, const std::string key) { return set.count(key); } @@ -42,7 +42,7 @@ bool contains(const std::unordered_set& set, * @return OkStatus if valid, NotFoundError if schema file load fails, * InvalidArgumentError if validation fails for any reason */ -absl::Status validate_schema(nlohmann::json& spec /*NOLINT*/) { +inline absl::Status validate_schema(nlohmann::json& spec /*NOLINT*/) { nlohmann::json targetSchema = nlohmann::json::parse(kDatasetSchema, nullptr, false); if (targetSchema.is_discarded()) { @@ -72,7 +72,7 @@ absl::Status validate_schema(nlohmann::json& spec /*NOLINT*/) { * @return OkStatus if valid, InvalidArgumentError if a coordinate does not have * a matching Variable. */ -absl::Status validate_coordinates_present(const nlohmann::json& spec) { +inline absl::Status validate_coordinates_present(const nlohmann::json& spec) { // Build a mapping of all the dimension coordinates std::unordered_set dimension; // name of all 1-d Variables who's name matches the dimension @@ -145,7 +145,7 @@ absl::Status validate_coordinates_present(const nlohmann::json& spec) { * reason */ -absl::Status validate_dataset(nlohmann::json& spec /*NOLINT*/) { +inline absl::Status validate_dataset(nlohmann::json& spec /*NOLINT*/) { absl::Status schemaStatus = validate_schema(spec); if (!schemaStatus.ok()) { return schemaStatus; diff --git a/mdio/utils/delete.h b/mdio/utils/delete.h index 7f1313e..ecb5b1a 100644 --- a/mdio/utils/delete.h +++ b/mdio/utils/delete.h @@ -34,7 +34,7 @@ namespace utils { * @return OK result if the dataset was valid and deleted successfully, * otherwise an error result */ -Result DeleteDataset(const std::string dataset_path) { +inline Result DeleteDataset(const std::string dataset_path) { // Open the dataset // This is to ensure that what is getting deleted by MDIO is a valid MDIO // dataset itself. diff --git a/mdio/variable.h b/mdio/variable.h index 60a7954..0b087e0 100644 --- a/mdio/variable.h +++ b/mdio/variable.h @@ -214,7 +214,7 @@ namespace internal { * @return A driver specific message if the status is a missing driver message, * otherwise the original status. */ -absl::Status CheckMissingDriverStatus(const absl::Status& status) { +inline absl::Status CheckMissingDriverStatus(const absl::Status& status) { std::string error(status.message()); if (error.find("Error parsing object member \"driver\"") != std::string::npos) { @@ -1837,10 +1837,10 @@ Result> from_variable( size_t element_size = variable.dtype().size(); if (variable.dtype() == constants::kFloat32) { - auto* data = reinterpret_cast(_array.data()); + auto* data = reinterpret_cast(_array.byte_strided_origin_pointer().get()); std::fill_n(data, num_elements, std::numeric_limits::quiet_NaN()); } else { // double - auto* data = reinterpret_cast(_array.data()); + auto* data = reinterpret_cast(_array.byte_strided_origin_pointer().get()); std::fill_n(data, num_elements, std::numeric_limits::quiet_NaN()); } } From a2cc9e315e7e10ba501b5a1b50f5474040afced1 Mon Sep 17 00:00:00 2001 From: Brian Michell Date: Wed, 6 Aug 2025 08:37:42 -0500 Subject: [PATCH 2/4] clang-format --- mdio/dataset_factory.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mdio/dataset_factory.h b/mdio/dataset_factory.h index 5b5656c..1824800 100644 --- a/mdio/dataset_factory.h +++ b/mdio/dataset_factory.h @@ -94,7 +94,7 @@ inline tensorstore::Result to_zarr_dtype(const std::string dtype) { * supported */ inline absl::Status transform_dtype(nlohmann::json& input /*NOLINT*/, - nlohmann::json& variable /*NOLINT*/) { + nlohmann::json& variable /*NOLINT*/) { if (input["dataType"].contains("fields")) { nlohmann::json dtypeFields = nlohmann::json::array(); for (const auto& field : input["dataType"]["fields"]) { @@ -125,7 +125,7 @@ inline absl::Status transform_dtype(nlohmann::json& input /*NOLINT*/, * for MDIO */ inline absl::Status transform_compressor(nlohmann::json& input /*NOLINT*/, - nlohmann::json& variable /*NOLINT*/) { + nlohmann::json& variable /*NOLINT*/) { if (input.contains("compressor")) { if (input["compressor"].contains("name")) { if (input["compressor"]["name"] != "blosc") { @@ -209,7 +209,7 @@ inline void transform_shape( * @return OkStatus if successful, InvalidArgumentError if the path is invalid */ inline absl::Status transform_metadata(const std::string& path, - nlohmann::json& variable /*NOLINT*/) { + nlohmann::json& variable /*NOLINT*/) { std::string bucket = "NULL"; // Default value, if is NULL don't add a bucket field std::string driver = "file"; @@ -404,8 +404,8 @@ inline tensorstore::Result from_json_to_spec( * @return A map of dimension names to sizes or error if the dimensions are not * consistently sized */ -inline tensorstore::Result> get_dimensions( - nlohmann::json& spec /*NOLINT*/) { +inline tensorstore::Result> +get_dimensions(nlohmann::json& spec /*NOLINT*/) { std::unordered_map dimensions; for (auto& variable : spec["variables"]) { if (variable["dimensions"][0].is_object()) { @@ -438,7 +438,8 @@ inline tensorstore::Result> get_dimens * @param spec A Dataset spec * @return A vector of Variable specs or an error if the Dataset spec is invalid */ -inline tensorstore::Result>> +inline tensorstore::Result< + std::tuple>> Construct(nlohmann::json& spec /*NOLINT*/, const std::string& path) { // Validation should only return status codes. If it returns data then it // should be a "constructor" From 05b2bfbc5623338c0cd22a50524129ed6f35673e Mon Sep 17 00:00:00 2001 From: Brian Michell Date: Wed, 6 Aug 2025 08:38:13 -0500 Subject: [PATCH 3/4] clang-format --- mdio/dataset_validator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdio/dataset_validator.h b/mdio/dataset_validator.h index 494cfb6..7c83c36 100644 --- a/mdio/dataset_validator.h +++ b/mdio/dataset_validator.h @@ -31,7 +31,7 @@ * Specific for our case of {coordinate: index} mapping */ inline bool contains(const std::unordered_set& set, - const std::string key) { + const std::string key) { return set.count(key); } From 62ec3ceb31c37508e8f63c17fa086794746bc19d Mon Sep 17 00:00:00 2001 From: Brian Michell Date: Wed, 6 Aug 2025 08:38:50 -0500 Subject: [PATCH 4/4] clang-format --- mdio/variable.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mdio/variable.h b/mdio/variable.h index 0b087e0..cd1c1c7 100644 --- a/mdio/variable.h +++ b/mdio/variable.h @@ -1837,10 +1837,12 @@ Result> from_variable( size_t element_size = variable.dtype().size(); if (variable.dtype() == constants::kFloat32) { - auto* data = reinterpret_cast(_array.byte_strided_origin_pointer().get()); + auto* data = + reinterpret_cast(_array.byte_strided_origin_pointer().get()); std::fill_n(data, num_elements, std::numeric_limits::quiet_NaN()); } else { // double - auto* data = reinterpret_cast(_array.byte_strided_origin_pointer().get()); + auto* data = + reinterpret_cast(_array.byte_strided_origin_pointer().get()); std::fill_n(data, num_elements, std::numeric_limits::quiet_NaN()); } }