diff --git a/CMakeLists.txt b/CMakeLists.txt index 82fa673..ce31c4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,6 +75,8 @@ include(CMakeDependentOption) # provides a method to download dependencies: include(FetchContent) include(CMakeHelpers/MdioHelpers) +# optional code coverage support: +include(CodeCoverage) list(APPEND mdio_DEFAULT_COPTS "-Wno-deprecated-declarations" @@ -108,6 +110,7 @@ if(NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(mdio_INTERNAL_DEPS tensorstore::driver_array tensorstore::driver_zarr + tensorstore::driver_zarr3 tensorstore::driver_json tensorstore::kvstore_file tensorstore::stack diff --git a/cmake/CMakeHelpers/MdioHelpers.cmake b/cmake/CMakeHelpers/MdioHelpers.cmake index c2162d7..e51b49f 100644 --- a/cmake/CMakeHelpers/MdioHelpers.cmake +++ b/cmake/CMakeHelpers/MdioHelpers.cmake @@ -297,6 +297,12 @@ function(mdio_cc_test) PRIVATE ${mdio_CC_TEST_COPTS} ) + # Add coverage flags only to mdio test targets (not dependencies) + if(MDIO_ENABLE_COVERAGE) + target_compile_options(${_NAME} PRIVATE ${MDIO_COVERAGE_COMPILE_FLAGS}) + target_link_options(${_NAME} PRIVATE ${MDIO_COVERAGE_LINK_FLAGS}) + endif() + target_link_libraries(${_NAME} PUBLIC ${mdio_CC_TEST_DEPS} PRIVATE ${mdio_CC_TEST_LINKOPTS} diff --git a/cmake/CodeCoverage.cmake b/cmake/CodeCoverage.cmake new file mode 100644 index 0000000..120da7f --- /dev/null +++ b/cmake/CodeCoverage.cmake @@ -0,0 +1,166 @@ +# Copyright 2024 TGS +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ============================================================================== +# Code Coverage Support (gcov + lcov) +# ============================================================================== +# +# Coverage instrumentation is applied ONLY to mdio test targets, not to +# dependencies like Tensorstore. This keeps build times and test execution fast. +# +# REQUIRED CMAKE FLAGS: +# -DMDIO_ENABLE_COVERAGE=ON Enable coverage instrumentation +# -DCMAKE_BUILD_TYPE=Debug Recommended for meaningful line coverage +# +# REQUIRED SYSTEM TOOLS: +# - gcov (usually bundled with GCC) +# - lcov (install via: apt install lcov / brew install lcov) +# - genhtml (included with lcov) +# +# USAGE: +# 1. Configure and build with coverage enabled: +# cd build +# cmake .. -DMDIO_ENABLE_COVERAGE=ON -DCMAKE_BUILD_TYPE=Debug +# make +# +# 2. Run tests to generate coverage data (coverage accumulates across runs): +# ./mdio/mdio_variable_test # single test +# ./mdio/mdio_variable_test && ./mdio/mdio_dataset_test # multiple tests +# ctest # all registered tests +# +# 3. Generate HTML coverage report: +# make coverage-capture +# +# 4. View the report: +# Open build/coverage_report/index.html in a browser +# +# AVAILABLE TARGETS: +# make coverage - Reset counters, capture data, generate report +# make coverage-capture - Capture current data and generate report (no reset) +# make coverage-reset - Zero out all coverage counters +# +# ============================================================================== + +option(MDIO_ENABLE_COVERAGE "Enable code coverage instrumentation (requires GCC or Clang)" OFF) + +if(MDIO_ENABLE_COVERAGE) + message(STATUS "Code coverage enabled") + + # Check for supported compiler + if(NOT CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + message(FATAL_ERROR "Code coverage requires GCC or Clang compiler") + endif() + + # Coverage compiler/linker flags - exported for use in MdioHelpers.cmake + # These are applied only to mdio targets, not to dependencies like Tensorstore + set(MDIO_COVERAGE_COMPILE_FLAGS "-fprofile-arcs;-ftest-coverage" CACHE INTERNAL "Coverage compile flags") + set(MDIO_COVERAGE_LINK_FLAGS "--coverage" CACHE INTERNAL "Coverage link flags") + + # Find required tools + find_program(LCOV_PATH lcov) + find_program(GENHTML_PATH genhtml) + + if(NOT LCOV_PATH) + message(WARNING "lcov not found - coverage report generation will not be available") + endif() + + if(NOT GENHTML_PATH) + message(WARNING "genhtml not found - coverage report generation will not be available") + endif() + + # Create coverage report target if tools are available + if(LCOV_PATH AND GENHTML_PATH) + # Custom target to generate coverage report + add_custom_target(coverage + COMMENT "Generating code coverage report..." + + # Clear previous coverage data + COMMAND ${LCOV_PATH} --directory ${CMAKE_BINARY_DIR} --zerocounters + + # Run all tests (ctest must be run separately or you can uncomment below) + # COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure + + # Capture coverage data + COMMAND ${LCOV_PATH} + --directory ${CMAKE_BINARY_DIR} + --capture + --output-file ${CMAKE_BINARY_DIR}/coverage.info + --ignore-errors mismatch,negative + + # Remove coverage data for external dependencies + COMMAND ${LCOV_PATH} + --remove ${CMAKE_BINARY_DIR}/coverage.info + '/usr/*' + '${CMAKE_BINARY_DIR}/_deps/*' + '*/googletest/*' + '*/gtest/*' + '*/gmock/*' + '*_test.cc' + --output-file ${CMAKE_BINARY_DIR}/coverage.info + --ignore-errors unused,negative + + # Generate HTML report + COMMAND ${GENHTML_PATH} + ${CMAKE_BINARY_DIR}/coverage.info + --output-directory ${CMAKE_BINARY_DIR}/coverage_report + --title "MDIO Code Coverage" + --legend + --show-details + + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + ) + + # Target to just capture coverage (without zeroing first) + add_custom_target(coverage-capture + COMMENT "Capturing coverage data..." + + COMMAND ${LCOV_PATH} + --directory ${CMAKE_BINARY_DIR} + --capture + --output-file ${CMAKE_BINARY_DIR}/coverage.info + --ignore-errors mismatch,negative + + COMMAND ${LCOV_PATH} + --remove ${CMAKE_BINARY_DIR}/coverage.info + '/usr/*' + '${CMAKE_BINARY_DIR}/_deps/*' + '*/googletest/*' + '*/gtest/*' + '*/gmock/*' + '*_test.cc' + --output-file ${CMAKE_BINARY_DIR}/coverage.info + --ignore-errors unused,negative + + COMMAND ${GENHTML_PATH} + ${CMAKE_BINARY_DIR}/coverage.info + --output-directory ${CMAKE_BINARY_DIR}/coverage_report + --title "MDIO Code Coverage" + --legend + --show-details + + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + ) + + # Target to reset coverage counters + add_custom_target(coverage-reset + COMMENT "Resetting coverage counters..." + COMMAND ${LCOV_PATH} --directory ${CMAKE_BINARY_DIR} --zerocounters + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + ) + + message(STATUS "Coverage targets available: 'coverage', 'coverage-capture', 'coverage-reset'") + message(STATUS "Coverage report will be generated at: ${CMAKE_BINARY_DIR}/coverage_report/index.html") + endif() +endif() + diff --git a/cmake/FindEXT_TENSORSTORE.cmake b/cmake/FindEXT_TENSORSTORE.cmake index 250f4fc..101b38b 100644 --- a/cmake/FindEXT_TENSORSTORE.cmake +++ b/cmake/FindEXT_TENSORSTORE.cmake @@ -3,11 +3,18 @@ IF ( NOT TARGET tensorstore ) include(FetchContent) - FetchContent_Declare( - tensorstore - GIT_REPOSITORY - https://github.com/brian-michell/tensorstore.git - GIT_TAG v0.1.63_latest +# FetchContent_Declare( +# tensorstore +# GIT_REPOSITORY +# https://github.com/brian-michell/tensorstore.git +# GIT_TAG v0.1.63_latest +# ) + +FetchContent_Declare( + tensorstore + GIT_REPOSITORY + https://github.com/BrianMichell/tensorstore.git + GIT_TAG v3_structs ) FetchContent_MakeAvailable(tensorstore) diff --git a/mdio/CMakeLists.txt b/mdio/CMakeLists.txt index 47fc89f..c838d3c 100644 --- a/mdio/CMakeLists.txt +++ b/mdio/CMakeLists.txt @@ -101,6 +101,7 @@ mdio_cc_test( GTest::gmock_main tensorstore::driver_array tensorstore::driver_zarr + tensorstore::driver_zarr3 tensorstore::driver_json tensorstore::kvstore_file tensorstore::stack @@ -125,6 +126,7 @@ mdio_cc_test( GTest::gmock_main tensorstore::driver_array tensorstore::driver_zarr + tensorstore::driver_zarr3 tensorstore::driver_json tensorstore::kvstore_file tensorstore::stack @@ -134,6 +136,8 @@ mdio_cc_test( tensorstore::util_status_testutil nlohmann_json_schema_validator tensorstore::kvstore_s3 + absl::log_initialize + absl::log_globals ) mdio_cc_test( @@ -186,6 +190,7 @@ mdio_cc_test( GTest::gmock_main tensorstore::driver_array tensorstore::driver_zarr + tensorstore::driver_zarr3 tensorstore::driver_json tensorstore::kvstore_file tensorstore::stack @@ -209,6 +214,7 @@ mdio_cc_test( GTest::gmock_main tensorstore::driver_array tensorstore::driver_zarr + tensorstore::driver_zarr3 tensorstore::driver_json tensorstore::kvstore_file tensorstore::stack @@ -232,6 +238,7 @@ mdio_cc_test( GTest::gmock_main tensorstore::driver_array tensorstore::driver_zarr + tensorstore::driver_zarr3 tensorstore::driver_json tensorstore::kvstore_file tensorstore::tensorstore @@ -255,6 +262,7 @@ mdio_cc_test( GTest::gmock_main tensorstore::driver_array tensorstore::driver_zarr + tensorstore::driver_zarr3 tensorstore::driver_json tensorstore::kvstore_file tensorstore::stack @@ -277,6 +285,7 @@ mdio_cc_test( GTest::gmock_main tensorstore::driver_array tensorstore::driver_zarr + tensorstore::driver_zarr3 tensorstore::driver_json tensorstore::kvstore_file tensorstore::stack @@ -301,6 +310,7 @@ mdio_cc_test( GTest::gmock_main tensorstore::driver_array tensorstore::driver_zarr + tensorstore::driver_zarr3 tensorstore::driver_json tensorstore::kvstore_file tensorstore::stack @@ -321,14 +331,31 @@ mdio_cc_test( ${mdio_DEFAULT_COPTS} LINKOPTS ${mdio_DEFAULT_LINKOPTS} + DEPS + GTest::gmock_main + tensorstore::driver_json + tensorstore::tensorstore + nlohmann_json_schema_validator +) + +mdio_cc_test( + NAME + coordinate_selector_test + SRCS + coordinate_selector_test.cc + COPTS + ${mdio_DEFAULT_COPTS} + LINKOPTS + ${mdio_DEFAULT_LINKOPTS} DEPS GTest::gmock_main tensorstore::driver_array tensorstore::driver_zarr + tensorstore::driver_zarr3 tensorstore::driver_json tensorstore::kvstore_file - tensorstore::stack tensorstore::tensorstore + tensorstore::stack tensorstore::index_space_dim_expression tensorstore::index_space_index_transform tensorstore::util_status_testutil @@ -337,9 +364,9 @@ mdio_cc_test( mdio_cc_test( NAME - coordinate_selector_test + zarr_test SRCS - coordinate_selector_test.cc + zarr/zarr_test.cc COPTS ${mdio_DEFAULT_COPTS} LINKOPTS @@ -348,6 +375,7 @@ mdio_cc_test( GTest::gmock_main tensorstore::driver_array tensorstore::driver_zarr + tensorstore::driver_zarr3 tensorstore::driver_json tensorstore::kvstore_file tensorstore::tensorstore @@ -357,3 +385,4 @@ mdio_cc_test( tensorstore::util_status_testutil nlohmann_json_schema_validator ) + diff --git a/mdio/acceptance_test.cc b/mdio/acceptance_test.cc index c464291..6691dc7 100644 --- a/mdio/acceptance_test.cc +++ b/mdio/acceptance_test.cc @@ -12,6 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +/** + * @file acceptance_test.cc + * @brief Unified acceptance tests for both Zarr V2 and V3 driver support. + * + * This file contains parameterized tests that verify the MDIO library works + * correctly with both Zarr V2 and Zarr V3 formats, including variable creation, + * reading, writing, and dataset operations. + */ + #include #include #include @@ -23,37 +32,329 @@ #include "mdio/dataset.h" #include "mdio/dataset_factory.h" +#include "mdio/zarr/zarr.h" namespace { -constexpr char PYTHON_EXECUTABLE[] = "python3"; // Don't specify absolute path +constexpr char PYTHON_EXECUTABLE[] = "python3"; constexpr char PROJECT_BASE_PATH_ENV[] = "PROJECT_BASE_PATH"; constexpr char DEFAULT_BASE_PATH[] = "../.."; -constexpr char ZARR_SCRIPT_RELATIVE_PATH[] = - "/mdio/regression_tests/zarr_compatibility.py"; constexpr char XARRAY_SCRIPT_RELATIVE_PATH[] = "/mdio/regression_tests/xarray_compatibility_test.py"; -constexpr char FILE_PATH_BASE[] = "./zarrs/acceptance/"; constexpr int ERROR_CODE = EXIT_FAILURE; constexpr int SUCCESS_CODE = EXIT_SUCCESS; +using float16_t = mdio::dtypes::float_16_t; + +/** + * @brief Test variable definition with dtype info for V2 and V3. + */ +struct TestVariableDef { + std::string name; + std::string dtype_v2; + std::string dtype_v3; + std::string long_name; + mdio::DataType expected_dtype; +}; + +// Common test variable definitions used across multiple tests +const std::vector kTestVariables = { + {"i2", "> OpenTestVariable(const std::string& name, + mdio::zarr::ZarrVersion version, + const std::string& base_path) { + return mdio::Variable<>::Open(CreateBaseSpec(version, name, base_path), + mdio::constants::kOpen); +} + +/** + * @brief Gets the Python script base path from environment. + */ +const char* GetPythonBasePath() { + const char* basePath = std::getenv(PROJECT_BASE_PATH_ENV); + if (!basePath) { + std::cout << "PROJECT_BASE_PATH environment variable not set. Expecting to " + "be in the 'build/mdio' directory." + << std::endl; + basePath = DEFAULT_BASE_PATH; + } + return basePath; +} + +/** + * @brief Returns the dataset manifest JSON for the given version. + * Both versions use the same manifest structure including struct arrays. + */ +std::string GetDatasetManifest(mdio::zarr::ZarrVersion version) { + std::string name = + version == mdio::zarr::ZarrVersion::kV3 ? "campos_3d_v3" : "campos_3d"; + + // clang-format off + std::string manifest = R"( +{ + "metadata": { + "name": ")" + name + R"(", + "apiVersion": "1.0.0", + "createdOn": "2023-12-12T15:02:06.413469-06:00", + "attributes": { + "textHeader": [ + "C01 .......................... ", + "C02 .......................... ", + "C03 .......................... " + ], + "foo": "bar" + } + }, + "variables": [ + { + "name": "image", + "dataType": "float32", + "dimensions": [ + {"name": "inline", "size": 256}, + {"name": "crossline", "size": 512}, + {"name": "depth", "size": 384} + ], + "metadata": { + "chunkGrid": { + "name": "regular", + "configuration": { "chunkShape": [128, 128, 128] } + }, + "statsV1": { + "count": 100, + "sum": 1215.1, + "sumSquares": 125.12, + "min": 5.61, + "max": 10.84, + "histogram": {"binCenters": [1, 2], "counts": [10, 15]} + }, + "attributes": { + "fizz": "buzz" + } + }, + "coordinates": ["inline", "crossline", "depth", "cdp-x", "cdp-y"], + "compressor": {"name": "blosc", "algorithm": "zstd"} + }, + { + "name": "velocity", + "dataType": "float64", + "dimensions": ["inline", "crossline", "depth"], + "metadata": { + "chunkGrid": { + "name": "regular", + "configuration": { "chunkShape": [128, 128, 128] } + }, + "unitsV1": {"speed": "m/s"} + }, + "coordinates": ["inline", "crossline", "depth", "cdp-x", "cdp-y"] + }, + { + "name": "image_inline", + "dataType": "int16", + "dimensions": ["inline", "crossline", "depth"], + "longName": "inline optimized version of 3d_stack", + "compressor": {"name": "blosc", "algorithm": "zstd"}, + "metadata": { + "chunkGrid": { + "name": "regular", + "configuration": { "chunkShape": [128, 128, 128] } + } + }, + "coordinates": ["inline", "crossline", "depth", "cdp-x", "cdp-y"] + }, + { + "name": "image_headers", + "dataType": { + "fields": [ + {"name": "cdp-x", "format": "int32"}, + {"name": "cdp-y", "format": "int32"}, + {"name": "elevation", "format": "float16"}, + {"name": "some_scalar", "format": "float16"} + ] + }, + "dimensions": ["inline", "crossline"], + "metadata": { + "chunkGrid": { + "name": "regular", + "configuration": { "chunkShape": [128, 128] } + } + }, + "coordinates": ["inline", "crossline", "cdp-x", "cdp-y"] + }, + { + "name": "inline", + "dataType": "uint32", + "dimensions": [{"name": "inline", "size": 256}] + }, + { + "name": "crossline", + "dataType": "uint32", + "dimensions": [{"name": "crossline", "size": 512}] + }, + { + "name": "depth", + "dataType": "uint32", + "dimensions": [{"name": "depth", "size": 384}], + "metadata": { + "unitsV1": { "length": "m" } + } + }, + { + "name": "cdp-x", + "dataType": "float32", + "dimensions": [ + {"name": "inline", "size": 256}, + {"name": "crossline", "size": 512} + ], + "metadata": { + "unitsV1": { "length": "m" } + } + }, + { + "name": "cdp-y", + "dataType": "float32", + "dimensions": [ + {"name": "inline", "size": 256}, + {"name": "crossline", "size": 512} + ], + "metadata": { + "unitsV1": { "length": "m" } + } + } + ] +} + )"; + // clang-format on + return manifest; +} + /** * @brief Executes the Python regression tests - * - * @param scriptPath Which script to run, should be either - * ZARR_SCRIPT_RELATIVE_PATH or XARRAY_SCRIPT_RELATIVE_PATH - * @param arg The argument to pass to the script - * @return int The status code of the script execution */ int executePythonScript(const std::string& scriptPath, const std::vector& args) { - // Ensure scriptPath and args are sanitized if (scriptPath.empty() || args.empty()) { std::cerr << "Error: scriptPath or args are empty." << std::endl; return ERROR_CODE; } - // Ensure the scriptPath is an absolute path or default base path if (scriptPath[0] != '/' && scriptPath.find(DEFAULT_BASE_PATH) != 0) { std::cerr << "Error: PROJECT_BASE_PATH must be an absolute path or not be set." @@ -61,1018 +362,279 @@ int executePythonScript(const std::string& scriptPath, return ERROR_CODE; } - // Prepare arguments for execvp std::vector execArgs = {PYTHON_EXECUTABLE, scriptPath.c_str()}; for (const auto& arg : args) { execArgs.push_back(arg.c_str()); } execArgs.push_back(nullptr); - // Execute the Python script if (execvp(execArgs[0], const_cast(execArgs.data())) == -1) { perror("execvp failed"); return ERROR_CODE; } - return SUCCESS_CODE; // This line will never be reached if execvp is - // successful + return SUCCESS_CODE; } -namespace VariableTesting { - -using float16_t = mdio::dtypes::float_16_t; - -// TODO(BrianMichell): Extend test coverage to include uint -nlohmann::json i2Base = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/i2" - } - } -)"_json; - -nlohmann::json i4Base = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/i4" - } - } -)"_json; - -nlohmann::json i8Base = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/i8" - } - } -)"_json; - -nlohmann::json f2Base = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/f2" - } - } -)"_json; +/** + * @brief Runs Python scripts with fork/wait and checks results. + * @return true if all scripts passed, false otherwise. + */ +bool RunPythonScripts(const std::string& script_path, + const std::vector>& arg_sets, + const std::string& skip_message) { + std::vector pids; -nlohmann::json f4Base = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/f4" - } + for (const auto& args : arg_sets) { + pid_t pid = fork(); + if (pid == 0) { + int result = executePythonScript(script_path, args); + if (result == 0xfd00) { + exit(SUCCESS_CODE); + } + exit(result); + } else if (pid > 0) { + pids.push_back(pid); + } else { + perror("fork failed"); + return false; } -)"_json; + } -nlohmann::json f8Base = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/f8" - } + bool all_passed = true; + for (pid_t pid : pids) { + int status; + if (waitpid(pid, &status, 0) == -1) { + perror("waitpid failed"); + return false; } -)"_json; - -nlohmann::json voidedBase = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/voided" - } + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + if (WIFEXITED(status) && WEXITSTATUS(status) == 0xfd00) { + // Import error - will be handled by caller + continue; + } + all_passed = false; } -)"_json; + } + return all_passed; +} -nlohmann::json u2Base = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/u2" - } - } -)"_json; +// ============================================================================ +// Parameterized Variable Tests +// ============================================================================ -nlohmann::json u4Base = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/u4" - } - } -)"_json; +class VariableTest : public ::testing::TestWithParam { + protected: + void SetUp() override { + version_ = GetParam(); + base_path_ = GetBasePath(version_); + driver_ = GetTestDriverName(version_); + } -// Test to set up some pre-existing elements for testing -TEST(Variable, SETUP) { - mdio::TransactionalOpenOptions options; - auto opt = options.Set(std::move(mdio::constants::kCreateClean)); - nlohmann::json i2Spec = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/i2" - }, - "metadata": { - "dtype": "::Open(i2Base, mdio::constants::kOpen).status().ok()); - EXPECT_TRUE( - mdio::Variable<>::Open(i4Base, mdio::constants::kOpen).status().ok()); - EXPECT_TRUE( - mdio::Variable<>::Open(i8Base, mdio::constants::kOpen).status().ok()); - EXPECT_TRUE( - mdio::Variable<>::Open(f2Base, mdio::constants::kOpen).status().ok()); - EXPECT_TRUE( - mdio::Variable<>::Open(f4Base, mdio::constants::kOpen).status().ok()); - EXPECT_TRUE( - mdio::Variable<>::Open(f8Base, mdio::constants::kOpen).status().ok()); - EXPECT_TRUE( - mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen).status().ok()); +TEST_P(VariableTest, open) { + for (const auto& def : kTestVariables) { + auto var = OpenTestVariable(def.name, version_, base_path_); + EXPECT_TRUE(var.status().ok()) + << "Failed to open " << def.name << ": " << var.status(); + } } -TEST(Variable, name) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); - ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); - ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); - - EXPECT_EQ(i2.value().get_variable_name(), "i2") - << i2.value().get_variable_name(); - EXPECT_EQ(i4.value().get_variable_name(), "i4") - << i4.value().get_variable_name(); - EXPECT_EQ(i8.value().get_variable_name(), "i8") - << i8.value().get_variable_name(); - EXPECT_EQ(f2.value().get_variable_name(), "f2") - << f2.value().get_variable_name(); - EXPECT_EQ(f4.value().get_variable_name(), "f4") - << f4.value().get_variable_name(); - EXPECT_EQ(f8.value().get_variable_name(), "f8") - << f8.value().get_variable_name(); - EXPECT_EQ(voided.value().get_variable_name(), "voided") - << voided.value().get_variable_name(); +TEST_P(VariableTest, name) { + for (const auto& def : kTestVariables) { + auto var = OpenTestVariable(def.name, version_, base_path_); + ASSERT_TRUE(var.status().ok()) << var.status(); + EXPECT_EQ(var.value().get_variable_name(), def.name); + } } -TEST(Variable, longName) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); - ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); - ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); - - EXPECT_EQ(i2.value().get_long_name(), "2-byte integer test") - << i2.value().get_long_name(); - EXPECT_EQ(i4.value().get_long_name(), "4-byte integer test") - << i4.value().get_long_name(); - EXPECT_EQ(i8.value().get_long_name(), "8-byte integer test") - << i8.value().get_long_name(); - EXPECT_EQ(f2.value().get_long_name(), "2-byte float test") - << f2.value().get_long_name(); - EXPECT_EQ(f4.value().get_long_name(), "4-byte float test") - << f4.value().get_long_name(); - EXPECT_EQ(f8.value().get_long_name(), "8-byte float test") - << f8.value().get_long_name(); - EXPECT_EQ(voided.value().get_long_name(), "struct array test") - << voided.value().get_long_name(); +TEST_P(VariableTest, longName) { + for (const auto& def : kTestVariables) { + auto var = OpenTestVariable(def.name, version_, base_path_); + ASSERT_TRUE(var.status().ok()) << var.status(); + EXPECT_EQ(var.value().get_long_name(), def.long_name); + } } -TEST(Variable, optionalAttrs) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); +TEST_P(VariableTest, optionalAttrs) { + auto i2 = OpenTestVariable("i2", version_, base_path_); ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); - ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); - - EXPECT_EQ(i2.value().GetAttributes()["attributes"]["foo"], "bar") - << i2.value().GetAttributes(); - EXPECT_EQ(i4.value().GetAttributes()["attributes"]["foo"], "bar") - << i4.value().GetAttributes(); - EXPECT_EQ(i8.value().GetAttributes()["attributes"]["foo"], "bar") - << i8.value().GetAttributes(); - EXPECT_EQ(f2.value().GetAttributes()["attributes"]["foo"], "bar") - << f2.value().GetAttributes(); - EXPECT_EQ(f4.value().GetAttributes()["attributes"]["foo"], "bar") - << f4.value().GetAttributes(); - EXPECT_EQ(f8.value().GetAttributes()["attributes"]["foo"], "bar") - << f8.value().GetAttributes(); - EXPECT_EQ(voided.value().GetAttributes()["attributes"]["foo"], "bar") - << voided.value().GetAttributes(); + EXPECT_EQ(i2.value().GetAttributes()["attributes"]["foo"], "bar"); } -TEST(Variable, namedDimensions) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); +TEST_P(VariableTest, namedDimensions) { + auto i2 = OpenTestVariable("i2", version_, base_path_); ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); - ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); - - EXPECT_EQ(i2.value().getMetadata()["dimension_names"].size(), 2) - << i2.value().getMetadata(); - EXPECT_EQ(i4.value().getMetadata()["dimension_names"].size(), 2) - << i4.value().getMetadata(); - EXPECT_EQ(i8.value().getMetadata()["dimension_names"].size(), 2) - << i8.value().getMetadata(); - EXPECT_EQ(f2.value().getMetadata()["dimension_names"].size(), 2) - << f2.value().getMetadata(); - EXPECT_EQ(f4.value().getMetadata()["dimension_names"].size(), 2) - << f4.value().getMetadata(); - EXPECT_EQ(f8.value().getMetadata()["dimension_names"].size(), 2) - << f8.value().getMetadata(); - EXPECT_EQ(voided.value().getMetadata()["dimension_names"].size(), 2) - << voided.value().getMetadata(); + EXPECT_EQ(i2.value().getMetadata()["dimension_names"].size(), 2); } -TEST(Variable, sliceByDimIdx) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); +TEST_P(VariableTest, sliceByDimIdx) { + auto i2 = OpenTestVariable("i2", version_, base_path_); + auto f4 = OpenTestVariable("f4", version_, base_path_); ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); mdio::RangeDescriptor zeroIdxSlice = {0, 0, 5, 1}; mdio::RangeDescriptor oneIdxSlice = {1, 0, 5, 1}; auto i2Slice = i2.value().slice(zeroIdxSlice, oneIdxSlice); - auto i4Slice = i4.value().slice(zeroIdxSlice, oneIdxSlice); - auto i8Slice = i8.value().slice(zeroIdxSlice, oneIdxSlice); - auto f2Slice = f2.value().slice(zeroIdxSlice, oneIdxSlice); auto f4Slice = f4.value().slice(zeroIdxSlice, oneIdxSlice); - auto f8Slice = f8.value().slice(zeroIdxSlice, oneIdxSlice); - auto voidedSlice = voided.value().slice(zeroIdxSlice, oneIdxSlice); EXPECT_TRUE(i2Slice.status().ok()) << i2Slice.status(); - EXPECT_TRUE(i4Slice.status().ok()) << i4Slice.status(); - EXPECT_TRUE(i8Slice.status().ok()) << i8Slice.status(); - EXPECT_TRUE(f2Slice.status().ok()) << f2Slice.status(); EXPECT_TRUE(f4Slice.status().ok()) << f4Slice.status(); - EXPECT_TRUE(f8Slice.status().ok()) << f8Slice.status(); - EXPECT_TRUE(voidedSlice.status().ok()) << voidedSlice.status(); EXPECT_THAT(i2Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << i2Slice.value().dimensions(); - EXPECT_THAT(i4Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << i4Slice.value().dimensions(); - EXPECT_THAT(i8Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << i8Slice.value().dimensions(); - EXPECT_THAT(f2Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << f2Slice.value().dimensions(); + ::testing::ElementsAre(5, 5)); EXPECT_THAT(f4Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << f4Slice.value().dimensions(); - EXPECT_THAT(f8Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << f8Slice.value().dimensions(); - EXPECT_THAT(voidedSlice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5, 14)) - << voidedSlice.value().dimensions(); + ::testing::ElementsAre(5, 5)); } -TEST(Variable, zarrCompatibility) { - const char* basePath = std::getenv(PROJECT_BASE_PATH_ENV); - if (!basePath) { - std::cout << "PROJECT_BASE_PATH environment variable not set. Expecting to " - "be in the 'build/mdio' directory." - << std::endl; - basePath = DEFAULT_BASE_PATH; - } - - // Resolve the absolute path for the script - std::string srcPath = std::string(basePath) + ZARR_SCRIPT_RELATIVE_PATH; - - // Ensure that srcPath is valid and points to an existing file - if (access(srcPath.c_str(), F_OK) == -1) { - std::cerr << "Error: Python script not found at " << srcPath << std::endl; - FAIL() << "Script not found: " << srcPath; - } - - std::vector args = {"i2", "i4", "i8", "f2", - "f4", "f8", "voided"}; - std::vector pids; - - for (const auto& arg : args) { - pid_t pid = fork(); - if (pid == 0) { - // Child process - int result = executePythonScript(srcPath, {FILE_PATH_BASE + arg}); - if (result == 0xfd00) { // 0xfd from Python is 0xfd00 in C++ - GTEST_SKIP() << "Zarr compatibility skipped due to import error for " - "required library"; - exit(SUCCESS_CODE); - } - exit(result); - } else if (pid > 0) { - // Parent process - pids.push_back(pid); - } else { - // Fork failed - perror("fork failed"); - FAIL() << "fork failed"; - } - } +TEST_P(VariableTest, sliceByDimName) { + auto i2 = OpenTestVariable("i2", version_, base_path_); + ASSERT_TRUE(i2.status().ok()) << i2.status(); - // Wait for all child processes - for (pid_t pid : pids) { - int status; - if (waitpid(pid, &status, 0) == -1) { - perror("waitpid failed"); - FAIL() << "waitpid failed"; - } - if (WIFEXITED(status) && WEXITSTATUS(status) == 0xfd00) { - GTEST_SKIP() << "Zarr compatibility skipped due to import error for " - "required library"; - } - EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) - << "Failed to read one of the arguments\n\tMore detailed error " - "expected above..."; - } -} + mdio::RangeDescriptor inlineSlice = {"inline", 0, 5, 1}; + mdio::RangeDescriptor crosslineSpec = {"crossline", 0, 5, 1}; + auto i2Slice = i2.value().slice(inlineSlice, crosslineSpec); -TEST(Variable, dimensionUnits) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); - ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); - ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); - - EXPECT_TRUE(i2.value().getMetadata().contains("dimension_units")) - << i2.value().getMetadata(); - EXPECT_TRUE(i4.value().getMetadata().contains("dimension_units")) - << i4.value().getMetadata(); - EXPECT_TRUE(i8.value().getMetadata().contains("dimension_units")) - << i8.value().getMetadata(); - EXPECT_TRUE(f2.value().getMetadata().contains("dimension_units")) - << f2.value().getMetadata(); - EXPECT_TRUE(f4.value().getMetadata().contains("dimension_units")) - << f4.value().getMetadata(); - EXPECT_TRUE(f8.value().getMetadata().contains("dimension_units")) - << f8.value().getMetadata(); - EXPECT_TRUE(voided.value().getMetadata().contains("dimension_units")) - << voided.value().getMetadata(); + EXPECT_TRUE(i2Slice.status().ok()) << i2Slice.status(); + EXPECT_THAT(i2Slice.value().dimensions().shape(), + ::testing::ElementsAre(5, 5)); } -TEST(Variable, chunkSize) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); +TEST_P(VariableTest, dimensionUnits) { + auto i2 = OpenTestVariable("i2", version_, base_path_); ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); - ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); - - EXPECT_TRUE(i2.value().get_chunk_shape().status().ok()) - << i2.value().get_chunk_shape().status(); - EXPECT_TRUE(i4.value().get_chunk_shape().status().ok()) - << i4.value().get_chunk_shape().status(); - EXPECT_TRUE(i8.value().get_chunk_shape().status().ok()) - << i8.value().get_chunk_shape().status(); - EXPECT_TRUE(f2.value().get_chunk_shape().status().ok()) - << f2.value().get_chunk_shape().status(); - EXPECT_TRUE(f4.value().get_chunk_shape().status().ok()) - << f4.value().get_chunk_shape().status(); - EXPECT_TRUE(f8.value().get_chunk_shape().status().ok()) - << f8.value().get_chunk_shape().status(); - EXPECT_TRUE(voided.value().get_chunk_shape().status().ok()) - << voided.value().get_chunk_shape().status(); + EXPECT_TRUE(i2.value().getMetadata().contains("dimension_units")); } -TEST(Variable, getCompressor) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); +TEST_P(VariableTest, chunkSize) { + auto i2 = OpenTestVariable("i2", version_, base_path_); ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); - ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); - - auto i2Json = i2.value().get_spec(); - auto i4Json = i4.value().get_spec(); - auto i8Json = i8.value().get_spec(); - auto f2Json = f2.value().get_spec(); - auto f4Json = f4.value().get_spec(); - auto f8Json = f8.value().get_spec(); - auto voidedJson = voided.value().get_spec(); - - ASSERT_TRUE(i2Json.status().ok()) << i2Json.status(); - ASSERT_TRUE(i4Json.status().ok()) << i4Json.status(); - ASSERT_TRUE(i8Json.status().ok()) << i8Json.status(); - ASSERT_TRUE(f2Json.status().ok()) << f2Json.status(); - ASSERT_TRUE(f4Json.status().ok()) << f4Json.status(); - ASSERT_TRUE(f8Json.status().ok()) << f8Json.status(); - ASSERT_TRUE(voidedJson.status().ok()) << voidedJson.status(); - - EXPECT_TRUE(i2Json.value()["metadata"].contains("compressor")) - << i2Json.value(); - EXPECT_TRUE(i4Json.value()["metadata"].contains("compressor")) - << i4Json.value(); - EXPECT_TRUE(i8Json.value()["metadata"].contains("compressor")) - << i8Json.value(); - EXPECT_TRUE(f2Json.value()["metadata"].contains("compressor")) - << f2Json.value(); - EXPECT_TRUE(f4Json.value()["metadata"].contains("compressor")) - << f4Json.value(); - EXPECT_TRUE(f8Json.value()["metadata"].contains("compressor")) - << f8Json.value(); - EXPECT_TRUE(voidedJson.value()["metadata"].contains("compressor")) - << voidedJson.value(); + EXPECT_TRUE(i2.value().get_chunk_shape().status().ok()); } -TEST(Variable, shape) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); +TEST_P(VariableTest, shape) { + auto i2 = OpenTestVariable("i2", version_, base_path_); ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); - ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); - - EXPECT_THAT(i2.value().dimensions().shape(), ::testing::ElementsAre(10, 10)) - << i2.value().dimensions(); - EXPECT_THAT(i4.value().dimensions().shape(), ::testing::ElementsAre(10, 10)) - << i4.value().dimensions(); - EXPECT_THAT(i8.value().dimensions().shape(), ::testing::ElementsAre(10, 10)) - << i8.value().dimensions(); - EXPECT_THAT(f2.value().dimensions().shape(), ::testing::ElementsAre(10, 10)) - << f2.value().dimensions(); - EXPECT_THAT(f4.value().dimensions().shape(), ::testing::ElementsAre(10, 10)) - << f4.value().dimensions(); - EXPECT_THAT(f8.value().dimensions().shape(), ::testing::ElementsAre(10, 10)) - << f8.value().dimensions(); - EXPECT_THAT(voided.value().dimensions().shape(), - ::testing::ElementsAre(10, 10, 14)) - << voided.value().dimensions(); + EXPECT_THAT(i2.value().dimensions().shape(), ::testing::ElementsAre(10, 10)); } -TEST(Variable, dtype) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); - ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); - ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); - - EXPECT_EQ(i2.value().dtype(), mdio::constants::kInt16) << i2.value().dtype(); - EXPECT_EQ(i4.value().dtype(), mdio::constants::kInt32) << i4.value().dtype(); - EXPECT_EQ(i8.value().dtype(), mdio::constants::kInt64) << i8.value().dtype(); - EXPECT_EQ(f2.value().dtype(), mdio::constants::kFloat16) - << f2.value().dtype(); - EXPECT_EQ(f4.value().dtype(), mdio::constants::kFloat32) - << f4.value().dtype(); - EXPECT_EQ(f8.value().dtype(), mdio::constants::kFloat64) - << f8.value().dtype(); - EXPECT_EQ(voided.value().dtype(), mdio::constants::kByte) - << voided.value().dtype(); +TEST_P(VariableTest, dtype) { + for (const auto& def : kTestVariables) { + auto var = OpenTestVariable(def.name, version_, base_path_); + ASSERT_TRUE(var.status().ok()) << var.status(); + EXPECT_EQ(var.value().dtype(), def.expected_dtype) + << "dtype mismatch for " << def.name; + } } -TEST(Variable, domain) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); +TEST_P(VariableTest, domain) { + auto i2 = OpenTestVariable("i2", version_, base_path_); ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); - ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); const mdio::Index EXPECTED_SHAPE = 10; - EXPECT_EQ(i2.value().dimensions().shape()[0], EXPECTED_SHAPE) - << i2.value().dimensions(); - EXPECT_EQ(i4.value().dimensions().shape()[0], EXPECTED_SHAPE) - << i4.value().dimensions(); - EXPECT_EQ(i8.value().dimensions().shape()[0], EXPECTED_SHAPE) - << i8.value().dimensions(); - EXPECT_EQ(f2.value().dimensions().shape()[0], EXPECTED_SHAPE) - << f2.value().dimensions(); - EXPECT_EQ(f4.value().dimensions().shape()[0], EXPECTED_SHAPE) - << f4.value().dimensions(); - EXPECT_EQ(f8.value().dimensions().shape()[0], EXPECTED_SHAPE) - << f8.value().dimensions(); - EXPECT_EQ(voided.value().dimensions().shape()[0], EXPECTED_SHAPE) - << voided.value().dimensions(); - - EXPECT_EQ(i2.value().dimensions().rank(), 2) << i2.value().dimensions(); - EXPECT_EQ(i4.value().dimensions().rank(), 2) << i4.value().dimensions(); - EXPECT_EQ(i8.value().dimensions().rank(), 2) << i8.value().dimensions(); - EXPECT_EQ(f2.value().dimensions().rank(), 2) << f2.value().dimensions(); - EXPECT_EQ(f4.value().dimensions().rank(), 2) << f4.value().dimensions(); - EXPECT_EQ(f8.value().dimensions().rank(), 2) << f8.value().dimensions(); - EXPECT_EQ(voided.value().dimensions().rank(), 3) - << voided.value().dimensions(); -} - -TEST(Variable, sliceByDimName) { - auto i2 = mdio::Variable<>::Open(i2Base, mdio::constants::kOpen); - auto i4 = mdio::Variable<>::Open(i4Base, mdio::constants::kOpen); - auto i8 = mdio::Variable<>::Open(i8Base, mdio::constants::kOpen); - auto f2 = mdio::Variable<>::Open(f2Base, mdio::constants::kOpen); - auto f4 = mdio::Variable<>::Open(f4Base, mdio::constants::kOpen); - auto f8 = mdio::Variable<>::Open(f8Base, mdio::constants::kOpen); - auto voided = mdio::Variable<>::Open(voidedBase, mdio::constants::kOpen); - ASSERT_TRUE(i2.status().ok()) << i2.status(); - ASSERT_TRUE(i4.status().ok()) << i4.status(); - ASSERT_TRUE(i8.status().ok()) << i8.status(); - ASSERT_TRUE(f2.status().ok()) << f2.status(); - ASSERT_TRUE(f4.status().ok()) << f4.status(); - ASSERT_TRUE(f8.status().ok()) << f8.status(); - ASSERT_TRUE(voided.status().ok()) << voided.status(); - - mdio::RangeDescriptor inlineSlice = {"inline", 0, 5, 1}; - mdio::RangeDescriptor crosslineSpec = {"crossline", 0, 5, 1}; - auto i2Slice = i2.value().slice(inlineSlice, crosslineSpec); - auto i4Slice = i4.value().slice(inlineSlice, crosslineSpec); - auto i8Slice = i8.value().slice(inlineSlice, crosslineSpec); - auto f2Slice = f2.value().slice(inlineSlice, crosslineSpec); - auto f4Slice = f4.value().slice(inlineSlice, crosslineSpec); - auto f8Slice = f8.value().slice(inlineSlice, crosslineSpec); - auto voidedSlice = voided.value().slice(inlineSlice, crosslineSpec); - - EXPECT_TRUE(i2Slice.status().ok()) << i2.status(); - EXPECT_TRUE(i4Slice.status().ok()) << i4Slice.status(); - EXPECT_TRUE(i8Slice.status().ok()) << i8Slice.status(); - EXPECT_TRUE(f2Slice.status().ok()) << f2Slice.status(); - EXPECT_TRUE(f4Slice.status().ok()) << f4Slice.status(); - EXPECT_TRUE(f8Slice.status().ok()) << f8Slice.status(); - EXPECT_TRUE(voidedSlice.status().ok()) << voidedSlice.status(); - EXPECT_THAT(i2Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << i2Slice.value().dimensions(); - EXPECT_THAT(i4Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << i4Slice.value().dimensions(); - EXPECT_THAT(i8Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << i8Slice.value().dimensions(); - EXPECT_THAT(f2Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << f2Slice.value().dimensions(); - EXPECT_THAT(f4Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << f4Slice.value().dimensions(); - EXPECT_THAT(f8Slice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5)) - << f8Slice.value().dimensions(); - EXPECT_THAT(voidedSlice.value().dimensions().shape(), - ::testing::ElementsAre(5, 5, 14)) - << voidedSlice.value().dimensions(); + EXPECT_EQ(i2.value().dimensions().shape()[0], EXPECTED_SHAPE); + EXPECT_EQ(i2.value().dimensions().rank(), 2); } -// A test to clean up after the test suite -TEST(Variable, TEARDOWN) { - std::filesystem::remove_all("zarrs/acceptance/i2"); - std::filesystem::remove_all("zarrs/acceptance/i4"); - std::filesystem::remove_all("zarrs/acceptance/i8"); - std::filesystem::remove_all("zarrs/acceptance/f2"); - std::filesystem::remove_all("zarrs/acceptance/f4"); - std::filesystem::remove_all("zarrs/acceptance/f8"); - std::filesystem::remove_all("zarrs/acceptance/voided"); - std::filesystem::remove_all("zarrs/acceptance"); - ASSERT_TRUE(true); +TEST_P(VariableTest, TEARDOWN) { + for (const auto& def : kTestVariables) { + std::filesystem::remove_all(base_path_ + "/" + def.name); + } } -} // namespace VariableTesting - -namespace VariableDataTest { +INSTANTIATE_TEST_SUITE_P( + ZarrVersions, VariableTest, + ::testing::Values(mdio::zarr::ZarrVersion::kV2, + mdio::zarr::ZarrVersion::kV3), + [](const ::testing::TestParamInfo& info) { + return ZarrVersionToString(info.param); + }); + +// ============================================================================ +// Parameterized VariableData Tests +// ============================================================================ + +class VariableDataTest + : public ::testing::TestWithParam { + protected: + void SetUp() override { + version_ = GetParam(); + base_path_ = GetBasePath(version_); + } -template -mdio::Variable getVariable() { - nlohmann::json i2Spec = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/i2" - } - } - )"_json; - auto i2 = mdio::Variable<>::Open(i2Spec, (mdio::constants::kOpen)); - if (!i2.status().ok()) { - std::cout << "Error opening i2: " << i2.status() << std::endl; - return mdio::Variable(); + mdio::Variable<> getVariable() { + auto var = OpenTestVariable("i2", version_, base_path_); + if (!var.status().ok()) { + std::cout << "Error opening i2: " << var.status() << std::endl; + return mdio::Variable<>(); + } + return var.value(); } - return i2.value(); -} -TEST(VariableData, SETUP) { - mdio::TransactionalOpenOptions options; - auto opt = options.Set(std::move(mdio::constants::kCreateClean)); - nlohmann::json i2Spec = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/i2" - }, - "metadata": { - "dtype": " zeroIdxSlice = {0, 0, 5, 1}; mdio::RangeDescriptor oneIdxSlice = {1, 0, 5, 1}; auto slicedVariableData = variableData.slice(zeroIdxSlice, oneIdxSlice); ASSERT_TRUE(slicedVariableData.status().ok()) << slicedVariableData.status(); EXPECT_THAT(slicedVariableData.value().domain().shape(), - ::testing::ElementsAre(5, 5)) - << slicedVariableData.value().domain(); + ::testing::ElementsAre(5, 5)); } -TEST(VariableData, sliceByDimName) { +TEST_P(VariableDataTest, sliceByDimName) { auto variableData = getVariable().Read().value(); mdio::RangeDescriptor inlineSlice = {"inline", 0, 5, 1}; mdio::RangeDescriptor crosslineSpec = {"crossline", 0, 5, 1}; auto slicedVariableData = variableData.slice(inlineSlice, crosslineSpec); ASSERT_TRUE(slicedVariableData.status().ok()) << slicedVariableData.status(); EXPECT_THAT(slicedVariableData.value().domain().shape(), - ::testing::ElementsAre(5, 5)) - << slicedVariableData.value().domain(); + ::testing::ElementsAre(5, 5)); } -TEST(VariableData, writeToStore) { +TEST_P(VariableDataTest, writeToStore) { auto variable = getVariable(); auto variableData = variable.Read().value(); auto data = @@ -1085,374 +647,104 @@ TEST(VariableData, writeToStore) { auto variableCheck = getVariable().Read().value(); auto dataCheck = reinterpret_cast(variableCheck.get_data_accessor().data()); - EXPECT_EQ(dataCheck[0], 0xff) << dataCheck[0]; + EXPECT_EQ(dataCheck[0], 0xff); } -TEST(VariableData, dimensionUnits) { +TEST_P(VariableDataTest, dimensionUnits) { auto variableData = getVariable().Read().value(); - EXPECT_EQ(variableData.metadata["dimension_units"].size(), 2) - << variableData.metadata; + EXPECT_EQ(variableData.metadata["dimension_units"].size(), 2); } -TEST(VariableData, TEARDOWN) { - std::filesystem::remove_all("zarrs/acceptance/i2"); +TEST_P(VariableDataTest, TEARDOWN) { + std::filesystem::remove_all(base_path_ + "/i2"); ASSERT_TRUE(true); } -} // namespace VariableDataTest - -namespace DatasetTest { +INSTANTIATE_TEST_SUITE_P( + ZarrVersions, VariableDataTest, + ::testing::Values(mdio::zarr::ZarrVersion::kV2, + mdio::zarr::ZarrVersion::kV3), + [](const ::testing::TestParamInfo& info) { + return ZarrVersionToString(info.param); + }); + +// ============================================================================ +// Parameterized Dataset Tests +// ============================================================================ + +class DatasetTest : public ::testing::TestWithParam { + protected: + void SetUp() override { + version_ = GetParam(); + base_path_ = GetBasePath(version_); + dataset_manifest_ = GetDatasetManifest(version_); + expected_var_count_ = 9; // Both versions support struct arrays + } -// clang-format off -/*NOLINT*/ std::string datasetManifest = R"( -{ - "metadata": { - "name": "campos_3d", - "apiVersion": "1.0.0", - "createdOn": "2023-12-12T15:02:06.413469-06:00", - "attributes": { - "textHeader": [ - "C01 .......................... ", - "C02 .......................... ", - "C03 .......................... " - ], - "foo": "bar" - } - }, - "variables": [ - { - "name": "image", - "dataType": "float32", - "dimensions": [ - {"name": "inline", "size": 256}, - {"name": "crossline", "size": 512}, - {"name": "depth", "size": 384} - ], - "metadata": { - "chunkGrid": { - "name": "regular", - "configuration": { "chunkShape": [128, 128, 128] } - }, - "statsV1": { - "count": 100, - "sum": 1215.1, - "sumSquares": 125.12, - "min": 5.61, - "max": 10.84, - "histogram": {"binCenters": [1, 2], "counts": [10, 15]} - }, - "attributes": { - "fizz": "buzz" - } - }, - "coordinates": ["inline", "crossline", "depth", "cdp-x", "cdp-y"], - "compressor": {"name": "blosc", "algorithm": "zstd"} - }, - { - "name": "velocity", - "dataType": "float64", - "dimensions": ["inline", "crossline", "depth"], - "metadata": { - "chunkGrid": { - "name": "regular", - "configuration": { "chunkShape": [128, 128, 128] } - }, - "unitsV1": {"speed": "m/s"} - }, - "coordinates": ["inline", "crossline", "depth", "cdp-x", "cdp-y"] - }, - { - "name": "image_inline", - "dataType": "int16", - "dimensions": ["inline", "crossline", "depth"], - "longName": "inline optimized version of 3d_stack", - "compressor": {"name": "blosc", "algorithm": "zstd"}, - "metadata": { - "chunkGrid": { - "name": "regular", - "configuration": { "chunkShape": [128, 128, 128] } - } - }, - "coordinates": ["inline", "crossline", "depth", "cdp-x", "cdp-y"] - }, - { - "name": "image_headers", - "dataType": { - "fields": [ - {"name": "cdp-x", "format": "int32"}, - {"name": "cdp-y", "format": "int32"}, - {"name": "elevation", "format": "float16"}, - {"name": "some_scalar", "format": "float16"} - ] - }, - "dimensions": ["inline", "crossline"], - "metadata": { - "chunkGrid": { - "name": "regular", - "configuration": { "chunkShape": [128, 128] } - } - }, - "coordinates": ["inline", "crossline", "cdp-x", "cdp-y"] - }, - { - "name": "inline", - "dataType": "uint32", - "dimensions": [{"name": "inline", "size": 256}] - }, - { - "name": "crossline", - "dataType": "uint32", - "dimensions": [{"name": "crossline", "size": 512}] - }, - { - "name": "depth", - "dataType": "uint32", - "dimensions": [{"name": "depth", "size": 384}], - "metadata": { - "unitsV1": { "length": "m" } - } - }, - { - "name": "cdp-x", - "dataType": "float32", - "dimensions": [ - {"name": "inline", "size": 256}, - {"name": "crossline", "size": 512} - ], - "metadata": { - "unitsV1": { "length": "m" } - } - }, - { - "name": "cdp-y", - "dataType": "float32", - "dimensions": [ - {"name": "inline", "size": 256}, - {"name": "crossline", "size": 512} - ], - "metadata": { - "unitsV1": { "length": "m" } - } - } - ] -} - )"; -// clang-format on + mdio::zarr::ZarrVersion version_; + std::string base_path_; + std::string dataset_manifest_; + size_t expected_var_count_; +}; -TEST(DatasetSpec, valid) { - nlohmann::json j = nlohmann::json::parse(datasetManifest); - auto res = Construct(j, "zarrs/acceptance"); +TEST_P(DatasetTest, specValid) { + nlohmann::json j = nlohmann::json::parse(dataset_manifest_); + auto res = Construct(j, base_path_, version_); ASSERT_TRUE(res.status().ok()) << res.status(); std::tuple> parsed = res.value(); std::vector variables = std::get<1>(parsed); - EXPECT_EQ(variables.size(), 9) << variables.size(); + EXPECT_EQ(variables.size(), expected_var_count_); + + // Verify driver name + std::string expected_driver = GetTestDriverName(version_); + for (const auto& var : variables) { + EXPECT_EQ(var["driver"], expected_driver); + } } -TEST(DatasetSpec, invalid) { - // manifest["variables"][0] missing "name" field - // Compressor for image_inline is also invalid - std::string manifest = R"( -{ - "metadata": { - "name": "campos_3d", - "apiVersion": "1.0.0", - "createdOn": "2023-12-12T15:02:06.413469-06:00", - "attributes": { - "textHeader": [ - "C01 .......................... ", - "C02 .......................... ", - "C03 .......................... " - ], - "foo": "bar" - } - }, - "variables": [ - { - "dataType": "float32", - "dimensions": [ - {"name": "inline", "size": 256}, - {"name": "crossline", "size": 512}, - {"name": "depth", "size": 384} - ], - "metadata": { - "chunkGrid": { - "name": "regular", - "configuration": { "chunkShape": [128, 128, 128] } - }, - "statsV1": { - "count": 100, - "sum": 1215.1, - "sumSquares": 125.12, - "min": 5.61, - "max": 10.84, - "histogram": {"binCenters": [1, 2], "counts": [10, 15]} - }, - "attributes": { - "fizz": "buzz" - } - }, - "coordinates": ["inline", "crossline", "depth", "cdp-x", "cdp-y"], - "compressor": {"name": "blosc", "algorithm": "zstd"} - }, - { - "name": "velocity", - "dataType": "float16", - "dimensions": ["inline", "crossline", "depth"], - "metadata": { - "chunkGrid": { - "name": "regular", - "configuration": { "chunkShape": [128, 128, 128] } - }, - "unitsV1": {"speed": "m/s"} - }, - "coordinates": ["inline", "crossline", "depth", "cdp-x", "cdp-y"] - }, - { - "name": "image_inline", - "dataType": "float32", - "dimensions": ["inline", "crossline", "depth"], - "longName": "inline optimized version of 3d_stack", - "compressor": {"name": "zfp", "mode": "fixed_accuracy", "tolerance": 0.05}, - "metadata": { - "chunkGrid": { - "name": "regular", - "configuration": { "chunkShape": [4, 512, 512] } - } - }, - "coordinates": ["inline", "crossline", "depth", "cdp-x", "cdp-y"] - }, - { - "name": "image_headers", - "dataType": { - "fields": [ - {"name": "cdp-x", "format": "int32"}, - {"name": "cdp-y", "format": "int32"}, - {"name": "elevation", "format": "float16"}, - {"name": "some_scalar", "format": "float16"} - ] - }, - "dimensions": ["inline", "crossline"], - "metadata": { - "chunkGrid": { - "name": "regular", - "configuration": { "chunkShape": [128, 128] } - } - }, - "coordinates": ["inline", "crossline", "cdp-x", "cdp-y"] - }, - { - "name": "inline", - "dataType": "uint32", - "dimensions": [{"name": "inline", "size": 256}] - }, - { - "name": "crossline", - "dataType": "uint32", - "dimensions": [{"name": "crossline", "size": 512}] - }, - { - "name": "depth", - "dataType": "uint32", - "dimensions": [{"name": "depth", "size": 384}], - "metadata": { - "unitsV1": { "length": "m" } - } - }, - { - "name": "cdp-x", - "dataType": "float32", - "dimensions": [ - {"name": "inline", "size": 256}, - {"name": "crossline", "size": 512} - ], - "metadata": { - "unitsV1": { "length": "m" } - } - }, - { - "name": "cdp-y", - "dataType": "float32", - "dimensions": [ - {"name": "inline", "size": 256}, - {"name": "crossline", "size": 512} - ], - "metadata": { - "unitsV1": { "length": "m" } - } - } - ] -} - )"; - - nlohmann::json j = nlohmann::json::parse(manifest); - auto res = Construct(j, "zarrs/acceptance"); - ASSERT_FALSE(res.status().ok()) << res.status(); -} - -TEST(Dataset, fillValue) { - nlohmann::json j = nlohmann::json::parse(datasetManifest); - auto ds = mdio::Dataset::from_json(j, "zarrs/acceptance", - mdio::constants::kCreateClean); - ASSERT_TRUE(ds.status().ok()) << ds.status(); - - std::string key = "image_headers"; - auto var = ds.value().get_variable(key); - ASSERT_TRUE(var.status().ok()) << var.status(); - auto vdf = var.value().Read(); - ASSERT_TRUE(vdf.status().ok()) << vdf.status(); - auto vd = vdf.value(); - - auto data = - reinterpret_cast(vd.get_data_accessor().data()); - std::byte zero = std::byte(0); - for (int i = 0; i < 1000000; i++) { - ASSERT_EQ(data[i], zero) << "Expected 0 at byte " << i << " but got " - << static_cast(data[i]); - } - - // This still doesn't work. We end up with the same empty fill values {, , , , - // , , , , , , , } auto dataRes = - // tensorstore::StaticDataTypeCast(vd.get_data_accessor()); - // ASSERT_TRUE(dataRes.status().ok()) << dataRes.status(); - // auto d = dataRes.value(); - // std::cout << d[0][0] << std::endl; -} - -TEST(Dataset, open) { - nlohmann::json j = nlohmann::json::parse(datasetManifest); - auto construct = Construct(j, "zarrs/acceptance"); +TEST_P(DatasetTest, open) { + nlohmann::json j = nlohmann::json::parse(dataset_manifest_); + auto construct = Construct(j, base_path_, version_); ASSERT_TRUE(construct.status().ok()) << construct.status(); - std::tuple> parsed = - construct.value(); - nlohmann::json metadata = std::get<0>(parsed); - std::vector variables = std::get<1>(parsed); - + auto [metadata, variables] = construct.value(); auto dataset = mdio::Dataset::Open(metadata, variables, mdio::constants::kCreateClean); ASSERT_TRUE(dataset.status().ok()) << dataset.status(); } -TEST(Dataset, condensed) { - std::string path = "zarrs/acceptance/"; - auto ds = mdio::Dataset::Open(path, mdio::constants::kOpen); - ASSERT_TRUE(ds.status().ok()) << ds.status(); +TEST_P(DatasetTest, condensed) { + // First create the dataset + nlohmann::json j = nlohmann::json::parse(dataset_manifest_); + auto construct = Construct(j, base_path_, version_); + ASSERT_TRUE(construct.status().ok()) << construct.status(); - std::string missingSlashPath = "zarrs/acceptance"; - auto ds2 = mdio::Dataset::Open(missingSlashPath, mdio::constants::kOpen); - ASSERT_TRUE(ds2.status().ok()) << ds2.status(); + auto [metadata, variables] = construct.value(); + auto createDs = + mdio::Dataset::Open(metadata, variables, mdio::constants::kCreateClean); + ASSERT_TRUE(createDs.status().ok()) << createDs.status(); + + // Now test opening with just the path + auto ds = mdio::Dataset::Open(base_path_, mdio::constants::kOpen); + ASSERT_TRUE(ds.status().ok()) + << "Failed to open with trailing slash: " << ds.status(); + + // Test without trailing slash + std::string path_no_slash = base_path_; + if (!path_no_slash.empty() && path_no_slash.back() == '/') { + path_no_slash.pop_back(); + } + auto ds2 = mdio::Dataset::Open(path_no_slash, mdio::constants::kOpen); + ASSERT_TRUE(ds2.status().ok()) + << "Failed to open without trailing slash: " << ds2.status(); } -TEST(Dataset, read) { - nlohmann::json j = nlohmann::json::parse(datasetManifest); - auto construct = Construct(j, "zarrs/acceptance"); +TEST_P(DatasetTest, read) { + nlohmann::json j = nlohmann::json::parse(dataset_manifest_); + auto construct = Construct(j, base_path_, version_); ASSERT_TRUE(construct.status().ok()) << construct.status(); - std::tuple> parsed = - construct.value(); - nlohmann::json metadata = std::get<0>(parsed); - std::vector variables = std::get<1>(parsed); - + auto [metadata, variables] = construct.value(); auto dataset = mdio::Dataset::Open(metadata, variables, mdio::constants::kOpen); ASSERT_TRUE(dataset.status().ok()) << dataset.status(); @@ -1465,17 +757,15 @@ TEST(Dataset, read) { } } -TEST(Dataset, write) { - nlohmann::json j = nlohmann::json::parse(datasetManifest); - auto construct = Construct(j, "zarrs/acceptance"); - - std::tuple> parsed = - construct.value(); - nlohmann::json metadata = std::get<0>(parsed); - std::vector variables = std::get<1>(parsed); +TEST_P(DatasetTest, write) { + nlohmann::json j = nlohmann::json::parse(dataset_manifest_); + auto construct = Construct(j, base_path_, version_); + ASSERT_TRUE(construct.status().ok()) << construct.status(); + auto [metadata, variables] = construct.value(); auto dataset = mdio::Dataset::Open(metadata, variables, mdio::constants::kOpen); + ASSERT_TRUE(dataset.status().ok()) << dataset.status(); auto ds = dataset.value(); std::vector names = ds.variables.get_keys(); @@ -1568,50 +858,32 @@ TEST(Dataset, write) { ASSERT_TRUE(w.status().ok()) << w.status(); } + // Test SelectField and negative case for struct arrays std::string fielded = "image_headers"; ASSERT_TRUE(ds.SelectField(fielded, "cdp-x").status().ok()); auto wf = ds.get_variable(fielded).value().Write(readVariables[4]); ASSERT_FALSE(wf.status().ok()) << wf.status(); - nlohmann::json imageJson = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/image" - } - } - )"_json; - - nlohmann::json velocityJson = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/velocity" - } - } - )"_json; - - nlohmann::json imageInlineJson = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/image_inline" - } - } - )"_json; - - nlohmann::json imageHeadersJson = R"( - { - "driver": "zarr", - "kvstore": { - "driver": "file", - "path": "zarrs/acceptance/image_headers" - } - } - )"_json; + std::string driver = GetTestDriverName(version_); + nlohmann::json imageJson; + imageJson["driver"] = driver; + imageJson["kvstore"]["driver"] = "file"; + imageJson["kvstore"]["path"] = base_path_ + "/image"; + + nlohmann::json velocityJson; + velocityJson["driver"] = driver; + velocityJson["kvstore"]["driver"] = "file"; + velocityJson["kvstore"]["path"] = base_path_ + "/velocity"; + + nlohmann::json imageInlineJson; + imageInlineJson["driver"] = driver; + imageInlineJson["kvstore"]["driver"] = "file"; + imageInlineJson["kvstore"]["path"] = base_path_ + "/image_inline"; + + nlohmann::json imageHeadersJson; + imageHeadersJson["driver"] = driver; + imageHeadersJson["kvstore"]["driver"] = "file"; + imageHeadersJson["kvstore"]["path"] = base_path_ + "/image_headers"; auto image = mdio::Variable<>::Open(imageJson, mdio::constants::kOpen); auto velocity = mdio::Variable<>::Open(velocityJson, mdio::constants::kOpen); @@ -1641,174 +913,154 @@ TEST(Dataset, write) { velocityData.value().get_data_accessor().data()); auto castedImageInline = reinterpret_cast( imageInlineData.value().get_data_accessor().data()); - auto castedImageHeaders = reinterpret_cast( - imageHeadersData.value().get_data_accessor().data()); EXPECT_EQ(castedImage[0], 3.14f) << castedImage[0]; EXPECT_EQ(castedVelociy[0], 2.71828) << castedVelociy[0]; EXPECT_EQ(castedImageInline[0], 0xff) << castedImageInline[0]; - // EXPECT_EQ(castedImageHeaders[0], std::byte(0xffffffffffff)) << "Struct - // array was not correct value"; } -TEST(Dataset, name) { - nlohmann::json j = nlohmann::json::parse(datasetManifest); - auto construct = Construct(j, "zarrs/acceptance"); +TEST_P(DatasetTest, name) { + nlohmann::json j = nlohmann::json::parse(dataset_manifest_); + auto construct = Construct(j, base_path_, version_); ASSERT_TRUE(construct.status().ok()) << construct.status(); - std::tuple> parsed = - construct.value(); - nlohmann::json metadata = std::get<0>(parsed); - std::vector variables = std::get<1>(parsed); - + auto [metadata, variables] = construct.value(); auto dataset = mdio::Dataset::Open(metadata, variables, mdio::constants::kOpen); ASSERT_TRUE(dataset.status().ok()) << dataset.status(); - auto ds = dataset.value(); - - EXPECT_EQ(ds.getMetadata()["name"], "campos_3d") << ds.getMetadata(); + std::string expected_name = + version_ == mdio::zarr::ZarrVersion::kV3 ? "campos_3d_v3" : "campos_3d"; + EXPECT_EQ(dataset.value().getMetadata()["name"], expected_name); } -TEST(Dataset, optionalAttrs) { - nlohmann::json j = nlohmann::json::parse(datasetManifest); - auto construct = Construct(j, "zarrs/acceptance"); +TEST_P(DatasetTest, optionalAttrs) { + nlohmann::json j = nlohmann::json::parse(dataset_manifest_); + auto construct = Construct(j, base_path_, version_); ASSERT_TRUE(construct.status().ok()) << construct.status(); - std::tuple> parsed = - construct.value(); - nlohmann::json metadata = std::get<0>(parsed); - std::vector variables = std::get<1>(parsed); - + auto [metadata, variables] = construct.value(); auto dataset = mdio::Dataset::Open(metadata, variables, mdio::constants::kOpen); ASSERT_TRUE(dataset.status().ok()) << dataset.status(); - auto ds = dataset.value(); - - EXPECT_TRUE(ds.getMetadata().contains("name")) << ds.getMetadata(); + EXPECT_TRUE(dataset.value().getMetadata().contains("name")); } -TEST(Dataset, isel) { - std::string path = "zarrs/acceptance"; - auto dataset = mdio::Dataset::Open(path, mdio::constants::kOpen); +TEST_P(DatasetTest, isel) { + nlohmann::json j = nlohmann::json::parse(dataset_manifest_); + auto construct = Construct(j, base_path_, version_); + ASSERT_TRUE(construct.status().ok()) << construct.status(); + auto [metadata, variables] = construct.value(); + auto dataset = + mdio::Dataset::Open(metadata, variables, mdio::constants::kOpen); ASSERT_TRUE(dataset.status().ok()) << dataset.status(); auto ds = dataset.value(); mdio::RangeDescriptor desc1 = {"inline", 0, 5, 1}; auto slice = ds.isel(desc1); - ASSERT_TRUE(slice.status().ok()); + ASSERT_TRUE(slice.status().ok()) << slice.status(); auto domain = slice->domain; ASSERT_EQ(domain.rank(), 3) << "This should have a rank of 3..."; - // Check depth range auto depthRange = domain[1]; - EXPECT_EQ(depthRange.interval().inclusive_min(), 0) - << "Depth range should start at 0"; - EXPECT_EQ(depthRange.interval().exclusive_max(), 384) - << "Depth range should end at 384"; + EXPECT_EQ(depthRange.interval().inclusive_min(), 0); + EXPECT_EQ(depthRange.interval().exclusive_max(), 384); - // Check crossline range auto crosslineRange = domain[0]; - EXPECT_EQ(crosslineRange.interval().inclusive_min(), 0) - << "Crossline range should start at 0"; - EXPECT_EQ(crosslineRange.interval().exclusive_max(), 512) - << "Crossline range should end at 512"; + EXPECT_EQ(crosslineRange.interval().inclusive_min(), 0); + EXPECT_EQ(crosslineRange.interval().exclusive_max(), 512); - // Check inline range auto inlineRange = domain[2]; - EXPECT_EQ(inlineRange.interval().inclusive_min(), 0) - << "Inline range should start at 0"; - EXPECT_EQ(inlineRange.interval().exclusive_max(), 5) - << "Inline range should end at 5"; + EXPECT_EQ(inlineRange.interval().inclusive_min(), 0); + EXPECT_EQ(inlineRange.interval().exclusive_max(), 5); } -TEST(Dataset, xarrayCompatible) { - const char* basePath = std::getenv(PROJECT_BASE_PATH_ENV); - if (!basePath) { - std::cout << "PROJECT_BASE_PATH environment variable not set. Expecting to " - "be in the 'build/mdio' directory." - << std::endl; - basePath = DEFAULT_BASE_PATH; - } - - // Resolve the absolute path for the script - std::string srcPath = std::string(basePath) + XARRAY_SCRIPT_RELATIVE_PATH; - - // Ensure that srcPath is valid and points to an existing file - if (access(srcPath.c_str(), F_OK) == -1) { - std::cerr << "Error: Python script not found at " << srcPath << std::endl; - FAIL() << "Script not found: " << srcPath; - } - - std::vector metadataOptions = {"False", "True"}; - std::vector pids; +TEST_P(DatasetTest, listVars) { + nlohmann::json j = nlohmann::json::parse(dataset_manifest_); + auto construct = Construct(j, base_path_, version_); + ASSERT_TRUE(construct.status().ok()) << construct.status(); - for (const auto& option : metadataOptions) { - pid_t pid = fork(); - if (pid == 0) { - // Child process - int result = executePythonScript(srcPath, {FILE_PATH_BASE, option}); - if (result == 0xfd00) { // 0xfd from Python is 0xfd00 in C++ - GTEST_SKIP() - << "Xarray compatibility skipped due to import error for xarray"; - exit(SUCCESS_CODE); - } - exit(result); - } else if (pid > 0) { - // Parent process - pids.push_back(pid); - } else { - // Fork failed - perror("fork failed"); - FAIL() << "fork failed"; - } - } + auto [metadata, variables] = construct.value(); + auto dataset = + mdio::Dataset::Open(metadata, variables, mdio::constants::kOpen); + ASSERT_TRUE(dataset.status().ok()) << dataset.status(); - // Wait for all child processes - for (pid_t pid : pids) { - int status; - if (waitpid(pid, &status, 0) == -1) { - perror("waitpid failed"); - FAIL() << "waitpid failed"; - } - if (WIFEXITED(status) && WEXITSTATUS(status) == 0xfd00) { - GTEST_SKIP() - << "Xarray compatibility skipped due to import error for xarray"; - } - ASSERT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) - << "xarray compatibility test failed with one of the metadata " - "options\n\tThere was some expected output above..."; - } + std::vector varList = dataset.value().variables.get_keys(); + EXPECT_EQ(varList.size(), expected_var_count_); } -TEST(Dataset, listVars) { - nlohmann::json j = nlohmann::json::parse(datasetManifest); - auto construct = Construct(j, "zarrs/acceptance"); - ASSERT_TRUE(construct.status().ok()) << construct.status(); - - std::tuple> parsed = - construct.value(); - nlohmann::json metadata = std::get<0>(parsed); - std::vector variables = std::get<1>(parsed); +TEST_P(DatasetTest, fromJson) { + std::filesystem::remove_all(base_path_ + "/from_json_test"); + nlohmann::json j = nlohmann::json::parse(dataset_manifest_); auto dataset = - mdio::Dataset::Open(metadata, variables, mdio::constants::kOpen); + mdio::Dataset::from_json(j, base_path_ + "/from_json_test", version_, + mdio::constants::kCreateClean); ASSERT_TRUE(dataset.status().ok()) << dataset.status(); std::vector varList = dataset.value().variables.get_keys(); - EXPECT_TRUE(varList.size() == 9) << "No variables found in dataset"; + EXPECT_EQ(varList.size(), expected_var_count_); + + std::filesystem::remove_all(base_path_ + "/from_json_test"); } -TEST(Dataset, selectField) { - std::string path = "zarrs/acceptance"; - auto dataset = mdio::Dataset::Open(path, mdio::constants::kOpen); - std::string name = "image_headers"; +TEST_P(DatasetTest, selectField) { + std::string manifest = R"( +{ + "metadata": { + "name": "select_field_test", + "apiVersion": "1.0.0", + "createdOn": "2023-12-12T15:02:06.413469-06:00" + }, + "variables": [ + { + "name": "image_headers", + "dataType": { + "fields": [ + {"name": "cdp-x", "format": "int32"}, + {"name": "cdp-y", "format": "int32"}, + {"name": "elevation", "format": "float16"}, + {"name": "some_scalar", "format": "float16"} + ] + }, + "dimensions": [ + {"name": "inline", "size": 128}, + {"name": "crossline", "size": 128} + ], + "metadata": { + "chunkGrid": { + "name": "regular", + "configuration": { "chunkShape": [64, 64] } + } + }, + "coordinates": ["inline", "crossline"] + }, + { + "name": "inline", + "dataType": "uint32", + "dimensions": [{"name": "inline", "size": 128}] + }, + { + "name": "crossline", + "dataType": "uint32", + "dimensions": [{"name": "crossline", "size": 128}] + } + ] +} + )"; + std::string test_path = base_path_ + "/select_field_test"; + std::filesystem::remove_all(test_path); + nlohmann::json j = nlohmann::json::parse(manifest); + auto dataset = mdio::Dataset::from_json(j, test_path, version_, + mdio::constants::kCreateClean); ASSERT_TRUE(dataset.status().ok()) << dataset.status(); + auto ds = dataset.value(); + std::string name = "image_headers"; EXPECT_TRUE(ds.get_variable(name).value().dtype() == mdio::constants::kByte) << "Failed to pull byte array from image_headers"; @@ -1845,13 +1097,378 @@ TEST(Dataset, selectField) { EXPECT_FALSE(ds.SelectField("image_headers", "NotAField").status().ok()) << "Somehow pulled NotAField from image_headers"; + + std::filesystem::remove_all(test_path); +} + +TEST_P(DatasetTest, fillValue) { + std::string manifest = R"( +{ + "metadata": { + "name": "fill_value_test", + "apiVersion": "1.0.0", + "createdOn": "2023-12-12T15:02:06.413469-06:00" + }, + "variables": [ + { + "name": "image_headers", + "dataType": { + "fields": [ + {"name": "cdp-x", "format": "int32"}, + {"name": "cdp-y", "format": "int32"}, + {"name": "elevation", "format": "float16"}, + {"name": "some_scalar", "format": "float16"} + ] + }, + "dimensions": [ + {"name": "inline", "size": 256}, + {"name": "crossline", "size": 512} + ], + "metadata": { + "chunkGrid": { + "name": "regular", + "configuration": { "chunkShape": [128, 128] } + } + } + }, + { + "name": "inline", + "dataType": "uint32", + "dimensions": [{"name": "inline", "size": 256}] + }, + { + "name": "crossline", + "dataType": "uint32", + "dimensions": [{"name": "crossline", "size": 512}] + } + ] } + )"; -TEST(Dataset, TEARDOWN) { - std::filesystem::remove_all("zarrs/acceptance"); + std::string test_path = base_path_ + "/fill_value_test"; + std::filesystem::remove_all(test_path); + nlohmann::json j = nlohmann::json::parse(manifest); + auto ds = mdio::Dataset::from_json(j, test_path, version_, + mdio::constants::kCreateClean); + ASSERT_TRUE(ds.status().ok()) << ds.status(); + + std::string key = "image_headers"; + auto var = ds.value().get_variable(key); + ASSERT_TRUE(var.status().ok()) << var.status(); + auto vdf = var.value().Read(); + ASSERT_TRUE(vdf.status().ok()) << vdf.status(); + auto vd = vdf.value(); + + auto data = + reinterpret_cast(vd.get_data_accessor().data()); + std::byte zero = std::byte(0); + for (int i = 0; i < 1000; i++) { + ASSERT_EQ(data[i], zero) << "Expected 0 at byte " << i << " but got " + << static_cast(data[i]); + } + + std::filesystem::remove_all(test_path); +} + +TEST_P(DatasetTest, TEARDOWN) { + std::filesystem::remove_all(base_path_); ASSERT_TRUE(true); } -} // namespace DatasetTest +INSTANTIATE_TEST_SUITE_P( + ZarrVersions, DatasetTest, + ::testing::Values(mdio::zarr::ZarrVersion::kV2, + mdio::zarr::ZarrVersion::kV3), + [](const ::testing::TestParamInfo& info) { + return ZarrVersionToString(info.param); + }); + +// ============================================================================ +// Parameterized Python/Xarray Dataset Compatibility Tests +// ============================================================================ + +class XarrayCompatibilityTest + : public ::testing::TestWithParam { + protected: + void SetUp() override { + version_ = GetParam(); + base_path_ = GetBasePath(version_); + } + + mdio::zarr::ZarrVersion version_; + std::string base_path_; +}; + +TEST_P(XarrayCompatibilityTest, datasetCompatible) { + // This test verifies that a Dataset created by MDIO can be opened by xarray. + // The dataset is created fresh for this test to ensure isolation. + std::string manifest = R"( +{ + "metadata": { + "name": "xarray_compat_test", + "apiVersion": "1.0.0", + "createdOn": "2023-12-12T15:02:06.413469-06:00" + }, + "variables": [ + { + "name": "data", + "dataType": "float32", + "dimensions": [ + {"name": "x", "size": 10}, + {"name": "y", "size": 10} + ], + "metadata": { + "chunkGrid": { + "name": "regular", + "configuration": { "chunkShape": [5, 5] } + } + } + }, + { + "name": "x", + "dataType": "int32", + "dimensions": [{"name": "x", "size": 10}] + }, + { + "name": "y", + "dataType": "int32", + "dimensions": [{"name": "y", "size": 10}] + } + ] +} + )"; + + std::string test_path = base_path_ + "/xarray_compat"; + std::filesystem::remove_all(test_path); + + nlohmann::json j = nlohmann::json::parse(manifest); + auto ds = mdio::Dataset::from_json(j, test_path, version_, + mdio::constants::kCreateClean); + ASSERT_TRUE(ds.status().ok()) << ds.status(); + + std::string srcPath = + std::string(GetPythonBasePath()) + XARRAY_SCRIPT_RELATIVE_PATH; + + if (access(srcPath.c_str(), F_OK) == -1) { + std::cerr << "Error: Python script not found at " << srcPath << std::endl; + std::filesystem::remove_all(test_path); + FAIL() << "Script not found: " << srcPath; + } + + // Test without consolidated metadata (both versions) + // Note: Consolidated metadata is only supported for V2 + std::vector> arg_sets = { + {test_path + "/", "False"}, + }; + if (version_ == mdio::zarr::ZarrVersion::kV2) { + // Also test with consolidated metadata for V2 + arg_sets.push_back({test_path + "/", "True"}); + } + + std::string version_name = ZarrVersionToString(version_); + EXPECT_TRUE(RunPythonScripts( + srcPath, arg_sets, "Xarray compatibility skipped due to import error")) + << "xarray " << version_name << " compatibility test failed"; + + // std::filesystem::remove_all(test_path); +} + +INSTANTIATE_TEST_SUITE_P( + ZarrVersions, XarrayCompatibilityTest, + ::testing::Values(mdio::zarr::ZarrVersion::kV2, + mdio::zarr::ZarrVersion::kV3), + [](const ::testing::TestParamInfo& info) { + return ZarrVersionToString(info.param); + }); + +// ============================================================================ +// Dataset::from_json with Version Parameter Tests +// ============================================================================ + +class DatasetFromJsonTest + : public ::testing::TestWithParam { + protected: + void SetUp() override { + version_ = GetParam(); + base_path_ = GetBasePath(version_) + "/from_json"; + std::filesystem::remove_all(base_path_); + } + + void TearDown() override { std::filesystem::remove_all(base_path_); } + + std::string GetSimpleManifest() { + return R"( +{ + "metadata": { + "name": "from_json_test", + "apiVersion": "1.0.0", + "createdOn": "2023-12-12T15:02:06.413469-06:00" + }, + "variables": [ + { + "name": "data", + "dataType": "float32", + "dimensions": [ + {"name": "x", "size": 32}, + {"name": "y", "size": 32} + ], + "metadata": { + "chunkGrid": { + "name": "regular", + "configuration": { "chunkShape": [16, 16] } + } + } + }, + { + "name": "x", + "dataType": "int32", + "dimensions": [{"name": "x", "size": 32}] + }, + { + "name": "y", + "dataType": "int32", + "dimensions": [{"name": "y", "size": 32}] + } + ] +} + )"; + } + + mdio::zarr::ZarrVersion version_; + std::string base_path_; +}; + +TEST_P(DatasetFromJsonTest, createWithExplicitVersion) { + nlohmann::json j = nlohmann::json::parse(GetSimpleManifest()); + auto dataset = mdio::Dataset::from_json(j, base_path_, version_, + mdio::constants::kCreateClean); + ASSERT_TRUE(dataset.status().ok()) << dataset.status(); + + auto ds = dataset.value(); + EXPECT_EQ(ds.getMetadata()["name"], "from_json_test"); + + std::vector varList = ds.variables.get_keys(); + EXPECT_EQ(varList.size(), 3); +} + +TEST_P(DatasetFromJsonTest, createWithOptionalVersion) { + nlohmann::json j = nlohmann::json::parse(GetSimpleManifest()); + auto dataset = mdio::Dataset::from_json( + j, base_path_, std::optional(version_), + mdio::constants::kCreateClean); + ASSERT_TRUE(dataset.status().ok()) << dataset.status(); + + auto ds = dataset.value(); + EXPECT_EQ(ds.getMetadata()["name"], "from_json_test"); +} + +TEST_P(DatasetFromJsonTest, readWrite) { + nlohmann::json j = nlohmann::json::parse(GetSimpleManifest()); + auto datasetRes = mdio::Dataset::from_json(j, base_path_, version_, + mdio::constants::kCreateClean); + ASSERT_TRUE(datasetRes.status().ok()) << datasetRes.status(); + auto ds = datasetRes.value(); + + auto dataVarRes = ds.variables.get("data"); + ASSERT_TRUE(dataVarRes.status().ok()) << dataVarRes.status(); + auto dataVar = dataVarRes.value(); + + auto dataRes = dataVar.Read(); + ASSERT_TRUE(dataRes.status().ok()) << dataRes.status(); + auto data = dataRes.value(); + + auto accessor = data.get_data_accessor().data(); + accessor[0] = 42.0f; + accessor[1] = 43.0f; + + auto writeFut = dataVar.Write(data); + ASSERT_TRUE(writeFut.status().ok()) << writeFut.status(); + + auto rereadFut = dataVar.Read(); + ASSERT_TRUE(rereadFut.status().ok()) << rereadFut.status(); + auto rereadData = rereadFut.value(); + auto rereadAccessor = rereadData.get_data_accessor().data(); + + EXPECT_FLOAT_EQ(rereadAccessor[0], 42.0f); + EXPECT_FLOAT_EQ(rereadAccessor[1], 43.0f); +} + +TEST_P(DatasetFromJsonTest, isel) { + nlohmann::json j = nlohmann::json::parse(GetSimpleManifest()); + auto datasetRes = mdio::Dataset::from_json(j, base_path_, version_, + mdio::constants::kCreateClean); + ASSERT_TRUE(datasetRes.status().ok()) << datasetRes.status(); + auto ds = datasetRes.value(); + + mdio::RangeDescriptor desc1 = {"x", 0, 10, 1}; + mdio::RangeDescriptor desc2 = {"y", 5, 15, 1}; + auto sliceRes = ds.isel(desc1, desc2); + + ASSERT_TRUE(sliceRes.status().ok()) << sliceRes.status(); + auto slice = sliceRes.value(); + + auto domain = slice.domain; + ASSERT_EQ(domain.rank(), 2); +} + +TEST_P(DatasetFromJsonTest, intervals) { + nlohmann::json j = nlohmann::json::parse(GetSimpleManifest()); + auto datasetRes = mdio::Dataset::from_json(j, base_path_, version_, + mdio::constants::kCreateClean); + ASSERT_TRUE(datasetRes.status().ok()) << datasetRes.status(); + auto ds = datasetRes.value(); + + auto intervalRes = ds.get_intervals(); + ASSERT_TRUE(intervalRes.ok()) << intervalRes.status(); + auto intervals = intervalRes.value(); + + EXPECT_GE(intervals.size(), 2); +} + +INSTANTIATE_TEST_SUITE_P( + ZarrVersions, DatasetFromJsonTest, + ::testing::Values(mdio::zarr::ZarrVersion::kV2, + mdio::zarr::ZarrVersion::kV3), + [](const ::testing::TestParamInfo& info) { + return ZarrVersionToString(info.param); + }); + +// Test nullopt version (should default to V2) +TEST(DatasetFromJsonNullopt, createWithNulloptVersion) { + std::filesystem::remove_all("zarrs/from_json_nullopt"); + + std::string manifest = R"( +{ + "metadata": { + "name": "nullopt_test", + "apiVersion": "1.0.0", + "createdOn": "2023-12-12T15:02:06.413469-06:00" + }, + "variables": [ + { + "name": "data", + "dataType": "float32", + "dimensions": [{"name": "x", "size": 10}] + }, + { + "name": "x", + "dataType": "int32", + "dimensions": [{"name": "x", "size": 10}] + } + ] +} + )"; + + nlohmann::json j = nlohmann::json::parse(manifest); + std::optional version = std::nullopt; + auto dataset = mdio::Dataset::from_json(j, "zarrs/from_json_nullopt", version, + mdio::constants::kCreateClean); + ASSERT_TRUE(dataset.status().ok()) << dataset.status(); + + auto ds = dataset.value(); + EXPECT_EQ(ds.getMetadata()["name"], "nullopt_test"); + + std::filesystem::remove_all("zarrs/from_json_nullopt"); +} } // namespace diff --git a/mdio/coordinate_selector_test.cc b/mdio/coordinate_selector_test.cc index 93eaec9..7652daa 100644 --- a/mdio/coordinate_selector_test.cc +++ b/mdio/coordinate_selector_test.cc @@ -26,6 +26,7 @@ #include "mdio/dataset.h" #include "mdio/dataset_factory.h" +#include "mdio/zarr/zarr.h" #include "tensorstore/driver/driver.h" #include "tensorstore/driver/registry.h" #include "tensorstore/index_space/dim_expression.h" @@ -43,8 +44,24 @@ namespace { -mdio::Result SetupDataset() { - std::string ds_path = "generic_with_coords.mdio"; +/** + * @brief Returns a string representation of the Zarr version for naming. + */ +std::string ZarrVersionToString(mdio::zarr::ZarrVersion version) { + return version == mdio::zarr::ZarrVersion::kV3 ? "V3" : "V2"; +} + +/** + * @brief Returns the base path for test data based on Zarr version. + */ +std::string GetBasePath(mdio::zarr::ZarrVersion version) { + return version == mdio::zarr::ZarrVersion::kV3 ? "generic_with_coords_v3.mdio" + : "generic_with_coords.mdio"; +} + +mdio::Result SetupDataset( + mdio::zarr::ZarrVersion version = mdio::zarr::ZarrVersion::kV2) { + std::string ds_path = GetBasePath(version); std::string schema_str = R"( { "metadata": { @@ -141,8 +158,8 @@ mdio::Result SetupDataset() { })"; auto schema = ::nlohmann::json::parse(schema_str); - auto dsFut = - mdio::Dataset::from_json(schema, ds_path, mdio::constants::kCreate); + auto dsFut = mdio::Dataset::from_json(schema, ds_path, version, + mdio::constants::kCreateClean); if (!dsFut.status().ok()) { return ds_path; } @@ -210,13 +227,31 @@ mdio::Result SetupDataset() { return ds_path; } -TEST(Intersection, SETUP) { - auto pathResult = SetupDataset(); +// ============================================================================ +// Parameterized Coordinate Selector Tests +// ============================================================================ + +class CoordinateSelectorTest + : public ::testing::TestWithParam { + protected: + void SetUp() override { + version_ = GetParam(); + base_path_ = GetBasePath(version_); + } + + void TearDown() override { std::filesystem::remove_all(base_path_); } + + mdio::zarr::ZarrVersion version_; + std::string base_path_; +}; + +TEST_P(CoordinateSelectorTest, SETUP) { + auto pathResult = SetupDataset(version_); ASSERT_TRUE(pathResult.status().ok()) << pathResult.status(); } -TEST(Intersection, constructor) { - auto pathResult = SetupDataset(); +TEST_P(CoordinateSelectorTest, constructor) { + auto pathResult = SetupDataset(version_); ASSERT_TRUE(pathResult.status().ok()) << pathResult.status(); auto path = pathResult.value(); @@ -227,8 +262,8 @@ TEST(Intersection, constructor) { mdio::CoordinateSelector cs(ds); } -TEST(Intersection, add_selection) { - auto pathResult = SetupDataset(); +TEST_P(CoordinateSelectorTest, add_selection) { + auto pathResult = SetupDataset(version_); ASSERT_TRUE(pathResult.status().ok()) << pathResult.status(); auto path = pathResult.value(); @@ -248,8 +283,8 @@ TEST(Intersection, add_selection) { // map but got " << selections.size(); } -TEST(Intersection, range_descriptors) { - auto pathResult = SetupDataset(); +TEST_P(CoordinateSelectorTest, range_descriptors) { + auto pathResult = SetupDataset(version_); ASSERT_TRUE(pathResult.status().ok()) << pathResult.status(); auto path = pathResult.value(); @@ -285,8 +320,8 @@ TEST(Intersection, range_descriptors) { // to have a step of 1"; } -TEST(Intersection, get_inline_range) { - auto pathResult = SetupDataset(); +TEST_P(CoordinateSelectorTest, get_inline_range) { + auto pathResult = SetupDataset(version_); ASSERT_TRUE(pathResult.status().ok()) << pathResult.status(); auto path = pathResult.value(); @@ -311,8 +346,8 @@ TEST(Intersection, get_inline_range) { // } } -TEST(Intersection, get_inline_range_dead) { - auto pathResult = SetupDataset(); +TEST_P(CoordinateSelectorTest, get_inline_range_dead) { + auto pathResult = SetupDataset(version_); ASSERT_TRUE(pathResult.status().ok()) << pathResult.status(); auto path = pathResult.value(); @@ -338,4 +373,12 @@ TEST(Intersection, get_inline_range_dead) { // } } +INSTANTIATE_TEST_SUITE_P( + ZarrVersions, CoordinateSelectorTest, + ::testing::Values(mdio::zarr::ZarrVersion::kV2, + mdio::zarr::ZarrVersion::kV3), + [](const ::testing::TestParamInfo& info) { + return ZarrVersionToString(info.param); + }); + } // namespace diff --git a/mdio/dataset.h b/mdio/dataset.h index 150940b..23de5c1 100644 --- a/mdio/dataset.h +++ b/mdio/dataset.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -34,8 +35,11 @@ #include "mdio/dataset_factory.h" #include "mdio/variable.h" #include "mdio/variable_collection.h" +#include "mdio/zarr/zarr.h" #include "tensorstore/driver/zarr/metadata.h" #include "tensorstore/util/future.h" +#include "tensorstore/util/option.h" +#include "tensorstore/util/status.h" // clang-format off #include // NOLINT @@ -48,201 +52,41 @@ namespace internal { * @brief Retrieves the .zarray JSON metadata from the given `metadata`. * * This function derives the .zarray JSON metadata without actually reading it. + * This is a compatibility wrapper that delegates to the zarr V2 implementation. * * @param metadata The input JSON metadata. * @return An `mdio::Result` containing the .zarray JSON metadata on success, or * an error on failure. + * @deprecated Use zarr::v2::GetZarray directly for V2 stores. */ inline Result get_zarray(const ::nlohmann::json metadata) { - // derive .zarray json metadata (without reading it). - auto json = - metadata; // Why am I doing this? It's an extra copy that does nothing! - nlohmann::json zarray; - if (!json.contains("metadata")) { - json["metadata"] = nlohmann::json::object(); // Just add an empty object - json["metadata"]["attributes"] = - nlohmann::json::object(); // We need attributes as well - } - - // these fields can have defaults: - if (!json["metadata"].contains("order")) { - zarray["order"] = "C"; - } else { - zarray["order"] = json["metadata"]["order"]; - } - if (!json["metadata"].contains("filters")) { - zarray["filters"] = nullptr; - } else { - zarray["filters"] = json["metadata"]["filters"]; - } - - if (!json["metadata"].contains("fill_value")) { - zarray["fill_value"] = nullptr; - } else { - zarray["fill_value"] = json["metadata"]["fill_value"]; - } - - if (!json["metadata"].contains("zarr_format")) { - zarray["zarr_format"] = 2; - } else { - zarray["zarr_format"] = json["metadata"]["zarr_format"]; - } - - if (!json["metadata"].contains("chunks") && - json["metadata"].contains("shape")) { - zarray["chunks"] = json["metadata"]["shape"]; - } else { - zarray["chunks"] = json["metadata"]["chunks"]; - } - - if (!json["metadata"].contains("compressor")) { - zarray["compressor"] = nullptr; - } else { - zarray["compressor"] = json["metadata"]["compressor"]; - } - - if (!json["metadata"].contains("dimension_separator")) { - zarray["dimension_separator"] = "/"; - } else { - zarray["dimension_separator"] = json["metadata"]["dimension_separator"]; - } - - zarray["shape"] = json["metadata"]["shape"]; - zarray["dtype"] = json["metadata"]["dtype"]; - - // fixme chunks must be configured ... - MDIO_ASSIGN_OR_RETURN( - auto zarr_metadata, - tensorstore::internal_zarr::ZarrMetadata::FromJson(zarray)) - - return ::nlohmann::json(zarr_metadata); + return zarr::v2::GetZarray(metadata); } /** - * @brief Writes the zmetadata for the dataset. + * @brief Writes the metadata for the dataset. + * + * For Zarr V2, writes consolidated metadata (.zmetadata, .zgroup, .zattrs). + * For Zarr V3, writes only root zarr.json (no consolidated metadata support). + * + * This overload auto-detects the Zarr version from the first variable's + * driver specification. * * @param dataset_metadata The metadata for the dataset. * @param json_variables The JSON variables. + * @param context Optional TensorStore context for credentials/configuration. * @return An `mdio::Future` representing the asynchronous write. */ inline Future write_zmetadata( const ::nlohmann::json& dataset_metadata, - const std::vector<::nlohmann::json>& json_variables) { - // header material at the root of the dataset ... - // Configure a kvstore (we can't deduce if it's in memory etc). - // { - // "kvstore", - // { - // {"driver", "file"}, - // {"path", "name"} - // } - //} - auto zattrs = dataset_metadata; - - // FIXME - generalize for zarr v3 - ::nlohmann::json zgroup; - zgroup["zarr_format"] = 2; - - // The consolidated metadata for the datset - ::nlohmann::json zmetadata; - - // FIXME - don't hard code here ... - zmetadata["zarr_consolidated_format"] = 1; - - zmetadata["metadata"][".zattrs"] = zattrs; - zmetadata["metadata"][".zgroup"] = zgroup; - - std::string zarray_key; - std::string zattrs_key; - std::string driver = - json_variables[0]["kvstore"]["driver"].get(); - - for (const auto& json : json_variables) { - zarray_key = - std::filesystem::path(json["kvstore"]["path"]).stem() / ".zarray"; - zattrs_key = - std::filesystem::path(json["kvstore"]["path"]).stem() / ".zattrs"; - - MDIO_ASSIGN_OR_RETURN(zmetadata["metadata"][zarray_key], get_zarray(json)) - - nlohmann::json fixedJson = json["attributes"]; - fixedJson["_ARRAY_DIMENSIONS"] = fixedJson["dimension_names"]; - fixedJson.erase("dimension_names"); - // We do not want to be seralizing the variable_name. It should be - // self-describing - if (fixedJson.contains("variable_name")) { - fixedJson.erase("variable_name"); - } - if (fixedJson.contains("long_name") && - fixedJson["long_name"].get() == "") { - fixedJson.erase("long_name"); - } - if (fixedJson.contains("metadata")) { - if (fixedJson["metadata"].contains("chunkGrid")) { - fixedJson["metadata"].erase("chunkGrid"); - } - for (auto& item : fixedJson["metadata"].items()) { - fixedJson[item.key()] = std::move(item.value()); - } - fixedJson.erase("metadata"); - } - // Case where an empty array of coordinates were provided - if (fixedJson.contains("coordinates")) { - auto coords = fixedJson["coordinates"]; - if (coords.empty() || - (coords.is_string() && coords.get() == "")) { - fixedJson.erase("coordinates"); - } - } - zmetadata["metadata"][zattrs_key] = fixedJson; - } - - nlohmann::json kvstore = nlohmann::json::object(); - kvstore["driver"] = driver; - std::vector file_parts = absl::StrSplit( - json_variables[0]["kvstore"]["path"].get(), '/'); - size_t toRemove = file_parts.back().size(); - std::string strippedPath = - json_variables[0]["kvstore"]["path"].get().substr( - 0, json_variables[0]["kvstore"]["path"].get().size() - - toRemove - 1); - kvstore["path"] = strippedPath; - - if (driver == "gcs" || driver == "s3") { - kvstore["bucket"] = - json_variables[0]["kvstore"]["bucket"].get(); - std::string cloudPath = kvstore["path"].get(); - kvstore["path"] = cloudPath; + const std::vector<::nlohmann::json>& json_variables, + tensorstore::Context context = tensorstore::Context::Default()) { + zarr::ZarrVersion version = zarr::ZarrVersion::kV2; + if (!json_variables.empty()) { + version = zarr::GetVersionFromSpec(json_variables[0]); } - - auto kvs_future = tensorstore::kvstore::Open(kvstore); - - auto zattrs_future = tensorstore::MapFutureValue( - tensorstore::InlineExecutor{}, - [zattrs = std::move(zattrs)](const tensorstore::KvStore& kvstore) { - return tensorstore::kvstore::Write(kvstore, "/.zattrs", - absl::Cord(zattrs.dump(4))); - }, - kvs_future); - - auto zmetadata_future = tensorstore::MapFutureValue( - tensorstore::InlineExecutor{}, - [zmetadata = std::move(zmetadata)](const tensorstore::KvStore& kvstore) { - return tensorstore::kvstore::Write(kvstore, "/.zmetadata", - absl::Cord(zmetadata.dump(4))); - }, - kvs_future); - - auto zgroup_future = tensorstore::MapFutureValue( - tensorstore::InlineExecutor{}, - [zgroup = std::move(zgroup)](const tensorstore::KvStore& kvstore) { - return tensorstore::kvstore::Write(kvstore, "/.zgroup", - absl::Cord(zgroup.dump(4))); - }, - kvs_future); - - return tensorstore::WaitAllFuture(zattrs_future, zmetadata_future, - zgroup_future); + return zarr::WriteDatasetMetadata(version, dataset_metadata, json_variables, + context); } /** @@ -251,143 +95,84 @@ inline Future write_zmetadata( * It will also attempt to infer the driver based on the prefix of the path. * It will default to the "file" driver if no prefix is found. * @param dataset_path The path to the dataset. + * @param context Optional TensorStore context for credentials/configuration. */ inline Future dataset_kvs_store( - const std::string& dataset_path) { - // the tensorstore driver needs a bucket field + const std::string& dataset_path, + tensorstore::Context context = tensorstore::Context::Default()) { ::nlohmann::json kvstore; - absl::string_view output_file = dataset_path; - - if (absl::StartsWith(output_file, "gs://")) { - absl::ConsumePrefix(&output_file, "gs://"); - kvstore["driver"] = "gcs"; - } else if (absl::StartsWith(output_file, "s3://")) { - absl::ConsumePrefix(&output_file, "s3://"); - kvstore["driver"] = "s3"; - } else { - kvstore["driver"] = "file"; - kvstore["path"] = output_file; - return tensorstore::kvstore::Open(kvstore); - } // FIXME - we need azure support ... - - std::vector file_parts = absl::StrSplit(output_file, '/'); - if (file_parts.size() < 2) { + // Use shared utility to infer driver from path prefix + std::string driver = zarr::InferDriverFromPath(dataset_path); + kvstore["driver"] = driver; + + if (driver == "file") { + // Local file system - just normalize with trailing slash + kvstore["path"] = zarr::NormalizePathWithSlash(dataset_path); + return tensorstore::kvstore::Open(kvstore, context); + } + + // Cloud storage (GCS or S3) - extract bucket and path + auto [bucket, path] = zarr::ExtractCloudPath(dataset_path); + if (bucket.empty()) { return absl::InvalidArgumentError( "gcs/s3 drivers requires [s3/gs]://[bucket]/[path_to_file]"); } - std::string bucket = file_parts[0]; - std::string filepath(file_parts[1]); - for (std::size_t i = 2; i < file_parts.size(); ++i) { - filepath += "/" + file_parts[i]; - } - // update the bucket and path ... kvstore["bucket"] = bucket; - kvstore["path"] = filepath; + kvstore["path"] = zarr::NormalizePathWithSlash(path); - return tensorstore::kvstore::Open(kvstore); + return tensorstore::kvstore::Open(kvstore, context); } /** - * @brief Retrieves the .zmetadata for the dataset. - * This is for executing a read on the dataset's consolidated metadata. - * It will also attempt to infer the driver based on the prefix of the path. - * It will default to the "file" driver if no prefix is found. + * @brief Retrieves the metadata for the dataset with auto-detection. + * This version auto-detects the Zarr version by checking for V3 markers first. * @param dataset_path The path to the dataset. - * @return An `mdio::Future` containing the .zmetadata JSON on success, or an + * @param context Optional TensorStore context for credentials/configuration. + * @return An `mdio::Future` containing the metadata JSON on success, or an * error on failure. */ inline Future>> -from_zmetadata(const std::string& dataset_path) { - // e.g. dataset_path = "zarrs/acceptance/"; - // FIXME - enable async - auto kvs_future = mdio::internal::dataset_kvs_store(dataset_path).result(); - - if (!kvs_future.ok()) { - return internal::CheckMissingDriverStatus(kvs_future.status()); - } - auto kvs_read_result = - tensorstore::kvstore::Read(kvs_future.value(), ".zmetadata").result(); - if (!kvs_read_result.ok()) { - return internal::CheckMissingDriverStatus(kvs_read_result.status()); - } - - ::nlohmann::json zmetadata; - try { - zmetadata = - ::nlohmann::json::parse(std::string(kvs_read_result.value().value)); - } catch (const nlohmann::json::parse_error& e) { - // It's a common error to not have a trailing slash on the dataset path. - if (!dataset_path.empty() && dataset_path.back() != '/') { - std::string fixPath = dataset_path + "/"; - return mdio::internal::from_zmetadata(fixPath); - } - return absl::Status(absl::StatusCode::kInvalidArgument, e.what()); - } - - if (!zmetadata.contains("metadata")) { - return absl::Status(absl::StatusCode::kInvalidArgument, - "zmetadata does not contain metadata."); - } - - if (!zmetadata["metadata"].contains(".zattrs")) { - return absl::Status(absl::StatusCode::kInvalidArgument, - "zmetadata does not contain dataset metadata."); - } - - auto dataset_metadata = zmetadata["metadata"][".zattrs"]; - - std::string driver = "file"; - // TODO(BrianMichell): Make this more robust. May be invalid if the stored - // path gets mangled somehow. Infer the driver - if (dataset_path.length() > 5) { - if (dataset_path.substr(0, 5) == "gs://") { - driver = "gcs"; - } else if (dataset_path.substr(0, 5) == "s3://") { - driver = "s3"; - } - } - - std::string bucket; - std::string cloudPath; - if (driver != "file") { - std::string providedPath = - dataset_path.substr(5); // Strip the gs:// or s3:// - size_t bucketLen = providedPath.find_first_of('/'); - bucket = providedPath.substr(0, bucketLen); // Extract the bucket name - cloudPath = providedPath.substr( - bucketLen + 1, providedPath.length() - 2); // Extract the path - } - - // Remove .zattrs from metadata - zmetadata["metadata"].erase(".zattrs"); - std::vector json_vars_from_zmeta; - // Assemble a list of json for opening the variables in the dataset. - for (auto& element : zmetadata["metadata"].items()) { - // FIXME - remove hard code .zarray - if (element.key().substr(element.key().find_last_of(".") + 1) == "zarray") { - std::string variable_name = - element.key().substr(0, element.key().find("/")); - nlohmann::json new_dict = { - {"driver", "zarr"}, - {"kvstore", - {{"driver", driver}, {"path", dataset_path + "/" + variable_name}}}}; - if (driver != "file") { - new_dict["kvstore"]["bucket"] = bucket; - new_dict["kvstore"]["path"] = cloudPath + variable_name; - } - json_vars_from_zmeta.push_back(new_dict); - } - } - if (!json_vars_from_zmeta.size()) { - return absl::Status(absl::StatusCode::kInvalidArgument, - "Not variables found in zmetadata."); - } +from_zmetadata(const std::string& dataset_path, + tensorstore::Context context = tensorstore::Context::Default()) { + auto kvs_future = mdio::internal::dataset_kvs_store(dataset_path, context); + + auto pair = tensorstore::PromiseFuturePair< + std::tuple<::nlohmann::json, std::vector<::nlohmann::json>>>::Make(); + + kvs_future.ExecuteWhenReady( + [promise = std::move(pair.promise), + dataset_path](tensorstore::ReadyFuture ready_kvs) { + if (!ready_kvs.result().ok()) { + promise.SetResult( + internal::CheckMissingDriverStatus(ready_kvs.result().status())); + return; + } - return tensorstore::ReadyFuture< - std::tuple<::nlohmann::json, std::vector<::nlohmann::json>>>( - std::make_tuple(dataset_metadata, json_vars_from_zmeta)); + // Auto-detect version + auto version_future = zarr::DetectVersion(ready_kvs.value()); + version_future.ExecuteWhenReady( + [promise = std::move(promise), dataset_path, + kvs = ready_kvs.value()]( + tensorstore::ReadyFuture version_ready) { + zarr::ZarrVersion version = zarr::ZarrVersion::kV2; + if (version_ready.result().ok()) { + version = version_ready.value(); + } + auto result_future = zarr::ReadDatasetMetadata( + version, dataset_path, + tensorstore::MakeReadyFuture(kvs)); + + result_future.ExecuteWhenReady( + [promise = std::move(promise)]( + tensorstore::ReadyFuture>> + result) { promise.SetResult(result.result()); }); + }); + }); + + return pair.future; } } // namespace internal @@ -402,11 +187,13 @@ class Dataset { public: Dataset(const nlohmann::json& metadata, const VariableCollection& variables, const coordinate_map& coordinates, - const tensorstore::IndexDomain<>& domain) + const tensorstore::IndexDomain<>& domain, + tensorstore::Context context = tensorstore::Context::Default()) : metadata(metadata), variables(variables), coordinates(coordinates), - domain(domain) {} + domain(domain), + context_(context) {} friend std::ostream& operator<<(std::ostream& os, const Dataset& dataset) { // Output metadata @@ -483,13 +270,99 @@ class Dataset { return intervals; } + /** + * @brief Constructs a Dataset from a JSON schema with explicit Zarr version. + * This method will validate the JSON schema against the MDIO Dataset schema + * and use the specified Zarr format version. + * @param json_schema The JSON schema to validate. + * @param path The path to create/open the dataset. + * @param zarr_version The Zarr format version to use (kV2 or kV3). + * @param options Variadic options for dataset creation/opening. + * @details \b Usage + * + * Create a Zarr V3 dataset given a schema and a path: + * @code + * auto dataset_future = mdio::Dataset::from_json( + * json_spec, + * dataset_path, + * mdio::zarr::ZarrVersion::kV3, + * mdio::constants::kCreate + * ); + * @endcode + * + * @return An `mdio::Future` resolves to a Dataset if successful, or an error + * if the schema is invalid. + */ + template + static Future from_json(::nlohmann::json& json_schema /*NOLINT*/, + const std::string& path, + zarr::ZarrVersion zarr_version, + Option&&... options) { + // json describing the vars ... + MDIO_ASSIGN_OR_RETURN(auto validated_schema, + Construct(json_schema, path, zarr_version)) + auto [dataset_metadata, json_vars] = validated_schema; + + return mdio::Dataset::Open(dataset_metadata, json_vars, + std::forward