From a0ffbb39dd5791fd1994372feaec4d3ee581a3a3 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 16 Mar 2026 15:12:49 +0000 Subject: [PATCH 1/7] Implement official support for `structured` and `struct` dtypes according to new extension. --- src/zarr/codecs/bytes.py | 17 +- src/zarr/core/array.py | 6 +- src/zarr/core/dtype/npy/structured.py | 99 +++++++++--- tests/test_dtype/test_npy/test_structured.py | 158 +++++++++++++++++-- 4 files changed, 239 insertions(+), 41 deletions(-) diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py index 1fbdeef497..23ae626385 100644 --- a/src/zarr/codecs/bytes.py +++ b/src/zarr/codecs/bytes.py @@ -1,6 +1,7 @@ from __future__ import annotations import sys +import warnings from dataclasses import dataclass, replace from enum import Enum from typing import TYPE_CHECKING @@ -9,6 +10,7 @@ from zarr.core.buffer import Buffer, NDBuffer from zarr.core.common import JSON, parse_enum, parse_named_configuration from zarr.core.dtype.common import HasEndianness +from zarr.core.dtype.npy.structured import Structured if TYPE_CHECKING: from typing import Self @@ -56,7 +58,20 @@ def to_dict(self) -> dict[str, JSON]: return {"name": "bytes", "configuration": {"endian": self.endian.value}} def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: - if not isinstance(array_spec.dtype, HasEndianness): + if isinstance(array_spec.dtype, Structured): + if array_spec.dtype.has_multi_byte_fields(): + if self.endian is None: + warnings.warn( + "Missing 'endian' for structured dtype with multi-byte fields. " + "Assuming little-endian for legacy compatibility.", + UserWarning, + stacklevel=2, + ) + return replace(self, endian=Endian.little) + else: + if self.endian is not None: + return replace(self, endian=None) + elif not isinstance(array_spec.dtype, HasEndianness): if self.endian is not None: return replace(self, endian=None) elif self.endian is None: diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 564d0e915a..f6a1bc77d4 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -66,6 +66,7 @@ ) from zarr.core.config import config as zarr_config from zarr.core.dtype import ( + Structured, VariableLengthBytes, VariableLengthUTF8, ZDType, @@ -5054,10 +5055,13 @@ def default_serializer_v3(dtype: ZDType[Any, Any]) -> ArrayBytesCodec: length strings and variable length bytes have hard-coded serializers -- ``VLenUTF8Codec`` and ``VLenBytesCodec``, respectively. + Structured data types with multi-byte fields use ``BytesCodec`` with little-endian encoding. """ serializer: ArrayBytesCodec = BytesCodec(endian=None) - if isinstance(dtype, HasEndianness): + if isinstance(dtype, HasEndianness) or ( + isinstance(dtype, Structured) and dtype.has_multi_byte_fields() + ): serializer = BytesCodec(endian="little") elif isinstance(dtype, HasObjectCodec): if dtype.object_codec_id == "vlen-bytes": diff --git a/src/zarr/core/dtype/npy/structured.py b/src/zarr/core/dtype/npy/structured.py index 8bedee07ef..6a22701df6 100644 --- a/src/zarr/core/dtype/npy/structured.py +++ b/src/zarr/core/dtype/npy/structured.py @@ -58,28 +58,32 @@ class StructuredJSON_V2(DTypeConfig_V2[StructuredName_V2, None]): class StructuredJSON_V3( - NamedConfig[Literal["structured"], dict[str, Sequence[Sequence[str | DTypeJSON]]]] + NamedConfig[Literal["struct", "structured"], dict[str, Sequence[dict[str, str | DTypeJSON]]]] ): """ A JSON representation of a structured data type in Zarr V3. References ---------- - This representation is not currently defined in an external specification. + The Zarr V3 specification for this data type is defined in the zarr-extensions repository: + https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/struct Examples -------- ```python { - "name": "structured", + "name": "struct", "configuration": { "fields": [ - ["f0", "int32"], - ["f1", "float64"], + {"name": "f0", "data_type": "int32"}, + {"name": "f1", "data_type": "float64"}, ] } } ``` + + The legacy tuple format ``[["f0", "int32"], ["f1", "float64"]]`` is also + accepted when reading for backward compatibility. """ @@ -98,12 +102,14 @@ class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize): References ---------- - This data type does not have a Zarr V3 specification. + The Zarr V3 specification for this data type is defined in the zarr-extensions repository: + https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/struct The Zarr V2 data type specification can be found [here](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). """ - _zarr_v3_name: ClassVar[Literal["structured"]] = "structured" + _zarr_v3_name: ClassVar[Literal["struct"]] = "struct" + _zarr_v3_names: ClassVar[tuple[str, ...]] = ("struct", "structured") dtype_cls = np.dtypes.VoidDType # type: ignore[assignment] fields: tuple[tuple[str, ZDType[TBaseDType, TBaseScalar]], ...] @@ -234,11 +240,10 @@ def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[StructuredJSON_V3]: True if the input is a valid JSON representation of a structured data type for Zarr V3, False otherwise. """ - return ( isinstance(data, dict) and set(data.keys()) == {"name", "configuration"} - and data["name"] == cls._zarr_v3_name + and data["name"] in cls._zarr_v3_names and isinstance(data["configuration"], dict) and set(data["configuration"].keys()) == {"fields"} ) @@ -274,12 +279,24 @@ def _from_json_v3(cls, data: DTypeJSON) -> Self: if cls._check_json_v3(data): config = data["configuration"] meta_fields = config["fields"] - return cls( - fields=tuple( + dtype_name = data["name"] + parsed_fields: list[tuple[str, ZDType[TBaseDType, TBaseScalar]]] = [] + for field in meta_fields: + if dtype_name == "struct": + if not isinstance(field, dict): + msg = f"Invalid field format for 'struct' dtype. Expected object with 'name' and 'data_type' keys, got {field!r}" + raise DataTypeValidationError(msg) + f_name = field["name"] + f_dtype = field["data_type"] + else: + if isinstance(field, dict): + msg = f"Invalid field format for 'structured' dtype. Expected [name, dtype] tuple, got {field!r}" + raise DataTypeValidationError(msg) + f_name, f_dtype = field + parsed_fields.append( (f_name, get_data_type_from_json(f_dtype, zarr_format=3)) # type: ignore[misc] - for f_name, f_dtype in meta_fields ) - ) + return cls(fields=tuple(parsed_fields)) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a JSON object with the key {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @@ -317,7 +334,7 @@ def to_json(self, zarr_format: ZarrFormat) -> StructuredJSON_V2 | StructuredJSON elif zarr_format == 3: v3_unstable_dtype_warning(self) fields = [ - [f_name, f_dtype.to_json(zarr_format=zarr_format)] # type: ignore[list-item] + {"name": f_name, "data_type": f_dtype.to_json(zarr_format=zarr_format)} for f_name, f_dtype in self.fields ] base_dict = { @@ -425,7 +442,9 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void: Parameters ---------- data : JSON - The JSON-serializable value. + The JSON-serializable value. Can be either: + - A dict mapping field names to values (primary format for V3) + - A base64-encoded string (legacy format, for backward compatibility) zarr_format : ZarrFormat The zarr format version. @@ -437,17 +456,27 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void: Raises ------ TypeError - If the input is not a base64-encoded string. + If the input is not a dict or base64-encoded string. """ - if check_json_str(data): + if isinstance(data, dict): + field_values = [] + for field_name, field_dtype in self.fields: + if field_name in data: + field_values.append( + field_dtype.from_json_scalar(data[field_name], zarr_format=zarr_format) + ) + else: + field_values.append(field_dtype.default_scalar()) + return self._cast_scalar_unchecked(tuple(field_values)) + elif check_json_str(data): as_bytes = bytes_from_json(data, zarr_format=zarr_format) dtype = self.to_native_dtype() return cast("np.void", np.array([as_bytes]).view(dtype)[0]) - raise TypeError(f"Invalid type: {data}. Expected a string.") + raise TypeError(f"Invalid type: {data}. Expected a dict or base64-encoded string.") - def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: + def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str | dict[str, JSON]: """ - Convert a scalar to a JSON-serializable string representation. + Convert a scalar to a JSON-serializable representation. Parameters ---------- @@ -458,11 +487,19 @@ def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: Returns ------- - str - A string representation of the scalar, which is a base64-encoded - string of the bytes that make up the scalar. + str | dict[str, JSON] + For V2: A base64-encoded string of the bytes that make up the scalar. + For V3: A dict mapping field names to their JSON-serialized values. """ - return bytes_to_json(self.cast_scalar(data).tobytes(), zarr_format) + scalar = self.cast_scalar(data) + if zarr_format == 2: + return bytes_to_json(scalar.tobytes(), zarr_format) + result: dict[str, JSON] = {} + for field_name, field_dtype in self.fields: + result[field_name] = field_dtype.to_json_scalar( + scalar[field_name], zarr_format=zarr_format + ) + return result @property def item_size(self) -> int: @@ -475,3 +512,17 @@ def item_size(self) -> int: The size of a single scalar in bytes. """ return self.to_native_dtype().itemsize + + def has_multi_byte_fields(self) -> bool: + """ + Check if this structured dtype has any fields with item_size > 1. + + Returns + ------- + bool + True if any field has item_size > 1, False otherwise. + """ + return any( + isinstance(field_dtype, HasItemSize) and field_dtype.item_size > 1 + for _, field_dtype in self.fields + ) diff --git a/tests/test_dtype/test_npy/test_structured.py b/tests/test_dtype/test_npy/test_structured.py index e2cd2a6dfe..d2cacd1502 100644 --- a/tests/test_dtype/test_npy/test_structured.py +++ b/tests/test_dtype/test_npy/test_structured.py @@ -12,7 +12,9 @@ Int32, Int64, Structured, + UInt8, ) +from zarr.core.dtype.common import DataTypeValidationError class TestStructured(BaseTestZDType): @@ -32,29 +34,32 @@ class TestStructured(BaseTestZDType): ) valid_json_v3 = ( { - "name": "structured", + "name": "struct", "configuration": { "fields": [ - ["field1", "int32"], - ["field2", "float64"], + {"name": "field1", "data_type": "int32"}, + {"name": "field2", "data_type": "float64"}, ] }, }, { - "name": "structured", + "name": "struct", "configuration": { "fields": [ - [ - "field1", - { + { + "name": "field1", + "data_type": { "name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 1}, }, - ], - [ - "field2", - {"name": "fixed_length_utf32", "configuration": {"length_bytes": 32}}, - ], + }, + { + "name": "field2", + "data_type": { + "name": "fixed_length_utf32", + "configuration": {"length_bytes": 32}, + }, + }, ] }, }, @@ -65,7 +70,7 @@ class TestStructured(BaseTestZDType): ) invalid_json_v3 = ( { - "name": "structured", + "name": "struct", "configuration": { "fields": [ ("field1", {"name": "int32", "configuration": {"endianness": "invalid"}}), @@ -81,8 +86,11 @@ class TestStructured(BaseTestZDType): (Structured(fields=(("field1", Float16()), ("field2", Int32()))), "AQAAAAAA"), ) scalar_v3_params = ( - (Structured(fields=(("field1", Int32()), ("field2", Float64()))), "AQAAAAAAAAAAAPA/"), - (Structured(fields=(("field1", Int64()), ("field2", Int32()))), "AQAAAAAAAAAAAPA/"), + ( + Structured(fields=(("field1", Int32()), ("field2", Float64()))), + {"field1": 1, "field2": 1.0}, + ), + (Structured(fields=(("field1", Int64()), ("field2", Int32()))), {"field1": 1, "field2": 1}), ) cast_value_params = ( @@ -122,3 +130,123 @@ def test_invalid_size() -> None: msg = f"must have at least one field. Got {fields!r}" with pytest.raises(ValueError, match=msg): Structured(fields=fields) + + +@pytest.mark.filterwarnings("ignore::zarr.errors.UnstableSpecificationWarning") +def test_struct_name_is_primary() -> None: + """ + Test that 'struct' is the primary name written to JSON. + """ + dtype = Structured(fields=(("field1", Int32()), ("field2", Float64()))) + json_v3 = dtype.to_json(zarr_format=3) + assert json_v3["name"] == "struct" + + +def test_structured_legacy_name_with_tuple_format() -> None: + """ + Test that the legacy 'structured' name with tuple field format is accepted. + """ + json_v3 = { + "name": "structured", + "configuration": { + "fields": [ + ["field1", "int32"], + ["field2", "float64"], + ] + }, + } + dtype = Structured.from_json(json_v3, zarr_format=3) + assert dtype.fields[0][0] == "field1" + assert dtype.fields[1][0] == "field2" + + +def test_struct_rejects_tuple_format() -> None: + """ + Test that 'struct' dtype rejects the legacy tuple field format. + """ + json_v3 = { + "name": "struct", + "configuration": { + "fields": [ + ["field1", "int32"], + ["field2", "float64"], + ] + }, + } + with pytest.raises(DataTypeValidationError, match="Invalid field format for 'struct'"): + Structured.from_json(json_v3, zarr_format=3) + + +def test_structured_rejects_object_format() -> None: + """ + Test that 'structured' dtype rejects the new object field format. + """ + json_v3 = { + "name": "structured", + "configuration": { + "fields": [ + {"name": "field1", "data_type": "int32"}, + {"name": "field2", "data_type": "float64"}, + ] + }, + } + with pytest.raises(DataTypeValidationError, match="Invalid field format for 'structured'"): + Structured.from_json(json_v3, zarr_format=3) + + +def test_fill_value_dict_form() -> None: + """ + Test that dict form fill values are properly parsed. + """ + dtype = Structured(fields=(("x", Int32()), ("y", Float64()))) + fill_value = dtype.from_json_scalar({"x": 42, "y": 3.14}, zarr_format=3) + assert fill_value["x"] == 42 + assert fill_value["y"] == 3.14 + + +def test_fill_value_dict_form_missing_fields() -> None: + """ + Test that missing fields in dict form fill values use defaults. + """ + dtype = Structured(fields=(("x", Int32()), ("y", Float64()))) + fill_value = dtype.from_json_scalar({"x": 42}, zarr_format=3) + assert fill_value["x"] == 42 + assert fill_value["y"] == 0.0 + + +def test_fill_value_legacy_base64() -> None: + """ + Test that legacy base64-encoded fill values are still readable. + """ + dtype = Structured(fields=(("field1", Int32()), ("field2", Float64()))) + fill_value = dtype.from_json_scalar("AQAAAAAAAAAAAPA/", zarr_format=3) + assert fill_value["field1"] == 1 + assert fill_value["field2"] == 1.0 + + +def test_fill_value_to_json_dict_form() -> None: + """ + Test that fill values are serialized as dict form. + """ + dtype = Structured(fields=(("x", Int32()), ("y", Float64()))) + scalar = np.array((42, 3.14), dtype=[("x", np.int32), ("y", np.float64)])[()] + json_val = dtype.to_json_scalar(scalar, zarr_format=3) + assert isinstance(json_val, dict) + assert json_val["x"] == 42 + assert json_val["y"] == 3.14 + + +def test_has_multi_byte_fields_true() -> None: + """ + Test that has_multi_byte_fields returns True for dtypes with multi-byte fields. + """ + dtype = Structured(fields=(("field1", Int32()), ("field2", Float64()))) + assert dtype.has_multi_byte_fields() is True + + +def test_has_multi_byte_fields_false() -> None: + """ + Test that has_multi_byte_fields returns False for dtypes with only single-byte fields. + """ + dtype = Structured(fields=(("field1", UInt8()), ("field2", UInt8()))) + assert dtype.has_multi_byte_fields() is False From 653476320bc1689cbac3c2f0441c43c1dd048649 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 16 Mar 2026 15:20:50 +0000 Subject: [PATCH 2/7] Update docs and add changelog --- changes/3781.feature.md | 8 ++++++++ docs/user-guide/data_types.md | 31 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 changes/3781.feature.md diff --git a/changes/3781.feature.md b/changes/3781.feature.md new file mode 100644 index 0000000000..4a83124ac1 --- /dev/null +++ b/changes/3781.feature.md @@ -0,0 +1,8 @@ +Updated structured dtype implementation to match the merged zarr-extensions spec for `struct` data types. + +Key changes: +- The primary V3 name is now `struct` (previously `structured`) +- Fields use object format: `{"name": "x", "data_type": "int32"}` instead of tuples +- Fill values use dict format: `{"x": 1, "y": 2.0}` instead of base64 +- The `bytes` codec requires explicit `endian` for structured types with multi-byte fields +- Legacy `structured` name with tuple format is accepted for backward compatibility when reading diff --git a/docs/user-guide/data_types.md b/docs/user-guide/data_types.md index aa19baf891..3cdafb5f28 100644 --- a/docs/user-guide/data_types.md +++ b/docs/user-guide/data_types.md @@ -229,6 +229,37 @@ here, it's possible to create it yourself: see [Adding New Data Types](#adding-n #### Struct-like - [Structured][zarr.dtype.Structured] +!!! note "Zarr V3 Structured Data Types" + + In Zarr V3, structured data types are specified using the `struct` extension defined in the + [zarr-extensions repository](https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/struct). + The JSON representation uses an object format for fields: + + ```json + { + "name": "struct", + "configuration": { + "fields": [ + {"name": "x", "data_type": "float32"}, + {"name": "y", "data_type": "int64"} + ] + } + } + ``` + + For backward compatibility, Zarr Python also accepts the legacy `structured` name with + tuple-format fields when reading existing data. + + Fill values for structured types are represented as JSON objects mapping field names to values: + + ```json + {"x": 1.5, "y": 42} + ``` + + When using structured types with multi-byte fields, the `bytes` codec must specify an + explicit `endian` parameter. If omitted, Zarr Python assumes little-endian for legacy + compatibility but emits a warning. + ### Example Usage This section will demonstrates the basic usage of Zarr data types. From 5dd651311027ff28e3821e99aed5bc4d8bb98a3d Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 16 Mar 2026 16:28:21 +0000 Subject: [PATCH 3/7] Implement `struct` subclass instead of modifying `structured` --- changes/3781.feature.md | 9 +- src/zarr/core/dtype/__init__.py | 19 +- src/zarr/core/dtype/npy/structured.py | 264 +++++++++++++------ tests/test_dtype/conftest.py | 9 +- tests/test_dtype/test_npy/test_structured.py | 177 +++++++------ 5 files changed, 295 insertions(+), 183 deletions(-) diff --git a/changes/3781.feature.md b/changes/3781.feature.md index 4a83124ac1..191ca5ed4a 100644 --- a/changes/3781.feature.md +++ b/changes/3781.feature.md @@ -1,8 +1 @@ -Updated structured dtype implementation to match the merged zarr-extensions spec for `struct` data types. - -Key changes: -- The primary V3 name is now `struct` (previously `structured`) -- Fields use object format: `{"name": "x", "data_type": "int32"}` instead of tuples -- Fill values use dict format: `{"x": 1, "y": 2.0}` instead of base64 -- The `bytes` codec requires explicit `endian` for structured types with multi-byte fields -- Legacy `structured` name with tuple format is accepted for backward compatibility when reading +Added `Struct` class (subclass of `Structured`) implementing the zarr-extensions `struct` dtype spec. Uses object-style field format and dict fill values. Legacy `Structured` remains available for backward compatibility. diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 1049a2063f..3df5034276 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -21,7 +21,13 @@ from zarr.core.dtype.npy.complex import Complex64, Complex128 from zarr.core.dtype.npy.float import Float16, Float32, Float64 from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64 -from zarr.core.dtype.npy.structured import Structured, StructuredJSON_V2, StructuredJSON_V3 +from zarr.core.dtype.npy.structured import ( + Struct, + StructJSON_V3, + Structured, + StructuredJSON_V2, + StructuredJSON_V3, +) from zarr.core.dtype.npy.time import ( DateTime64, DateTime64JSON_V2, @@ -75,6 +81,8 @@ "RawBytes", "RawBytesJSON_V2", "RawBytesJSON_V3", + "Struct", + "StructJSON_V3", "Structured", "StructuredJSON_V2", "StructuredJSON_V3", @@ -125,6 +133,7 @@ | StringDType | BytesDType | Structured + | Struct | TimeDType | VariableLengthBytes ) @@ -137,7 +146,7 @@ *COMPLEX_FLOAT_DTYPE, *STRING_DTYPE, *BYTES_DTYPE, - Structured, + Struct, *TIME_DTYPE, VariableLengthBytes, ) @@ -155,6 +164,10 @@ # mypy does not know that all the elements of ANY_DTYPE are subclasses of ZDType data_type_registry.register(dtype._zarr_v3_name, dtype) # type: ignore[arg-type] +# Register Structured for reading legacy "structured" format JSON, but don't include it in +# ANY_DTYPE since it doesn't support native dtype matching (use Struct instead). +data_type_registry.register(Structured._zarr_v3_name, Structured) + # TODO: find a better name for this function def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[TBaseDType, TBaseScalar]: @@ -268,7 +281,7 @@ def parse_dtype( # First attempt to interpret the input as JSON if isinstance(dtype_spec, Mapping | str | Sequence): try: - return get_data_type_from_json(dtype_spec, zarr_format=zarr_format) # type: ignore[arg-type] + return get_data_type_from_json(dtype_spec, zarr_format=zarr_format) except ValueError: # no data type matched this JSON-like input pass diff --git a/src/zarr/core/dtype/npy/structured.py b/src/zarr/core/dtype/npy/structured.py index 6a22701df6..2cb1be3295 100644 --- a/src/zarr/core/dtype/npy/structured.py +++ b/src/zarr/core/dtype/npy/structured.py @@ -58,10 +58,35 @@ class StructuredJSON_V2(DTypeConfig_V2[StructuredName_V2, None]): class StructuredJSON_V3( - NamedConfig[Literal["struct", "structured"], dict[str, Sequence[dict[str, str | DTypeJSON]]]] + NamedConfig[Literal["structured"], dict[str, Sequence[Sequence[str | DTypeJSON]]]] ): """ - A JSON representation of a structured data type in Zarr V3. + A JSON representation of a structured data type in Zarr V3 (legacy format). + + This is the legacy format using tuple-style field definitions. + For the canonical format, see ``StructJSON_V3``. + + Examples + -------- + ```python + { + "name": "structured", + "configuration": { + "fields": [ + ["f0", "int32"], + ["f1", "float64"], + ] + } + } + ``` + """ + + +class StructJSON_V3( + NamedConfig[Literal["struct"], dict[str, Sequence[dict[str, str | DTypeJSON]]]] +): + """ + A JSON representation of a structured data type in Zarr V3 (canonical format). References ---------- @@ -81,20 +106,20 @@ class StructuredJSON_V3( } } ``` - - The legacy tuple format ``[["f0", "int32"], ["f1", "float64"]]`` is also - accepted when reading for backward compatibility. """ @dataclass(frozen=True, kw_only=True) class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize): """ - A Zarr data type for arrays containing structured scalars, AKA "record arrays". + A Zarr data type for arrays containing structured scalars, AKA "record arrays" (legacy format). Wraps the NumPy `np.dtypes.VoidDType` if the data type has fields. Scalars for this data type are instances of `np.void`, with a ``fields`` attribute. + This class handles the legacy "structured" format with tuple-style field definitions. + For the canonical "struct" format, see ``Struct``. + Attributes ---------- fields : Sequence[tuple[str, ZDType]] @@ -102,14 +127,10 @@ class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize): References ---------- - The Zarr V3 specification for this data type is defined in the zarr-extensions repository: - https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/struct - The Zarr V2 data type specification can be found [here](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). """ - _zarr_v3_name: ClassVar[Literal["struct"]] = "struct" - _zarr_v3_names: ClassVar[tuple[str, ...]] = ("struct", "structured") + _zarr_v3_name: ClassVar[Literal["structured"]] = "structured" dtype_cls = np.dtypes.VoidDType # type: ignore[assignment] fields: tuple[tuple[str, ZDType[TBaseDType, TBaseScalar]], ...] @@ -119,61 +140,11 @@ def __post_init__(self) -> None: @classmethod def _check_native_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]: - """ - Check that this dtype is a numpy structured dtype - - Parameters - ---------- - dtype : np.dtypes.DTypeLike - The dtype to check. - - Returns - ------- - TypeGuard[np.dtypes.VoidDType] - True if the dtype matches, False otherwise. - """ - return isinstance(dtype, cls.dtype_cls) and dtype.fields is not None + return False @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: - """ - Create a Structured ZDType from a native NumPy data type. - - Parameters - ---------- - dtype : TBaseDType - The native data type. - - Returns - ------- - Self - An instance of this data type. - - Raises - ------ - DataTypeValidationError - If the input data type is not an instance of np.dtypes.VoidDType with a non-null - ``fields`` attribute. - - Notes - ----- - This method attempts to resolve the fields of the structured dtype using the data type - registry. - """ - from zarr.core.dtype import get_data_type_from_native_dtype - - fields: list[tuple[str, ZDType[TBaseDType, TBaseScalar]]] = [] - if cls._check_native_dtype(dtype): - # fields of a structured numpy dtype are either 2-tuples or 3-tuples. we only - # care about the first element in either case. - for key, (dtype_instance, *_) in dtype.fields.items(): # type: ignore[union-attr] - dtype_wrapped = get_data_type_from_native_dtype(dtype_instance) - fields.append((key, dtype_wrapped)) - - return cls(fields=tuple(fields)) - raise DataTypeValidationError( - f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" - ) + raise DataTypeValidationError(f"Use 'Struct' for native dtype matching. Got: {dtype}") def to_native_dtype(self) -> np.dtypes.VoidDType[int]: """ @@ -243,20 +214,16 @@ def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[StructuredJSON_V3]: return ( isinstance(data, dict) and set(data.keys()) == {"name", "configuration"} - and data["name"] in cls._zarr_v3_names + and data["name"] == cls._zarr_v3_name and isinstance(data["configuration"], dict) and set(data["configuration"].keys()) == {"fields"} ) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: - # avoid circular import from zarr.core.dtype import get_data_type_from_json if cls._check_json_v2(data): - # structured dtypes are constructed directly from a list of lists - # note that we do not handle the object codec here! this will prevent structured - # dtypes from containing object dtypes. return cls( fields=tuple( # type: ignore[misc] ( # type: ignore[misc] @@ -273,29 +240,18 @@ def _from_json_v2(cls, data: DTypeJSON) -> Self: @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: - # avoid circular import from zarr.core.dtype import get_data_type_from_json if cls._check_json_v3(data): config = data["configuration"] meta_fields = config["fields"] - dtype_name = data["name"] parsed_fields: list[tuple[str, ZDType[TBaseDType, TBaseScalar]]] = [] for field in meta_fields: - if dtype_name == "struct": - if not isinstance(field, dict): - msg = f"Invalid field format for 'struct' dtype. Expected object with 'name' and 'data_type' keys, got {field!r}" - raise DataTypeValidationError(msg) - f_name = field["name"] - f_dtype = field["data_type"] - else: - if isinstance(field, dict): - msg = f"Invalid field format for 'structured' dtype. Expected [name, dtype] tuple, got {field!r}" - raise DataTypeValidationError(msg) - f_name, f_dtype = field - parsed_fields.append( - (f_name, get_data_type_from_json(f_dtype, zarr_format=3)) # type: ignore[misc] - ) + if isinstance(field, dict): + msg = f"Invalid field format for 'structured' dtype. Expected [name, dtype] tuple, got {field!r}" + raise DataTypeValidationError(msg) + f_name, f_dtype = field + parsed_fields.append((f_name, get_data_type_from_json(f_dtype, zarr_format=3))) # type: ignore[arg-type] return cls(fields=tuple(parsed_fields)) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a JSON object with the key {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @@ -334,7 +290,7 @@ def to_json(self, zarr_format: ZarrFormat) -> StructuredJSON_V2 | StructuredJSON elif zarr_format == 3: v3_unstable_dtype_warning(self) fields = [ - {"name": f_name, "data_type": f_dtype.to_json(zarr_format=zarr_format)} + [f_name, f_dtype.to_json(zarr_format=zarr_format)] # type: ignore[list-item] for f_name, f_dtype in self.fields ] base_dict = { @@ -345,7 +301,6 @@ def to_json(self, zarr_format: ZarrFormat) -> StructuredJSON_V2 | StructuredJSON raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def _check_scalar(self, data: object) -> TypeGuard[StructuredScalarLike]: - # TODO: implement something more precise here! """ Check that the input is a valid scalar value for this structured data type. @@ -526,3 +481,140 @@ def has_multi_byte_fields(self) -> bool: isinstance(field_dtype, HasItemSize) and field_dtype.item_size > 1 for _, field_dtype in self.fields ) + + +@dataclass(frozen=True, kw_only=True) +class Struct(Structured): + """ + A Zarr data type for arrays containing structured scalars, AKA "record arrays". + + Wraps the NumPy `np.dtypes.VoidDType` if the data type has fields. Scalars for this data + type are instances of `np.void`, with a ``fields`` attribute. + + This class handles the canonical "struct" format with object-style field definitions. + For the legacy "structured" format, see ``Structured``. + + Attributes + ---------- + fields : Sequence[tuple[str, ZDType]] + The fields of the structured dtype. + + References + ---------- + The Zarr V3 specification for this data type is defined in the zarr-extensions repository: + https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/struct + + The Zarr V2 data type specification can be found [here](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). + """ + + _zarr_v3_name: ClassVar[Literal["struct"]] = "struct" # type: ignore[assignment] + + @classmethod + def _check_native_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]: + """ + Check that this dtype is a numpy structured dtype. + + Parameters + ---------- + dtype : np.dtypes.DTypeLike + The dtype to check. + + Returns + ------- + TypeGuard[np.dtypes.VoidDType] + True if the dtype matches, False otherwise. + """ + return isinstance(dtype, cls.dtype_cls) and dtype.fields is not None + + @classmethod + def from_native_dtype(cls, dtype: TBaseDType) -> Self: + """ + Create a Struct ZDType from a native NumPy data type. + + Parameters + ---------- + dtype : TBaseDType + The native data type. + + Returns + ------- + Self + An instance of this data type. + + Raises + ------ + DataTypeValidationError + If the input data type is not an instance of np.dtypes.VoidDType with a non-null + ``fields`` attribute. + + Notes + ----- + This method attempts to resolve the fields of the structured dtype using the data type + registry. + """ + from zarr.core.dtype import get_data_type_from_native_dtype + + fields: list[tuple[str, ZDType[TBaseDType, TBaseScalar]]] = [] + if cls._check_native_dtype(dtype): + for key, (dtype_instance, *_) in dtype.fields.items(): # type: ignore[union-attr] + dtype_wrapped = get_data_type_from_native_dtype(dtype_instance) + fields.append((key, dtype_wrapped)) + + return cls(fields=tuple(fields)) + raise DataTypeValidationError( + f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" + ) + + @classmethod + def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[StructJSON_V3]: # type: ignore[override] + return ( + isinstance(data, dict) + and set(data.keys()) == {"name", "configuration"} + and data["name"] == cls._zarr_v3_name + and isinstance(data["configuration"], dict) + and set(data["configuration"].keys()) == {"fields"} + ) + + @classmethod + def _from_json_v3(cls, data: DTypeJSON) -> Self: + from zarr.core.dtype import get_data_type_from_json + + if cls._check_json_v3(data): + config = data["configuration"] + meta_fields = config["fields"] + parsed_fields: list[tuple[str, ZDType[TBaseDType, TBaseScalar]]] = [] + for field in meta_fields: + if not isinstance(field, dict): + msg = f"Invalid field format for 'struct' dtype. Expected object with 'name' and 'data_type' keys, got {field!r}" # type: ignore[unreachable] + raise DataTypeValidationError(msg) + f_name = field["name"] + f_dtype = field["data_type"] + parsed_fields.append((f_name, get_data_type_from_json(f_dtype, zarr_format=3))) # type: ignore[arg-type] + return cls(fields=tuple(parsed_fields)) + msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a JSON object with the key {cls._zarr_v3_name!r}" + raise DataTypeValidationError(msg) + + @overload # type: ignore[override] + def to_json(self, zarr_format: Literal[2]) -> StructuredJSON_V2: ... + + @overload + def to_json(self, zarr_format: Literal[3]) -> StructJSON_V3: ... + + def to_json(self, zarr_format: ZarrFormat) -> StructuredJSON_V2 | StructJSON_V3: + if zarr_format == 2: + fields_v2 = [ + [f_name, f_dtype.to_json(zarr_format=zarr_format)["name"]] + for f_name, f_dtype in self.fields + ] + return {"name": fields_v2, "object_codec_id": None} + elif zarr_format == 3: + v3_unstable_dtype_warning(self) + fields_v3 = [ + {"name": f_name, "data_type": f_dtype.to_json(zarr_format=zarr_format)} + for f_name, f_dtype in self.fields + ] + return cast( + "StructJSON_V3", + {"name": self._zarr_v3_name, "configuration": {"fields": fields_v3}}, + ) + raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover diff --git a/tests/test_dtype/conftest.py b/tests/test_dtype/conftest.py index 0650d143c6..7bc309a371 100644 --- a/tests/test_dtype/conftest.py +++ b/tests/test_dtype/conftest.py @@ -6,19 +6,22 @@ from zarr.core.dtype import data_type_registry from zarr.core.dtype.common import HasLength -from zarr.core.dtype.npy.structured import Structured +from zarr.core.dtype.npy.structured import Struct, Structured from zarr.core.dtype.npy.time import DateTime64, TimeDelta64 from zarr.core.dtype.wrapper import ZDType zdtype_examples: tuple[ZDType[Any, Any], ...] = () for wrapper_cls in data_type_registry.contents.values(): - # The Structured dtype has to be constructed with some actual fields - if wrapper_cls is Structured: + # The Struct dtype has to be constructed with some actual fields + if wrapper_cls is Struct: with warnings.catch_warnings(): warnings.simplefilter("ignore") zdtype_examples += ( wrapper_cls.from_native_dtype(np.dtype([("a", np.float64), ("b", np.int8)])), ) + # The legacy Structured dtype doesn't support native dtype matching, skip it + elif wrapper_cls is Structured: + continue elif issubclass(wrapper_cls, HasLength): zdtype_examples += (wrapper_cls(length=1),) elif issubclass(wrapper_cls, DateTime64 | TimeDelta64): diff --git a/tests/test_dtype/test_npy/test_structured.py b/tests/test_dtype/test_npy/test_structured.py index d2cacd1502..63ac98d1a4 100644 --- a/tests/test_dtype/test_npy/test_structured.py +++ b/tests/test_dtype/test_npy/test_structured.py @@ -11,14 +11,17 @@ Float64, Int32, Int64, + Struct, Structured, UInt8, ) from zarr.core.dtype.common import DataTypeValidationError -class TestStructured(BaseTestZDType): - test_cls = Structured +class TestStruct(BaseTestZDType): + """Test the canonical 'struct' dtype format.""" + + test_cls = Struct valid_dtype = ( np.dtype([("field1", np.int32), ("field2", np.float64)]), np.dtype([("field1", np.int64), ("field2", np.int32)]), @@ -82,38 +85,38 @@ class TestStructured(BaseTestZDType): ) scalar_v2_params = ( - (Structured(fields=(("field1", Int32()), ("field2", Float64()))), "AQAAAAAAAAAAAPA/"), - (Structured(fields=(("field1", Float16()), ("field2", Int32()))), "AQAAAAAA"), + (Struct(fields=(("field1", Int32()), ("field2", Float64()))), "AQAAAAAAAAAAAPA/"), + (Struct(fields=(("field1", Float16()), ("field2", Int32()))), "AQAAAAAA"), ) scalar_v3_params = ( ( - Structured(fields=(("field1", Int32()), ("field2", Float64()))), + Struct(fields=(("field1", Int32()), ("field2", Float64()))), {"field1": 1, "field2": 1.0}, ), - (Structured(fields=(("field1", Int64()), ("field2", Int32()))), {"field1": 1, "field2": 1}), + (Struct(fields=(("field1", Int64()), ("field2", Int32()))), {"field1": 1, "field2": 1}), ) cast_value_params = ( ( - Structured(fields=(("field1", Int32()), ("field2", Float64()))), + Struct(fields=(("field1", Int32()), ("field2", Float64()))), (1, 2.0), np.array((1, 2.0), dtype=[("field1", np.int32), ("field2", np.float64)]), ), ( - Structured(fields=(("field1", Int64()), ("field2", Int32()))), + Struct(fields=(("field1", Int64()), ("field2", Int32()))), (3, 4.5), np.array((3, 4.5), dtype=[("field1", np.int64), ("field2", np.int32)]), ), ) item_size_params = ( - Structured(fields=(("field1", Int32()), ("field2", Float64()))), - Structured(fields=(("field1", Int64()), ("field2", Int32()))), + Struct(fields=(("field1", Int32()), ("field2", Float64()))), + Struct(fields=(("field1", Int64()), ("field2", Int32()))), ) invalid_scalar_params = ( - (Structured(fields=(("field1", Int32()), ("field2", Float64()))), "i am a string"), - (Structured(fields=(("field1", Int32()), ("field2", Float64()))), {"type": "dict"}), + (Struct(fields=(("field1", Int32()), ("field2", Float64()))), "i am a string"), + (Struct(fields=(("field1", Int32()), ("field2", Float64()))), {"type": "dict"}), ) def scalar_equals(self, scalar1: Any, scalar2: Any) -> bool: @@ -122,48 +125,77 @@ def scalar_equals(self, scalar1: Any, scalar2: Any) -> bool: return super().scalar_equals(scalar1, scalar2) +class TestStructured: + """Test the legacy 'structured' dtype format.""" + + def test_invalid_size(self) -> None: + """Test that it's impossible to create a data type that has no fields.""" + fields = () + msg = f"must have at least one field. Got {fields!r}" + with pytest.raises(ValueError, match=msg): + Structured(fields=fields) + + def test_structured_legacy_name_with_tuple_format(self) -> None: + """Test that the legacy 'structured' name with tuple field format is accepted.""" + json_v3 = { + "name": "structured", + "configuration": { + "fields": [ + ["field1", "int32"], + ["field2", "float64"], + ] + }, + } + dtype = Structured.from_json(json_v3, zarr_format=3) + assert dtype.fields[0][0] == "field1" + assert dtype.fields[1][0] == "field2" + + def test_structured_rejects_object_format(self) -> None: + """Test that 'structured' dtype rejects the new object field format.""" + json_v3 = { + "name": "structured", + "configuration": { + "fields": [ + {"name": "field1", "data_type": "int32"}, + {"name": "field2", "data_type": "float64"}, + ] + }, + } + with pytest.raises(DataTypeValidationError, match="Invalid field format for 'structured'"): + Structured.from_json(json_v3, zarr_format=3) + + @pytest.mark.filterwarnings("ignore::zarr.errors.UnstableSpecificationWarning") + def test_structured_writes_tuple_format(self) -> None: + """Test that 'structured' writes the tuple field format.""" + dtype = Structured(fields=(("field1", Int32()), ("field2", Float64()))) + json_v3 = dtype.to_json(zarr_format=3) + assert json_v3["name"] == "structured" + assert json_v3["configuration"]["fields"][0] == ["field1", "int32"] + + def test_structured_no_native_dtype_matching(self) -> None: + dtype = np.dtype([("field1", np.int32), ("field2", np.float64)]) + with pytest.raises(DataTypeValidationError, match="Use 'Struct' for native dtype matching"): + Structured.from_native_dtype(dtype) + + def test_invalid_size() -> None: - """ - Test that it's impossible to create a data type that has no fields - """ + """Test that it's impossible to create a data type that has no fields.""" fields = () msg = f"must have at least one field. Got {fields!r}" with pytest.raises(ValueError, match=msg): - Structured(fields=fields) + Struct(fields=fields) @pytest.mark.filterwarnings("ignore::zarr.errors.UnstableSpecificationWarning") def test_struct_name_is_primary() -> None: - """ - Test that 'struct' is the primary name written to JSON. - """ - dtype = Structured(fields=(("field1", Int32()), ("field2", Float64()))) + """Test that 'struct' is the primary name written to JSON.""" + dtype = Struct(fields=(("field1", Int32()), ("field2", Float64()))) json_v3 = dtype.to_json(zarr_format=3) assert json_v3["name"] == "struct" -def test_structured_legacy_name_with_tuple_format() -> None: - """ - Test that the legacy 'structured' name with tuple field format is accepted. - """ - json_v3 = { - "name": "structured", - "configuration": { - "fields": [ - ["field1", "int32"], - ["field2", "float64"], - ] - }, - } - dtype = Structured.from_json(json_v3, zarr_format=3) - assert dtype.fields[0][0] == "field1" - assert dtype.fields[1][0] == "field2" - - def test_struct_rejects_tuple_format() -> None: - """ - Test that 'struct' dtype rejects the legacy tuple field format. - """ + """Test that 'struct' dtype rejects the legacy tuple field format.""" json_v3 = { "name": "struct", "configuration": { @@ -174,61 +206,36 @@ def test_struct_rejects_tuple_format() -> None: }, } with pytest.raises(DataTypeValidationError, match="Invalid field format for 'struct'"): - Structured.from_json(json_v3, zarr_format=3) - - -def test_structured_rejects_object_format() -> None: - """ - Test that 'structured' dtype rejects the new object field format. - """ - json_v3 = { - "name": "structured", - "configuration": { - "fields": [ - {"name": "field1", "data_type": "int32"}, - {"name": "field2", "data_type": "float64"}, - ] - }, - } - with pytest.raises(DataTypeValidationError, match="Invalid field format for 'structured'"): - Structured.from_json(json_v3, zarr_format=3) + Struct.from_json(json_v3, zarr_format=3) def test_fill_value_dict_form() -> None: - """ - Test that dict form fill values are properly parsed. - """ - dtype = Structured(fields=(("x", Int32()), ("y", Float64()))) + """Test that dict form fill values are properly parsed.""" + dtype = Struct(fields=(("x", Int32()), ("y", Float64()))) fill_value = dtype.from_json_scalar({"x": 42, "y": 3.14}, zarr_format=3) assert fill_value["x"] == 42 assert fill_value["y"] == 3.14 def test_fill_value_dict_form_missing_fields() -> None: - """ - Test that missing fields in dict form fill values use defaults. - """ - dtype = Structured(fields=(("x", Int32()), ("y", Float64()))) + """Test that missing fields in dict form fill values use defaults.""" + dtype = Struct(fields=(("x", Int32()), ("y", Float64()))) fill_value = dtype.from_json_scalar({"x": 42}, zarr_format=3) assert fill_value["x"] == 42 assert fill_value["y"] == 0.0 def test_fill_value_legacy_base64() -> None: - """ - Test that legacy base64-encoded fill values are still readable. - """ - dtype = Structured(fields=(("field1", Int32()), ("field2", Float64()))) + """Test that legacy base64-encoded fill values are still readable.""" + dtype = Struct(fields=(("field1", Int32()), ("field2", Float64()))) fill_value = dtype.from_json_scalar("AQAAAAAAAAAAAPA/", zarr_format=3) assert fill_value["field1"] == 1 assert fill_value["field2"] == 1.0 def test_fill_value_to_json_dict_form() -> None: - """ - Test that fill values are serialized as dict form. - """ - dtype = Structured(fields=(("x", Int32()), ("y", Float64()))) + """Test that fill values are serialized as dict form.""" + dtype = Struct(fields=(("x", Int32()), ("y", Float64()))) scalar = np.array((42, 3.14), dtype=[("x", np.int32), ("y", np.float64)])[()] json_val = dtype.to_json_scalar(scalar, zarr_format=3) assert isinstance(json_val, dict) @@ -237,16 +244,20 @@ def test_fill_value_to_json_dict_form() -> None: def test_has_multi_byte_fields_true() -> None: - """ - Test that has_multi_byte_fields returns True for dtypes with multi-byte fields. - """ - dtype = Structured(fields=(("field1", Int32()), ("field2", Float64()))) + """Test that has_multi_byte_fields returns True for dtypes with multi-byte fields.""" + dtype = Struct(fields=(("field1", Int32()), ("field2", Float64()))) assert dtype.has_multi_byte_fields() is True def test_has_multi_byte_fields_false() -> None: - """ - Test that has_multi_byte_fields returns False for dtypes with only single-byte fields. - """ - dtype = Structured(fields=(("field1", UInt8()), ("field2", UInt8()))) + """Test that has_multi_byte_fields returns False for dtypes with only single-byte fields.""" + dtype = Struct(fields=(("field1", UInt8()), ("field2", UInt8()))) assert dtype.has_multi_byte_fields() is False + + +def test_struct_from_native_dtype() -> None: + """Test that Struct can be created from native numpy dtype.""" + dtype = np.dtype([("field1", np.int32), ("field2", np.float64)]) + struct = Struct.from_native_dtype(dtype) + assert struct.fields[0][0] == "field1" + assert struct.fields[1][0] == "field2" From 87dd5fccd7c01fcb598c264f0538e5bfc1df835e Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 16 Mar 2026 18:51:37 +0000 Subject: [PATCH 4/7] Remove Structured from AnyDType to fix test_match_dtype_unique --- src/zarr/core/dtype/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 3df5034276..9f4690c1cd 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -132,7 +132,6 @@ | ComplexFloatDType | StringDType | BytesDType - | Structured | Struct | TimeDType | VariableLengthBytes From 6b15718af248aa317b03aff76568bccd9b5c4bf9 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 16 Mar 2026 19:41:25 +0000 Subject: [PATCH 5/7] Revert removal of linting ignore flag --- src/zarr/core/dtype/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py index 9f4690c1cd..290a51d287 100644 --- a/src/zarr/core/dtype/__init__.py +++ b/src/zarr/core/dtype/__init__.py @@ -280,7 +280,7 @@ def parse_dtype( # First attempt to interpret the input as JSON if isinstance(dtype_spec, Mapping | str | Sequence): try: - return get_data_type_from_json(dtype_spec, zarr_format=zarr_format) + return get_data_type_from_json(dtype_spec, zarr_format=zarr_format) # type: ignore[arg-type] except ValueError: # no data type matched this JSON-like input pass From 8cb3abf774631afee5df8553b23bcb33317fb61b Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 16 Mar 2026 20:00:58 +0000 Subject: [PATCH 6/7] Fix dtype support --- src/zarr/dtype.py | 4 ++++ tests/test_v2.py | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/zarr/dtype.py b/src/zarr/dtype.py index 2c7eb651b0..f75219aab8 100644 --- a/src/zarr/dtype.py +++ b/src/zarr/dtype.py @@ -22,6 +22,8 @@ RawBytes, RawBytesJSON_V2, RawBytesJSON_V3, + Struct, + StructJSON_V3, Structured, StructuredJSON_V2, StructuredJSON_V3, @@ -68,6 +70,8 @@ "RawBytes", "RawBytesJSON_V2", "RawBytesJSON_V3", + "Struct", + "StructJSON_V3", "Structured", "StructuredJSON_V2", "StructuredJSON_V3", diff --git a/tests/test_v2.py b/tests/test_v2.py index cb990f6159..ce699ab7e8 100644 --- a/tests/test_v2.py +++ b/tests/test_v2.py @@ -14,7 +14,8 @@ from zarr import config from zarr.abc.store import Store from zarr.core.buffer.core import default_buffer_prototype -from zarr.core.dtype import FixedLengthUTF32, Structured, VariableLengthUTF8 +from zarr.core.dtype import FixedLengthUTF32, VariableLengthUTF8 +from zarr.core.dtype.npy.structured import Struct from zarr.core.dtype.npy.bytes import NullTerminatedBytes from zarr.core.dtype.wrapper import ZDType from zarr.core.group import Group @@ -283,7 +284,7 @@ def test_structured_dtype_roundtrip(fill_value: float | bytes, tmp_path: Path) - def test_parse_structured_fill_value_valid( fill_value: Any, dtype: np.dtype[Any], expected_result: Any ) -> None: - zdtype = Structured.from_native_dtype(dtype) + zdtype = Struct.from_native_dtype(dtype) result = zdtype.cast_scalar(fill_value) assert result.dtype == expected_result.dtype assert result == expected_result From ff8e29f64f6f3d071e2835e62112c4a0c423a33e Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 16 Mar 2026 20:19:52 +0000 Subject: [PATCH 7/7] Fix import sort order -- Linting --- tests/test_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_v2.py b/tests/test_v2.py index ce699ab7e8..3a063ac509 100644 --- a/tests/test_v2.py +++ b/tests/test_v2.py @@ -15,8 +15,8 @@ from zarr.abc.store import Store from zarr.core.buffer.core import default_buffer_prototype from zarr.core.dtype import FixedLengthUTF32, VariableLengthUTF8 -from zarr.core.dtype.npy.structured import Struct from zarr.core.dtype.npy.bytes import NullTerminatedBytes +from zarr.core.dtype.npy.structured import Struct from zarr.core.dtype.wrapper import ZDType from zarr.core.group import Group from zarr.core.sync import sync