diff --git a/pixi.toml b/pixi.toml index 6990777b42d..4e1195f31c3 100644 --- a/pixi.toml +++ b/pixi.toml @@ -159,7 +159,7 @@ pyarrow = { version = "*", index = "https://pypi.anaconda.org/scientific-python- dask = { git = "https://github.com/dask/dask" } distributed = { git = "https://github.com/dask/distributed" } -zarr = { git = "https://github.com/zarr-developers/zarr-python" } +zarr = { git = "https://github.com/jhamman/zarr-python", branch = "feature/rectilinear-chunk-grid" } numcodecs = { git = "https://github.com/zarr-developers/numcodecs" } cftime = { git = "https://github.com/Unidata/cftime" } # packaging = { git = "https://github.com/pypa/packaging"} #? Pixi warns if this is enabled diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 410a6a49a7b..a5c43a689ee 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -45,6 +45,13 @@ from xarray.core.datatree import DataTree from xarray.core.types import ZarrArray, ZarrGroup +try: + from zarr import RectilinearChunks, RegularChunks # noqa: F401 + + has_variable_chunk_support = True +except ImportError: + has_variable_chunk_support = False + def _get_mappers(*, storage_options, store, chunk_store): # expand str and path-like arguments @@ -280,7 +287,7 @@ async def async_getitem(self, key): ) -def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): +def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, zarr_format): """ Given encoding chunks (possibly None or []) and variable chunks (possibly None or []). @@ -302,18 +309,24 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): # while dask chunks can be variable sized # https://dask.pydata.org/en/latest/array-design.html#chunks if var_chunks and not enc_chunks: + if zarr_format == 3 and has_variable_chunk_support: + return tuple(var_chunks) + if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( - "Zarr requires uniform chunk sizes except for final chunk. " + "Zarr v2 requires uniform chunk sizes except for final chunk. " f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. " - "Consider rechunking using `chunk()`." + "Consider rechunking using `chunk()`, or switching to the " + "zarr v3 format with zarr-python>=3.2." ) if any((chunks[0] < chunks[-1]) for chunks in var_chunks): raise ValueError( - "Final chunk of Zarr array must be the same size or smaller " - f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}." - "Consider either rechunking using `chunk()` or instead deleting " - "or modifying `encoding['chunks']`." + "Final chunk of a Zarr v2 array or a Zarr v3 array without the " + "rectilinear chunks extension must be the same size or smaller " + f"than the first. Variable named {name!r} has incompatible Dask " + f"chunks {var_chunks!r}. " + "Consider switching to Zarr v3 with the rectilinear chunks extension, " + "rechunking using `chunk()` or deleting or modifying `encoding['chunks']`." ) # return the first chunk for each dimension return tuple(chunk[0] for chunk in var_chunks) @@ -476,6 +489,7 @@ def extract_zarr_variable_encoding( var_chunks=variable.chunks, ndim=variable.ndim, name=name, + zarr_format=zarr_format, ) if _zarr_v3() and chunks is None: chunks = "auto" @@ -854,9 +868,12 @@ def open_store_variable(self, name): ) attributes = dict(attributes) + chunks = tuple(zarr_array.chunks) + preferred_chunks = dict(zip(dimensions, chunks, strict=True)) + encoding = { - "chunks": zarr_array.chunks, - "preferred_chunks": dict(zip(dimensions, zarr_array.chunks, strict=True)), + "chunks": chunks, + "preferred_chunks": preferred_chunks, } if _zarr_v3():