diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d7bf0c0..6ca3479 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,7 +39,7 @@ repos: - id: yamllint exclude: pre-commit-config.yaml - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.14.13" + rev: "v0.14.14" hooks: - id: ruff-format - id: ruff-check diff --git a/src/ome_arrow/core.py b/src/ome_arrow/core.py index 402701d..3aeeb75 100644 --- a/src/ome_arrow/core.py +++ b/src/ome_arrow/core.py @@ -161,7 +161,8 @@ def __init__( # --- 5) Plain dict matching the schema ----------------------------------- elif isinstance(data, dict): - self.data = pa.scalar(data, type=OME_ARROW_STRUCT) + record = {f.name: data.get(f.name) for f in OME_ARROW_STRUCT} + self.data = pa.scalar(record, type=OME_ARROW_STRUCT) if image_type is not None: self.data = self._wrap_with_image_type(self.data, image_type) @@ -239,7 +240,8 @@ def export( # noqa: PLR0911 compression / compression_level / tile: OME-TIFF options (passed through to tifffile via BioIO). chunks / zarr_compressor / zarr_level : - OME-Zarr options (chunk shape, compressor hint, level). + OME-Zarr options (chunk shape, compressor hint, level). If chunks is + None, a TCZYX default is chosen (1,1,<=4,<=512,<=512). use_channel_colors: Try to embed per-channel display colors when safe; otherwise omitted. parquet_*: diff --git a/src/ome_arrow/export.py b/src/ome_arrow/export.py index a3fd96e..8d70a97 100644 --- a/src/ome_arrow/export.py +++ b/src/ome_arrow/export.py @@ -21,7 +21,8 @@ def to_numpy( Convert an OME-Arrow record into a NumPy array shaped (T,C,Z,Y,X). The OME-Arrow "planes" are flattened YX slices indexed by (z, t, c). - This function reconstitutes them into a dense TCZYX ndarray. + When chunks are present, this function reconstitutes the dense TCZYX array + from chunked pixels instead of planes. Args: data: @@ -58,7 +59,7 @@ def to_numpy( if sx <= 0 or sy <= 0 or sz <= 0 or sc <= 0 or st <= 0: raise ValueError("All size_* fields must be positive integers.") - expected_len = sx * sy + expected_plane_len = sx * sy # Prepare target array (T,C,Z,Y,X), zero-filled by default. out = np.zeros((st, sc, sz, sy, sx), dtype=dtype) @@ -78,6 +79,52 @@ def _cast_plane(a: np.ndarray) -> np.ndarray: a = np.clip(a, lo, hi) return a.astype(dtype, copy=False) + chunks = data.get("chunks") or [] + if chunks: + chunk_grid = data.get("chunk_grid") or {} + chunk_order = str(chunk_grid.get("chunk_order") or "ZYX").upper() + if chunk_order != "ZYX": + raise ValueError("Only chunk_order='ZYX' is supported for now.") + + for i, ch in enumerate(chunks): + t = int(ch["t"]) + c = int(ch["c"]) + z = int(ch["z"]) + y = int(ch["y"]) + x = int(ch["x"]) + shape_z = int(ch["shape_z"]) + shape_y = int(ch["shape_y"]) + shape_x = int(ch["shape_x"]) + + if not (0 <= t < st and 0 <= c < sc and 0 <= z < sz): + raise ValueError( + f"chunks[{i}] index out of range: (t,c,z)=({t},{c},{z})" + ) + if y < 0 or x < 0 or shape_z <= 0 or shape_y <= 0 or shape_x <= 0: + raise ValueError(f"chunks[{i}] has invalid shape or origin.") + + pix = ch["pixels"] + try: + n = len(pix) + except Exception as e: + raise ValueError(f"chunks[{i}].pixels is not a sequence") from e + + expected_len = shape_z * shape_y * shape_x + if n != expected_len: + if strict: + raise ValueError( + f"chunks[{i}].pixels length {n} != expected {expected_len}" + ) + if n > expected_len: + pix = pix[:expected_len] + else: + pix = list(pix) + [0] * (expected_len - n) + + arr3d = np.asarray(pix).reshape(shape_z, shape_y, shape_x) + arr3d = _cast_plane(arr3d) + out[t, c, z : z + shape_z, y : y + shape_y, x : x + shape_x] = arr3d + return out + # Fill planes. for i, p in enumerate(data.get("planes", [])): z = int(p["z"]) @@ -94,16 +141,17 @@ def _cast_plane(a: np.ndarray) -> np.ndarray: except Exception as e: raise ValueError(f"planes[{i}].pixels is not a sequence") from e - if n != expected_len: + if n != expected_plane_len: if strict: raise ValueError( - f"planes[{i}].pixels length {n} != size_x*size_y {expected_len}" + f"planes[{i}].pixels length {n} != size_x*size_y " + f"{expected_plane_len}" ) # Lenient mode: fix length by truncation or zero-pad. - if n > expected_len: - pix = pix[:expected_len] + if n > expected_plane_len: + pix = pix[:expected_plane_len] else: - pix = list(pix) + [0] * (expected_len - n) + pix = list(pix) + [0] * (expected_plane_len - n) # Reshape to (Y,X) and cast. arr2d = np.asarray(pix).reshape(sy, sx) @@ -113,6 +161,128 @@ def _cast_plane(a: np.ndarray) -> np.ndarray: return out +def plane_from_chunks( + data: Dict[str, Any] | pa.StructScalar, + *, + t: int, + c: int, + z: int, + dtype: np.dtype = np.uint16, + strict: bool = True, + clamp: bool = False, +) -> np.ndarray: + """Extract a single (t, c, z) plane using chunked pixels when available. + + Args: + data: OME-Arrow data as a Python dict or a `pa.StructScalar`. + t: Time index for the plane. + c: Channel index for the plane. + z: Z index for the plane. + dtype: Output dtype (default: np.uint16). + strict: When True, raise if chunk pixels are malformed. + clamp: If True, clamp values to the valid range of the target dtype. + + Returns: + np.ndarray: 2D array with shape (Y, X). + + Raises: + KeyError: If required OME-Arrow fields are missing. + ValueError: If indices are out of range or pixels are malformed. + """ + if isinstance(data, pa.StructScalar): + data = data.as_py() + + pm = data["pixels_meta"] + sx, sy = int(pm["size_x"]), int(pm["size_y"]) + sz, sc, st = int(pm["size_z"]), int(pm["size_c"]), int(pm["size_t"]) + if not (0 <= t < st and 0 <= c < sc and 0 <= z < sz): + raise ValueError(f"Requested plane (t={t}, c={c}, z={z}) out of range.") + + if np.issubdtype(dtype, np.integer): + info = np.iinfo(dtype) + lo, hi = info.min, info.max + elif np.issubdtype(dtype, np.floating): + lo, hi = -np.inf, np.inf + else: + lo, hi = -np.inf, np.inf + + def _cast_plane(a: np.ndarray) -> np.ndarray: + if clamp: + a = np.clip(a, lo, hi) + return a.astype(dtype, copy=False) + + chunks = data.get("chunks") or [] + if chunks: + chunk_grid = data.get("chunk_grid") or {} + chunk_order = str(chunk_grid.get("chunk_order") or "ZYX").upper() + if chunk_order != "ZYX": + raise ValueError("Only chunk_order='ZYX' is supported for now.") + + plane = np.zeros((sy, sx), dtype=dtype) + for i, ch in enumerate(chunks): + if int(ch["t"]) != t or int(ch["c"]) != c: + continue + z0 = int(ch["z"]) + szc = int(ch["shape_z"]) + if not (z0 <= z < z0 + szc): + continue + y0 = int(ch["y"]) + x0 = int(ch["x"]) + syc = int(ch["shape_y"]) + sxc = int(ch["shape_x"]) + pix = ch["pixels"] + try: + n = len(pix) + except Exception as e: + raise ValueError(f"chunks[{i}].pixels is not a sequence") from e + expected_len = szc * syc * sxc + if n != expected_len: + if strict: + raise ValueError( + f"chunks[{i}].pixels length {n} != expected {expected_len}" + ) + if n > expected_len: + pix = pix[:expected_len] + else: + pix = list(pix) + [0] * (expected_len - n) + + slab = np.asarray(pix).reshape(szc, syc, sxc) + slab = _cast_plane(slab) + zi = z - z0 + plane[y0 : y0 + syc, x0 : x0 + sxc] = slab[zi] + + return plane + + # Fallback to planes list if chunks are absent. + target = next( + ( + p + for p in data.get("planes", []) + if int(p["t"]) == t and int(p["c"]) == c and int(p["z"]) == z + ), + None, + ) + if target is None: + raise ValueError(f"plane (t={t}, c={c}, z={z}) not found") + + pix = target["pixels"] + try: + n = len(pix) + except Exception as e: + raise ValueError("plane pixels is not a sequence") from e + expected_len = sx * sy + if n != expected_len: + if strict: + raise ValueError(f"plane pixels length {n} != size_x*size_y {expected_len}") + if n > expected_len: + pix = pix[:expected_len] + else: + pix = list(pix) + [0] * (expected_len - n) + + arr2d = np.asarray(pix).reshape(sy, sx) + return _cast_plane(arr2d) + + def to_ome_tiff( data: Dict[str, Any] | pa.StructScalar, out_path: str, @@ -255,6 +425,7 @@ def to_ome_zarr( - Creates level shapes for a multiscale pyramid (if multiscale_levels>1). - Chooses Blosc codec compatible with zarr_format (v2 vs v3). - Populates axes names/types/units and physical pixel sizes from pixels_meta. + - Uses default TCZYX chunks if none are provided. """ # --- local import to avoid hard deps at module import time # Use the class you showed @@ -317,6 +488,15 @@ def to_ome_zarr( def _down(a: int, f: int) -> int: return max(1, a // f) + def _default_chunks_tcxyz( + shape: Tuple[int, int, int, int, int], + ) -> Tuple[int, int, int, int, int]: + _t, _c, z, y, x = shape + cz = min(z, 4) if z > 1 else 1 + cy = min(y, 512) + cx = min(x, 512) + return (1, 1, cz, cy, cx) + def _level_shapes_tcxyz(levels: int) -> List[Tuple[int, int, int, int, int]]: shapes = [(st, sc, sz, sy, sx)] for _ in range(levels - 1): @@ -340,6 +520,8 @@ def _level_shapes_tcxyz(levels: int) -> List[Tuple[int, int, int, int, int]]: # 5) Chunking / shards (can be single-shape or per-level; # we pass single-shape if provided) chunk_shape: Optional[List[Tuple[int, ...]]] = None + if chunks is None: + chunks = _default_chunks_tcxyz((st, sc, sz, sy, sx)) if chunks is not None: chunk_shape = [tuple(int(v) for v in chunks)] * multiscale_levels @@ -393,7 +575,8 @@ def to_ome_parquet( record_dict = data.as_py() else: # Validate by round-tripping through a typed scalar, then back to dict. - record_dict = pa.scalar(data, type=OME_ARROW_STRUCT).as_py() + record_dict = {f.name: data.get(f.name) for f in OME_ARROW_STRUCT} + record_dict = pa.scalar(record_dict, type=OME_ARROW_STRUCT).as_py() # 2) Build a single-row struct array from the dict, explicitly passing the schema struct_array = pa.array([record_dict], type=OME_ARROW_STRUCT) # len=1 @@ -456,7 +639,8 @@ def to_ome_vortex( record_dict = data.as_py() else: # Validate by round-tripping through a typed scalar, then back to dict. - record_dict = pa.scalar(data, type=OME_ARROW_STRUCT).as_py() + record_dict = {f.name: data.get(f.name) for f in OME_ARROW_STRUCT} + record_dict = pa.scalar(record_dict, type=OME_ARROW_STRUCT).as_py() # 2) Build a single-row struct array from the dict, explicitly passing the schema struct_array = pa.array([record_dict], type=OME_ARROW_STRUCT) # len=1 diff --git a/src/ome_arrow/ingest.py b/src/ome_arrow/ingest.py index 25ea49b..fe86346 100644 --- a/src/ome_arrow/ingest.py +++ b/src/ome_arrow/ingest.py @@ -50,7 +50,7 @@ def _ome_arrow_from_table( # 1) Locate the OME-Arrow column def _struct_matches_ome_fields(t: pa.StructType) -> bool: ome_fields = {f.name for f in OME_ARROW_STRUCT} - required_fields = ome_fields - {"image_type"} + required_fields = ome_fields - {"image_type", "chunk_grid", "chunks"} col_fields = {f.name for f in t} return required_fields.issubset(col_fields) @@ -249,6 +249,105 @@ def _read_ngff_scale(zarr_path: Path) -> tuple[float, float, float, str | None] return psize_x, psize_y, psize_z, unit +def _normalize_chunk_shape( + chunk_shape: Optional[Tuple[int, int, int]], + size_z: int, + size_y: int, + size_x: int, +) -> Tuple[int, int, int]: + """Normalize a chunk shape against image bounds. + + Args: + chunk_shape: Desired chunk shape as (Z, Y, X), or None. + size_z: Total Z size of the image. + size_y: Total Y size of the image. + size_x: Total X size of the image. + + Returns: + Tuple[int, int, int]: Normalized (Z, Y, X) chunk shape. + """ + if chunk_shape is None: + chunk_shape = (1, 512, 512) + cz = max(1, min(int(chunk_shape[0]), int(size_z))) + cy = max(1, min(int(chunk_shape[1]), int(size_y))) + cx = max(1, min(int(chunk_shape[2]), int(size_x))) + return cz, cy, cx + + +def _build_chunks_from_planes( + *, + planes: List[Dict[str, Any]], + size_t: int, + size_c: int, + size_z: int, + size_y: int, + size_x: int, + chunk_shape: Optional[Tuple[int, int, int]], + chunk_order: str = "ZYX", +) -> List[Dict[str, Any]]: + """Build chunked pixels from a list of flattened planes. + + Args: + planes: List of plane dicts with keys z, t, c, and pixels. + size_t: Total T size of the image. + size_c: Total C size of the image. + size_z: Total Z size of the image. + size_y: Total Y size of the image. + size_x: Total X size of the image. + chunk_shape: Desired chunk shape as (Z, Y, X). + chunk_order: Flattening order for chunk pixels (default "ZYX"). + + Returns: + List[Dict[str, Any]]: Chunk list with pixels stored as flat lists. + + Raises: + ValueError: If an unsupported chunk_order is requested. + """ + if str(chunk_order).upper() != "ZYX": + raise ValueError("Only chunk_order='ZYX' is supported for now.") + + cz, cy, cx = _normalize_chunk_shape(chunk_shape, size_z, size_y, size_x) + + plane_map: Dict[Tuple[int, int, int], np.ndarray] = {} + for p in planes: + z = int(p["z"]) + t = int(p["t"]) + c = int(p["c"]) + pix = p["pixels"] + arr2d = np.asarray(pix).reshape(size_y, size_x) + plane_map[(t, c, z)] = arr2d + + chunks: List[Dict[str, Any]] = [] + for t in range(size_t): + for c in range(size_c): + for z0 in range(0, size_z, cz): + sz = min(cz, size_z - z0) + for y0 in range(0, size_y, cy): + sy = min(cy, size_y - y0) + for x0 in range(0, size_x, cx): + sx = min(cx, size_x - x0) + slab = np.zeros((sz, sy, sx), dtype=np.uint16) + for zi in range(sz): + plane = plane_map.get((t, c, z0 + zi)) + if plane is None: + continue + slab[zi] = plane[y0 : y0 + sy, x0 : x0 + sx] + chunks.append( + { + "t": t, + "c": c, + "z": z0, + "y": y0, + "x": x0, + "shape_z": sz, + "shape_y": sy, + "shape_x": sx, + "pixels": slab.reshape(-1).tolist(), + } + ) + return chunks + + def to_ome_arrow( type_: str = OME_ARROW_TAG_TYPE, version: str = OME_ARROW_TAG_VERSION, @@ -269,6 +368,10 @@ def to_ome_arrow( physical_size_unit: str = "µm", channels: Optional[List[Dict[str, Any]]] = None, planes: Optional[List[Dict[str, Any]]] = None, + chunks: Optional[List[Dict[str, Any]]] = None, + chunk_shape: Optional[Tuple[int, int, int]] = (1, 512, 512), # (Z, Y, X) + chunk_order: str = "ZYX", + build_chunks: bool = True, masks: Any = None, ) -> pa.StructScalar: """ @@ -294,6 +397,12 @@ def to_ome_arrow( physical_size_unit: Unit string, default "µm". channels: List of channel dicts. Autogenerates one if None. planes: List of plane dicts. Empty if None. + chunks: Optional list of chunk dicts. If None and build_chunks is True, + chunks are derived from planes using chunk_shape. + chunk_shape: Chunk shape as (Z, Y, X). Defaults to (1, 512, 512). + chunk_order: Flattening order for chunk pixels (default "ZYX"). + build_chunks: If True, build chunked pixels from planes when chunks + is None. masks: Optional placeholder for future annotations. Returns: @@ -339,6 +448,31 @@ def to_ome_arrow( if planes is None: planes = [{"z": 0, "t": 0, "c": 0, "pixels": [0] * (size_x * size_y)}] + if chunks is None and build_chunks: + chunks = _build_chunks_from_planes( + planes=planes, + size_t=size_t, + size_c=size_c, + size_z=size_z, + size_y=size_y, + size_x=size_x, + chunk_shape=chunk_shape, + chunk_order=chunk_order, + ) + + chunk_grid = None + if chunks is not None: + cz, cy, cx = _normalize_chunk_shape(chunk_shape, size_z, size_y, size_x) + chunk_grid = { + "order": "TCZYX", + "chunk_t": 1, + "chunk_c": 1, + "chunk_z": cz, + "chunk_y": cy, + "chunk_x": cx, + "chunk_order": str(chunk_order), + } + record = { "type": type_, "version": version, @@ -362,6 +496,8 @@ def to_ome_arrow( "physical_size_z_unit": physical_size_unit, "channels": channels, }, + "chunk_grid": chunk_grid, + "chunks": chunks, "planes": planes, "masks": masks, } @@ -379,6 +515,9 @@ def from_numpy( channel_names: Optional[Sequence[str]] = None, acquisition_datetime: Optional[datetime] = None, clamp_to_uint16: bool = True, + chunk_shape: Optional[Tuple[int, int, int]] = (1, 512, 512), + chunk_order: str = "ZYX", + build_chunks: bool = True, # meta physical_size_x: float = 1.0, physical_size_y: float = 1.0, @@ -386,44 +525,39 @@ def from_numpy( physical_size_unit: str = "µm", dtype_meta: Optional[str] = None, # if None, inferred from output dtype ) -> pa.StructScalar: - """ - Build an OME-Arrow StructScalar from a NumPy array. - - Parameters - ---------- - arr : np.ndarray - Image data with axes described by `dim_order`. - dim_order : str, default "TCZYX" - Axis labels for `arr`. Must include "Y" and "X". - Supported examples: "YX", "ZYX", "CYX", "CZYX", "TYX", "TCYX", "TCZYX". - image_id, name : Optional[str] - Identifiers to embed in the record. - image_type : Optional[str] - Open-ended image kind (e.g., "image", "label"). - channel_names : Optional[Sequence[str]] - Names for channels; defaults to C0..C{n-1}. - acquisition_datetime : Optional[datetime] - Defaults to now (UTC) if None. - clamp_to_uint16 : bool, default True - If True, clamp/cast planes to uint16 before serialization. - physical_size_x/y/z : float - Spatial pixel sizes (µm), Z used if present. - physical_size_unit : str - Unit string for spatial axes (default "µm"). - dtype_meta : Optional[str] - Pixel dtype string to place in metadata; if None, inferred from the - (possibly cast) array's dtype. - - Returns - ------- - pa.StructScalar - Typed OME-Arrow record (schema = OME_ARROW_STRUCT). - - Notes - ----- - - If Z is not in `dim_order`, `size_z` will be 1 and the meta - dimension_order becomes "XYCT"; otherwise "XYZCT". - - If T/C are absent in `dim_order`, they default to size 1. + """Build an OME-Arrow StructScalar from a NumPy array. + + Args: + arr: Image data with axes described by `dim_order`. + dim_order: Axis labels for `arr`. Must include "Y" and "X". + Supported examples: "YX", "ZYX", "CYX", "CZYX", "TYX", "TCYX", "TCZYX". + image_id: Optional stable image identifier. + name: Optional human label. + image_type: Open-ended image kind (e.g., "image", "label"). + channel_names: Names for channels; defaults to C0..C{n-1}. + acquisition_datetime: Defaults to now (UTC) if None. + clamp_to_uint16: If True, clamp/cast planes to uint16 before serialization. + chunk_shape: Chunk shape as (Z, Y, X). Defaults to (1, 512, 512). + chunk_order: Flattening order for chunk pixels (default "ZYX"). + build_chunks: If True, build chunked pixels from planes. + physical_size_x: Spatial pixel size (µm) for X. + physical_size_y: Spatial pixel size (µm) for Y. + physical_size_z: Spatial pixel size (µm) for Z when present. + physical_size_unit: Unit string for spatial axes (default "µm"). + dtype_meta: Pixel dtype string to place in metadata; if None, inferred + from the (possibly cast) array's dtype. + + Returns: + pa.StructScalar: Typed OME-Arrow record (schema = OME_ARROW_STRUCT). + + Raises: + TypeError: If `arr` is not a NumPy ndarray. + ValueError: If `dim_order` is invalid or dimensions are non-positive. + + Notes: + - If Z is not in `dim_order`, `size_z` will be 1 and the meta + dimension_order becomes "XYCT"; otherwise "XYZCT". + - If T/C are absent in `dim_order`, they default to size 1. """ if not isinstance(arr, np.ndarray): @@ -526,6 +660,9 @@ def from_numpy( physical_size_unit=str(physical_size_unit), channels=channels, planes=planes, + chunk_shape=chunk_shape, + chunk_order=chunk_order, + build_chunks=build_chunks, masks=None, ) diff --git a/src/ome_arrow/meta.py b/src/ome_arrow/meta.py index c4b5395..2eaf29e 100644 --- a/src/ome_arrow/meta.py +++ b/src/ome_arrow/meta.py @@ -15,6 +15,7 @@ # - image_type: open-ended image kind (e.g., "image", "label"). # - pixels_meta: pixels struct (sizes, units, channels). # - planes: list of planes struct entries, one per (t,c,z). +# - chunk_grid/chunks: optional chunked pixels (TCZYX-aware), stored as Arrow lists. # - masks: reserved for future labels/ROIs (placeholder). OME_ARROW_STRUCT: pa.StructType = pa.struct( [ @@ -70,6 +71,44 @@ ] ), ), + # CHUNK GRID: optional chunking metadata for random access. + # - order: axis order for the full array, e.g., "TCZYX". + # - chunk_*: chunk sizes for each axis (defaults to 1 for T/C). + # - chunk_order: order used to flatten chunk pixels (default "ZYX"). + pa.field( + "chunk_grid", + pa.struct( + [ + pa.field("order", pa.string()), + pa.field("chunk_t", pa.int32()), + pa.field("chunk_c", pa.int16()), + pa.field("chunk_z", pa.int32()), + pa.field("chunk_y", pa.int32()), + pa.field("chunk_x", pa.int32()), + pa.field("chunk_order", pa.string()), + ] + ), + ), + # CHUNKS: list of chunk entries (Arrow-native, no binary payloads). + # - pixels flattened in chunk_order (default "ZYX"). + pa.field( + "chunks", + pa.list_( + pa.struct( + [ + pa.field("t", pa.int32()), + pa.field("c", pa.int16()), + pa.field("z", pa.int32()), + pa.field("y", pa.int32()), + pa.field("x", pa.int32()), + pa.field("shape_z", pa.int32()), + pa.field("shape_y", pa.int32()), + pa.field("shape_x", pa.int32()), + pa.field("pixels", pa.list_(pa.uint16())), + ] + ) + ), + ), # PLANES: one 2D image plane for a specific (t, c, z). # - pixels: flattened numeric list (Y*X) for analysis-ready computation. pa.field( diff --git a/src/ome_arrow/transform.py b/src/ome_arrow/transform.py index 163cb5d..84dcf60 100644 --- a/src/ome_arrow/transform.py +++ b/src/ome_arrow/transform.py @@ -8,6 +8,7 @@ import numpy as np import pyarrow as pa +from ome_arrow.ingest import _build_chunks_from_planes, _normalize_chunk_shape from ome_arrow.meta import OME_ARROW_STRUCT @@ -179,4 +180,37 @@ def _crop_pixels(flat: Iterable[int]) -> List[int]: rec_out["pixels_meta"] = pm_out rec_out["planes"] = planes_out + chunk_grid_in = row.get("chunk_grid") or {} + if chunk_grid_in or row.get("chunks"): + chunk_shape = ( + int(chunk_grid_in.get("chunk_z", 1)), + int(chunk_grid_in.get("chunk_y", 512)), + int(chunk_grid_in.get("chunk_x", 512)), + ) + chunk_order = str(chunk_grid_in.get("chunk_order") or "ZYX") + chunks_out = _build_chunks_from_planes( + planes=planes_out, + size_t=new_st, + size_c=new_sc, + size_z=new_sz, + size_y=new_sy, + size_x=new_sx, + chunk_shape=chunk_shape, + chunk_order=chunk_order, + ) + cz, cy, cx = _normalize_chunk_shape(chunk_shape, new_sz, new_sy, new_sx) + rec_out["chunk_grid"] = { + "order": "TCZYX", + "chunk_t": 1, + "chunk_c": 1, + "chunk_z": cz, + "chunk_y": cy, + "chunk_x": cx, + "chunk_order": chunk_order, + } + rec_out["chunks"] = chunks_out + else: + rec_out["chunk_grid"] = row.get("chunk_grid") + rec_out["chunks"] = row.get("chunks") + return pa.scalar(rec_out, type=OME_ARROW_STRUCT) diff --git a/src/ome_arrow/view.py b/src/ome_arrow/view.py index 303475a..71c9ab6 100644 --- a/src/ome_arrow/view.py +++ b/src/ome_arrow/view.py @@ -23,6 +23,8 @@ if TYPE_CHECKING: import pyvista +from ome_arrow.export import plane_from_chunks + def view_matplotlib( data: dict[str, object] | pa.StructScalar, @@ -50,29 +52,8 @@ def view_matplotlib( Raises: ValueError: If the requested plane is missing or pixel sizes mismatch. """ - if isinstance(data, pa.StructScalar): - data = data.as_py() - - pm = data["pixels_meta"] - sx, sy = int(pm["size_x"]), int(pm["size_y"]) t, c, z = (int(x) for x in tcz) - - plane = next( - ( - p - for p in data["planes"] - if int(p["t"]) == t and int(p["c"]) == c and int(p["z"]) == z - ), - None, - ) - if plane is None: - raise ValueError(f"plane (t={t}, c={c}, z={z}) not found") - - pix = plane["pixels"] - if len(pix) != sx * sy: - raise ValueError(f"pixels len {len(pix)} != size_x*size_y ({sx * sy})") - - img = np.asarray(pix, dtype=np.uint16).reshape(sy, sx).copy() + img = plane_from_chunks(data, t=t, c=c, z=z, dtype=np.uint16).copy() if (vmin is None or vmax is None) and autoscale: lo, hi = int(img.min()), int(img.max()) diff --git a/tests/conftest.py b/tests/conftest.py index 518ca5d..6ee8121 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -52,6 +52,39 @@ def example_correct_data() -> dict: }, ], }, + "chunk_grid": { + "order": "TCZYX", + "chunk_t": 1, + "chunk_c": 1, + "chunk_z": 1, + "chunk_y": 3, + "chunk_x": 4, + "chunk_order": "ZYX", + }, + "chunks": [ + { + "t": 0, + "c": 0, + "z": 0, + "y": 0, + "x": 0, + "shape_z": 1, + "shape_y": 3, + "shape_x": 4, + "pixels": [0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23], + }, + { + "t": 0, + "c": 1, + "z": 0, + "y": 0, + "x": 0, + "shape_z": 1, + "shape_y": 3, + "shape_x": 4, + "pixels": [100, 101, 102, 103, 110, 111, 112, 113, 120, 121, 122, 123], + }, + ], "planes": [ { "z": 0, diff --git a/tests/test_chunks.py b/tests/test_chunks.py new file mode 100644 index 0000000..76ebb88 --- /dev/null +++ b/tests/test_chunks.py @@ -0,0 +1,83 @@ +""" +Tests for chunked pixel support. +""" + +import numpy as np + +from ome_arrow.export import plane_from_chunks, to_numpy +from ome_arrow.ingest import to_ome_arrow + + +def test_to_numpy_from_chunks(example_correct_data: dict) -> None: + """Reconstruct dense arrays from chunked pixels.""" + data = dict(example_correct_data) + data["planes"] = [] + + arr = to_numpy(data) + + assert arr.shape == (1, 2, 1, 3, 4) + np.testing.assert_array_equal( + arr[0, 0, 0], + np.array([[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23]]), + ) + np.testing.assert_array_equal( + arr[0, 1, 0], + np.array([[100, 101, 102, 103], [110, 111, 112, 113], [120, 121, 122, 123]]), + ) + + +def test_to_ome_arrow_builds_chunks() -> None: + """Build chunked pixels from planes when requested.""" + planes = [ + { + "z": 0, + "t": 0, + "c": 0, + "pixels": [0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23], + } + ] + + scalar = to_ome_arrow( + image_id="img-0002", + name="Chunky", + image_type="image", + dimension_order="XYCT", + dtype="uint16", + size_x=4, + size_y=3, + size_z=1, + size_c=1, + size_t=1, + channels=[{"id": "ch-0", "name": "C0"}], + planes=planes, + chunk_shape=(1, 2, 2), + build_chunks=True, + ) + + record = scalar.as_py() + assert record["chunk_grid"]["chunk_y"] == 2 + assert record["chunk_grid"]["chunk_x"] == 2 + assert len(record["chunks"]) == 4 + + first_chunk = record["chunks"][0] + assert first_chunk["t"] == 0 + assert first_chunk["c"] == 0 + assert first_chunk["z"] == 0 + assert first_chunk["y"] == 0 + assert first_chunk["x"] == 0 + assert first_chunk["shape_y"] == 2 + assert first_chunk["shape_x"] == 2 + assert first_chunk["pixels"] == [0, 1, 10, 11] + + +def test_plane_from_chunks(example_correct_data: dict) -> None: + """Extract a 2D plane directly from chunked pixels.""" + data = dict(example_correct_data) + data["planes"] = [] + + plane = plane_from_chunks(data, t=0, c=1, z=0) + + np.testing.assert_array_equal( + plane, + np.array([[100, 101, 102, 103], [110, 111, 112, 113], [120, 121, 122, 123]]), + )