From 8027c0bc88c903895bde5b111044e37d3641e226 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Sun, 16 Feb 2025 13:36:41 +0100
Subject: [PATCH 01/39] feature: Initial lazy tiff reader

---
 src/spatialdata_io/readers/generic.py | 186 ++++++++++++++++++++++++--
 1 file changed, 176 insertions(+), 10 deletions(-)

diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 24ee7071..30a16dd7 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -1,23 +1,34 @@
 from __future__ import annotations
 
 import warnings
-from collections.abc import Sequence
+from collections.abc import Callable, Sequence
 from pathlib import Path
+from typing import Any, Protocol, TypeVar
 
+import dask.array as da
 import numpy as np
-from dask_image.imread import imread
+from dask import delayed
 from geopandas import GeoDataFrame
+from numpy.typing import NDArray
 from spatialdata._docs import docstring_parameter
 from spatialdata.models import Image2DModel, ShapesModel
 from spatialdata.models._utils import DEFAULT_COORDINATE_SYSTEM
 from spatialdata.transformations import Identity
+from tifffile import memmap as tiffmmemap
 from xarray import DataArray
 
-VALID_IMAGE_TYPES = [".tif", ".tiff", ".png", ".jpg", ".jpeg"]
+VALID_IMAGE_TYPES = [".tif", ".tiff"]
 VALID_SHAPE_TYPES = [".geojson"]
+DEFAULT_CHUNKSIZE = (1000, 1000)
 
 __all__ = ["generic", "geojson", "image", "VALID_IMAGE_TYPES", "VALID_SHAPE_TYPES"]
 
+T = TypeVar("T", bound=np.generic)  # Restrict to NumPy scalar types
+
+
+class DaskArray(Protocol[T]):
+    dtype: np.dtype[T]
+
 
 @docstring_parameter(
     valid_image_types=", ".join(VALID_IMAGE_TYPES),
@@ -68,11 +79,166 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame:
     return ShapesModel.parse(input, transformations={coordinate_system: Identity()})
 
 
+def _compute_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
+    """Calculate chunk sizes and positions for a given dimension and chunk size"""
+    # All chunks have the same size except for the last one
+    positions = np.arange(min_coord, size, chunk)
+    lengths = np.full_like(positions, chunk, dtype=int)
+
+    if positions[-1] + chunk > size:
+        lengths[-1] = size - positions[-1]
+
+    return positions, lengths
+
+
+def _compute_chunks(
+    dimensions: tuple[int, int],
+    chunk_size: tuple[int, int],
+    min_coordinates: tuple[int, int] = (0, 0),
+) -> NDArray[np.int_]:
+    """Create all chunk specs for a given image and chunk size.
+
+    Creates specifications (x, y, width, height) with (x, y) being the upper left corner
+    of chunks of size chunk_size. Chunks at the edges correspond to the remainder of
+    chunk size and dimensions
+
+    Parameters
+    ----------
+    dimensions : tuple[int, int]
+        Size of the image in (width, height).
+    chunk_size : tuple[int, int]
+        Size of individual tiles in (width, height).
+    min_coordinates : tuple[int, int], optional
+        Minimum coordinates (x, y) in the image, defaults to (0, 0).
+
+    Returns
+    -------
+    np.ndarray
+        Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile
+        as (x, y, width, height).
+    """
+    x_positions, widths = _compute_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1])
+    y_positions, heights = _compute_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0])
+
+    # Generate the tiles
+    tiles = np.array(
+        [
+            [[x, y, w, h] for y, h in zip(y_positions, heights, strict=True)]
+            for x, w in zip(x_positions, widths, strict=True)
+        ],
+        dtype=int,
+    )
+    return tiles
+
+
+def _read_chunks(
+    func: Callable[..., NDArray[np.int_]],
+    slide: Any,
+    coords: NDArray[np.int_],
+    n_channel: int,
+    dtype: type,
+    **func_kwargs: Any,
+) -> list[list[da.array]]:
+    """Abstract factory method to tile a large microscopy image.
+
+    Parameters
+    ----------
+    func
+        Function to retrieve a rectangular tile from the slide image. Must take the
+        arguments:
+
+            - slide Full slide image
+            - x0: x (col) coordinate of upper left corner of chunk
+            - y0: y (row) coordinate of upper left corner of chunk
+            - width: Width of chunk
+            - height: Height of chunk
+
+        and should return the chunk as numpy array of shape (c, y, x)
+    slide
+        Slide image in lazyly loaded format compatible with func
+    coords
+        Coordinates of the upper left corner of the image in format (n_row_x, n_row_y, 4)
+        where the last dimension defines the rectangular tile in format (x, y, width, height).
+        n_row_x represents the number of chunks in x dimension and n_row_y the number of chunks
+        in y dimension.
+    n_channel
+        Number of channels in array
+    dtype
+        Data type of image
+    func_kwargs
+        Additional keyword arguments passed to func
+
+    Returns
+    -------
+    list[list[da.array]]
+        List (length: n_row_x) of lists (length: n_row_y) of chunks.
+        Represents all chunks of the full image.
+    """
+    func_kwargs = func_kwargs if func_kwargs else {}
+
+    # Collect each delayed chunk as item in list of list
+    # Inner list becomes dim=-1 (cols/x)
+    # Outer list becomes dim=-2 (rows/y)
+    # see dask.array.block
+    chunks = [
+        [
+            da.from_delayed(
+                delayed(func)(
+                    slide,
+                    x0=coords[x, y, 0],
+                    y0=coords[x, y, 1],
+                    width=coords[x, y, 2],
+                    height=coords[x, y, 3],
+                    **func_kwargs,
+                ),
+                dtype=dtype,
+                shape=(n_channel, *coords[x, y, [2, 3]]),
+            )
+            for y in range(coords.shape[0])
+        ]
+        for x in range(coords.shape[1])
+    ]
+    return chunks
+
+
+def _tiff_to_chunks(input: Path) -> list[list[DaskArray[np.int_]]]:
+    """Chunkwise reader for tiff files.
+
+    Parameters
+    ----------
+    input
+        Path to image
+
+    Returns
+    -------
+    list[list[DaskArray]]
+    """
+    # Lazy file reader
+    slide = tiffmmemap(input)
+
+    # Get dimensions in (y, x)
+    slide_dimensions = slide.shape[:-1]
+
+    # Get number of channels (c)
+    n_channel = slide.shape[-1]
+
+    # Compute chunk coords
+    chunk_coords = _compute_chunks(slide_dimensions, chunk_size=DEFAULT_CHUNKSIZE, min_coordinates=(0, 0))
+
+    # Define reader func
+    def _reader_func(slide: NDArray[np.int_], x0: int, y0: int, width: int, height: int) -> NDArray[np.int_]:
+        return np.array(slide[y0 : y0 + height, x0 : x0 + width, :]).T
+
+    return _read_chunks(_reader_func, slide, coords=chunk_coords, n_channel=n_channel, dtype=slide.dtype)
+
+
 def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> DataArray:
-    """Reads an image file and returns a parsed Image2D spatial element"""
-    # this function is just a draft, the more general one will be available when
-    # https://github.com/scverse/spatialdata-io/pull/234 is merged
-    image = imread(input)
-    if len(image.shape) == len(data_axes) + 1 and image.shape[0] == 1:
-        image = np.squeeze(image, axis=0)
-    return Image2DModel.parse(image, dims=data_axes, transformations={coordinate_system: Identity()})
+    """Reads an image file and returns a parsed Image2DModel"""
+    if input.suffix in [".tiff", ".tif"]:
+        chunks = _tiff_to_chunks(input)
+    else:
+        raise NotImplementedError(f"File format {input.suffix} not implemented")
+
+    img = da.block(chunks, allow_unknown_chunksizes=True)
+
+    return Image2DModel.parse(img, dims=data_axes, transformations={coordinate_system: Identity()})

From 7ff1d2340ef2b284067299b28de4848026517387 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Sun, 16 Feb 2025 14:48:20 +0100
Subject: [PATCH 02/39] Updated comments

---
 src/spatialdata_io/readers/generic.py | 38 ++++++++++++++++-----------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 30a16dd7..6f902b6a 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -123,8 +123,8 @@ def _compute_chunks(
     # Generate the tiles
     tiles = np.array(
         [
-            [[x, y, w, h] for y, h in zip(y_positions, heights, strict=True)]
-            for x, w in zip(x_positions, widths, strict=True)
+            [[x, y, w, h] for x, w in zip(x_positions, widths, strict=True)]
+            for y, h in zip(y_positions, heights, strict=True)
         ],
         dtype=int,
     )
@@ -136,7 +136,7 @@ def _read_chunks(
     slide: Any,
     coords: NDArray[np.int_],
     n_channel: int,
-    dtype: type,
+    dtype: np.number,
     **func_kwargs: Any,
 ) -> list[list[da.array]]:
     """Abstract factory method to tile a large microscopy image.
@@ -185,23 +185,23 @@ def _read_chunks(
             da.from_delayed(
                 delayed(func)(
                     slide,
-                    x0=coords[x, y, 0],
-                    y0=coords[x, y, 1],
-                    width=coords[x, y, 2],
-                    height=coords[x, y, 3],
+                    x0=coords[y, x, 0],
+                    y0=coords[y, x, 1],
+                    width=coords[y, x, 2],
+                    height=coords[y, x, 3],
                     **func_kwargs,
                 ),
                 dtype=dtype,
-                shape=(n_channel, *coords[x, y, [2, 3]]),
+                shape=(n_channel, *coords[y, x, [3, 2]]),
             )
-            for y in range(coords.shape[0])
+            for x in range(coords.shape[1])
         ]
-        for x in range(coords.shape[1])
+        for y in range(coords.shape[0])
     ]
     return chunks
 
 
-def _tiff_to_chunks(input: Path) -> list[list[DaskArray[np.int_]]]:
+def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[DaskArray[np.int_]]]:
     """Chunkwise reader for tiff files.
 
     Parameters
@@ -216,26 +216,32 @@ def _tiff_to_chunks(input: Path) -> list[list[DaskArray[np.int_]]]:
     # Lazy file reader
     slide = tiffmmemap(input)
 
-    # Get dimensions in (y, x)
-    slide_dimensions = slide.shape[:-1]
+    # Transpose to cyx order
+    slide = np.transpose(slide, (axes_dim_mapping["c"], axes_dim_mapping["y"], axes_dim_mapping["x"]))
+
+    # Get dimensions in (x, y)
+    slide_dimensions = slide.shape[2], slide.shape[1]
 
     # Get number of channels (c)
-    n_channel = slide.shape[-1]
+    n_channel = slide.shape[0]
 
     # Compute chunk coords
     chunk_coords = _compute_chunks(slide_dimensions, chunk_size=DEFAULT_CHUNKSIZE, min_coordinates=(0, 0))
 
     # Define reader func
     def _reader_func(slide: NDArray[np.int_], x0: int, y0: int, width: int, height: int) -> NDArray[np.int_]:
-        return np.array(slide[y0 : y0 + height, x0 : x0 + width, :]).T
+        return np.array(slide[:, y0 : y0 + height, x0 : x0 + width])
 
     return _read_chunks(_reader_func, slide, coords=chunk_coords, n_channel=n_channel, dtype=slide.dtype)
 
 
 def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> DataArray:
     """Reads an image file and returns a parsed Image2DModel"""
+    # Map passed data axes to position of dimension
+    axes_dim_mapping = {axes: ndim for ndim, axes in enumerate(data_axes)}
+
     if input.suffix in [".tiff", ".tif"]:
-        chunks = _tiff_to_chunks(input)
+        chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping)
     else:
         raise NotImplementedError(f"File format {input.suffix} not implemented")
 

From 826133ab4cac7619cc8f46aab8eeb6dfe632595a Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Sun, 16 Feb 2025 14:51:46 +0100
Subject: [PATCH 03/39] Updated comments

---
 src/spatialdata_io/readers/generic.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 6f902b6a..3cac8876 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -79,7 +79,7 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame:
     return ShapesModel.parse(input, transformations={coordinate_system: Identity()})
 
 
-def _compute_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
+def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
     """Calculate chunk sizes and positions for a given dimension and chunk size"""
     # All chunks have the same size except for the last one
     positions = np.arange(min_coord, size, chunk)
@@ -117,8 +117,8 @@ def _compute_chunks(
         Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile
         as (x, y, width, height).
     """
-    x_positions, widths = _compute_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1])
-    y_positions, heights = _compute_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0])
+    x_positions, widths = _compute_chunk_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1])
+    y_positions, heights = _compute_chunk_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0])
 
     # Generate the tiles
     tiles = np.array(
@@ -139,7 +139,7 @@ def _read_chunks(
     dtype: np.number,
     **func_kwargs: Any,
 ) -> list[list[da.array]]:
-    """Abstract factory method to tile a large microscopy image.
+    """Abstract method to tile a large microscopy image.
 
     Parameters
     ----------

From df258bc7f79811a0fca6e873fdadceca198faa38 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Sun, 16 Feb 2025 15:03:04 +0100
Subject: [PATCH 04/39] Move utility functions to designated submodule
 readers._utils._image

---
 src/spatialdata_io/readers/_utils/_image.py | 129 +++++++++++++++++++
 src/spatialdata_io/readers/generic.py       | 131 +-------------------
 2 files changed, 135 insertions(+), 125 deletions(-)
 create mode 100644 src/spatialdata_io/readers/_utils/_image.py

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
new file mode 100644
index 00000000..ab175512
--- /dev/null
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -0,0 +1,129 @@
+from collections.abc import Callable
+from typing import Any
+
+import dask.array as da
+import numpy as np
+from dask import delayed
+from numpy.typing import NDArray
+
+
+def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
+    """Calculate chunk sizes and positions for a given dimension and chunk size"""
+    # All chunks have the same size except for the last one
+    positions = np.arange(min_coord, size, chunk)
+    lengths = np.full_like(positions, chunk, dtype=int)
+
+    if positions[-1] + chunk > size:
+        lengths[-1] = size - positions[-1]
+
+    return positions, lengths
+
+
+def _compute_chunks(
+    dimensions: tuple[int, int],
+    chunk_size: tuple[int, int],
+    min_coordinates: tuple[int, int] = (0, 0),
+) -> NDArray[np.int_]:
+    """Create all chunk specs for a given image and chunk size.
+
+    Creates specifications (x, y, width, height) with (x, y) being the upper left corner
+    of chunks of size chunk_size. Chunks at the edges correspond to the remainder of
+    chunk size and dimensions
+
+    Parameters
+    ----------
+    dimensions : tuple[int, int]
+        Size of the image in (width, height).
+    chunk_size : tuple[int, int]
+        Size of individual tiles in (width, height).
+    min_coordinates : tuple[int, int], optional
+        Minimum coordinates (x, y) in the image, defaults to (0, 0).
+
+    Returns
+    -------
+    np.ndarray
+        Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile
+        as (x, y, width, height).
+    """
+    x_positions, widths = _compute_chunk_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1])
+    y_positions, heights = _compute_chunk_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0])
+
+    # Generate the tiles
+    tiles = np.array(
+        [
+            [[x, y, w, h] for x, w in zip(x_positions, widths, strict=True)]
+            for y, h in zip(y_positions, heights, strict=True)
+        ],
+        dtype=int,
+    )
+    return tiles
+
+
+def _read_chunks(
+    func: Callable[..., NDArray[np.int_]],
+    slide: Any,
+    coords: NDArray[np.int_],
+    n_channel: int,
+    dtype: np.number,
+    **func_kwargs: Any,
+) -> list[list[da.array]]:
+    """Abstract method to tile a large microscopy image.
+
+    Parameters
+    ----------
+    func
+        Function to retrieve a rectangular tile from the slide image. Must take the
+        arguments:
+
+            - slide Full slide image
+            - x0: x (col) coordinate of upper left corner of chunk
+            - y0: y (row) coordinate of upper left corner of chunk
+            - width: Width of chunk
+            - height: Height of chunk
+
+        and should return the chunk as numpy array of shape (c, y, x)
+    slide
+        Slide image in lazyly loaded format compatible with func
+    coords
+        Coordinates of the upper left corner of the image in format (n_row_x, n_row_y, 4)
+        where the last dimension defines the rectangular tile in format (x, y, width, height).
+        n_row_x represents the number of chunks in x dimension and n_row_y the number of chunks
+        in y dimension.
+    n_channel
+        Number of channels in array
+    dtype
+        Data type of image
+    func_kwargs
+        Additional keyword arguments passed to func
+
+    Returns
+    -------
+    list[list[da.array]]
+        List (length: n_row_x) of lists (length: n_row_y) of chunks.
+        Represents all chunks of the full image.
+    """
+    func_kwargs = func_kwargs if func_kwargs else {}
+
+    # Collect each delayed chunk as item in list of list
+    # Inner list becomes dim=-1 (cols/x)
+    # Outer list becomes dim=-2 (rows/y)
+    # see dask.array.block
+    chunks = [
+        [
+            da.from_delayed(
+                delayed(func)(
+                    slide,
+                    x0=coords[y, x, 0],
+                    y0=coords[y, x, 1],
+                    width=coords[y, x, 2],
+                    height=coords[y, x, 3],
+                    **func_kwargs,
+                ),
+                dtype=dtype,
+                shape=(n_channel, *coords[y, x, [3, 2]]),
+            )
+            for x in range(coords.shape[1])
+        ]
+        for y in range(coords.shape[0])
+    ]
+    return chunks
diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 3cac8876..65158413 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -1,13 +1,12 @@
 from __future__ import annotations
 
 import warnings
-from collections.abc import Callable, Sequence
+from collections.abc import Sequence
 from pathlib import Path
-from typing import Any, Protocol, TypeVar
+from typing import Protocol, TypeVar
 
 import dask.array as da
 import numpy as np
-from dask import delayed
 from geopandas import GeoDataFrame
 from numpy.typing import NDArray
 from spatialdata._docs import docstring_parameter
@@ -17,6 +16,8 @@
 from tifffile import memmap as tiffmmemap
 from xarray import DataArray
 
+from ._utils._image import _compute_chunks, _read_chunks
+
 VALID_IMAGE_TYPES = [".tif", ".tiff"]
 VALID_SHAPE_TYPES = [".geojson"]
 DEFAULT_CHUNKSIZE = (1000, 1000)
@@ -79,128 +80,6 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame:
     return ShapesModel.parse(input, transformations={coordinate_system: Identity()})
 
 
-def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
-    """Calculate chunk sizes and positions for a given dimension and chunk size"""
-    # All chunks have the same size except for the last one
-    positions = np.arange(min_coord, size, chunk)
-    lengths = np.full_like(positions, chunk, dtype=int)
-
-    if positions[-1] + chunk > size:
-        lengths[-1] = size - positions[-1]
-
-    return positions, lengths
-
-
-def _compute_chunks(
-    dimensions: tuple[int, int],
-    chunk_size: tuple[int, int],
-    min_coordinates: tuple[int, int] = (0, 0),
-) -> NDArray[np.int_]:
-    """Create all chunk specs for a given image and chunk size.
-
-    Creates specifications (x, y, width, height) with (x, y) being the upper left corner
-    of chunks of size chunk_size. Chunks at the edges correspond to the remainder of
-    chunk size and dimensions
-
-    Parameters
-    ----------
-    dimensions : tuple[int, int]
-        Size of the image in (width, height).
-    chunk_size : tuple[int, int]
-        Size of individual tiles in (width, height).
-    min_coordinates : tuple[int, int], optional
-        Minimum coordinates (x, y) in the image, defaults to (0, 0).
-
-    Returns
-    -------
-    np.ndarray
-        Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile
-        as (x, y, width, height).
-    """
-    x_positions, widths = _compute_chunk_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1])
-    y_positions, heights = _compute_chunk_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0])
-
-    # Generate the tiles
-    tiles = np.array(
-        [
-            [[x, y, w, h] for x, w in zip(x_positions, widths, strict=True)]
-            for y, h in zip(y_positions, heights, strict=True)
-        ],
-        dtype=int,
-    )
-    return tiles
-
-
-def _read_chunks(
-    func: Callable[..., NDArray[np.int_]],
-    slide: Any,
-    coords: NDArray[np.int_],
-    n_channel: int,
-    dtype: np.number,
-    **func_kwargs: Any,
-) -> list[list[da.array]]:
-    """Abstract method to tile a large microscopy image.
-
-    Parameters
-    ----------
-    func
-        Function to retrieve a rectangular tile from the slide image. Must take the
-        arguments:
-
-            - slide Full slide image
-            - x0: x (col) coordinate of upper left corner of chunk
-            - y0: y (row) coordinate of upper left corner of chunk
-            - width: Width of chunk
-            - height: Height of chunk
-
-        and should return the chunk as numpy array of shape (c, y, x)
-    slide
-        Slide image in lazyly loaded format compatible with func
-    coords
-        Coordinates of the upper left corner of the image in format (n_row_x, n_row_y, 4)
-        where the last dimension defines the rectangular tile in format (x, y, width, height).
-        n_row_x represents the number of chunks in x dimension and n_row_y the number of chunks
-        in y dimension.
-    n_channel
-        Number of channels in array
-    dtype
-        Data type of image
-    func_kwargs
-        Additional keyword arguments passed to func
-
-    Returns
-    -------
-    list[list[da.array]]
-        List (length: n_row_x) of lists (length: n_row_y) of chunks.
-        Represents all chunks of the full image.
-    """
-    func_kwargs = func_kwargs if func_kwargs else {}
-
-    # Collect each delayed chunk as item in list of list
-    # Inner list becomes dim=-1 (cols/x)
-    # Outer list becomes dim=-2 (rows/y)
-    # see dask.array.block
-    chunks = [
-        [
-            da.from_delayed(
-                delayed(func)(
-                    slide,
-                    x0=coords[y, x, 0],
-                    y0=coords[y, x, 1],
-                    width=coords[y, x, 2],
-                    height=coords[y, x, 3],
-                    **func_kwargs,
-                ),
-                dtype=dtype,
-                shape=(n_channel, *coords[y, x, [3, 2]]),
-            )
-            for x in range(coords.shape[1])
-        ]
-        for y in range(coords.shape[0])
-    ]
-    return chunks
-
-
 def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[DaskArray[np.int_]]]:
     """Chunkwise reader for tiff files.
 
@@ -208,6 +87,8 @@ def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[
     ----------
     input
         Path to image
+    axes_dim_mapping
+        Mapping between dimension name (x, y, c) and index
 
     Returns
     -------

From db0d78278e13d0f35e18cc645900e82ee3a97985 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Sun, 16 Feb 2025 15:27:52 +0100
Subject: [PATCH 05/39] Initial tests utils

---
 tests/readers/test_utils_image.py | 59 +++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 tests/readers/test_utils_image.py

diff --git a/tests/readers/test_utils_image.py b/tests/readers/test_utils_image.py
new file mode 100644
index 00000000..d9f0c295
--- /dev/null
+++ b/tests/readers/test_utils_image.py
@@ -0,0 +1,59 @@
+import numpy as np
+import pytest
+from numpy.typing import NDArray
+
+from spatialdata_io.readers._utils._image import (
+    _compute_chunk_sizes_positions,
+    _compute_chunks,
+)
+
+
+@pytest.mark.parametrize(
+    ("size", "chunk", "min_coordinate", "positions", "lengths"),
+    [
+        (300, 100, 0, np.array([0, 100, 200]), np.array([100, 100, 100])),
+        (300, 200, 0, np.array([0, 200]), np.array([200, 100])),
+        (300, 100, -100, np.array([-100, 0, 100]), np.array([100, 100, 100])),
+        (300, 200, -100, np.array([-100, 100]), np.array([200, 100])),
+    ],
+)
+def test_compute_chunk_sizes_positions(
+    size: int,
+    chunk: int,
+    min_coordinate: int,
+    positions: NDArray[np.number],
+    lengths: NDArray[np.number],
+) -> None:
+    computed_positions, computed_lengths = _compute_chunk_sizes_positions(size, chunk, min_coordinate)
+    assert (positions == computed_positions).all()
+    assert (lengths == computed_lengths).all()
+
+
+@pytest.mark.parametrize(
+    ("dimensions", "chunk_size", "min_coordinates", "result"),
+    [
+        # Regular grid 2x2
+        (
+            (2, 2),
+            (1, 1),
+            (0, 0),
+            np.array([[[0, 0, 1, 1], [1, 0, 1, 1]], [[0, 1, 1, 1], [1, 1, 1, 1]]]),
+        ),
+        # Different tile sizes
+        (
+            (3, 3),
+            (2, 2),
+            (0, 0),
+            np.array([[[0, 0, 2, 2], [2, 0, 1, 2]], [[0, 2, 2, 1], [2, 2, 1, 1]]]),
+        ),
+    ],
+)
+def test_compute_chunks(
+    dimensions: tuple[int, int],
+    chunk_size: tuple[int, int],
+    min_coordinates: tuple[int, int],
+    result: NDArray[np.number],
+) -> None:
+    tiles = _compute_chunks(dimensions, chunk_size, min_coordinates)
+
+    assert (tiles == result).all(), tiles

From c03932f0d1f932a2b6b55a91ace1aeaede555cdd Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Sun, 16 Feb 2025 15:28:41 +0100
Subject: [PATCH 06/39] Fixes edge cases for min coordinate

---
 src/spatialdata_io/readers/_utils/_image.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index ab175512..28e6e992 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -10,11 +10,11 @@
 def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
     """Calculate chunk sizes and positions for a given dimension and chunk size"""
     # All chunks have the same size except for the last one
-    positions = np.arange(min_coord, size, chunk)
+    positions = np.arange(min_coord, min_coord + size, chunk)
     lengths = np.full_like(positions, chunk, dtype=int)
 
-    if positions[-1] + chunk > size:
-        lengths[-1] = size - positions[-1]
+    if positions[-1] + chunk > size + min_coord:
+        lengths[-1] = size + min_coord - positions[-1]
 
     return positions, lengths
 

From 339cbd88aae511d723ddc1eebfe82496f1340cf1 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Sun, 16 Feb 2025 19:33:07 +0100
Subject: [PATCH 07/39] Added test for negative coordinates

---
 tests/readers/test_utils_image.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/readers/test_utils_image.py b/tests/readers/test_utils_image.py
index d9f0c295..f1c58fa1 100644
--- a/tests/readers/test_utils_image.py
+++ b/tests/readers/test_utils_image.py
@@ -46,6 +46,12 @@ def test_compute_chunk_sizes_positions(
             (0, 0),
             np.array([[[0, 0, 2, 2], [2, 0, 1, 2]], [[0, 2, 2, 1], [2, 2, 1, 1]]]),
         ),
+        (
+            (2, 2),
+            (1, 1),
+            (-1, 0),
+            np.array([[[0, -1, 1, 1], [1, -1, 1, 1]], [[0, 0, 1, 1], [1, 0, 1, 1]]]),
+        ),
     ],
 )
 def test_compute_chunks(
@@ -56,4 +62,4 @@ def test_compute_chunks(
 ) -> None:
     tiles = _compute_chunks(dimensions, chunk_size, min_coordinates)
 
-    assert (tiles == result).all(), tiles
+    assert (tiles == result).all()

From 24c6eece2dfdd07f2b63a5d3807be0d689ad7818 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Mon, 17 Feb 2025 16:46:03 +0100
Subject: [PATCH 08/39] Add support for png/jpg again

---
 src/spatialdata_io/readers/generic.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 65158413..8aec6765 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -7,6 +7,7 @@
 
 import dask.array as da
 import numpy as np
+from dask_image.imread import imread
 from geopandas import GeoDataFrame
 from numpy.typing import NDArray
 from spatialdata._docs import docstring_parameter
@@ -18,7 +19,7 @@
 
 from ._utils._image import _compute_chunks, _read_chunks
 
-VALID_IMAGE_TYPES = [".tif", ".tiff"]
+VALID_IMAGE_TYPES = [".tif", ".tiff", ".png", ".jpg", ".jpeg"]
 VALID_SHAPE_TYPES = [".geojson"]
 DEFAULT_CHUNKSIZE = (1000, 1000)
 
@@ -123,6 +124,10 @@ def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> Data
 
     if input.suffix in [".tiff", ".tif"]:
         chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping)
+    elif input.suffix in [".png", "jpg", "jpeg"]:
+        image = imread(input)
+        if len(image.shape) == len(data_axes) + 1 and image.shape[0] == 1:
+            image = np.squeeze(image, axis=0)
     else:
         raise NotImplementedError(f"File format {input.suffix} not implemented")
 

From dbdc7c7516823eb58d2547630dffe731a646ab13 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Mon, 17 Feb 2025 17:11:06 +0100
Subject: [PATCH 09/39] Add initial test

---
 tests/test_generic.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tests/test_generic.py b/tests/test_generic.py
index d4dda1a7..e8414fa7 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -9,9 +9,12 @@
 from PIL import Image
 from spatialdata import SpatialData
 from spatialdata.datasets import blobs
+from tifffile import imread as tiffread
+from tifffile import imwrite as tiffwrite
 
 from spatialdata_io.__main__ import read_generic_wrapper
 from spatialdata_io.converters.generic_to_zarr import generic_to_zarr
+from spatialdata_io.readers.generic import image
 
 
 @contextmanager
@@ -33,6 +36,32 @@ def save_temp_files() -> Generator[tuple[Path, Path, Path], None, None]:
         yield jpg_path, geojson_path, Path(tmpdir)
 
 
+@pytest.fixture(scope="module", params=[("c", "y", "x"), ("x", "y", "c")])
+def save_tiff_files(
+    request: pytest.FixtureRequest,
+) -> Generator[tuple[Path, tuple[str], Path], None, None]:
+    with tempfile.TemporaryDirectory() as tmpdir:
+        axes = request.param
+        sdata = blobs()
+        # save the image as tiff
+        x = sdata["blobs_image"].transpose(*axes).data.compute()
+        img = np.clip(x * 255, 0, 255).astype(np.uint8)
+
+        tiff_path = Path(tmpdir) / "blobs_image.tiff"
+        tiffwrite(tiff_path, img)
+
+        yield tiff_path, axes, Path(tmpdir)
+
+
+def test_read_tiff(save_tiff_files: tuple[Path, tuple[str], Path]) -> None:
+    tiff_path, axes, _ = save_tiff_files
+    img = image(tiff_path, data_axes=axes, coordinate_system="global")
+
+    reference = tiffread(tiff_path)
+
+    assert (img.compute() == reference).all()
+
+
 @pytest.mark.parametrize("cli", [True, False])
 @pytest.mark.parametrize("element_name", [None, "test_element"])
 def test_read_generic_image(runner: CliRunner, cli: bool, element_name: str | None) -> None:

From da98469859daebb57753e372ad2a53dbff08fa8d Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Mon, 17 Feb 2025 17:19:45 +0100
Subject: [PATCH 10/39] Fix: Fix jpeg and png reader, fix issues with local
 variable name

---
 src/spatialdata_io/readers/generic.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 8aec6765..fd8c17dd 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -124,13 +124,14 @@ def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> Data
 
     if input.suffix in [".tiff", ".tif"]:
         chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping)
-    elif input.suffix in [".png", "jpg", "jpeg"]:
+        image = da.block(chunks, allow_unknown_chunksizes=True)
+
+    elif input.suffix in [".png", ".jpg", ".jpeg"]:
         image = imread(input)
         if len(image.shape) == len(data_axes) + 1 and image.shape[0] == 1:
             image = np.squeeze(image, axis=0)
+
     else:
         raise NotImplementedError(f"File format {input.suffix} not implemented")
 
-    img = da.block(chunks, allow_unknown_chunksizes=True)
-
-    return Image2DModel.parse(img, dims=data_axes, transformations={coordinate_system: Identity()})
+    return Image2DModel.parse(image, dims=data_axes, transformations={coordinate_system: Identity()})

From 2349be7a7d036a9c801c66f7d5b698b3d7de9ea1 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <86159938+lucas-diedrich@users.noreply.github.com>
Date: Fri, 2 May 2025 17:41:37 +0200
Subject: [PATCH 11/39] Update src/spatialdata_io/readers/_utils/_image.py

Co-authored-by: Wouter-Michiel Vierdag <w-mv@hotmail.com>
---
 src/spatialdata_io/readers/_utils/_image.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 28e6e992..8896d78f 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -20,7 +20,7 @@ def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tup
 
 
 def _compute_chunks(
-    dimensions: tuple[int, int],
+    shape: tuple[int, int],
     chunk_size: tuple[int, int],
     min_coordinates: tuple[int, int] = (0, 0),
 ) -> NDArray[np.int_]:
@@ -32,7 +32,7 @@ def _compute_chunks(
 
     Parameters
     ----------
-    dimensions : tuple[int, int]
+    shape : tuple[int, int]
         Size of the image in (width, height).
     chunk_size : tuple[int, int]
         Size of individual tiles in (width, height).

From 46ed3e5f6e12f792b7f61da396a967ee87a22e6d Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 2 May 2025 17:46:51 +0200
Subject: [PATCH 12/39] [Refactor|API] Rename dimensions to shape to stick to
 numpy convention

---
 src/spatialdata_io/readers/_utils/_image.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 8896d78f..d644f934 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -45,8 +45,8 @@ def _compute_chunks(
         Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile
         as (x, y, width, height).
     """
-    x_positions, widths = _compute_chunk_sizes_positions(dimensions[1], chunk_size[1], min_coord=min_coordinates[1])
-    y_positions, heights = _compute_chunk_sizes_positions(dimensions[0], chunk_size[0], min_coord=min_coordinates[0])
+    x_positions, widths = _compute_chunk_sizes_positions(shape[1], chunk_size[1], min_coord=min_coordinates[1])
+    y_positions, heights = _compute_chunk_sizes_positions(shape[0], chunk_size[0], min_coord=min_coordinates[0])
 
     # Generate the tiles
     tiles = np.array(

From 99731fedbc9502ddae055e9b26de7251ba5b52b7 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 2 May 2025 17:48:54 +0200
Subject: [PATCH 13/39] [Refactor] Make suggested simplification of code,
 suggested by @melonora

---
 src/spatialdata_io/readers/_utils/_image.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index d644f934..4621d5fb 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -11,10 +11,7 @@ def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tup
     """Calculate chunk sizes and positions for a given dimension and chunk size"""
     # All chunks have the same size except for the last one
     positions = np.arange(min_coord, min_coord + size, chunk)
-    lengths = np.full_like(positions, chunk, dtype=int)
-
-    if positions[-1] + chunk > size + min_coord:
-        lengths[-1] = size + min_coord - positions[-1]
+    lengths = np.minimum(chunk, min_coord + size - positions)
 
     return positions, lengths
 

From f03ca8e8d3f867424b1a526020521c97846d3a68 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 2 May 2025 17:52:39 +0200
Subject: [PATCH 14/39] [Test] Add test for compressed tiffs

---
 tests/test_generic.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/tests/test_generic.py b/tests/test_generic.py
index e8414fa7..3251e8fd 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -36,19 +36,29 @@ def save_temp_files() -> Generator[tuple[Path, Path, Path], None, None]:
         yield jpg_path, geojson_path, Path(tmpdir)
 
 
-@pytest.fixture(scope="module", params=[("c", "y", "x"), ("x", "y", "c")])
+@pytest.fixture(
+    scope="module",
+    params=[
+        {"axes": ("c", "y", "x"), "compression": None},
+        {"axes": ("x", "y", "c"), "compression": None},
+        {"axes": ("c", "y", "x"), "compression": "lzw"},
+        {"axes": ("x", "y", "c"), "compression": "lzw"},
+    ],
+)
 def save_tiff_files(
     request: pytest.FixtureRequest,
 ) -> Generator[tuple[Path, tuple[str], Path], None, None]:
     with tempfile.TemporaryDirectory() as tmpdir:
-        axes = request.param
+        axes = request.param["axes"]
+        compression = request.param["compression"]
+
         sdata = blobs()
         # save the image as tiff
         x = sdata["blobs_image"].transpose(*axes).data.compute()
         img = np.clip(x * 255, 0, 255).astype(np.uint8)
 
         tiff_path = Path(tmpdir) / "blobs_image.tiff"
-        tiffwrite(tiff_path, img)
+        tiffwrite(tiff_path, img, compression=compression)
 
         yield tiff_path, axes, Path(tmpdir)
 

From 9e057de33bcdf5072bf3627220d05d6f7b57efde Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 2 May 2025 18:24:24 +0200
Subject: [PATCH 15/39] [Fix] Account for compressed images

---
 src/spatialdata_io/readers/generic.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index fd8c17dd..9eb799dd 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -7,6 +7,7 @@
 
 import dask.array as da
 import numpy as np
+import tifffile
 from dask_image.imread import imread
 from geopandas import GeoDataFrame
 from numpy.typing import NDArray
@@ -14,7 +15,6 @@
 from spatialdata.models import Image2DModel, ShapesModel
 from spatialdata.models._utils import DEFAULT_COORDINATE_SYSTEM
 from spatialdata.transformations import Identity
-from tifffile import memmap as tiffmmemap
 from xarray import DataArray
 
 from ._utils._image import _compute_chunks, _read_chunks
@@ -81,7 +81,9 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame:
     return ShapesModel.parse(input, transformations={coordinate_system: Identity()})
 
 
-def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[DaskArray[np.int_]]]:
+def _tiff_to_chunks(
+    input: Path, axes_dim_mapping: dict[str, int]
+) -> DaskArray[np.int_] | list[list[DaskArray[np.int_]]]:
     """Chunkwise reader for tiff files.
 
     Parameters
@@ -96,7 +98,7 @@ def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[
     list[list[DaskArray]]
     """
     # Lazy file reader
-    slide = tiffmmemap(input)
+    slide = tifffile.memmap(input)
 
     # Transpose to cyx order
     slide = np.transpose(slide, (axes_dim_mapping["c"], axes_dim_mapping["y"], axes_dim_mapping["x"]))
@@ -123,8 +125,20 @@ def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> Data
     axes_dim_mapping = {axes: ndim for ndim, axes in enumerate(data_axes)}
 
     if input.suffix in [".tiff", ".tif"]:
-        chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping)
-        image = da.block(chunks, allow_unknown_chunksizes=True)
+        try:
+            chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping)
+            image = da.block(chunks, allow_unknown_chunksizes=True)
+
+        # Edge case: Compressed images are not memory-mappable
+        except ValueError:
+            warnings.warn(
+                "Image data are not memory-mappable, potentially due to compression. Trying to load the image into memory at once",
+                stacklevel=2,
+            )
+            image = imread(input)
+            if len(image.shape) == len(data_axes) + 1 and image.shape[0] == 1:
+                image = np.squeeze(image, axis=0)
+            image = image.transpose(axes_dim_mapping["c"], axes_dim_mapping["y"], axes_dim_mapping["x"])
 
     elif input.suffix in [".png", ".jpg", ".jpeg"]:
         image = imread(input)

From c05b71872151525f456399b8f56b55b006aba650 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 2 May 2025 18:31:59 +0200
Subject: [PATCH 16/39] [Refactor] Remove unnecessary type hint

---
 src/spatialdata_io/readers/generic.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 9eb799dd..b632eba7 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -81,9 +81,7 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame:
     return ShapesModel.parse(input, transformations={coordinate_system: Identity()})
 
 
-def _tiff_to_chunks(
-    input: Path, axes_dim_mapping: dict[str, int]
-) -> DaskArray[np.int_] | list[list[DaskArray[np.int_]]]:
+def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[DaskArray[np.int_]]]:
     """Chunkwise reader for tiff files.
 
     Parameters

From 9f3cc3ceba7f1dc1dadfaf258148d79da666a84f Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Mon, 12 Jan 2026 17:20:07 +0100
Subject: [PATCH 17/39] fix pre-commit

---
 src/spatialdata_io/readers/_utils/_image.py | 4 ++--
 src/spatialdata_io/readers/generic.py       | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 5993df30..9fab0dd0 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -61,9 +61,9 @@ def _read_chunks(
     slide: Any,
     coords: NDArray[np.int_],
     n_channel: int,
-    dtype: np.number,
+    dtype: np.dtype[Any],
     **func_kwargs: Any,
-) -> list[list[da.array]]:
+) -> list[list[da.Array]]:
     """Abstract method to tile a large microscopy image.
 
     Parameters
diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 5aad2a5e..5f2affde 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -4,7 +4,6 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Protocol, TypeVar
 
-import dask.array
 import dask.array as da
 import numpy as np
 import tifffile
@@ -123,7 +122,7 @@ def _reader_func(slide: NDArray[np.int_], x0: int, y0: int, width: int, height:
     return _read_chunks(_reader_func, slide, coords=chunk_coords, n_channel=n_channel, dtype=slide.dtype)
 
 
-def _dask_image_imread(input: Path, data_axes: Sequence[str]) -> DaskArray[int | float]:
+def _dask_image_imread(input: Path, data_axes: Sequence[str]) -> da.Array:
     image = imread(input)
     if len(image.shape) == len(data_axes) + 1 and image.shape[0] == 1:
         image = np.squeeze(image, axis=0)

From 9d44f25e01c1f0c9be9f3ba9f17f9564186a7317 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 13 Jan 2026 14:17:30 +0100
Subject: [PATCH 18/39] fix transpose in image(); wip code review

---
 src/spatialdata_io/readers/_utils/_image.py | 12 +++++++++++-
 src/spatialdata_io/readers/generic.py       |  2 ++
 tests/readers/test_utils_image.py           |  1 +
 tests/test_generic.py                       |  3 ++-
 4 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 9fab0dd0..12c76c0e 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -42,6 +42,7 @@ def _compute_chunks(
         Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile
         as (x, y, width, height).
     """
+    # TODO: check x -> 1 and y -> 0?
     x_positions, widths = _compute_chunk_sizes_positions(shape[1], chunk_size[1], min_coord=min_coordinates[1])
     y_positions, heights = _compute_chunk_sizes_positions(shape[0], chunk_size[0], min_coord=min_coordinates[0])
 
@@ -57,6 +58,8 @@ def _compute_chunks(
 
 
 def _read_chunks(
+    # TODO: expand type hints for ...
+    # TODO: really only np.int_? Not float? Consider using np.number
     func: Callable[..., NDArray[np.int_]],
     slide: Any,
     coords: NDArray[np.int_],
@@ -80,7 +83,7 @@ def _read_chunks(
 
         and should return the chunk as numpy array of shape (c, y, x)
     slide
-        Slide image in lazyly loaded format compatible with func
+        Slide image in lazily loaded format compatible with func
     coords
         Coordinates of the upper left corner of the image in format (n_row_x, n_row_y, 4)
         where the last dimension defines the rectangular tile in format (x, y, width, height).
@@ -102,6 +105,7 @@ def _read_chunks(
     func_kwargs = func_kwargs if func_kwargs else {}
 
     # Collect each delayed chunk as item in list of list
+    # TODO: check, wasn't it x, y and not y, x?
     # Inner list becomes dim=-1 (cols/x)
     # Outer list becomes dim=-2 (rows/y)
     # see dask.array.block
@@ -117,10 +121,16 @@ def _read_chunks(
                     **func_kwargs,
                 ),
                 dtype=dtype,
+                # TODO: double check the [3, 2] with debugger
                 shape=(n_channel, *coords[y, x, [3, 2]]),
             )
+            # TODO: seems inconsistent with coords docstring
             for x in range(coords.shape[1])
         ]
+        # TODO: seems inconsistent with coords docstring
         for y in range(coords.shape[0])
     ]
     return chunks
+
+
+# TODO: do a asv debugging for peak mem
diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 5f2affde..412c2923 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -138,9 +138,11 @@ def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> Data
         try:
             chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping)
             image = da.block(chunks, allow_unknown_chunksizes=True)
+            data_axes = ["c", "y", "x"]
 
         # Edge case: Compressed images are not memory-mappable
         except ValueError as e:
+            # TODO: change to logger warning
             warnings.warn(
                 f"Exception occurred: {str(e)}\nPossible troubleshooting: image data are not memory-mappable, potentially due to compression. Trying to load the image into memory at once",
                 stacklevel=2,
diff --git a/tests/readers/test_utils_image.py b/tests/readers/test_utils_image.py
index f1c58fa1..5aaabd39 100644
--- a/tests/readers/test_utils_image.py
+++ b/tests/readers/test_utils_image.py
@@ -13,6 +13,7 @@
     [
         (300, 100, 0, np.array([0, 100, 200]), np.array([100, 100, 100])),
         (300, 200, 0, np.array([0, 200]), np.array([200, 100])),
+        # TODO: why negative coordinates if used only for 0, 0?
         (300, 100, -100, np.array([-100, 0, 100]), np.array([100, 100, 100])),
         (300, 200, -100, np.array([-100, 100]), np.array([200, 100])),
     ],
diff --git a/tests/test_generic.py b/tests/test_generic.py
index ff4c1d53..31e1c19f 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -68,8 +68,9 @@ def test_read_tiff(save_tiff_files: tuple[Path, tuple[str], Path]) -> None:
     img = image(tiff_path, data_axes=axes, coordinate_system="global")
 
     reference = tiffread(tiff_path)
+    reference_cyx = reference.transpose(*[axes.index(ax) for ax in ["c", "y", "x"]])
 
-    assert (img.compute() == reference).all()
+    assert (img.compute() == reference_cyx).all()
 
 
 @pytest.mark.parametrize("cli", [True, False])

From c14cb2226b0b3d75144b780097d52d5d3b19cd1e Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 13 Jan 2026 15:18:10 +0100
Subject: [PATCH 19/39] add test for dask-image fallback for compressed tiffs

---
 src/spatialdata_io/readers/generic.py | 11 ++++++-----
 tests/test_generic.py                 | 17 +++++++++++++----
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 412c2923..08ca2bdb 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import warnings
 from pathlib import Path
 from typing import TYPE_CHECKING, Protocol, TypeVar
 
@@ -10,6 +9,7 @@
 from dask_image.imread import imread
 from geopandas import GeoDataFrame
 from spatialdata._docs import docstring_parameter
+from spatialdata._logging import logger
 from spatialdata.models import Image2DModel, ShapesModel
 from spatialdata.models._utils import DEFAULT_COORDINATE_SYSTEM
 from spatialdata.transformations import Identity
@@ -71,7 +71,7 @@ def generic(
         coordinate_system = DEFAULT_COORDINATE_SYSTEM
     if input.suffix in VALID_SHAPE_TYPES:
         if data_axes is not None:
-            warnings.warn("data_axes is not used for geojson files", UserWarning, stacklevel=2)
+            logger.warning("data_axes is not used for geojson files")
         return geojson(input, coordinate_system=coordinate_system)
     elif input.suffix in VALID_IMAGE_TYPES:
         if data_axes is None:
@@ -143,9 +143,10 @@ def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> Data
         # Edge case: Compressed images are not memory-mappable
         except ValueError as e:
             # TODO: change to logger warning
-            warnings.warn(
-                f"Exception occurred: {str(e)}\nPossible troubleshooting: image data are not memory-mappable, potentially due to compression. Trying to load the image into memory at once",
-                stacklevel=2,
+            logger.warning(
+                f"Exception occurred: {str(e)}\nPossible troubleshooting: image data "
+                "is not memory-mappable, potentially due to compression. Trying to "
+                "load the image into memory at once",
             )
             image = _dask_image_imread(input=input, data_axes=data_axes)
 
diff --git a/tests/test_generic.py b/tests/test_generic.py
index 31e1c19f..445200dd 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -1,3 +1,4 @@
+import logging
 import tempfile
 from collections.abc import Generator
 from contextlib import contextmanager
@@ -8,6 +9,7 @@
 from click.testing import CliRunner
 from PIL import Image
 from spatialdata import SpatialData
+from spatialdata._logging import logger
 from spatialdata.datasets import blobs
 from tifffile import imread as tiffread
 from tifffile import imwrite as tiffwrite
@@ -60,12 +62,19 @@ def save_tiff_files(
         tiff_path = Path(tmpdir) / "blobs_image.tiff"
         tiffwrite(tiff_path, img, compression=compression)
 
-        yield tiff_path, axes, Path(tmpdir)
+        yield tiff_path, axes, compression
 
 
-def test_read_tiff(save_tiff_files: tuple[Path, tuple[str], Path]) -> None:
-    tiff_path, axes, _ = save_tiff_files
-    img = image(tiff_path, data_axes=axes, coordinate_system="global")
+def test_read_tiff(save_tiff_files: tuple[Path, tuple[str], str | None], caplog: pytest.LogCaptureFixture) -> None:
+    tiff_path, axes, compression = save_tiff_files
+
+    logger.propagate = True
+    with caplog.at_level(logging.WARNING):
+        img = image(tiff_path, data_axes=axes, coordinate_system="global")
+        if compression is not None:
+            assert "image data is not memory-mappable, potentially due to compression" in caplog.text
+
+    logger.propagate = False
 
     reference = tiffread(tiff_path)
     reference_cyx = reference.transpose(*[axes.index(ax) for ax in ["c", "y", "x"]])

From a7a2b923ebe5853a6f7f047285057abb9f50b757 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 13 Jan 2026 15:43:06 +0100
Subject: [PATCH 20/39] remove unused min_coordinate

---
 src/spatialdata_io/readers/_utils/_image.py | 13 +++++------
 src/spatialdata_io/readers/generic.py       |  2 +-
 tests/readers/test_utils_image.py           | 25 +++++----------------
 3 files changed, 12 insertions(+), 28 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 12c76c0e..4bab4038 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -7,11 +7,11 @@
 from numpy.typing import NDArray
 
 
-def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
+def _compute_chunk_sizes_positions(size: int, chunk: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
     """Calculate chunk sizes and positions for a given dimension and chunk size."""
     # All chunks have the same size except for the last one
-    positions = np.arange(min_coord, min_coord + size, chunk)
-    lengths = np.minimum(chunk, min_coord + size - positions)
+    positions = np.arange(0, size, chunk)
+    lengths = np.minimum(chunk, size - positions)
 
     return positions, lengths
 
@@ -19,7 +19,6 @@ def _compute_chunk_sizes_positions(size: int, chunk: int, min_coord: int) -> tup
 def _compute_chunks(
     shape: tuple[int, int],
     chunk_size: tuple[int, int],
-    min_coordinates: tuple[int, int] = (0, 0),
 ) -> NDArray[np.int_]:
     """Create all chunk specs for a given image and chunk size.
 
@@ -33,8 +32,6 @@ def _compute_chunks(
         Size of the image in (width, height).
     chunk_size : tuple[int, int]
         Size of individual tiles in (width, height).
-    min_coordinates : tuple[int, int], optional
-        Minimum coordinates (x, y) in the image, defaults to (0, 0).
 
     Returns
     -------
@@ -43,8 +40,8 @@ def _compute_chunks(
         as (x, y, width, height).
     """
     # TODO: check x -> 1 and y -> 0?
-    x_positions, widths = _compute_chunk_sizes_positions(shape[1], chunk_size[1], min_coord=min_coordinates[1])
-    y_positions, heights = _compute_chunk_sizes_positions(shape[0], chunk_size[0], min_coord=min_coordinates[0])
+    x_positions, widths = _compute_chunk_sizes_positions(shape[1], chunk_size[1])
+    y_positions, heights = _compute_chunk_sizes_positions(shape[0], chunk_size[0])
 
     # Generate the tiles
     tiles = np.array(
diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 08ca2bdb..668cfc56 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -113,7 +113,7 @@ def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[
     n_channel = slide.shape[0]
 
     # Compute chunk coords
-    chunk_coords = _compute_chunks(slide_dimensions, chunk_size=DEFAULT_CHUNKSIZE, min_coordinates=(0, 0))
+    chunk_coords = _compute_chunks(slide_dimensions, chunk_size=DEFAULT_CHUNKSIZE)
 
     # Define reader func
     def _reader_func(slide: NDArray[np.int_], x0: int, y0: int, width: int, height: int) -> NDArray[np.int_]:
diff --git a/tests/readers/test_utils_image.py b/tests/readers/test_utils_image.py
index 5aaabd39..bdf5f411 100644
--- a/tests/readers/test_utils_image.py
+++ b/tests/readers/test_utils_image.py
@@ -9,58 +9,45 @@
 
 
 @pytest.mark.parametrize(
-    ("size", "chunk", "min_coordinate", "positions", "lengths"),
+    ("size", "chunk", "positions", "lengths"),
     [
-        (300, 100, 0, np.array([0, 100, 200]), np.array([100, 100, 100])),
-        (300, 200, 0, np.array([0, 200]), np.array([200, 100])),
-        # TODO: why negative coordinates if used only for 0, 0?
-        (300, 100, -100, np.array([-100, 0, 100]), np.array([100, 100, 100])),
-        (300, 200, -100, np.array([-100, 100]), np.array([200, 100])),
+        (300, 100, np.array([0, 100, 200]), np.array([100, 100, 100])),
+        (300, 200, np.array([0, 200]), np.array([200, 100])),
     ],
 )
 def test_compute_chunk_sizes_positions(
     size: int,
     chunk: int,
-    min_coordinate: int,
     positions: NDArray[np.number],
     lengths: NDArray[np.number],
 ) -> None:
-    computed_positions, computed_lengths = _compute_chunk_sizes_positions(size, chunk, min_coordinate)
+    computed_positions, computed_lengths = _compute_chunk_sizes_positions(size, chunk)
     assert (positions == computed_positions).all()
     assert (lengths == computed_lengths).all()
 
 
 @pytest.mark.parametrize(
-    ("dimensions", "chunk_size", "min_coordinates", "result"),
+    ("dimensions", "chunk_size", "result"),
     [
         # Regular grid 2x2
         (
             (2, 2),
             (1, 1),
-            (0, 0),
             np.array([[[0, 0, 1, 1], [1, 0, 1, 1]], [[0, 1, 1, 1], [1, 1, 1, 1]]]),
         ),
         # Different tile sizes
         (
             (3, 3),
             (2, 2),
-            (0, 0),
             np.array([[[0, 0, 2, 2], [2, 0, 1, 2]], [[0, 2, 2, 1], [2, 2, 1, 1]]]),
         ),
-        (
-            (2, 2),
-            (1, 1),
-            (-1, 0),
-            np.array([[[0, -1, 1, 1], [1, -1, 1, 1]], [[0, 0, 1, 1], [1, 0, 1, 1]]]),
-        ),
     ],
 )
 def test_compute_chunks(
     dimensions: tuple[int, int],
     chunk_size: tuple[int, int],
-    min_coordinates: tuple[int, int],
     result: NDArray[np.number],
 ) -> None:
-    tiles = _compute_chunks(dimensions, chunk_size, min_coordinates)
+    tiles = _compute_chunks(dimensions, chunk_size)
 
     assert (tiles == result).all()

From 80a931d036c4fe87059477f352e3ab0f0e88cd92 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 13 Jan 2026 16:03:12 +0100
Subject: [PATCH 21/39] fix wrong dimension _compute_chunks(); cover with test

---
 src/spatialdata_io/readers/_utils/_image.py |  9 +++----
 tests/readers/test_utils_image.py           | 29 ++++++++++++++-------
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 4bab4038..11d18e8e 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -39,15 +39,14 @@ def _compute_chunks(
         Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile
         as (x, y, width, height).
     """
-    # TODO: check x -> 1 and y -> 0?
-    x_positions, widths = _compute_chunk_sizes_positions(shape[1], chunk_size[1])
-    y_positions, heights = _compute_chunk_sizes_positions(shape[0], chunk_size[0])
+    x_positions, widths = _compute_chunk_sizes_positions(shape[0], chunk_size[0])
+    y_positions, heights = _compute_chunk_sizes_positions(shape[1], chunk_size[1])
 
     # Generate the tiles
     tiles = np.array(
         [
-            [[x, y, w, h] for x, w in zip(x_positions, widths, strict=True)]
-            for y, h in zip(y_positions, heights, strict=True)
+            [[x, y, w, h] for y, h in zip(y_positions, heights, strict=True)]
+            for x, w in zip(x_positions, widths, strict=True)
         ],
         dtype=int,
     )
diff --git a/tests/readers/test_utils_image.py b/tests/readers/test_utils_image.py
index bdf5f411..b6648556 100644
--- a/tests/readers/test_utils_image.py
+++ b/tests/readers/test_utils_image.py
@@ -9,7 +9,7 @@
 
 
 @pytest.mark.parametrize(
-    ("size", "chunk", "positions", "lengths"),
+    ("size", "chunk", "expected_positions", "expected_lengths"),
     [
         (300, 100, np.array([0, 100, 200]), np.array([100, 100, 100])),
         (300, 200, np.array([0, 200]), np.array([200, 100])),
@@ -18,12 +18,12 @@
 def test_compute_chunk_sizes_positions(
     size: int,
     chunk: int,
-    positions: NDArray[np.number],
-    lengths: NDArray[np.number],
+    expected_positions: NDArray[np.number],
+    expected_lengths: NDArray[np.number],
 ) -> None:
     computed_positions, computed_lengths = _compute_chunk_sizes_positions(size, chunk)
-    assert (positions == computed_positions).all()
-    assert (lengths == computed_lengths).all()
+    assert (expected_positions == computed_positions).all()
+    assert (expected_lengths == computed_lengths).all()
 
 
 @pytest.mark.parametrize(
@@ -33,13 +33,24 @@ def test_compute_chunk_sizes_positions(
         (
             (2, 2),
             (1, 1),
-            np.array([[[0, 0, 1, 1], [1, 0, 1, 1]], [[0, 1, 1, 1], [1, 1, 1, 1]]]),
+            np.array(
+                [
+                    [[0, 0, 1, 1], [0, 1, 1, 1]],
+                    [[1, 0, 1, 1], [1, 1, 1, 1]],
+                ]
+            ),
         ),
         # Different tile sizes
         (
-            (3, 3),
-            (2, 2),
-            np.array([[[0, 0, 2, 2], [2, 0, 1, 2]], [[0, 2, 2, 1], [2, 2, 1, 1]]]),
+            (300, 300),
+            (100, 200),
+            np.array(
+                [
+                    [[0, 0, 100, 200], [0, 200, 100, 100]],
+                    [[100, 0, 100, 200], [100, 200, 100, 100]],
+                    [[200, 0, 100, 200], [200, 200, 100, 100]],
+                ]
+            ),
         ),
     ],
 )

From fc26342c25dbf407385e75f4437f3e86a10a67b5 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 13 Jan 2026 16:09:10 +0100
Subject: [PATCH 22/39] np._int -> np.number

---
 src/spatialdata_io/readers/_utils/_image.py | 3 +--
 src/spatialdata_io/readers/generic.py       | 4 ++--
 tests/test_generic.py                       | 3 +--
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 11d18e8e..37676833 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -55,8 +55,7 @@ def _compute_chunks(
 
 def _read_chunks(
     # TODO: expand type hints for ...
-    # TODO: really only np.int_? Not float? Consider using np.number
-    func: Callable[..., NDArray[np.int_]],
+    func: Callable[..., NDArray[np.number]],
     slide: Any,
     coords: NDArray[np.int_],
     n_channel: int,
diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 668cfc56..45fb2a02 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -86,7 +86,7 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame:
     return ShapesModel.parse(input, transformations={coordinate_system: Identity()})
 
 
-def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[DaskArray[np.int_]]]:
+def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[DaskArray[np.number]]]:
     """Chunkwise reader for tiff files.
 
     Parameters
@@ -116,7 +116,7 @@ def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[
     chunk_coords = _compute_chunks(slide_dimensions, chunk_size=DEFAULT_CHUNKSIZE)
 
     # Define reader func
-    def _reader_func(slide: NDArray[np.int_], x0: int, y0: int, width: int, height: int) -> NDArray[np.int_]:
+    def _reader_func(slide: NDArray[np.number], x0: int, y0: int, width: int, height: int) -> NDArray[np.number]:
         return np.array(slide[:, y0 : y0 + height, x0 : x0 + width])
 
     return _read_chunks(_reader_func, slide, coords=chunk_coords, n_channel=n_channel, dtype=slide.dtype)
diff --git a/tests/test_generic.py b/tests/test_generic.py
index 445200dd..33108e98 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -57,10 +57,9 @@ def save_tiff_files(
         sdata = blobs()
         # save the image as tiff
         x = sdata["blobs_image"].transpose(*axes).data.compute()
-        img = np.clip(x * 255, 0, 255).astype(np.uint8)
 
         tiff_path = Path(tmpdir) / "blobs_image.tiff"
-        tiffwrite(tiff_path, img, compression=compression)
+        tiffwrite(tiff_path, x, compression=compression)
 
         yield tiff_path, axes, compression
 

From 3b296da258afc49c53c01febea86dce31043148b Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 13 Jan 2026 17:14:56 +0100
Subject: [PATCH 23/39] fix indices in _read_chunks()

---
 src/spatialdata_io/readers/_utils/_image.py | 32 ++++++++++++---------
 src/spatialdata_io/readers/generic.py       |  3 +-
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 37676833..56ee7f39 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -54,7 +54,6 @@ def _compute_chunks(
 
 
 def _read_chunks(
-    # TODO: expand type hints for ...
     func: Callable[..., NDArray[np.number]],
     slide: Any,
     coords: NDArray[np.int_],
@@ -94,13 +93,23 @@ def _read_chunks(
     Returns
     -------
     list[list[da.array]]
-        List (length: n_row_x) of lists (length: n_row_y) of chunks.
-        Represents all chunks of the full image.
+        (Outer) list (length: n_row_y) of (inner) lists (length: n_row_x) of chunks with axes
+        (c, y, x). Represents all chunks of the full image.
+
+    Notes
+    -------
+    As seen in _compute_chunks(), since coords are in format (x, y, width, height), the
+    inner list there (dim=-1) runs over the y values and the outer list (dim=-2) runs
+    over the x values. In _read_chunks() we have the more common (y, x) format, where
+    the inner list (dim=-1) runs over the x values and the outer list (dim=-2) runs over
+    the y values.
+
+    The above can be confusing, and a way to address this is to define coords to be
+    in format (y, x, height, width) instead of (x, y, width, height).
     """
     func_kwargs = func_kwargs if func_kwargs else {}
 
     # Collect each delayed chunk as item in list of list
-    # TODO: check, wasn't it x, y and not y, x?
     # Inner list becomes dim=-1 (cols/x)
     # Outer list becomes dim=-2 (rows/y)
     # see dask.array.block
@@ -109,21 +118,18 @@ def _read_chunks(
             da.from_delayed(
                 delayed(func)(
                     slide,
-                    x0=coords[y, x, 0],
-                    y0=coords[y, x, 1],
-                    width=coords[y, x, 2],
-                    height=coords[y, x, 3],
+                    x0=coords[x, y, 0],
+                    y0=coords[x, y, 1],
+                    width=coords[x, y, 2],
+                    height=coords[x, y, 3],
                     **func_kwargs,
                 ),
                 dtype=dtype,
-                # TODO: double check the [3, 2] with debugger
                 shape=(n_channel, *coords[y, x, [3, 2]]),
             )
-            # TODO: seems inconsistent with coords docstring
-            for x in range(coords.shape[1])
+            for x in range(coords.shape[0])
         ]
-        # TODO: seems inconsistent with coords docstring
-        for y in range(coords.shape[0])
+        for y in range(coords.shape[1])
     ]
     return chunks
 
diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 45fb2a02..1fef307d 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -116,7 +116,7 @@ def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[
     chunk_coords = _compute_chunks(slide_dimensions, chunk_size=DEFAULT_CHUNKSIZE)
 
     # Define reader func
-    def _reader_func(slide: NDArray[np.number], x0: int, y0: int, width: int, height: int) -> NDArray[np.number]:
+    def _reader_func(slide: np.memmap, x0: int, y0: int, width: int, height: int) -> NDArray[np.number]:
         return np.array(slide[:, y0 : y0 + height, x0 : x0 + width])
 
     return _read_chunks(_reader_func, slide, coords=chunk_coords, n_channel=n_channel, dtype=slide.dtype)
@@ -142,7 +142,6 @@ def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> Data
 
         # Edge case: Compressed images are not memory-mappable
         except ValueError as e:
-            # TODO: change to logger warning
             logger.warning(
                 f"Exception occurred: {str(e)}\nPossible troubleshooting: image data "
                 "is not memory-mappable, potentially due to compression. Trying to "

From f7ad81aab9e6f4682fc5523d6038f7579ae26a2a Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 13 Jan 2026 17:22:02 +0100
Subject: [PATCH 24/39] better english

---
 src/spatialdata_io/readers/_utils/_image.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 56ee7f39..7909de63 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -98,14 +98,14 @@ def _read_chunks(
 
     Notes
     -------
-    As seen in _compute_chunks(), since coords are in format (x, y, width, height), the
-    inner list there (dim=-1) runs over the y values and the outer list (dim=-2) runs
-    over the x values. In _read_chunks() we have the more common (y, x) format, where
-    the inner list (dim=-1) runs over the x values and the outer list (dim=-2) runs over
-    the y values.
-
-    The above can be confusing, and a way to address this is to define coords to be
-    in format (y, x, height, width) instead of (x, y, width, height).
+    As shown in `_compute_chunks()`, `coords` are in the form `(x, y,
+    width, height)`. In that function, the inner list (dim = -1) iterates over `y`
+    values, and the outer list (dim = -2) iterates over `x` values. In `_read_chunks(
+    )`, we use the more common `(y, x)` ordering: the inner list (dim = -1) iterates
+    over `x` values, and the outer list (dim = -2) iterates over `y` values.
+
+    This mismatch can be confusing. A straightforward fix is to standardize `coords`
+    to `(y, x, height, width)` instead of `(x, y, width, height)`.
     """
     func_kwargs = func_kwargs if func_kwargs else {}
 

From 7f5b7ce9e3716d185b8909da9e28b21b4ed37b9f Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 13 Jan 2026 17:23:03 +0100
Subject: [PATCH 25/39] better docstring

---
 src/spatialdata_io/readers/_utils/_image.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 7909de63..07f4026e 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -104,9 +104,12 @@ def _read_chunks(
     )`, we use the more common `(y, x)` ordering: the inner list (dim = -1) iterates
     over `x` values, and the outer list (dim = -2) iterates over `y` values.
 
-    This mismatch can be confusing. A straightforward fix is to standardize `coords`
-    to `(y, x, height, width)` instead of `(x, y, width, height)`.
+    This mismatch can be confusing. A straightforward fix that one could perform would
+    be to standardize `coords` to `(y, x, height, width)` instead of
+    `(x, y, width, height)`.
     """
+    # TODO: standardize `coords` as explained in the docstring above, then remove that
+    # part from the docstring
     func_kwargs = func_kwargs if func_kwargs else {}
 
     # Collect each delayed chunk as item in list of list

From 0c482acf1bd331c2a84cd9b19dc47a98f35d18e6 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Tue, 13 Jan 2026 18:02:39 +0100
Subject: [PATCH 26/39] wip benchmark (bugs)

---
 .gitignore                                    |  1 +
 asv.conf.json                                 |  2 +-
 benchmarks/benchmark_image.py                 | 97 +++++++++++++++++++
 .../{bench_xenium.py => benchmark_xenium.py}  | 17 ++--
 src/spatialdata_io/readers/generic.py         | 66 ++++++++++---
 5 files changed, 159 insertions(+), 24 deletions(-)
 create mode 100644 benchmarks/benchmark_image.py
 rename benchmarks/{bench_xenium.py => benchmark_xenium.py} (86%)

diff --git a/.gitignore b/.gitignore
index 379f16be..19b9d658 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,4 @@ data
 data/
 tests/data
 uv.lock
+.asv/
diff --git a/asv.conf.json b/asv.conf.json
index 516f0f1f..55f61b90 100644
--- a/asv.conf.json
+++ b/asv.conf.json
@@ -3,7 +3,7 @@
     "project": "spatialdata-io",
     "project_url": "https://github.com/scverse/spatialdata-io",
     "repo": ".",
-    "branches": ["main", "xenium-labels-dask", "xenium-labels-dask-zipstore"],
+    "branches": ["image-reader-chunkwise"],
     "dvcs": "git",
     "environment_type": "virtualenv",
     "pythons": ["3.12"],
diff --git a/benchmarks/benchmark_image.py b/benchmarks/benchmark_image.py
new file mode 100644
index 00000000..5b83f524
--- /dev/null
+++ b/benchmarks/benchmark_image.py
@@ -0,0 +1,97 @@
+"""Benchmarks for SpatialData IO operations for large images.
+
+Instructions:
+    See benchmark_xenium.py for instructions.
+"""
+
+import shutil
+from pathlib import Path
+
+from spatialdata import SpatialData
+from xarray import DataArray
+
+from spatialdata_io import image  # type: ignore[attr-defined]
+
+# =============================================================================
+# CONFIGURATION - Edit these paths to match your setup
+# =============================================================================
+SANDBOX_DIR = Path(__file__).parent.parent.parent / "spatialdata-sandbox"
+DATASET = "xenium_2.0.0_io"
+# =============================================================================
+
+
+def get_paths() -> tuple[Path, Path]:
+    """Get paths for benchmark data."""
+    path = SANDBOX_DIR / DATASET
+    # TODO: this is not a good image because it's compressed (not memmappable)
+    path_read = path / "data" / "morphology.ome.tif"
+    path_write = path / "data_benchmark.zarr"
+
+    if not path_read.exists():
+        raise ValueError(f"Data directory not found: {path_read}")
+
+    return path_read, path_write
+
+
+class IOBenchmarkImage:
+    """Benchmark IO read operations with different parameter combinations."""
+
+    timeout = 3600
+    repeat = 3
+    number = 1
+    warmup_time = 0
+    processes = 1
+
+    # Parameter combinations: scale_factors, use_tiff_memmap, chunks
+    params = [
+        [None, [2, 2, 2]],  # scale_factors
+        [True, False],  # use_tiff_memmap
+        [(5000, 5000), (1000, 1000)],  # chunks
+    ]
+    param_names = ["scale_factors", "use_tiff_memmap", "chunks"]
+
+    def setup(self, *_) -> None:
+        """Set up paths for benchmarking."""
+        self.path_read, self.path_write = get_paths()
+        if self.path_write.exists():
+            shutil.rmtree(self.path_write)
+
+    def _convert_image(self, scale_factors, use_tiff_memmap, chunks) -> SpatialData:
+        """Read image data with specified parameters."""
+        im = image(
+            input=self.path_read,
+            data_axes=("c", "y", "x"),
+            coordinate_system="global",
+            use_tiff_memmap=use_tiff_memmap,
+            chunks=chunks,
+            scale_factors=scale_factors,
+        )
+        sdata = SpatialData.init_from_elements({"image": im})
+        # sanity check
+        if scale_factors is None:
+            assert isinstance(sdata["image"], DataArray)
+        else:
+            assert len(sdata["image"].keys()) == len(scale_factors)
+
+        if chunks is not None:
+            # TODO: bug here!
+            assert sdata["image"].chunksizes["x"] == chunks[0]
+            assert sdata["image"].chunksizes["y"] == chunks[1]
+        return sdata
+
+    def time_io(self, scale_factors, use_tiff_memmap, chunks) -> None:
+        """Walltime for data parsing."""
+        sdata = self._convert_image(scale_factors, use_tiff_memmap, chunks)
+        sdata.write(self.path_write)
+
+    def peakmem_io(self, scale_factors, use_tiff_memmap, chunks) -> None:
+        """Peak memory for data parsing."""
+        sdata = self._convert_image(scale_factors, use_tiff_memmap, chunks)
+        sdata.write(self.path_write)
+
+
+if __name__ == "__main__":
+    # Run a single test case for quick verification
+    bench = IOBenchmarkImage()
+    bench.setup(None, True, (1000, 1000))
+    bench.time_io(None, True, (1000, 1000))
diff --git a/benchmarks/bench_xenium.py b/benchmarks/benchmark_xenium.py
similarity index 86%
rename from benchmarks/bench_xenium.py
rename to benchmarks/benchmark_xenium.py
index 685e5094..22d70888 100644
--- a/benchmarks/bench_xenium.py
+++ b/benchmarks/benchmark_xenium.py
@@ -11,18 +11,18 @@
     cd /path/to/spatialdata-io
 
     # Quick benchmark (single run, for testing):
-    asv run --python=same -b IOBenchmark --quick --show-stderr -v
+    asv run --python=same -b IOBenchmarkXenium --quick --show-stderr -v
 
     # Full benchmark (multiple runs, for accurate results):
-    asv run --python=same -b IOBenchmark --show-stderr -v
+    asv run --python=same -b IOBenchmarkXenium --show-stderr -v
 
 Comparing branches:
     # Run on specific commits:
-    asv run main^! -b IOBenchmark --show-stderr -v
-    asv run xenium-labels-dask^! -b IOBenchmark --show-stderr -v
+    asv run main^! -b IOBenchmarkXenium --show-stderr -v
+    asv run xenium-labels-dask^! -b IOBenchmarkXenium --show-stderr -v
 
     # Or compare two branches directly:
-    asv continuous main xenium-labels-dask -b IOBenchmark --show-stderr -v
+    asv continuous main xenium-labels-dask -b IOBenchmarkXenium --show-stderr -v
 
     # View comparison:
     asv compare main xenium-labels-dask
@@ -36,7 +36,6 @@
 import inspect
 import shutil
 from pathlib import Path
-from typing import TYPE_CHECKING
 
 from spatialdata import SpatialData
 
@@ -62,9 +61,7 @@ def get_paths() -> tuple[Path, Path]:
     return path_read, path_write
 
 
-class IOBenchmark:
-    """Benchmark IO read operations."""
-
+class IOBenchmarkXenium:
     timeout = 3600
     repeat = 3
     number = 1
@@ -106,4 +103,4 @@ def peakmem_io(self) -> None:
 
 
 if __name__ == "__main__":
-    IOBenchmark().time_io()
+    IOBenchmarkXenium().time_io()
diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 1fef307d..79e33fac 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -25,7 +25,7 @@
 
 VALID_IMAGE_TYPES = [".tif", ".tiff", ".png", ".jpg", ".jpeg"]
 VALID_SHAPE_TYPES = [".geojson"]
-DEFAULT_CHUNKSIZE = (1000, 1000)
+DEFAULT_CHUNK_SIZE = (1000, 1000)
 
 __all__ = ["generic", "geojson", "image", "VALID_IMAGE_TYPES", "VALID_SHAPE_TYPES"]
 
@@ -86,7 +86,11 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame:
     return ShapesModel.parse(input, transformations={coordinate_system: Identity()})
 
 
-def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[DaskArray[np.number]]]:
+def _tiff_to_chunks(
+    input: Path,
+    axes_dim_mapping: dict[str, int],
+    chunk_size: tuple[int, int] | None = None,
+) -> list[list[DaskArray[np.number]]]:
     """Chunkwise reader for tiff files.
 
     Parameters
@@ -95,11 +99,16 @@ def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[
         Path to image
     axes_dim_mapping
         Mapping between dimension name (x, y, c) and index
+    chunk_size
+        Chunk size in (x, y) order. If None, defaults to DEFAULT_CHUNK_SIZE.
 
     Returns
     -------
     list[list[DaskArray]]
     """
+    if chunk_size is None:
+        chunk_size = DEFAULT_CHUNK_SIZE
+
     # Lazy file reader
     slide = tifffile.memmap(input)
 
@@ -113,7 +122,7 @@ def _tiff_to_chunks(input: Path, axes_dim_mapping: dict[str, int]) -> list[list[
     n_channel = slide.shape[0]
 
     # Compute chunk coords
-    chunk_coords = _compute_chunks(slide_dimensions, chunk_size=DEFAULT_CHUNKSIZE)
+    chunk_coords = _compute_chunks(slide_dimensions, chunk_size=chunk_size)
 
     # Define reader func
     def _reader_func(slide: np.memmap, x0: int, y0: int, width: int, height: int) -> NDArray[np.number]:
@@ -129,15 +138,43 @@ def _dask_image_imread(input: Path, data_axes: Sequence[str]) -> da.Array:
     return image
 
 
-def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> DataArray:
-    """Reads an image file and returns a parsed Image2D spatial element."""
+def image(
+    input: Path,
+    data_axes: Sequence[str],
+    coordinate_system: str,
+    use_tiff_memmap: bool = True,
+    chunks: tuple[int, int] | None = None,
+    scale_factors: Sequence[int] | None = None,
+) -> DataArray:
+    """Reads an image file and returns a parsed Image2D spatial element.
+
+    Parameters
+    ----------
+    input
+        Path to the image file.
+    data_axes
+        Axes of the data.
+    coordinate_system
+        Coordinate system of the spatial element.
+    use_tiff_memmap
+        Whether to use memory-mapped reading for TIFF files.
+    chunks
+        Chunk size in (x, y) order for TIFF files. If None, defaults to (1000, 1000).
+    scale_factors
+        Scale factors for building a multiscale image pyramid. Passed to Image2DModel.parse().
+
+    Returns
+    -------
+    Parsed Image2D spatial element.
+    """
     # Map passed data axes to position of dimension
     axes_dim_mapping = {axes: ndim for ndim, axes in enumerate(data_axes)}
 
-    if input.suffix in [".tiff", ".tif"]:
+    im = None
+    if input.suffix in [".tiff", ".tif"] and use_tiff_memmap:
         try:
-            chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping)
-            image = da.block(chunks, allow_unknown_chunksizes=True)
+            im_chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping, chunk_size=chunks)
+            im = da.block(im_chunks, allow_unknown_chunksizes=True)
             data_axes = ["c", "y", "x"]
 
         # Edge case: Compressed images are not memory-mappable
@@ -147,11 +184,14 @@ def image(input: Path, data_axes: Sequence[str], coordinate_system: str) -> Data
                 "is not memory-mappable, potentially due to compression. Trying to "
                 "load the image into memory at once",
             )
-            image = _dask_image_imread(input=input, data_axes=data_axes)
+            use_tiff_memmap = False
 
-    elif input.suffix in [".png", ".jpg", ".jpeg"]:
-        image = _dask_image_imread(input=input, data_axes=data_axes)
-    else:
+    if input.suffix in [".tiff", ".tif"] and not use_tiff_memmap or input.suffix in [".png", ".jpg", ".jpeg"]:
+        im = _dask_image_imread(input=input, data_axes=data_axes)
+
+    if im is None:
         raise NotImplementedError(f"File format {input.suffix} not implemented")
 
-    return Image2DModel.parse(image, dims=data_axes, transformations={coordinate_system: Identity()})
+    return Image2DModel.parse(
+        im, dims=data_axes, transformations={coordinate_system: Identity()}, scale_factors=scale_factors
+    )

From 14665967a82743225eb2d483f9917224f6345699 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 16 Jan 2026 09:27:57 +0100
Subject: [PATCH 27/39] [Test] Use small assymetric chunk sizes to capture any
 issues with the computation order of chunks/assembly

---
 tests/test_generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_generic.py b/tests/test_generic.py
index 33108e98..341e92d2 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -69,7 +69,7 @@ def test_read_tiff(save_tiff_files: tuple[Path, tuple[str], str | None], caplog:
 
     logger.propagate = True
     with caplog.at_level(logging.WARNING):
-        img = image(tiff_path, data_axes=axes, coordinate_system="global")
+        img = image(tiff_path, data_axes=axes, chunks=(29, 71), coordinate_system="global")
         if compression is not None:
             assert "image data is not memory-mappable, potentially due to compression" in caplog.text
 

From a2930d51e5177d3d8f206ce691132edec175108c Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 16 Jan 2026 10:16:07 +0100
Subject: [PATCH 28/39] Add comment to clarify use of asymmetric chunk sizes in
 test_read_tiff

---
 tests/test_generic.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_generic.py b/tests/test_generic.py
index 341e92d2..374d96e2 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -69,6 +69,7 @@ def test_read_tiff(save_tiff_files: tuple[Path, tuple[str], str | None], caplog:
 
     logger.propagate = True
     with caplog.at_level(logging.WARNING):
+        # Use asymmetric chunk sizes to catch errors with the ordering of chunk dimensions and the assembly of the individual chunks
         img = image(tiff_path, data_axes=axes, chunks=(29, 71), coordinate_system="global")
         if compression is not None:
             assert "image data is not memory-mappable, potentially due to compression" in caplog.text

From a9c2a2bd0b038878835377674800cbee0015e43e Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 16 Jan 2026 10:23:53 +0100
Subject: [PATCH 29/39] Follow standard convention of image dimensions
 throughout reader function. Switch axes order everywhere from (x, y) to (y,
 x)

---
 src/spatialdata_io/readers/generic.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 79e33fac..aa126464 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -98,9 +98,9 @@ def _tiff_to_chunks(
     input
         Path to image
     axes_dim_mapping
-        Mapping between dimension name (x, y, c) and index
+        Mapping between dimension name (c, y, x) and index
     chunk_size
-        Chunk size in (x, y) order. If None, defaults to DEFAULT_CHUNK_SIZE.
+        Chunk size in (y, x) order. If None, defaults to DEFAULT_CHUNK_SIZE.
 
     Returns
     -------
@@ -115,8 +115,8 @@ def _tiff_to_chunks(
     # Transpose to cyx order
     slide = np.transpose(slide, (axes_dim_mapping["c"], axes_dim_mapping["y"], axes_dim_mapping["x"]))
 
-    # Get dimensions in (x, y)
-    slide_dimensions = slide.shape[2], slide.shape[1]
+    # Get dimensions in (y, x)
+    slide_dimensions = slide.shape[1], slide.shape[2]
 
     # Get number of channels (c)
     n_channel = slide.shape[0]
@@ -125,7 +125,7 @@ def _tiff_to_chunks(
     chunk_coords = _compute_chunks(slide_dimensions, chunk_size=chunk_size)
 
     # Define reader func
-    def _reader_func(slide: np.memmap, x0: int, y0: int, width: int, height: int) -> NDArray[np.number]:
+    def _reader_func(slide: np.memmap, y0: int, x0: int, height: int, width: int) -> NDArray[np.number]:
         return np.array(slide[:, y0 : y0 + height, x0 : x0 + width])
 
     return _read_chunks(_reader_func, slide, coords=chunk_coords, n_channel=n_channel, dtype=slide.dtype)
@@ -159,7 +159,7 @@ def image(
     use_tiff_memmap
         Whether to use memory-mapped reading for TIFF files.
     chunks
-        Chunk size in (x, y) order for TIFF files. If None, defaults to (1000, 1000).
+        Chunk size in (y, x) order for TIFF files. If None, defaults to (1000, 1000).
     scale_factors
         Scale factors for building a multiscale image pyramid. Passed to Image2DModel.parse().
 

From 2dabbf9f241bd93784a4e1553e93ac3031b188a4 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 16 Jan 2026 10:31:18 +0100
Subject: [PATCH 30/39] Refactor chunk specification to follow (y, x) order and
 update related documentation

1. Enforce standard dimension order convention (y, x).

2. Change local variable names to better distinguish between chunk-level coordinates and pixel-level coordinates
- All chunk indices are indicated as such with the  prefix.
- The pixel-coordinates of individual chunks are now consistently named (y, x, height, width)
---
 src/spatialdata_io/readers/_utils/_image.py | 57 +++++++++++----------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 07f4026e..320ae69c 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -22,31 +22,34 @@ def _compute_chunks(
 ) -> NDArray[np.int_]:
     """Create all chunk specs for a given image and chunk size.
 
-    Creates specifications (x, y, width, height) with (x, y) being the upper left corner
+    For each chunk in the final image with the chunk coords (block row=chunk_y_position, block col=chunk_x_position), this function
+    creates specifications (y, x, height, width) with (y, x) being the upper left corner
     of chunks of size chunk_size. Chunks at the edges correspond to the remainder of
     chunk size and dimensions
 
     Parameters
     ----------
     shape : tuple[int, int]
-        Size of the image in (width, height).
+        Size of the image in (image height, image width).
     chunk_size : tuple[int, int]
-        Size of individual tiles in (width, height).
+        Size of individual tiles in (height, width).
 
     Returns
     -------
     np.ndarray
-        Array of shape (n_tiles_x, n_tiles_y, 4). Each entry defines a tile
-        as (x, y, width, height).
+        Array of shape (n_tiles_y, n_tiles_x, 4). Each entry defines a tile
+        as (y, x, height, width).
     """
-    x_positions, widths = _compute_chunk_sizes_positions(shape[0], chunk_size[0])
-    y_positions, heights = _compute_chunk_sizes_positions(shape[1], chunk_size[1])
+    y_positions, heights = _compute_chunk_sizes_positions(shape[0], chunk_size[0])
+    x_positions, widths = _compute_chunk_sizes_positions(shape[1], chunk_size[1])
 
     # Generate the tiles
+    # Each entry defines the chunk dimensions for a tile
+    # The order of the chunk definitions (chunk_index_y=outer, chunk_index_x=inner) follows the dask.block convention
     tiles = np.array(
         [
-            [[x, y, w, h] for y, h in zip(y_positions, heights, strict=True)]
-            for x, w in zip(x_positions, widths, strict=True)
+            [[y, x, h, w] for x, w in zip(x_positions, widths, strict=True)]
+            for y, h in zip(y_positions, heights, strict=True)
         ],
         dtype=int,
     )
@@ -66,23 +69,23 @@ def _read_chunks(
     Parameters
     ----------
     func
-        Function to retrieve a rectangular tile from the slide image. Must take the
+        Function to retrieve a single rectangular tile from the slide image. Must take the
         arguments:
 
             - slide Full slide image
-            - x0: x (col) coordinate of upper left corner of chunk
             - y0: y (row) coordinate of upper left corner of chunk
-            - width: Width of chunk
+            - x0: x (col) coordinate of upper left corner of chunk
             - height: Height of chunk
+            - width: Width of chunk
 
         and should return the chunk as numpy array of shape (c, y, x)
     slide
-        Slide image in lazily loaded format compatible with func
+        Slide image in lazily loaded format compatible with `func`
     coords
-        Coordinates of the upper left corner of the image in format (n_row_x, n_row_y, 4)
-        where the last dimension defines the rectangular tile in format (x, y, width, height).
-        n_row_x represents the number of chunks in x dimension and n_row_y the number of chunks
-        in y dimension.
+        Coordinates of the upper left corner of the image in format (n_row_y, n_row_x, 4)
+        where the last dimension defines the rectangular tile in format (y, x, height, width).
+        n_row_y represents the number of chunks in y dimension (block rows) and n_row_x the number of chunks
+        in x dimension (block columns).
     n_channel
         Number of channels in array
     dtype
@@ -112,27 +115,27 @@ def _read_chunks(
     # part from the docstring
     func_kwargs = func_kwargs if func_kwargs else {}
 
-    # Collect each delayed chunk as item in list of list
-    # Inner list becomes dim=-1 (cols/x)
-    # Outer list becomes dim=-2 (rows/y)
+    # Collect each delayed chunk (c, y, x) as item in list of list
+    # Inner list becomes dim=-1 (chunk columns/x)
+    # Outer list becomes dim=-2 (chunk rows/y)
     # see dask.array.block
     chunks = [
         [
             da.from_delayed(
                 delayed(func)(
                     slide,
-                    x0=coords[x, y, 0],
-                    y0=coords[x, y, 1],
-                    width=coords[x, y, 2],
-                    height=coords[x, y, 3],
+                    x0=coords[chunk_y, chunk_x, 1],
+                    y0=coords[chunk_y, chunk_x, 0],
+                    width=coords[chunk_y, chunk_x, 3],
+                    height=coords[chunk_y, chunk_x, 2],
                     **func_kwargs,
                 ),
                 dtype=dtype,
-                shape=(n_channel, *coords[y, x, [3, 2]]),
+                shape=(n_channel, *coords[chunk_y, chunk_x, [3, 2]]),
             )
-            for x in range(coords.shape[0])
+            for chunk_x in range(coords.shape[1])
         ]
-        for y in range(coords.shape[1])
+        for chunk_y in range(coords.shape[0])
     ]
     return chunks
 

From 5d9ece2911097559f17ab55c1a9f06379c3f10f6 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 16 Jan 2026 10:42:29 +0100
Subject: [PATCH 31/39] [Fix] Shape dimensions were inversed. Fix shape
 specification and make it self-documenting

---
 src/spatialdata_io/readers/_utils/_image.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 320ae69c..e2b229de 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -6,6 +6,11 @@
 from dask import delayed
 from numpy.typing import NDArray
 
+_Y_IDX = 0
+_X_IDX = 1
+_HEIGHT_IDX = 2
+_WIDTH_IDX = 3
+
 
 def _compute_chunk_sizes_positions(size: int, chunk: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
     """Calculate chunk sizes and positions for a given dimension and chunk size."""
@@ -124,14 +129,14 @@ def _read_chunks(
             da.from_delayed(
                 delayed(func)(
                     slide,
-                    x0=coords[chunk_y, chunk_x, 1],
-                    y0=coords[chunk_y, chunk_x, 0],
-                    width=coords[chunk_y, chunk_x, 3],
-                    height=coords[chunk_y, chunk_x, 2],
+                    y0=coords[chunk_y, chunk_x, _Y_IDX],
+                    x0=coords[chunk_y, chunk_x, _X_IDX],
+                    height=coords[chunk_y, chunk_x, _HEIGHT_IDX],
+                    width=coords[chunk_y, chunk_x, _WIDTH_IDX],
                     **func_kwargs,
                 ),
                 dtype=dtype,
-                shape=(n_channel, *coords[chunk_y, chunk_x, [3, 2]]),
+                shape=(n_channel, coords[chunk_y, chunk_x, _HEIGHT_IDX], coords[chunk_y, chunk_x, _WIDTH_IDX]),
             )
             for chunk_x in range(coords.shape[1])
         ]

From 21f8fc65e538f7bb4aba8c3caa32e040231e1608 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 16 Jan 2026 10:43:22 +0100
Subject: [PATCH 32/39] chore: Remove Note and TODO

---
 src/spatialdata_io/readers/_utils/_image.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index e2b229de..1857712a 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -103,21 +103,7 @@ def _read_chunks(
     list[list[da.array]]
         (Outer) list (length: n_row_y) of (inner) lists (length: n_row_x) of chunks with axes
         (c, y, x). Represents all chunks of the full image.
-
-    Notes
-    -------
-    As shown in `_compute_chunks()`, `coords` are in the form `(x, y,
-    width, height)`. In that function, the inner list (dim = -1) iterates over `y`
-    values, and the outer list (dim = -2) iterates over `x` values. In `_read_chunks(
-    )`, we use the more common `(y, x)` ordering: the inner list (dim = -1) iterates
-    over `x` values, and the outer list (dim = -2) iterates over `y` values.
-
-    This mismatch can be confusing. A straightforward fix that one could perform would
-    be to standardize `coords` to `(y, x, height, width)` instead of
-    `(x, y, width, height)`.
     """
-    # TODO: standardize `coords` as explained in the docstring above, then remove that
-    # part from the docstring
     func_kwargs = func_kwargs if func_kwargs else {}
 
     # Collect each delayed chunk (c, y, x) as item in list of list

From 93554e7f43574be3a409645a70277d49b0e76d18 Mon Sep 17 00:00:00 2001
From: Lucas Diedrich <diedrich@biochem.mpg.de>
Date: Fri, 16 Jan 2026 10:55:30 +0100
Subject: [PATCH 33/39] Clarify documentation

---
 src/spatialdata_io/readers/_utils/_image.py | 23 ++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 1857712a..9603dc54 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -7,9 +7,16 @@
 from numpy.typing import NDArray
 
 _Y_IDX = 0
+"""Index of y coordinate in in chunk coordinate array format: (y, x, height, width)"""
+
 _X_IDX = 1
+"""Index of x coordinate in chunk coordinate array format: (y, x, height, width)"""
+
 _HEIGHT_IDX = 2
+"""Index of height specification in chunk coordinate array format: (y, x, height, width)"""
+
 _WIDTH_IDX = 3
+"""Index of width specification in chunk coordinate array format: (y, x, height, width)"""
 
 
 def _compute_chunk_sizes_positions(size: int, chunk: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
@@ -27,10 +34,11 @@ def _compute_chunks(
 ) -> NDArray[np.int_]:
     """Create all chunk specs for a given image and chunk size.
 
-    For each chunk in the final image with the chunk coords (block row=chunk_y_position, block col=chunk_x_position), this function
-    creates specifications (y, x, height, width) with (y, x) being the upper left corner
-    of chunks of size chunk_size. Chunks at the edges correspond to the remainder of
-    chunk size and dimensions
+    Creates chunk specifications for tiling an image. Returns an array where position [i, j]
+    contains the spec for the chunk at block row i, block column j.
+    Each chunk specification consists of (y, x, height, width) with (y, x) being the upper left
+    corner of chunks of size chunk_size.
+    Chunks at the edges correspond to the remainder of chunk size and dimensions
 
     Parameters
     ----------
@@ -77,7 +85,7 @@ def _read_chunks(
         Function to retrieve a single rectangular tile from the slide image. Must take the
         arguments:
 
-            - slide Full slide image
+            - slide: Full slide image
             - y0: y (row) coordinate of upper left corner of chunk
             - x0: x (col) coordinate of upper left corner of chunk
             - height: Height of chunk
@@ -87,8 +95,9 @@ def _read_chunks(
     slide
         Slide image in lazily loaded format compatible with `func`
     coords
-        Coordinates of the upper left corner of the image in format (n_row_y, n_row_x, 4)
-        where the last dimension defines the rectangular tile in format (y, x, height, width).
+        Coordinates of the upper left corner of each chunk image in format (n_row_y, n_row_x, 4)
+        where the last dimension defines the rectangular tile in format (y, x, height, width), as returned
+        by :func:`_compute_chunks`.
         n_row_y represents the number of chunks in y dimension (block rows) and n_row_x the number of chunks
         in x dimension (block columns).
     n_channel

From 5a20f413c36b3009c028229313c598b6b60e4109 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Fri, 16 Jan 2026 11:02:01 +0100
Subject: [PATCH 34/39] fix pre-commit benchmark_image

---
 benchmarks/benchmark_image.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/benchmarks/benchmark_image.py b/benchmarks/benchmark_image.py
index 5b83f524..ea674ac6 100644
--- a/benchmarks/benchmark_image.py
+++ b/benchmarks/benchmark_image.py
@@ -6,11 +6,12 @@
 
 import shutil
 from pathlib import Path
+from typing import Any
 
 from spatialdata import SpatialData
 from xarray import DataArray
 
-from spatialdata_io import image  # type: ignore[attr-defined]
+from spatialdata_io import image
 
 # =============================================================================
 # CONFIGURATION - Edit these paths to match your setup
@@ -50,13 +51,15 @@ class IOBenchmarkImage:
     ]
     param_names = ["scale_factors", "use_tiff_memmap", "chunks"]
 
-    def setup(self, *_) -> None:
+    def setup(self, *_: Any) -> None:
         """Set up paths for benchmarking."""
         self.path_read, self.path_write = get_paths()
         if self.path_write.exists():
             shutil.rmtree(self.path_write)
 
-    def _convert_image(self, scale_factors, use_tiff_memmap, chunks) -> SpatialData:
+    def _convert_image(
+        self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, int]
+    ) -> SpatialData:
         """Read image data with specified parameters."""
         im = image(
             input=self.path_read,
@@ -79,12 +82,12 @@ def _convert_image(self, scale_factors, use_tiff_memmap, chunks) -> SpatialData:
             assert sdata["image"].chunksizes["y"] == chunks[1]
         return sdata
 
-    def time_io(self, scale_factors, use_tiff_memmap, chunks) -> None:
+    def time_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, int]) -> None:
         """Walltime for data parsing."""
         sdata = self._convert_image(scale_factors, use_tiff_memmap, chunks)
         sdata.write(self.path_write)
 
-    def peakmem_io(self, scale_factors, use_tiff_memmap, chunks) -> None:
+    def peakmem_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, int]) -> None:
         """Peak memory for data parsing."""
         sdata = self._convert_image(scale_factors, use_tiff_memmap, chunks)
         sdata.write(self.path_write)

From a890951e1cb5d16c591bff75b88f3985951a2da1 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Fri, 16 Jan 2026 12:50:39 +0100
Subject: [PATCH 35/39] wip fix chunks

---
 benchmarks/benchmark_image.py         | 54 +++++++++++++++++++++++----
 benchmarks/benchmark_xenium.py        |  4 +-
 src/spatialdata_io/readers/generic.py | 16 +++++---
 3 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/benchmarks/benchmark_image.py b/benchmarks/benchmark_image.py
index ea674ac6..5b9b8095 100644
--- a/benchmarks/benchmark_image.py
+++ b/benchmarks/benchmark_image.py
@@ -11,7 +11,7 @@
 from spatialdata import SpatialData
 from xarray import DataArray
 
-from spatialdata_io import image
+from spatialdata_io import image  # type: ignore[attr-defined]
 
 # =============================================================================
 # CONFIGURATION - Edit these paths to match your setup
@@ -73,13 +73,29 @@ def _convert_image(
         # sanity check
         if scale_factors is None:
             assert isinstance(sdata["image"], DataArray)
+            if chunks is not None:
+                assert (
+                    sdata["image"].chunksizes["x"][0] == chunks[0]
+                    or sdata["image"].chunksizes["x"][0] == sdata["image"].shape[2]
+                )
+                assert (
+                    sdata["image"].chunksizes["y"][0] == chunks[1]
+                    or sdata["image"].chunksizes["y"][0] == sdata["image"].shape[1]
+                )
         else:
-            assert len(sdata["image"].keys()) == len(scale_factors)
+            assert len(sdata["image"].keys()) == len(scale_factors) + 1
+            if chunks is not None:
+                assert (
+                    sdata["image"]["scale0"]["image"].chunksizes["x"][0] == chunks[0]
+                    or sdata["image"]["scale0"]["image"].chunksizes["x"][0]
+                    == sdata["image"]["scale0"]["image"].shape[2]
+                )
+                assert (
+                    sdata["image"]["scale0"]["image"].chunksizes["y"][0] == chunks[1]
+                    or sdata["image"]["scale0"]["image"].chunksizes["y"][0]
+                    == sdata["image"]["scale0"]["image"].shape[1]
+                )
 
-        if chunks is not None:
-            # TODO: bug here!
-            assert sdata["image"].chunksizes["x"] == chunks[0]
-            assert sdata["image"].chunksizes["y"] == chunks[1]
         return sdata
 
     def time_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, int]) -> None:
@@ -96,5 +112,27 @@ def peakmem_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chu
 if __name__ == "__main__":
     # Run a single test case for quick verification
     bench = IOBenchmarkImage()
-    bench.setup(None, True, (1000, 1000))
-    bench.time_io(None, True, (1000, 1000))
+
+    # bench.setup()
+    # bench.time_io(None, True, (5000, 5000))
+
+    # bench.setup()
+    # bench.time_io(None, True, (1000, 1000))
+
+    # bench.setup()
+    # bench.time_io(None, False, (5000, 5000))
+
+    # bench.setup()
+    # bench.time_io(None, False, (1000, 1000))
+
+    # bench.setup()
+    # bench.time_io([2, 2, 2], True, (5000, 5000))
+
+    # bench.setup()
+    # bench.time_io([2, 2, 2], True, (1000, 1000))
+
+    bench.setup()
+    bench.time_io([2, 2, 2], False, (5000, 5000))
+
+    # bench.setup()
+    # bench.time_io([2, 2, 2], False, (1000, 1000))
diff --git a/benchmarks/benchmark_xenium.py b/benchmarks/benchmark_xenium.py
index 22d70888..b1f09afe 100644
--- a/benchmarks/benchmark_xenium.py
+++ b/benchmarks/benchmark_xenium.py
@@ -103,4 +103,6 @@ def peakmem_io(self) -> None:
 
 
 if __name__ == "__main__":
-    IOBenchmarkXenium().time_io()
+    benchmark = IOBenchmarkXenium()
+    benchmark.setup()
+    benchmark.time_io()
diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index aa126464..1d126192 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -131,11 +131,15 @@ def _reader_func(slide: np.memmap, y0: int, x0: int, height: int, width: int) ->
     return _read_chunks(_reader_func, slide, coords=chunk_coords, n_channel=n_channel, dtype=slide.dtype)
 
 
-def _dask_image_imread(input: Path, data_axes: Sequence[str]) -> da.Array:
+def _dask_image_imread(input: Path, data_axes: Sequence[str], chunks: tuple[int, int] | None = None) -> da.Array:
+    if set(data_axes) != {"c", "y", "x"}:
+        raise NotImplementedError(f"Only 'c', 'y', 'x' axes are supported, got {data_axes}")
     image = imread(input)
-    if len(image.shape) == len(data_axes) + 1 and image.shape[0] == 1:
-        image = np.squeeze(image, axis=0)
-    return image
+    if image.ndim != len(data_axes):
+        raise ValueError(f"Expected image with {len(data_axes)} dimensions, got {image.ndim}")
+    image = image.transpose(*[data_axes.index(ax) for ax in ["c", "y", "x"]])
+    chunks = (1,) + chunks
+    return image.rechunk(chunks)
 
 
 def image(
@@ -187,11 +191,11 @@ def image(
             use_tiff_memmap = False
 
     if input.suffix in [".tiff", ".tif"] and not use_tiff_memmap or input.suffix in [".png", ".jpg", ".jpeg"]:
-        im = _dask_image_imread(input=input, data_axes=data_axes)
+        im = _dask_image_imread(input=input, data_axes=data_axes, chunks=chunks)
 
     if im is None:
         raise NotImplementedError(f"File format {input.suffix} not implemented")
 
     return Image2DModel.parse(
-        im, dims=data_axes, transformations={coordinate_system: Identity()}, scale_factors=scale_factors
+        im, dims=data_axes, transformations={coordinate_system: Identity()}, scale_factors=scale_factors, chunks=chunks
     )

From 19df32530e463689af8ab108b9c1c84117e396bc Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Fri, 16 Jan 2026 16:59:20 +0100
Subject: [PATCH 36/39] improve chunks support

---
 src/spatialdata_io/readers/_utils/_image.py | 63 +++++++++++++-
 src/spatialdata_io/readers/generic.py       | 91 +++++++++++++++------
 tests/readers/test_utils_image.py           | 39 +++++++++
 tests/test_generic.py                       | 38 +++++++--
 4 files changed, 194 insertions(+), 37 deletions(-)

diff --git a/src/spatialdata_io/readers/_utils/_image.py b/src/spatialdata_io/readers/_utils/_image.py
index 9603dc54..3784ccca 100644
--- a/src/spatialdata_io/readers/_utils/_image.py
+++ b/src/spatialdata_io/readers/_utils/_image.py
@@ -1,10 +1,13 @@
-from collections.abc import Callable
+from collections.abc import Callable, Mapping, Sequence
 from typing import Any
 
 import dask.array as da
 import numpy as np
 from dask import delayed
 from numpy.typing import NDArray
+from spatialdata.models.models import Chunks_t
+
+__all__ = ["Chunks_t", "_compute_chunks", "_read_chunks", "normalize_chunks"]
 
 _Y_IDX = 0
 """Index of y coordinate in in chunk coordinate array format: (y, x, height, width)"""
@@ -18,6 +21,8 @@
 _WIDTH_IDX = 3
 """Index of width specification in chunk coordinate array format: (y, x, height, width)"""
 
+DEFAULT_CHUNK_SIZE = 1000
+
 
 def _compute_chunk_sizes_positions(size: int, chunk: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
     """Calculate chunk sizes and positions for a given dimension and chunk size."""
@@ -140,4 +145,58 @@ def _read_chunks(
     return chunks
 
 
-# TODO: do a asv debugging for peak mem
+def normalize_chunks(
+    chunks: Chunks_t | None,
+    axes: Sequence[str],
+) -> dict[str, int]:
+    """Normalize chunk specification to dict format.
+
+    This function converts various chunk formats to a dict mapping dimension names
+    to chunk sizes. The dict format is preferred because it's explicit about which
+    dimension gets which chunk size and is compatible with spatialdata.
+
+    Parameters
+    ----------
+    chunks
+        Chunk specification. Can be:
+        - None: Uses DEFAULT_CHUNK_SIZE for all axes
+        - int: Applied to all axes
+        - tuple[int, ...]: Chunk sizes in order corresponding to axes
+        - dict: Mapping of axis names to chunk sizes (validated against axes)
+    axes
+        Tuple of axis names that defines the expected dimensions (e.g., ('c', 'y', 'x')).
+
+    Returns
+    -------
+    dict[str, int]
+        Dict mapping axis names to chunk sizes.
+
+    Raises
+    ------
+    ValueError
+        If chunks format is not supported or incompatible with axes.
+    """
+    if chunks is None:
+        return dict.fromkeys(axes, DEFAULT_CHUNK_SIZE)
+
+    if isinstance(chunks, int):
+        return dict.fromkeys(axes, chunks)
+
+    if isinstance(chunks, Mapping):
+        chunks_dict = dict(chunks)
+        missing = set(axes) - set(chunks_dict.keys())
+        if missing:
+            raise ValueError(f"chunks dict missing keys for axes {missing}, got: {list(chunks_dict.keys())}")
+        return {ax: chunks_dict[ax] for ax in axes}
+
+    if isinstance(chunks, tuple):
+        if len(chunks) != len(axes):
+            raise ValueError(f"chunks tuple length {len(chunks)} doesn't match axes {axes} (length {len(axes)})")
+        if not all(isinstance(c, int) for c in chunks):
+            raise ValueError(f"All elements in chunks tuple must be int, got: {chunks}")
+        return dict(zip(axes, chunks, strict=True))
+
+    raise ValueError(f"Unsupported chunks type: {type(chunks)}. Expected int, tuple, dict, or None.")
+
+
+##
diff --git a/src/spatialdata_io/readers/generic.py b/src/spatialdata_io/readers/generic.py
index 1d126192..462989e0 100644
--- a/src/spatialdata_io/readers/generic.py
+++ b/src/spatialdata_io/readers/generic.py
@@ -19,13 +19,18 @@
 
     from geopandas import GeoDataFrame
     from numpy.typing import NDArray
+    from spatialdata.models.models import Chunks_t
     from xarray import DataArray
 
-from ._utils._image import _compute_chunks, _read_chunks
+
+from spatialdata_io.readers._utils._image import (
+    _compute_chunks,
+    _read_chunks,
+    normalize_chunks,
+)
 
 VALID_IMAGE_TYPES = [".tif", ".tiff", ".png", ".jpg", ".jpeg"]
 VALID_SHAPE_TYPES = [".geojson"]
-DEFAULT_CHUNK_SIZE = (1000, 1000)
 
 __all__ = ["generic", "geojson", "image", "VALID_IMAGE_TYPES", "VALID_SHAPE_TYPES"]
 
@@ -89,26 +94,29 @@ def geojson(input: Path, coordinate_system: str) -> GeoDataFrame:
 def _tiff_to_chunks(
     input: Path,
     axes_dim_mapping: dict[str, int],
-    chunk_size: tuple[int, int] | None = None,
+    chunks_cyx: dict[str, int],
 ) -> list[list[DaskArray[np.number]]]:
     """Chunkwise reader for tiff files.
 
+    Creates spatial tiles from a TIFF file. Each tile contains all channels.
+    Channel chunking is handled downstream by Image2DModel.parse().
+
     Parameters
     ----------
     input
         Path to image
     axes_dim_mapping
         Mapping between dimension name (c, y, x) and index
-    chunk_size
-        Chunk size in (y, x) order. If None, defaults to DEFAULT_CHUNK_SIZE.
+    chunks_cyx
+        Chunk size dict with 'c', 'y', and 'x' keys. The 'y' and 'x' values
+        are used for spatial tiling. The 'c' value is passed through for
+        downstream rechunking.
 
     Returns
     -------
     list[list[DaskArray]]
+        2D list of dask arrays representing spatial tiles, each with shape (n_channels, height, width).
     """
-    if chunk_size is None:
-        chunk_size = DEFAULT_CHUNK_SIZE
-
     # Lazy file reader
     slide = tifffile.memmap(input)
 
@@ -118,28 +126,49 @@ def _tiff_to_chunks(
     # Get dimensions in (y, x)
     slide_dimensions = slide.shape[1], slide.shape[2]
 
-    # Get number of channels (c)
+    # Get number of channels (all channels are included in each spatial tile)
     n_channel = slide.shape[0]
 
-    # Compute chunk coords
-    chunk_coords = _compute_chunks(slide_dimensions, chunk_size=chunk_size)
+    # Compute chunk coords using (y, x) tuple
+    chunk_coords = _compute_chunks(slide_dimensions, chunk_size=(chunks_cyx["y"], chunks_cyx["x"]))
 
-    # Define reader func
+    # Define reader func - reads all channels for each spatial tile
     def _reader_func(slide: np.memmap, y0: int, x0: int, height: int, width: int) -> NDArray[np.number]:
         return np.array(slide[:, y0 : y0 + height, x0 : x0 + width])
 
     return _read_chunks(_reader_func, slide, coords=chunk_coords, n_channel=n_channel, dtype=slide.dtype)
 
 
-def _dask_image_imread(input: Path, data_axes: Sequence[str], chunks: tuple[int, int] | None = None) -> da.Array:
+def _dask_image_imread(input: Path, data_axes: Sequence[str], chunks_cyx: dict[str, int]) -> da.Array:
+    """Read image using dask-image and rechunk.
+
+    Parameters
+    ----------
+    input
+        Path to image file.
+    data_axes
+        Axes of the input data.
+    chunks_cyx
+        Chunk size dict with 'c', 'y', 'x' keys.
+
+    Returns
+    -------
+    Dask array with (c, y, x) axes order.
+    """
     if set(data_axes) != {"c", "y", "x"}:
         raise NotImplementedError(f"Only 'c', 'y', 'x' axes are supported, got {data_axes}")
-    image = imread(input)
-    if image.ndim != len(data_axes):
-        raise ValueError(f"Expected image with {len(data_axes)} dimensions, got {image.ndim}")
-    image = image.transpose(*[data_axes.index(ax) for ax in ["c", "y", "x"]])
-    chunks = (1,) + chunks
-    return image.rechunk(chunks)
+    im = imread(input)
+
+    # dask_image.imread may add an extra leading dimension for frames/pages
+    # If image has one extra dimension with size 1, squeeze it out
+    if im.ndim == len(data_axes) + 1 and im.shape[0] == 1:
+        im = im[0]
+
+    if im.ndim != len(data_axes):
+        raise ValueError(f"Expected image with {len(data_axes)} dimensions, got {im.ndim}")
+
+    im = im.transpose(*[data_axes.index(ax) for ax in ["c", "y", "x"]])
+    return im.rechunk((chunks_cyx["c"], chunks_cyx["y"], chunks_cyx["x"]))
 
 
 def image(
@@ -147,7 +176,7 @@ def image(
     data_axes: Sequence[str],
     coordinate_system: str,
     use_tiff_memmap: bool = True,
-    chunks: tuple[int, int] | None = None,
+    chunks: Chunks_t | None = None,
     scale_factors: Sequence[int] | None = None,
 ) -> DataArray:
     """Reads an image file and returns a parsed Image2D spatial element.
@@ -157,13 +186,17 @@ def image(
     input
         Path to the image file.
     data_axes
-        Axes of the data.
+        Axes of the data (e.g., ('c', 'y', 'x') or ('y', 'x', 'c')).
     coordinate_system
         Coordinate system of the spatial element.
     use_tiff_memmap
         Whether to use memory-mapped reading for TIFF files.
     chunks
-        Chunk size in (y, x) order for TIFF files. If None, defaults to (1000, 1000).
+        Chunk size specification. Can be:
+        - int: Applied to all dimensions
+        - tuple: Chunk sizes matching the order of output axes (c, y, x)
+        - dict: Mapping of axis names to chunk sizes (e.g., {'c': 1, 'y': 1000, 'x': 1000})
+        If None, uses a default (DEFAULT_CHUNK_SIZE) for all axes.
     scale_factors
         Scale factors for building a multiscale image pyramid. Passed to Image2DModel.parse().
 
@@ -174,12 +207,13 @@ def image(
     # Map passed data axes to position of dimension
     axes_dim_mapping = {axes: ndim for ndim, axes in enumerate(data_axes)}
 
+    chunks_dict = normalize_chunks(chunks, axes=data_axes)
+
     im = None
     if input.suffix in [".tiff", ".tif"] and use_tiff_memmap:
         try:
-            im_chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping, chunk_size=chunks)
+            im_chunks = _tiff_to_chunks(input, axes_dim_mapping=axes_dim_mapping, chunks_cyx=chunks_dict)
             im = da.block(im_chunks, allow_unknown_chunksizes=True)
-            data_axes = ["c", "y", "x"]
 
         # Edge case: Compressed images are not memory-mappable
         except ValueError as e:
@@ -191,11 +225,16 @@ def image(
             use_tiff_memmap = False
 
     if input.suffix in [".tiff", ".tif"] and not use_tiff_memmap or input.suffix in [".png", ".jpg", ".jpeg"]:
-        im = _dask_image_imread(input=input, data_axes=data_axes, chunks=chunks)
+        im = _dask_image_imread(input=input, data_axes=data_axes, chunks_cyx=chunks_dict)
 
     if im is None:
         raise NotImplementedError(f"File format {input.suffix} not implemented")
 
+    # the output axes are always cyx
     return Image2DModel.parse(
-        im, dims=data_axes, transformations={coordinate_system: Identity()}, scale_factors=scale_factors, chunks=chunks
+        im,
+        dims=("c", "y", "x"),
+        transformations={coordinate_system: Identity()},
+        scale_factors=scale_factors,
+        chunks=chunks_dict,
     )
diff --git a/tests/readers/test_utils_image.py b/tests/readers/test_utils_image.py
index b6648556..c2e08230 100644
--- a/tests/readers/test_utils_image.py
+++ b/tests/readers/test_utils_image.py
@@ -3,8 +3,11 @@
 from numpy.typing import NDArray
 
 from spatialdata_io.readers._utils._image import (
+    DEFAULT_CHUNK_SIZE,
+    Chunks_t,
     _compute_chunk_sizes_positions,
     _compute_chunks,
+    normalize_chunks,
 )
 
 
@@ -62,3 +65,39 @@ def test_compute_chunks(
     tiles = _compute_chunks(dimensions, chunk_size)
 
     assert (tiles == result).all()
+
+
+@pytest.mark.parametrize(
+    "chunks, axes, expected",
+    [
+        # 2D (y, x)
+        (None, ("y", "x"), {"y": DEFAULT_CHUNK_SIZE, "x": DEFAULT_CHUNK_SIZE}),
+        (256, ("y", "x"), {"y": 256, "x": 256}),
+        ((200, 100), ("x", "y"), {"y": 100, "x": 200}),
+        ({"y": 300, "x": 400}, ("x", "y"), {"y": 300, "x": 400}),
+        # 2D with channel (c, y, x)
+        (None, ("c", "y", "x"), {"c": DEFAULT_CHUNK_SIZE, "y": DEFAULT_CHUNK_SIZE, "x": DEFAULT_CHUNK_SIZE}),
+        (256, ("c", "y", "x"), {"c": 256, "y": 256, "x": 256}),
+        ((1, 100, 200), ("c", "y", "x"), {"c": 1, "y": 100, "x": 200}),
+        ({"c": 1, "y": 300, "x": 400}, ("c", "y", "x"), {"c": 1, "y": 300, "x": 400}),
+        # 3D (z, y, x)
+        ((10, 100, 200), ("z", "y", "x"), {"z": 10, "y": 100, "x": 200}),
+        ({"z": 10, "y": 300, "x": 400}, ("z", "y", "x"), {"z": 10, "y": 300, "x": 400}),
+    ],
+)
+def test_normalize_chunks_valid(chunks: Chunks_t, axes: tuple[str, ...], expected: dict[str, int]) -> None:
+    assert normalize_chunks(chunks, axes=axes) == expected
+
+
+@pytest.mark.parametrize(
+    "chunks, axes, match",
+    [
+        ({"y": 100}, ("y", "x"), "missing keys for axes"),
+        ((1, 2, 3), ("y", "x"), "doesn't match axes"),
+        ((1.5, 2), ("y", "x"), "must be int"),
+        ("invalid", ("y", "x"), "Unsupported chunks type"),
+    ],
+)
+def test_normalize_chunks_errors(chunks: Chunks_t, axes: tuple[str, ...], match: str) -> None:
+    with pytest.raises(ValueError, match=match):
+        normalize_chunks(chunks, axes=axes)
diff --git a/tests/test_generic.py b/tests/test_generic.py
index 374d96e2..dd46e15d 100644
--- a/tests/test_generic.py
+++ b/tests/test_generic.py
@@ -64,22 +64,42 @@ def save_tiff_files(
         yield tiff_path, axes, compression
 
 
-def test_read_tiff(save_tiff_files: tuple[Path, tuple[str], str | None], caplog: pytest.LogCaptureFixture) -> None:
+@pytest.mark.parametrize("scale_factors", [None, [2]])
+def test_read_tiff(
+    save_tiff_files: tuple[Path, tuple[str, ...], str | None],
+    caplog: pytest.LogCaptureFixture,
+    scale_factors: list[int] | None,
+) -> None:
     tiff_path, axes, compression = save_tiff_files
+    # Use asymmetric chunk sizes to catch errors with the ordering of chunk dimensions and the assembly of the individual chunks
+    CHUNKS = {"c": 2, "y": 29, "x": 71}
 
     logger.propagate = True
     with caplog.at_level(logging.WARNING):
-        # Use asymmetric chunk sizes to catch errors with the ordering of chunk dimensions and the assembly of the individual chunks
-        img = image(tiff_path, data_axes=axes, chunks=(29, 71), coordinate_system="global")
-        if compression is not None:
-            assert "image data is not memory-mappable, potentially due to compression" in caplog.text
-
+        obj = image(
+            tiff_path,
+            data_axes=axes,
+            coordinate_system="global",
+            chunks=CHUNKS,
+            scale_factors=scale_factors,
+            use_tiff_memmap=True,
+        )
     logger.propagate = False
+    assert ("image data is not memory-mappable" in caplog.text) == (compression is not None)
+
+    target = obj if scale_factors is None else obj["scale0"]["image"]
+
+    # check chunks are correct
+    for i, ax in enumerate(("c", "y", "x")):
+        assert target.chunksizes[ax][0] in (CHUNKS[ax], target.shape[i])
 
-    reference = tiffread(tiff_path)
-    reference_cyx = reference.transpose(*[axes.index(ax) for ax in ["c", "y", "x"]])
+    # check multiscale is correct
+    if scale_factors is not None:
+        assert "scale0" in obj and len(obj.keys()) == len(scale_factors) + 1
 
-    assert (img.compute() == reference_cyx).all()
+    # check pixel data
+    ref = tiffread(tiff_path).transpose(*[axes.index(ax) for ax in ("c", "y", "x")])
+    assert (target.compute() == ref).all()
 
 
 @pytest.mark.parametrize("cli", [True, False])

From 163d8b040c24623ad0e11409fd58bdad7700af37 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Fri, 16 Jan 2026 18:38:05 +0100
Subject: [PATCH 37/39] benchmark for image() with synthetic data

---
 .gitignore                    |   1 +
 benchmarks/benchmark_image.py | 187 ++++++++++++++++++++++------------
 2 files changed, 125 insertions(+), 63 deletions(-)

diff --git a/.gitignore b/.gitignore
index 19b9d658..35db05af 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@ temp/
 
 # Compiled files
 __pycache__/
+.ipynb_checkpoints/
 
 # Distribution / packaging
 /build/
diff --git a/benchmarks/benchmark_image.py b/benchmarks/benchmark_image.py
index 5b9b8095..a872b5c2 100644
--- a/benchmarks/benchmark_image.py
+++ b/benchmarks/benchmark_image.py
@@ -4,36 +4,28 @@
     See benchmark_xenium.py for instructions.
 """
 
-import shutil
+import logging
+import logging.handlers
+import tempfile
 from pathlib import Path
 from typing import Any
 
+import numpy as np
+import tifffile
 from spatialdata import SpatialData
+from spatialdata._logging import logger
 from xarray import DataArray
 
 from spatialdata_io import image  # type: ignore[attr-defined]
 
 # =============================================================================
-# CONFIGURATION - Edit these paths to match your setup
+# CONFIGURATION - Edit these values to match your setup
 # =============================================================================
-SANDBOX_DIR = Path(__file__).parent.parent.parent / "spatialdata-sandbox"
-DATASET = "xenium_2.0.0_io"
+# Image dimensions: (channels, height, width)
+IMAGE_SHAPE = (3, 30000, 30000)
 # =============================================================================
 
 
-def get_paths() -> tuple[Path, Path]:
-    """Get paths for benchmark data."""
-    path = SANDBOX_DIR / DATASET
-    # TODO: this is not a good image because it's compressed (not memmappable)
-    path_read = path / "data" / "morphology.ome.tif"
-    path_write = path / "data_benchmark.zarr"
-
-    if not path_read.exists():
-        raise ValueError(f"Data directory not found: {path_read}")
-
-    return path_read, path_write
-
-
 class IOBenchmarkImage:
     """Benchmark IO read operations with different parameter combinations."""
 
@@ -45,37 +37,93 @@ class IOBenchmarkImage:
 
     # Parameter combinations: scale_factors, use_tiff_memmap, chunks
     params = [
-        [None, [2, 2, 2]],  # scale_factors
+        [None, [2, 2]],  # scale_factors
         [True, False],  # use_tiff_memmap
-        [(5000, 5000), (1000, 1000)],  # chunks
+        [(1, 5000, 5000), (1, 1000, 1000)],  # chunks
     ]
     param_names = ["scale_factors", "use_tiff_memmap", "chunks"]
 
+    # Class-level temp directory for image files (persists across all benchmarks)
+    _images_temp_dir: tempfile.TemporaryDirectory[str] | None = None
+    _path_read_uncompressed: Path | None = None
+    _path_read_compressed: Path | None = None
+
+    @classmethod
+    def _setup_images(cls) -> None:
+        """Create fake image data once for all benchmarks."""
+        if cls._images_temp_dir is not None:
+            return
+
+        cls._images_temp_dir = tempfile.TemporaryDirectory()
+        images_dir = Path(cls._images_temp_dir.name)
+        cls._path_read_uncompressed = images_dir / "image_uncompressed.tif"
+        cls._path_read_compressed = images_dir / "image_compressed.tif"
+
+        # Generate fake image data
+        rng = np.random.default_rng(42)
+        data = rng.integers(0, 255, size=IMAGE_SHAPE, dtype=np.uint8)
+
+        # Write uncompressed TIFF (memmappable)
+        tifffile.imwrite(cls._path_read_uncompressed, data, compression=None)
+        # Write compressed TIFF (not memmappable)
+        tifffile.imwrite(cls._path_read_compressed, data, compression="zlib")
+
     def setup(self, *_: Any) -> None:
         """Set up paths for benchmarking."""
-        self.path_read, self.path_write = get_paths()
-        if self.path_write.exists():
-            shutil.rmtree(self.path_write)
+        # Create images once (shared across all benchmark runs)
+        self._setup_images()
+        self.path_read_uncompressed = self._path_read_uncompressed
+        self.path_read_compressed = self._path_read_compressed
+
+        # Create a separate temp directory for output (cleaned up after each run)
+        self._output_temp_dir = tempfile.TemporaryDirectory()
+        self.path_write = Path(self._output_temp_dir.name) / "data_benchmark.zarr"
+
+    def teardown(self, *_: Any) -> None:
+        """Clean up output directory after each benchmark run."""
+        if hasattr(self, "_output_temp_dir"):
+            self._output_temp_dir.cleanup()
 
     def _convert_image(
-        self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, int]
+        self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, ...]
     ) -> SpatialData:
         """Read image data with specified parameters."""
-        im = image(
-            input=self.path_read,
-            data_axes=("c", "y", "x"),
-            coordinate_system="global",
-            use_tiff_memmap=use_tiff_memmap,
-            chunks=chunks,
-            scale_factors=scale_factors,
-        )
+        # Use uncompressed (memmappable) for use_tiff_memmap=True, compressed otherwise
+        path_read = self.path_read_uncompressed if use_tiff_memmap else self.path_read_compressed
+
+        # Capture log messages to verify memmappable warning behavior
+        log_capture = logging.handlers.MemoryHandler(capacity=100)
+        log_capture.setLevel(logging.WARNING)
+        logger.addHandler(log_capture)
+        original_propagate = logger.propagate
+        logger.propagate = True
+
+        try:
+            im = image(
+                input=path_read,
+                data_axes=("c", "y", "x"),
+                coordinate_system="global",
+                use_tiff_memmap=use_tiff_memmap,
+                chunks=chunks,
+                scale_factors=scale_factors,
+            )
+        finally:
+            logger.removeHandler(log_capture)
+            logger.propagate = original_propagate
+
+        # Check warning behavior: when use_tiff_memmap=True, no compression warning should be raised
+        log_messages = [record.getMessage() for record in log_capture.buffer]
+        has_memmap_warning = any("image data is not memory-mappable" in msg for msg in log_messages)
+        if use_tiff_memmap:
+            assert not has_memmap_warning, "Uncompressed TIFF should not trigger memory-mappable warning"
+
         sdata = SpatialData.init_from_elements({"image": im})
-        # sanity check
+        # sanity check: chunks is (c, y, x)
         if scale_factors is None:
             assert isinstance(sdata["image"], DataArray)
             if chunks is not None:
                 assert (
-                    sdata["image"].chunksizes["x"][0] == chunks[0]
+                    sdata["image"].chunksizes["x"][0] == chunks[2]
                     or sdata["image"].chunksizes["x"][0] == sdata["image"].shape[2]
                 )
                 assert (
@@ -86,7 +134,7 @@ def _convert_image(
             assert len(sdata["image"].keys()) == len(scale_factors) + 1
             if chunks is not None:
                 assert (
-                    sdata["image"]["scale0"]["image"].chunksizes["x"][0] == chunks[0]
+                    sdata["image"]["scale0"]["image"].chunksizes["x"][0] == chunks[2]
                     or sdata["image"]["scale0"]["image"].chunksizes["x"][0]
                     == sdata["image"]["scale0"]["image"].shape[2]
                 )
@@ -98,41 +146,54 @@ def _convert_image(
 
         return sdata
 
-    def time_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, int]) -> None:
+    def time_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, ...]) -> None:
         """Walltime for data parsing."""
         sdata = self._convert_image(scale_factors, use_tiff_memmap, chunks)
         sdata.write(self.path_write)
 
-    def peakmem_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, int]) -> None:
+    def peakmem_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, ...]) -> None:
         """Peak memory for data parsing."""
         sdata = self._convert_image(scale_factors, use_tiff_memmap, chunks)
         sdata.write(self.path_write)
 
 
-if __name__ == "__main__":
-    # Run a single test case for quick verification
-    bench = IOBenchmarkImage()
-
-    # bench.setup()
-    # bench.time_io(None, True, (5000, 5000))
-
-    # bench.setup()
-    # bench.time_io(None, True, (1000, 1000))
-
-    # bench.setup()
-    # bench.time_io(None, False, (5000, 5000))
-
-    # bench.setup()
-    # bench.time_io(None, False, (1000, 1000))
-
-    # bench.setup()
-    # bench.time_io([2, 2, 2], True, (5000, 5000))
-
-    # bench.setup()
-    # bench.time_io([2, 2, 2], True, (1000, 1000))
-
-    bench.setup()
-    bench.time_io([2, 2, 2], False, (5000, 5000))
-
-    # bench.setup()
-    # bench.time_io([2, 2, 2], False, (1000, 1000))
+# if __name__ == "__main__":
+#     # Run a single test case for quick verification
+#     bench = IOBenchmarkImage()
+#
+#     bench.setup()
+#     bench.time_io(None, True, (1, 5000, 5000))
+#     bench.teardown()
+#
+#     bench.setup()
+#     bench.time_io(None, True, (1, 1000, 1000))
+#     bench.teardown()
+#
+#     bench.setup()
+#     bench.time_io(None, False, (1, 5000, 5000))
+#     bench.teardown()
+#
+#     bench.setup()
+#     bench.time_io(None, False, (1, 1000, 1000))
+#     bench.teardown()
+#
+#     bench.setup()
+#     bench.time_io([2, 2, 2], True, (1, 5000, 5000))
+#     bench.teardown()
+#
+#     bench.setup()
+#     bench.time_io([2, 2, 2], True, (1, 1000, 1000))
+#     bench.teardown()
+#
+#     bench.setup()
+#     bench.time_io([2, 2, 2], False, (1, 5000, 5000))
+#     bench.teardown()
+#
+#     bench.setup()
+#     bench.time_io([2, 2, 2], False, (1, 1000, 1000))
+#     bench.teardown()
+#
+#     # Clean up the shared images temp directory at the end
+#     if IOBenchmarkImage._images_temp_dir is not None:
+#         IOBenchmarkImage._images_temp_dir.cleanup()
+#         IOBenchmarkImage._images_temp_dir = None

From 4b4a00e716d257387c140e48c8424d8fe8fcc433 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Fri, 16 Jan 2026 19:03:59 +0100
Subject: [PATCH 38/39] fix pre-commit

---
 benchmarks/benchmark_image.py | 65 +++++++++++++----------------------
 1 file changed, 23 insertions(+), 42 deletions(-)

diff --git a/benchmarks/benchmark_image.py b/benchmarks/benchmark_image.py
index a872b5c2..ce46dc94 100644
--- a/benchmarks/benchmark_image.py
+++ b/benchmarks/benchmark_image.py
@@ -35,13 +35,14 @@ class IOBenchmarkImage:
     warmup_time = 0
     processes = 1
 
-    # Parameter combinations: scale_factors, use_tiff_memmap, chunks
+    # Parameter combinations: scale_factors, (use_tiff_memmap, compressed), chunks
+    # Combinations: (memmap=False, compressed=True), (memmap=False, compressed=False), (memmap=True, compressed=False)
     params = [
         [None, [2, 2]],  # scale_factors
-        [True, False],  # use_tiff_memmap
+        [(False, True), (False, False), (True, False)],  # (use_tiff_memmap, compressed)
         [(1, 5000, 5000), (1, 1000, 1000)],  # chunks
     ]
-    param_names = ["scale_factors", "use_tiff_memmap", "chunks"]
+    param_names = ["scale_factors", "memmap_compressed", "chunks"]
 
     # Class-level temp directory for image files (persists across all benchmarks)
     _images_temp_dir: tempfile.TemporaryDirectory[str] | None = None
@@ -85,11 +86,13 @@ def teardown(self, *_: Any) -> None:
             self._output_temp_dir.cleanup()
 
     def _convert_image(
-        self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, ...]
+        self, scale_factors: list[int] | None, memmap_compressed: tuple[bool, bool], chunks: tuple[int, ...]
     ) -> SpatialData:
         """Read image data with specified parameters."""
-        # Use uncompressed (memmappable) for use_tiff_memmap=True, compressed otherwise
-        path_read = self.path_read_uncompressed if use_tiff_memmap else self.path_read_compressed
+        use_tiff_memmap, compressed = memmap_compressed
+        # Select file based on compression setting
+        path_read = self.path_read_compressed if compressed else self.path_read_uncompressed
+        assert path_read is not None
 
         # Capture log messages to verify memmappable warning behavior
         log_capture = logging.handlers.MemoryHandler(capacity=100)
@@ -111,11 +114,13 @@ def _convert_image(
             logger.removeHandler(log_capture)
             logger.propagate = original_propagate
 
-        # Check warning behavior: when use_tiff_memmap=True, no compression warning should be raised
+        # Check warning behavior: when use_tiff_memmap=True with uncompressed file, no warning should be raised
         log_messages = [record.getMessage() for record in log_capture.buffer]
         has_memmap_warning = any("image data is not memory-mappable" in msg for msg in log_messages)
-        if use_tiff_memmap:
-            assert not has_memmap_warning, "Uncompressed TIFF should not trigger memory-mappable warning"
+        if use_tiff_memmap and not compressed:
+            assert not has_memmap_warning, (
+                "Uncompressed TIFF with memmap=True should not trigger memory-mappable warning"
+            )
 
         sdata = SpatialData.init_from_elements({"image": im})
         # sanity check: chunks is (c, y, x)
@@ -146,14 +151,18 @@ def _convert_image(
 
         return sdata
 
-    def time_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, ...]) -> None:
+    def time_io(
+        self, scale_factors: list[int] | None, memmap_compressed: tuple[bool, bool], chunks: tuple[int, ...]
+    ) -> None:
         """Walltime for data parsing."""
-        sdata = self._convert_image(scale_factors, use_tiff_memmap, chunks)
+        sdata = self._convert_image(scale_factors, memmap_compressed, chunks)
         sdata.write(self.path_write)
 
-    def peakmem_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chunks: tuple[int, ...]) -> None:
+    def peakmem_io(
+        self, scale_factors: list[int] | None, memmap_compressed: tuple[bool, bool], chunks: tuple[int, ...]
+    ) -> None:
         """Peak memory for data parsing."""
-        sdata = self._convert_image(scale_factors, use_tiff_memmap, chunks)
+        sdata = self._convert_image(scale_factors, memmap_compressed, chunks)
         sdata.write(self.path_write)
 
 
@@ -162,35 +171,7 @@ def peakmem_io(self, scale_factors: list[int] | None, use_tiff_memmap: bool, chu
 #     bench = IOBenchmarkImage()
 #
 #     bench.setup()
-#     bench.time_io(None, True, (1, 5000, 5000))
-#     bench.teardown()
-#
-#     bench.setup()
-#     bench.time_io(None, True, (1, 1000, 1000))
-#     bench.teardown()
-#
-#     bench.setup()
-#     bench.time_io(None, False, (1, 5000, 5000))
-#     bench.teardown()
-#
-#     bench.setup()
-#     bench.time_io(None, False, (1, 1000, 1000))
-#     bench.teardown()
-#
-#     bench.setup()
-#     bench.time_io([2, 2, 2], True, (1, 5000, 5000))
-#     bench.teardown()
-#
-#     bench.setup()
-#     bench.time_io([2, 2, 2], True, (1, 1000, 1000))
-#     bench.teardown()
-#
-#     bench.setup()
-#     bench.time_io([2, 2, 2], False, (1, 5000, 5000))
-#     bench.teardown()
-#
-#     bench.setup()
-#     bench.time_io([2, 2, 2], False, (1, 1000, 1000))
+#     bench.time_io(None, (True, False), (1, 5000, 5000))
 #     bench.teardown()
 #
 #     # Clean up the shared images temp directory at the end

From 355b695788d0edc3ad46e087da50308345710f9e Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Sun, 18 Jan 2026 16:22:43 +0100
Subject: [PATCH 39/39] c=1 vs c=3 benchmark

---
 benchmarks/benchmark_image.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/benchmark_image.py b/benchmarks/benchmark_image.py
index ce46dc94..3096fee7 100644
--- a/benchmarks/benchmark_image.py
+++ b/benchmarks/benchmark_image.py
@@ -40,7 +40,7 @@ class IOBenchmarkImage:
     params = [
         [None, [2, 2]],  # scale_factors
         [(False, True), (False, False), (True, False)],  # (use_tiff_memmap, compressed)
-        [(1, 5000, 5000), (1, 1000, 1000)],  # chunks
+        [(1, 250, 250), (3, 250, 250)],  # chunks
     ]
     param_names = ["scale_factors", "memmap_compressed", "chunks"]