Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
8027c0b
feature: Initial lazy tiff reader
lucas-diedrich Feb 16, 2025
7ff1d23
Updated comments
lucas-diedrich Feb 16, 2025
826133a
Updated comments
lucas-diedrich Feb 16, 2025
df258bc
Move utility functions to designated submodule readers._utils._image
lucas-diedrich Feb 16, 2025
db0d782
Initial tests utils
lucas-diedrich Feb 16, 2025
c03932f
Fixes edge cases for min coordinate
lucas-diedrich Feb 16, 2025
339cbd8
Added test for negative coordinates
lucas-diedrich Feb 16, 2025
24c6eec
Add support for png/jpg again
lucas-diedrich Feb 17, 2025
dbdc7c7
Add initial test
lucas-diedrich Feb 17, 2025
da98469
Fix: Fix jpeg and png reader, fix issues with local variable name
lucas-diedrich Feb 17, 2025
b7e5874
Merge branch 'main' of https://github.com/scverse/spatialdata-io into…
lucas-diedrich Feb 17, 2025
2349be7
Update src/spatialdata_io/readers/_utils/_image.py
lucas-diedrich May 2, 2025
46ed3e5
[Refactor|API] Rename dimensions to shape to stick to numpy convention
lucas-diedrich May 2, 2025
99731fe
[Refactor] Make suggested simplification of code, suggested by @melonora
lucas-diedrich May 2, 2025
f03ca8e
[Test] Add test for compressed tiffs
lucas-diedrich May 2, 2025
9e057de
[Fix] Account for compressed images
lucas-diedrich May 2, 2025
c05b718
[Refactor] Remove unnecessary type hint
lucas-diedrich May 2, 2025
5705515
Merge branch 'main' into image-reader-chunkwise
LucaMarconato Jan 12, 2026
9f3cc3c
fix pre-commit
LucaMarconato Jan 12, 2026
9d44f25
fix transpose in image(); wip code review
LucaMarconato Jan 13, 2026
c14cb22
add test for dask-image fallback for compressed tiffs
LucaMarconato Jan 13, 2026
a7a2b92
remove unused min_coordinate
LucaMarconato Jan 13, 2026
80a931d
fix wrong dimension _compute_chunks(); cover with test
LucaMarconato Jan 13, 2026
fc26342
np._int -> np.number
LucaMarconato Jan 13, 2026
3b296da
fix indices in _read_chunks()
LucaMarconato Jan 13, 2026
f7ad81a
better english
LucaMarconato Jan 13, 2026
7f5b7ce
better docstring
LucaMarconato Jan 13, 2026
0c482ac
wip benchmark (bugs)
LucaMarconato Jan 13, 2026
1466596
[Test] Use small assymetric chunk sizes to capture any issues with th…
lucas-diedrich Jan 16, 2026
a2930d5
Add comment to clarify use of asymmetric chunk sizes in test_read_tiff
lucas-diedrich Jan 16, 2026
a9c2a2b
Follow standard convention of image dimensions throughout reader func…
lucas-diedrich Jan 16, 2026
2dabbf9
Refactor chunk specification to follow (y, x) order and update relate…
lucas-diedrich Jan 16, 2026
5d9ece2
[Fix] Shape dimensions were inversed. Fix shape specification and mak…
lucas-diedrich Jan 16, 2026
21f8fc6
chore: Remove Note and TODO
lucas-diedrich Jan 16, 2026
93554e7
Clarify documentation
lucas-diedrich Jan 16, 2026
5a20f41
fix pre-commit benchmark_image
LucaMarconato Jan 16, 2026
a890951
wip fix chunks
LucaMarconato Jan 16, 2026
19df325
improve chunks support
LucaMarconato Jan 16, 2026
163d8b0
benchmark for image() with synthetic data
LucaMarconato Jan 16, 2026
4b4a00e
fix pre-commit
LucaMarconato Jan 16, 2026
355b695
c=1 vs c=3 benchmark
LucaMarconato Jan 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ temp/

# Compiled files
__pycache__/
.ipynb_checkpoints/

# Distribution / packaging
/build/
Expand Down Expand Up @@ -42,3 +43,4 @@ data
data/
tests/data
uv.lock
.asv/
2 changes: 1 addition & 1 deletion asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"project": "spatialdata-io",
"project_url": "https://github.com/scverse/spatialdata-io",
"repo": ".",
"branches": ["main", "xenium-labels-dask", "xenium-labels-dask-zipstore"],
"branches": ["image-reader-chunkwise"],
"dvcs": "git",
"environment_type": "virtualenv",
"pythons": ["3.12"],
Expand Down
180 changes: 180 additions & 0 deletions benchmarks/benchmark_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
"""Benchmarks for SpatialData IO operations for large images.

Instructions:
See benchmark_xenium.py for instructions.
"""

import logging
import logging.handlers
import tempfile
from pathlib import Path
from typing import Any

import numpy as np
import tifffile
from spatialdata import SpatialData
from spatialdata._logging import logger
from xarray import DataArray

from spatialdata_io import image # type: ignore[attr-defined]

# =============================================================================
# CONFIGURATION - Edit these values to match your setup
# =============================================================================
# Image dimensions: (channels, height, width)
IMAGE_SHAPE = (3, 30000, 30000)
# =============================================================================


class IOBenchmarkImage:
"""Benchmark IO read operations with different parameter combinations."""

timeout = 3600
repeat = 3
number = 1
warmup_time = 0
processes = 1

# Parameter combinations: scale_factors, (use_tiff_memmap, compressed), chunks
# Combinations: (memmap=False, compressed=True), (memmap=False, compressed=False), (memmap=True, compressed=False)
params = [
[None, [2, 2]], # scale_factors
[(False, True), (False, False), (True, False)], # (use_tiff_memmap, compressed)
[(1, 250, 250), (3, 250, 250)], # chunks
]
param_names = ["scale_factors", "memmap_compressed", "chunks"]

# Class-level temp directory for image files (persists across all benchmarks)
_images_temp_dir: tempfile.TemporaryDirectory[str] | None = None
_path_read_uncompressed: Path | None = None
_path_read_compressed: Path | None = None

@classmethod
def _setup_images(cls) -> None:
"""Create fake image data once for all benchmarks."""
if cls._images_temp_dir is not None:
return

cls._images_temp_dir = tempfile.TemporaryDirectory()
images_dir = Path(cls._images_temp_dir.name)
cls._path_read_uncompressed = images_dir / "image_uncompressed.tif"
cls._path_read_compressed = images_dir / "image_compressed.tif"

# Generate fake image data
rng = np.random.default_rng(42)
data = rng.integers(0, 255, size=IMAGE_SHAPE, dtype=np.uint8)

# Write uncompressed TIFF (memmappable)
tifffile.imwrite(cls._path_read_uncompressed, data, compression=None)
# Write compressed TIFF (not memmappable)
tifffile.imwrite(cls._path_read_compressed, data, compression="zlib")

def setup(self, *_: Any) -> None:
"""Set up paths for benchmarking."""
# Create images once (shared across all benchmark runs)
self._setup_images()
self.path_read_uncompressed = self._path_read_uncompressed
self.path_read_compressed = self._path_read_compressed

# Create a separate temp directory for output (cleaned up after each run)
self._output_temp_dir = tempfile.TemporaryDirectory()
self.path_write = Path(self._output_temp_dir.name) / "data_benchmark.zarr"

def teardown(self, *_: Any) -> None:
"""Clean up output directory after each benchmark run."""
if hasattr(self, "_output_temp_dir"):
self._output_temp_dir.cleanup()

def _convert_image(
self, scale_factors: list[int] | None, memmap_compressed: tuple[bool, bool], chunks: tuple[int, ...]
) -> SpatialData:
"""Read image data with specified parameters."""
use_tiff_memmap, compressed = memmap_compressed
# Select file based on compression setting
path_read = self.path_read_compressed if compressed else self.path_read_uncompressed
assert path_read is not None

# Capture log messages to verify memmappable warning behavior
log_capture = logging.handlers.MemoryHandler(capacity=100)
log_capture.setLevel(logging.WARNING)
logger.addHandler(log_capture)
original_propagate = logger.propagate
logger.propagate = True

try:
im = image(
input=path_read,
data_axes=("c", "y", "x"),
coordinate_system="global",
use_tiff_memmap=use_tiff_memmap,
chunks=chunks,
scale_factors=scale_factors,
)
finally:
logger.removeHandler(log_capture)
logger.propagate = original_propagate

# Check warning behavior: when use_tiff_memmap=True with uncompressed file, no warning should be raised
log_messages = [record.getMessage() for record in log_capture.buffer]
has_memmap_warning = any("image data is not memory-mappable" in msg for msg in log_messages)
if use_tiff_memmap and not compressed:
assert not has_memmap_warning, (
"Uncompressed TIFF with memmap=True should not trigger memory-mappable warning"
)

sdata = SpatialData.init_from_elements({"image": im})
# sanity check: chunks is (c, y, x)
if scale_factors is None:
assert isinstance(sdata["image"], DataArray)
if chunks is not None:
assert (
sdata["image"].chunksizes["x"][0] == chunks[2]
or sdata["image"].chunksizes["x"][0] == sdata["image"].shape[2]
)
assert (
sdata["image"].chunksizes["y"][0] == chunks[1]
or sdata["image"].chunksizes["y"][0] == sdata["image"].shape[1]
)
else:
assert len(sdata["image"].keys()) == len(scale_factors) + 1
if chunks is not None:
assert (
sdata["image"]["scale0"]["image"].chunksizes["x"][0] == chunks[2]
or sdata["image"]["scale0"]["image"].chunksizes["x"][0]
== sdata["image"]["scale0"]["image"].shape[2]
)
assert (
sdata["image"]["scale0"]["image"].chunksizes["y"][0] == chunks[1]
or sdata["image"]["scale0"]["image"].chunksizes["y"][0]
== sdata["image"]["scale0"]["image"].shape[1]
)

return sdata

def time_io(
self, scale_factors: list[int] | None, memmap_compressed: tuple[bool, bool], chunks: tuple[int, ...]
) -> None:
"""Walltime for data parsing."""
sdata = self._convert_image(scale_factors, memmap_compressed, chunks)
sdata.write(self.path_write)

def peakmem_io(
self, scale_factors: list[int] | None, memmap_compressed: tuple[bool, bool], chunks: tuple[int, ...]
) -> None:
"""Peak memory for data parsing."""
sdata = self._convert_image(scale_factors, memmap_compressed, chunks)
sdata.write(self.path_write)


# if __name__ == "__main__":
# # Run a single test case for quick verification
# bench = IOBenchmarkImage()
#
# bench.setup()
# bench.time_io(None, (True, False), (1, 5000, 5000))
# bench.teardown()
#
# # Clean up the shared images temp directory at the end
# if IOBenchmarkImage._images_temp_dir is not None:
# IOBenchmarkImage._images_temp_dir.cleanup()
# IOBenchmarkImage._images_temp_dir = None
19 changes: 9 additions & 10 deletions benchmarks/bench_xenium.py → benchmarks/benchmark_xenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@
cd /path/to/spatialdata-io

# Quick benchmark (single run, for testing):
asv run --python=same -b IOBenchmark --quick --show-stderr -v
asv run --python=same -b IOBenchmarkXenium --quick --show-stderr -v

# Full benchmark (multiple runs, for accurate results):
asv run --python=same -b IOBenchmark --show-stderr -v
asv run --python=same -b IOBenchmarkXenium --show-stderr -v

Comparing branches:
# Run on specific commits:
asv run main^! -b IOBenchmark --show-stderr -v
asv run xenium-labels-dask^! -b IOBenchmark --show-stderr -v
asv run main^! -b IOBenchmarkXenium --show-stderr -v
asv run xenium-labels-dask^! -b IOBenchmarkXenium --show-stderr -v

# Or compare two branches directly:
asv continuous main xenium-labels-dask -b IOBenchmark --show-stderr -v
asv continuous main xenium-labels-dask -b IOBenchmarkXenium --show-stderr -v

# View comparison:
asv compare main xenium-labels-dask
Expand All @@ -36,7 +36,6 @@
import inspect
import shutil
from pathlib import Path
from typing import TYPE_CHECKING

from spatialdata import SpatialData

Expand All @@ -62,9 +61,7 @@ def get_paths() -> tuple[Path, Path]:
return path_read, path_write


class IOBenchmark:
"""Benchmark IO read operations."""

class IOBenchmarkXenium:
timeout = 3600
repeat = 3
number = 1
Expand Down Expand Up @@ -106,4 +103,6 @@ def peakmem_io(self) -> None:


if __name__ == "__main__":
IOBenchmark().time_io()
benchmark = IOBenchmarkXenium()
benchmark.setup()
benchmark.time_io()
Loading
Loading