Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def _read_geo_info(source, *, overview_level: int | None = None):
"""
from ._dtypes import tiff_dtype_to_numpy
from ._geotags import extract_geo_info
from ._header import parse_all_ifds, parse_header
from ._header import parse_all_ifds, parse_header, select_overview_ifd
from ._reader import _coerce_path, _is_file_like

source = _coerce_path(source)
Expand Down Expand Up @@ -226,10 +226,9 @@ def _read_geo_info(source, *, overview_level: int | None = None):
try:
header = parse_header(data)
ifds = parse_all_ifds(data, header)
ifd_idx = 0
if overview_level is not None:
ifd_idx = min(overview_level, len(ifds) - 1)
ifd = ifds[ifd_idx]
if not ifds:
raise ValueError("No IFDs found in TIFF file")
ifd = select_overview_ifd(ifds, overview_level)
geo_info = extract_geo_info(ifd, data, header.byte_order)
bps = ifd.bits_per_sample
if isinstance(bps, tuple):
Expand Down Expand Up @@ -1444,7 +1443,9 @@ def read_geotiff_gpu(source: str, *,
from ._reader import (
_FileSource, _check_dimensions, MAX_PIXELS_DEFAULT, _coerce_path,
)
from ._header import parse_header, parse_all_ifds, validate_tile_layout
from ._header import (
parse_header, parse_all_ifds, select_overview_ifd, validate_tile_layout,
)
from ._dtypes import tiff_dtype_to_numpy
from ._geotags import extract_geo_info
from ._gpu_decode import gpu_decode_tiles
Expand All @@ -1465,10 +1466,8 @@ def read_geotiff_gpu(source: str, *,
if len(ifds) == 0:
raise ValueError("No IFDs found in TIFF file")

ifd_idx = 0
if overview_level is not None:
ifd_idx = min(overview_level, len(ifds) - 1)
ifd = ifds[ifd_idx]
# Skip mask IFDs (NewSubfileType bit 2)
ifd = select_overview_ifd(ifds, overview_level)

bps = ifd.bits_per_sample
if isinstance(bps, tuple):
Expand Down
95 changes: 95 additions & 0 deletions xrspatial/geotiff/_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,25 @@ def get_values(self, tag: int) -> tuple | None:
return (v,)

# Convenience properties
@property
def subfile_type(self) -> int:
"""NewSubfileType (tag 254) bit flags. 0 if absent.

Bit flags (TIFF 6.0 spec):
bit 0 (& 1) - reduced-resolution overview
bit 1 (& 2) - page of multi-page document
bit 2 (& 4) - transparency mask
"""
v = self.get_value(TAG_NEW_SUBFILE_TYPE, 0)
if isinstance(v, tuple):
v = v[0] if v else 0
return int(v)

@property
def is_mask(self) -> bool:
"""True if this IFD's NewSubfileType marks it as a transparency mask."""
return bool(self.subfile_type & 4)

@property
def width(self) -> int:
return self.get_value(TAG_IMAGE_WIDTH, 0)
Expand Down Expand Up @@ -426,6 +445,82 @@ def parse_ifd(data: bytes | memoryview, offset: int,
return IFD(entries=entries, next_ifd_offset=next_ifd)


def _is_overview_or_full_res(ifd: IFD) -> bool:
"""Return True if *ifd* is the full-resolution image or an overview.

NewSubfileType (tag 254) is a bit field per TIFF 6.0:

* bit 0 (value 1) -- reduced-resolution version of another image (overview)
* bit 1 (value 2) -- single page of a multi-page document
* bit 2 (value 4) -- transparency mask

The full-resolution IFD has ``NewSubfileType=0``. We accept it plus
any IFD that is an overview *and* not a mask. Pages and any future
flag combinations get filtered out so ``overview_level`` indexes the
pyramid only.
"""
st = ifd.subfile_type
if st & 4:
return False # transparency mask (or overview-of-mask, st=5)
return st == 0 or (st & 1) != 0


def select_overview_ifd(ifds: list[IFD], overview_level: int | None) -> IFD:
"""Pick the IFD for a requested overview level, skipping non-pyramid IFDs.

Some COG variants (notably GDAL with internal masks) interleave
transparency-mask IFDs (NewSubfileType bit 2 set) with overview IFDs.
Multi-page TIFFs additionally carry page IFDs (bit 1 set). Indexing the
raw IFD list by ``overview_level`` returns the wrong layer in either
case. This helper builds a filtered list of full-resolution and
overview IFDs only, and indexes into that.

``overview_level=0`` (or ``None``) returns the full-resolution IFD;
``overview_level=1`` returns the first overview, and so on.

Parameters
----------
ifds : list[IFD]
All IFDs as parsed from the file.
overview_level : int or None
Which overview to return. ``None`` is treated as ``0``.

Returns
-------
IFD

Raises
------
ValueError
If ``ifds`` is empty, or if ``overview_level`` exceeds the number
of pyramid IFDs in the file.
"""
if not ifds:
raise ValueError("No IFDs found in TIFF file")

filtered = [ifd for ifd in ifds if _is_overview_or_full_res(ifd)]
if not filtered:
raise ValueError(
"TIFF file contains no full-resolution or overview IFDs "
"(every IFD is a mask, page, or other non-pyramid layer)")

level = 0 if overview_level is None else overview_level
if level < 0:
raise ValueError(f"overview_level must be >= 0, got {level}")
if level >= len(filtered):
n_overviews = len(filtered) - 1
n_skipped = len(ifds) - len(filtered)
raise ValueError(
f"overview_level={level} is out of range: TIFF has "
f"{len(filtered)} pyramid IFDs (1 full-resolution + "
f"{n_overviews} overview{'s' if n_overviews != 1 else ''}"
f"{f', plus {n_skipped} non-pyramid IFD' if n_skipped else ''}"
f"{'s' if n_skipped > 1 else ''}). Valid overview_level values "
f"are 0..{len(filtered) - 1}.")

return filtered[level]
Comment thread
brendancol marked this conversation as resolved.


def parse_all_ifds(data: bytes | memoryview,
header: TIFFHeader) -> list[IFD]:
"""Parse all IFDs in a TIFF file.
Expand Down
23 changes: 12 additions & 11 deletions xrspatial/geotiff/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@
)
from ._dtypes import SUB_BYTE_BPS, tiff_dtype_to_numpy
from ._geotags import GeoInfo, GeoTransform, extract_geo_info
from ._header import IFD, TIFFHeader, parse_all_ifds, parse_header, validate_tile_layout
from ._header import (
IFD,
TIFFHeader,
parse_all_ifds,
parse_header,
select_overview_ifd,
validate_tile_layout,
)

# ---------------------------------------------------------------------------
# Allocation guard: reject TIFF dimensions that would exhaust memory
Expand Down Expand Up @@ -972,11 +979,8 @@ def _read_cog_http(url: str, overview_level: int | None = None,
if len(ifds) == 0:
raise ValueError("No IFDs found in COG")

# Select IFD based on overview level
ifd_idx = 0
if overview_level is not None:
ifd_idx = min(overview_level, len(ifds) - 1)
ifd = ifds[ifd_idx]
# Select IFD based on overview level, skipping any mask IFDs
ifd = select_overview_ifd(ifds, overview_level)

bps = ifd.bits_per_sample
if isinstance(bps, tuple):
Expand Down Expand Up @@ -1131,11 +1135,8 @@ def read_to_array(source, *, window=None, overview_level: int | None = None,
if len(ifds) == 0:
raise ValueError("No IFDs found in TIFF file")

# Select IFD
ifd_idx = 0
if overview_level is not None:
ifd_idx = min(overview_level, len(ifds) - 1)
ifd = ifds[ifd_idx]
# Select IFD, skipping any mask IFDs
ifd = select_overview_ifd(ifds, overview_level)

bps = ifd.bits_per_sample
if isinstance(bps, tuple):
Expand Down
Loading
Loading