diff --git a/xrspatial/geotiff/_gpu_decode.py b/xrspatial/geotiff/_gpu_decode.py index 4cb5fe75..2afe393d 100644 --- a/xrspatial/geotiff/_gpu_decode.py +++ b/xrspatial/geotiff/_gpu_decode.py @@ -56,6 +56,28 @@ def _check_gpu_memory(required_bytes: int, what: str = "tile buffer") -> None: "with cupy.get_default_memory_pool().free_all_blocks()." ) +def _xp_byteswap(arr): + """Return *arr* with each element's bytes physically reversed. + + Equivalent to ``numpy.ndarray.byteswap()``: the dtype is preserved + (still native-endian on output), and the bytes that make up each + element are flipped end-for-end. Works on both numpy and cupy. + + The earlier ``arr.view(arr.dtype.newbyteorder()).copy()`` shortcut + looked equivalent but produced an array whose dtype was tagged with + the opposite byte order (e.g. ``>u2`` instead of `` 1: # See gpu_decode_tiles for why BE samples need a final byteswap. - out = out.byteswap() + # cupy.ndarray has no .byteswap(), so use the dtype-view helper. + out = _xp_byteswap(out) return out @@ -1814,7 +1837,8 @@ def gpu_decode_tiles( # so big-endian samples that are wider than a byte must be swapped # back to native before the values mean anything. if byte_order == '>' and dtype.itemsize > 1: - out = out.byteswap() + # cupy.ndarray has no .byteswap(), so use the dtype-view helper. + out = _xp_byteswap(out) return out diff --git a/xrspatial/geotiff/tests/test_gpu_byteswap_1508.py b/xrspatial/geotiff/tests/test_gpu_byteswap_1508.py new file mode 100644 index 00000000..4cde5cc4 --- /dev/null +++ b/xrspatial/geotiff/tests/test_gpu_byteswap_1508.py @@ -0,0 +1,144 @@ +"""Regression test for issue #1508. + +Big-endian multi-byte TIFFs read via ``read_geotiff_gpu`` used to crash +inside the GPU decode pipeline with:: + + AttributeError: 'ndarray' object has no attribute 'byteswap' + +because ``cupy.ndarray`` (as of cupy 13.x) does not expose ``byteswap()``. +The dispatcher in ``read_geotiff_gpu`` caught the error and silently fell +back to CPU, so results stayed correct but the GPU fast path was lost. + +These tests confirm the GPU path now decodes BE multi-byte data directly +(result is a CuPy array, not a NumPy fallback) and matches the CPU read. +""" +from __future__ import annotations + +import importlib.util + +import numpy as np +import pytest + + +def _gpu_available() -> bool: + """True if cupy is importable and CUDA is initialised.""" + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_HAS_TIFFFILE = importlib.util.find_spec("tifffile") is not None +_gpu_only = pytest.mark.skipif( + not (_HAS_GPU and _HAS_TIFFFILE), + reason="cupy + CUDA + tifffile required", +) + + +@_gpu_only +@pytest.mark.parametrize("dtype", [np.uint16, np.int16, np.uint32, np.int32]) +def test_read_geotiff_gpu_big_endian_multibyte(tmp_path, dtype): + """GPU path decodes BE multi-byte tiles and stays on GPU.""" + import cupy + import tifffile + + from xrspatial.geotiff import read_geotiff_gpu + from xrspatial.geotiff._reader import read_to_array + + rng = np.random.RandomState(20260507) + info = np.iinfo(dtype) + arr = rng.randint( + info.min, info.max, size=(32, 48), dtype=np.int64 + ).astype(dtype) + + path = tmp_path / f"be_{np.dtype(dtype).name}.tif" + tifffile.imwrite( + str(path), arr, byteorder=">", compression="deflate", + tile=(16, 16), + ) + + cpu, _ = read_to_array(str(path)) + np.testing.assert_array_equal(cpu, arr) + assert cpu.dtype == np.dtype(dtype), ( + f"CPU baseline drifted from native dtype: got {cpu.dtype}" + ) + + gpu_da = read_geotiff_gpu(str(path)) + + # The GPU path was actually exercised (no silent CPU fallback masking + # a crash inside gpu_decode_tiles_from_file). + assert isinstance(gpu_da.data, cupy.ndarray), ( + "expected cupy-backed DataArray, got " + f"{type(gpu_da.data).__name__} -- the GPU path likely fell back " + "to CPU again" + ) + + # The fix must preserve the native dtype contract. An earlier version + # used ``arr.view(arr.dtype.newbyteorder()).copy()`` which produced an + # array tagged with non-native byteorder (``>u2`` instead of ``