diff --git a/xrspatial/geotiff/_compression.py b/xrspatial/geotiff/_compression.py index 319397ee..2f41e1a5 100644 --- a/xrspatial/geotiff/_compression.py +++ b/xrspatial/geotiff/_compression.py @@ -790,6 +790,143 @@ def jpeg2000_compress(data: bytes, width: int, height: int, os.unlink(tmp) +# -- LERC codec (via lerc) ---------------------------------------------------- + +LERC_AVAILABLE = False +try: + import lerc as _lerc + LERC_AVAILABLE = True +except ImportError: + _lerc = None + + +def lerc_decompress(data: bytes, width: int = 0, height: int = 0, + samples: int = 1) -> bytes: + """Decompress LERC data. Requires the ``lerc`` package.""" + if not LERC_AVAILABLE: + raise ImportError( + "lerc is required to read LERC-compressed TIFFs. " + "Install it with: pip install lerc") + result = _lerc.decode(data) + # lerc.decode returns (result_code, data_array, valid_mask, ...) + if result[0] != 0: + raise RuntimeError(f"LERC decode failed with error code {result[0]}") + arr = result[1] + return arr.tobytes() + + +def lerc_compress(data: bytes, width: int, height: int, + samples: int = 1, dtype: np.dtype = np.dtype('float32'), + max_z_error: float = 0.0) -> bytes: + """Compress raw pixel data with LERC. Requires the ``lerc`` package. + + Parameters + ---------- + max_z_error : float + Maximum encoding error per pixel. 0 = lossless. + """ + if not LERC_AVAILABLE: + raise ImportError( + "lerc is required to write LERC-compressed TIFFs. " + "Install it with: pip install lerc") + if samples == 1: + arr = np.frombuffer(data, dtype=dtype).reshape(height, width) + else: + arr = np.frombuffer(data, dtype=dtype).reshape(height, width, samples) + n_values_per_pixel = samples + # lerc.encode(npArr, nValuesPerPixel, bHasMask, npValidMask, + # maxZErr, nBytesHint) + # nBytesHint=1 triggers actual encoding (0 = compute size only) + result = _lerc.encode(arr, n_values_per_pixel, False, None, + max_z_error, 1) + if result[0] != 0: + raise RuntimeError(f"LERC encode failed with error code {result[0]}") + # result is (error_code, nBytesWritten, ctypes_buffer) + return bytes(result[2]) + + +# -- LZ4 codec (via python-lz4) ----------------------------------------------- + +LZ4_AVAILABLE = False +try: + import lz4.frame as _lz4 + LZ4_AVAILABLE = True +except ImportError: + _lz4 = None + + +def lz4_decompress(data: bytes) -> bytes: + """Decompress LZ4 frame data. Requires the ``lz4`` package.""" + if not LZ4_AVAILABLE: + raise ImportError( + "lz4 is required to read LZ4-compressed TIFFs. " + "Install it with: pip install lz4") + return _lz4.decompress(data) + + +def lz4_compress(data: bytes, level: int = 0) -> bytes: + """Compress data with LZ4 frame format. Requires the ``lz4`` package.""" + if not LZ4_AVAILABLE: + raise ImportError( + "lz4 is required to write LZ4-compressed TIFFs. " + "Install it with: pip install lz4") + return _lz4.compress(data, compression_level=level) + + +# -- LERC codec (via lerc) ---------------------------------------------------- + +LERC_AVAILABLE = False +try: + import lerc as _lerc + LERC_AVAILABLE = True +except ImportError: + _lerc = None + + +def lerc_decompress(data: bytes, width: int = 0, height: int = 0, + samples: int = 1) -> bytes: + """Decompress LERC data. Requires the ``lerc`` package.""" + if not LERC_AVAILABLE: + raise ImportError( + "lerc is required to read LERC-compressed TIFFs. " + "Install it with: pip install lerc") + result = _lerc.decode(data) + # lerc.decode returns (result_code, data_array, valid_mask, ...) + if result[0] != 0: + raise RuntimeError(f"LERC decode failed with error code {result[0]}") + arr = result[1] + return arr.tobytes() + + +def lerc_compress(data: bytes, width: int, height: int, + samples: int = 1, dtype: np.dtype = np.dtype('float32'), + max_z_error: float = 0.0) -> bytes: + """Compress raw pixel data with LERC. Requires the ``lerc`` package. + + Parameters + ---------- + max_z_error : float + Maximum encoding error per pixel. 0 = lossless. + """ + if not LERC_AVAILABLE: + raise ImportError( + "lerc is required to write LERC-compressed TIFFs. " + "Install it with: pip install lerc") + if samples == 1: + arr = np.frombuffer(data, dtype=dtype).reshape(height, width) + else: + arr = np.frombuffer(data, dtype=dtype).reshape(height, width, samples) + n_values_per_pixel = samples + # lerc.encode(npArr, nValuesPerPixel, bHasMask, npValidMask, + # maxZErr, nBytesHint) + # nBytesHint=1 triggers actual encoding (0 = compute size only) + result = _lerc.encode(arr, n_values_per_pixel, False, None, + max_z_error, 1) + if result[0] != 0: + raise RuntimeError(f"LERC encode failed with error code {result[0]}") + # result is (error_code, nBytesWritten, ctypes_buffer) + return bytes(result[2]) + # -- Dispatch helpers --------------------------------------------------------- @@ -800,7 +937,9 @@ def jpeg2000_compress(data: bytes, width: int, height: int, COMPRESSION_DEFLATE = 8 COMPRESSION_JPEG2000 = 34712 COMPRESSION_ZSTD = 50000 +COMPRESSION_LZ4 = 50004 COMPRESSION_PACKBITS = 32773 +COMPRESSION_LERC = 34887 COMPRESSION_ADOBE_DEFLATE = 32946 @@ -839,6 +978,11 @@ def decompress(data, compression: int, expected_size: int = 0, elif compression == COMPRESSION_JPEG2000: return np.frombuffer( jpeg2000_decompress(data, width, height, samples), dtype=np.uint8) + elif compression == COMPRESSION_LZ4: + return np.frombuffer(lz4_decompress(data), dtype=np.uint8) + elif compression == COMPRESSION_LERC: + return np.frombuffer( + lerc_decompress(data, width, height, samples), dtype=np.uint8) else: raise ValueError(f"Unsupported compression type: {compression}") @@ -869,9 +1013,13 @@ def compress(data: bytes, compression: int, level: int = 6) -> bytes: return packbits_compress(data) elif compression == COMPRESSION_ZSTD: return zstd_compress(data, level) + elif compression == COMPRESSION_LZ4: + return lz4_compress(data, level) elif compression == COMPRESSION_JPEG: raise ValueError("Use jpeg_compress() directly with width/height/samples") elif compression == COMPRESSION_JPEG2000: raise ValueError("Use jpeg2000_compress() directly with width/height/samples/dtype") + elif compression == COMPRESSION_LERC: + raise ValueError("Use lerc_compress() directly with width/height/samples/dtype") else: raise ValueError(f"Unsupported compression type: {compression}") diff --git a/xrspatial/geotiff/_gpu_decode.py b/xrspatial/geotiff/_gpu_decode.py index 0bb335fb..2b596ef0 100644 --- a/xrspatial/geotiff/_gpu_decode.py +++ b/xrspatial/geotiff/_gpu_decode.py @@ -1534,6 +1534,21 @@ def gpu_decode_tiles( decomp_offsets = np.arange(n_tiles, dtype=np.int64) * tile_bytes d_decomp_offsets = cupy.asarray(decomp_offsets) + elif compression == 34887: # LERC + from ._compression import lerc_decompress + raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8) + for i, tile in enumerate(compressed_tiles): + start = i * tile_bytes + chunk = np.frombuffer( + lerc_decompress(tile, tile_width, tile_height, samples), + dtype=np.uint8) + raw_host[start:start + min(len(chunk), tile_bytes)] = \ + chunk[:tile_bytes] if len(chunk) >= tile_bytes else \ + np.pad(chunk, (0, tile_bytes - len(chunk))) + d_decomp = cupy.asarray(raw_host) + decomp_offsets = np.arange(n_tiles, dtype=np.int64) * tile_bytes + d_decomp_offsets = cupy.asarray(decomp_offsets) + elif compression == 1: # Uncompressed raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8) for i, tile in enumerate(compressed_tiles): @@ -2273,6 +2288,19 @@ def gpu_compress_tiles(d_image, tile_width, tile_height, samples=samples, dtype=dtype)) return result + # LERC: CPU only, no GPU library + if compression == 34887: + from ._compression import lerc_compress + cpu_buf = d_tile_buf.get() + result = [] + for i in range(n_tiles): + start = i * tile_bytes + tile_data = bytes(cpu_buf[start:start + tile_bytes]) + result.append(lerc_compress( + tile_data, tile_width, tile_height, + samples=samples, dtype=dtype)) + return result + # Try nvCOMP batch compress result = _nvcomp_batch_compress(d_tiles, None, tile_bytes, compression, n_tiles) diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 32aae801..6768c6e6 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -10,6 +10,8 @@ COMPRESSION_DEFLATE, COMPRESSION_JPEG, COMPRESSION_JPEG2000, + COMPRESSION_LERC, + COMPRESSION_LZ4, COMPRESSION_LZW, COMPRESSION_NONE, COMPRESSION_PACKBITS, @@ -71,8 +73,10 @@ def _compression_tag(compression_name: str) -> int: 'jpeg': COMPRESSION_JPEG, 'packbits': COMPRESSION_PACKBITS, 'zstd': COMPRESSION_ZSTD, + 'lz4': COMPRESSION_LZ4, 'jpeg2000': COMPRESSION_JPEG2000, 'j2k': COMPRESSION_JPEG2000, + 'lerc': COMPRESSION_LERC, } name = compression_name.lower() if name not in _map: @@ -332,6 +336,10 @@ def _write_stripped(data: np.ndarray, compression: int, predictor: bool, from ._compression import jpeg2000_compress compressed = jpeg2000_compress( strip_data, width, strip_rows, samples=samples, dtype=dtype) + elif compression == COMPRESSION_LERC: + from ._compression import lerc_compress + compressed = lerc_compress( + strip_data, width, strip_rows, samples=samples, dtype=dtype) else: compressed = compress(strip_data, compression) @@ -387,6 +395,10 @@ def _prepare_tile(data, tr, tc, th, tw, height, width, samples, dtype, from ._compression import jpeg2000_compress return jpeg2000_compress( tile_data, tw, th, samples=samples, dtype=dtype) + if compression == COMPRESSION_LERC: + from ._compression import lerc_compress + return lerc_compress( + tile_data, tw, th, samples=samples, dtype=dtype) return compress(tile_data, compression) diff --git a/xrspatial/geotiff/tests/test_lerc.py b/xrspatial/geotiff/tests/test_lerc.py new file mode 100644 index 00000000..927c8fc5 --- /dev/null +++ b/xrspatial/geotiff/tests/test_lerc.py @@ -0,0 +1,160 @@ +"""Tests for LERC compression codec (#1052).""" +from __future__ import annotations + +import numpy as np +import pytest + +from xrspatial.geotiff._compression import ( + COMPRESSION_LERC, + LERC_AVAILABLE, + lerc_compress, + lerc_decompress, + decompress, +) + +pytestmark = pytest.mark.skipif( + not LERC_AVAILABLE, + reason="lerc not installed", +) + + +class TestLERCCodec: + """CPU LERC codec roundtrip.""" + + def test_roundtrip_float32_lossless(self): + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + compressed = lerc_compress( + arr.tobytes(), 8, 8, samples=1, dtype=np.dtype('float32'), + max_z_error=0.0) + assert isinstance(compressed, bytes) + assert len(compressed) > 0 + + decompressed = lerc_decompress(compressed, 8, 8, 1) + result = np.frombuffer(decompressed, dtype=np.float32).reshape(8, 8) + np.testing.assert_array_equal(result, arr) + + def test_roundtrip_uint8_lossless(self): + arr = np.arange(64, dtype=np.uint8).reshape(8, 8) + compressed = lerc_compress( + arr.tobytes(), 8, 8, samples=1, dtype=np.dtype('uint8'), + max_z_error=0.0) + decompressed = lerc_decompress(compressed, 8, 8, 1) + result = np.frombuffer(decompressed, dtype=np.uint8).reshape(8, 8) + np.testing.assert_array_equal(result, arr) + + def test_roundtrip_uint16_lossless(self): + arr = np.arange(64, dtype=np.uint16).reshape(8, 8) + compressed = lerc_compress( + arr.tobytes(), 8, 8, samples=1, dtype=np.dtype('uint16'), + max_z_error=0.0) + decompressed = lerc_decompress(compressed, 8, 8, 1) + result = np.frombuffer(decompressed, dtype=np.uint16).reshape(8, 8) + np.testing.assert_array_equal(result, arr) + + def test_lossy_within_tolerance(self): + rng = np.random.RandomState(1052) + arr = rng.rand(32, 32).astype(np.float32) * 100 + max_err = 0.5 + compressed = lerc_compress( + arr.tobytes(), 32, 32, samples=1, dtype=np.dtype('float32'), + max_z_error=max_err) + decompressed = lerc_decompress(compressed, 32, 32, 1) + result = np.frombuffer(decompressed, dtype=np.float32).reshape(32, 32) + np.testing.assert_array_less(np.abs(result - arr), max_err + 1e-6) + + def test_lossy_smaller_than_lossless(self): + rng = np.random.RandomState(1052) + arr = rng.rand(64, 64).astype(np.float32) * 1000 + lossless = lerc_compress( + arr.tobytes(), 64, 64, samples=1, dtype=np.dtype('float32'), + max_z_error=0.0) + lossy = lerc_compress( + arr.tobytes(), 64, 64, samples=1, dtype=np.dtype('float32'), + max_z_error=1.0) + assert len(lossy) <= len(lossless) + + def test_dispatch_decompress(self): + arr = np.arange(16, dtype=np.float32).reshape(4, 4) + compressed = lerc_compress( + arr.tobytes(), 4, 4, samples=1, dtype=np.dtype('float32'), + max_z_error=0.0) + result = decompress(compressed, COMPRESSION_LERC, + width=4, height=4, samples=1) + # decompress returns uint8, reinterpret as float32 + result_f = np.frombuffer(result.tobytes(), dtype=np.float32).reshape(4, 4) + np.testing.assert_array_equal(result_f, arr) + + +class TestLERCWriteRoundTrip: + """Write-read roundtrip using the TIFF writer with LERC compression.""" + + def test_tiled_float32(self, tmp_path): + from xrspatial.geotiff._writer import write + from xrspatial.geotiff._reader import read_to_array + + expected = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / 'lerc_1052_tiled_f32.tif') + write(expected, path, compression='lerc', tiled=True, tile_size=8) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_tiled_uint8(self, tmp_path): + from xrspatial.geotiff._writer import write + from xrspatial.geotiff._reader import read_to_array + + expected = np.arange(64, dtype=np.uint8).reshape(8, 8) + path = str(tmp_path / 'lerc_1052_tiled_u8.tif') + write(expected, path, compression='lerc', tiled=True, tile_size=8) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_stripped_float32(self, tmp_path): + from xrspatial.geotiff._writer import write + from xrspatial.geotiff._reader import read_to_array + + expected = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / 'lerc_1052_stripped.tif') + write(expected, path, compression='lerc', tiled=False) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_public_api_roundtrip(self, tmp_path): + import xarray as xr + from xrspatial.geotiff import open_geotiff, to_geotiff + + data = np.arange(64, dtype=np.float32).reshape(8, 8) + da = xr.DataArray(data, dims=['y', 'x'], + coords={'y': np.arange(8), 'x': np.arange(8)}, + attrs={'crs': 4326}) + path = str(tmp_path / 'lerc_1052_api.tif') + to_geotiff(da, path, compression='lerc') + + result = open_geotiff(path) + np.testing.assert_array_equal(result.values, data) + + +class TestLERCAvailability: + """Test availability flag and error handling (always runs).""" + pytestmark = [] + + def test_compression_constant(self): + assert COMPRESSION_LERC == 34887 + + def test_compression_tag_mapping(self): + from xrspatial.geotiff._writer import _compression_tag + assert _compression_tag('lerc') == 34887 + + def test_unavailable_raises_import_error(self): + import xrspatial.geotiff._compression as comp_mod + orig = comp_mod.LERC_AVAILABLE + comp_mod.LERC_AVAILABLE = False + try: + with pytest.raises(ImportError, match="lerc"): + comp_mod.lerc_decompress(b'\x00') + with pytest.raises(ImportError, match="lerc"): + comp_mod.lerc_compress(b'\x00', 1, 1) + finally: + comp_mod.LERC_AVAILABLE = orig diff --git a/xrspatial/geotiff/tests/test_lz4.py b/xrspatial/geotiff/tests/test_lz4.py new file mode 100644 index 00000000..a845d868 --- /dev/null +++ b/xrspatial/geotiff/tests/test_lz4.py @@ -0,0 +1,137 @@ +"""Tests for LZ4 compression codec (#1051).""" +from __future__ import annotations + +import numpy as np +import pytest + +from xrspatial.geotiff._compression import ( + COMPRESSION_LZ4, + LZ4_AVAILABLE, + lz4_compress, + lz4_decompress, + compress, + decompress, +) + +pytestmark = pytest.mark.skipif( + not LZ4_AVAILABLE, + reason="lz4 not installed", +) + + +class TestLZ4Codec: + """CPU LZ4 codec roundtrip via python-lz4.""" + + def test_roundtrip_simple(self): + data = b'hello world! ' * 100 + compressed = lz4_compress(data) + assert compressed != data + assert lz4_decompress(compressed) == data + + def test_roundtrip_binary(self): + data = bytes(range(256)) * 10 + compressed = lz4_compress(data) + assert lz4_decompress(compressed) == data + + def test_roundtrip_empty(self): + compressed = lz4_compress(b'') + assert lz4_decompress(compressed) == b'' + + def test_roundtrip_large(self): + rng = np.random.RandomState(1051) + data = bytes(rng.randint(0, 256, size=50000, dtype=np.uint8)) + compressed = lz4_compress(data) + assert lz4_decompress(compressed) == data + + def test_dispatch_roundtrip(self): + data = b'test data ' * 50 + compressed = compress(data, COMPRESSION_LZ4) + result = decompress(compressed, COMPRESSION_LZ4) + assert result.tobytes() == data + + +class TestLZ4WriteRoundTrip: + """Write-read roundtrip using the TIFF writer with LZ4 compression.""" + + def test_tiled_uint8(self, tmp_path): + from xrspatial.geotiff._writer import write + from xrspatial.geotiff._reader import read_to_array + + expected = np.arange(64, dtype=np.uint8).reshape(8, 8) + path = str(tmp_path / 'lz4_1051_tiled_uint8.tif') + write(expected, path, compression='lz4', tiled=True, tile_size=8) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_tiled_float32(self, tmp_path): + from xrspatial.geotiff._writer import write + from xrspatial.geotiff._reader import read_to_array + + expected = np.random.RandomState(1051).rand(16, 16).astype(np.float32) + path = str(tmp_path / 'lz4_1051_tiled_f32.tif') + write(expected, path, compression='lz4', tiled=True, tile_size=8) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_stripped_uint8(self, tmp_path): + from xrspatial.geotiff._writer import write + from xrspatial.geotiff._reader import read_to_array + + expected = np.arange(64, dtype=np.uint8).reshape(8, 8) + path = str(tmp_path / 'lz4_1051_stripped.tif') + write(expected, path, compression='lz4', tiled=False) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_with_predictor(self, tmp_path): + from xrspatial.geotiff._writer import write + from xrspatial.geotiff._reader import read_to_array + + expected = np.random.RandomState(1051).rand(16, 16).astype(np.float32) + path = str(tmp_path / 'lz4_1051_predictor.tif') + write(expected, path, compression='lz4', tiled=True, tile_size=8, + predictor=True) + + arr, geo = read_to_array(path) + np.testing.assert_array_equal(arr, expected) + + def test_public_api_roundtrip(self, tmp_path): + import xarray as xr + from xrspatial.geotiff import open_geotiff, to_geotiff + + data = np.arange(64, dtype=np.uint8).reshape(8, 8) + da = xr.DataArray(data, dims=['y', 'x'], + coords={'y': np.arange(8), 'x': np.arange(8)}, + attrs={'crs': 4326}) + path = str(tmp_path / 'lz4_1051_api.tif') + to_geotiff(da, path, compression='lz4') + + result = open_geotiff(path) + np.testing.assert_array_equal(result.values, data) + + +class TestLZ4Availability: + """Test availability flag and error handling (always runs).""" + pytestmark = [] + + def test_compression_constant(self): + assert COMPRESSION_LZ4 == 50004 + + def test_compression_tag_mapping(self): + from xrspatial.geotiff._writer import _compression_tag + assert _compression_tag('lz4') == 50004 + + def test_unavailable_raises_import_error(self): + import xrspatial.geotiff._compression as comp_mod + orig = comp_mod.LZ4_AVAILABLE + comp_mod.LZ4_AVAILABLE = False + try: + with pytest.raises(ImportError, match="lz4"): + comp_mod.lz4_decompress(b'\x00') + with pytest.raises(ImportError, match="lz4"): + comp_mod.lz4_compress(b'\x00') + finally: + comp_mod.LZ4_AVAILABLE = orig