diff --git a/benchmarks/RASTERIZER_BENCHMARKS.md b/benchmarks/RASTERIZER_BENCHMARKS.md new file mode 100644 index 00000000..eb4b3dbb --- /dev/null +++ b/benchmarks/RASTERIZER_BENCHMARKS.md @@ -0,0 +1,600 @@ +# Rasterizer Benchmarks + +Generated: 2026-03-14 04:21 UTC + +Compares xarray-spatial (numpy and cupy backends) against datashader, geocube, and rasterio across 10 geometry types, 3 feature counts (50/200/1000), and 5 output resolutions (100-4000 px wide). + +- **Polygon types** (circles, irregular, rectangles, stars, donuts, multipolygons): all 5 rasterizers compared +- **Line types** (lines, multilines): xrspatial, geocube, rasterio (datashader uses a different API for lines) +- **Point types** (points, multipoints): xrspatial, geocube, rasterio (datashader uses a different API for points) + +## Circles (64 vertices) + +### Timings + +| n | size | xrs-numpy | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 0.59 ms | 1.64 ms | 0.42 ms | 3.62 ms | 1.16 ms | +| 50 | 500x250 | 0.83 ms | 1.57 ms | 1.30 ms | 4.67 ms | 1.50 ms | +| 50 | 1000x500 | 1.97 ms | 1.73 ms | 3.83 ms | 7.10 ms | 3.10 ms | +| 50 | 2000x1000 | 7.35 ms | 2.07 ms | 13.7 ms | 18.0 ms | 10.1 ms | +| 50 | 4000x2000 | 49.8 ms | 2.86 ms | 65.5 ms | 94.2 ms | 59.9 ms | +| 200 | 100x50 | 0.98 ms | 1.95 ms | 0.60 ms | 7.18 ms | 4.15 ms | +| 200 | 500x250 | 1.47 ms | 2.61 ms | 3.79 ms | 7.69 ms | 4.49 ms | +| 200 | 1000x500 | 2.49 ms | 2.91 ms | 12.9 ms | 10.2 ms | 6.12 ms | +| 200 | 2000x1000 | 8.65 ms | 3.80 ms | 48.9 ms | 19.5 ms | 13.9 ms | +| 200 | 4000x2000 | 53.0 ms | 33.3 ms | 200 ms | 82.0 ms | 65.4 ms | +| 1000 | 100x50 | 4.61 ms | 9.58 ms | 1.93 ms | 32.9 ms | 21.5 ms | +| 1000 | 500x250 | 5.62 ms | 16.5 ms | 18.0 ms | 28.2 ms | 23.9 ms | +| 1000 | 1000x500 | 8.84 ms | 29.7 ms | 61.0 ms | 31.4 ms | 26.6 ms | +| 1000 | 2000x1000 | 17.9 ms | 54.3 ms | 230 ms | 45.5 ms | 37.8 ms | +| 1000 | 4000x2000 | 79.9 ms | 39.6 ms | 912 ms | 142 ms | 106 ms | + +### Relative to xrs-numpy + +Values below 1.0 mean the competitor is faster than xrs-numpy. + +| n | size | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 2.76x | 0.70x | 6.10x | 1.94x | +| 50 | 500x250 | 1.89x | 1.56x | 5.63x | 1.80x | +| 50 | 1000x500 | 0.88x | 1.94x | 3.60x | 1.57x | +| 50 | 2000x1000 | 0.28x | 1.87x | 2.45x | 1.37x | +| 50 | 4000x2000 | 0.06x | 1.32x | 1.89x | 1.20x | +| 200 | 100x50 | 2.00x | 0.62x | 7.36x | 4.25x | +| 200 | 500x250 | 1.78x | 2.58x | 5.23x | 3.05x | +| 200 | 1000x500 | 1.17x | 5.19x | 4.08x | 2.45x | +| 200 | 2000x1000 | 0.44x | 5.65x | 2.25x | 1.60x | +| 200 | 4000x2000 | 0.63x | 3.78x | 1.55x | 1.24x | +| 1000 | 100x50 | 2.08x | 0.42x | 7.14x | 4.67x | +| 1000 | 500x250 | 2.94x | 3.21x | 5.02x | 4.25x | +| 1000 | 1000x500 | 3.36x | 6.90x | 3.55x | 3.00x | +| 1000 | 2000x1000 | 3.04x | 12.85x | 2.54x | 2.11x | +| 1000 | 4000x2000 | 0.49x | 11.41x | 1.78x | 1.32x | + +### Consistency + +| pair | IoU min | IoU max | RMSE range | +|:-----|--------:|--------:|-----------:| +| xrs-numpy vs datashader | 0.1463 | 0.9166 | 34.9 - 44.4 | +| xrs-numpy vs geocube | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | +| datashader vs geocube | 0.1463 | 0.9166 | 34.9 - 44.4 | +| datashader vs rasterio | 0.1463 | 0.9166 | 34.9 - 44.4 | +| datashader vs xrs-cupy | 0.1463 | 0.9166 | 34.9 - 44.4 | +| geocube vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| geocube vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | +| rasterio vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | + +## Irregular (128 vertices) + +### Timings + +| n | size | xrs-numpy | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 0.74 ms | 1.72 ms | 0.51 ms | 4.08 ms | 1.64 ms | +| 50 | 500x250 | 1.19 ms | 2.19 ms | 3.13 ms | 4.48 ms | 2.03 ms | +| 50 | 1000x500 | 1.97 ms | 2.27 ms | 10.1 ms | 5.82 ms | 2.72 ms | +| 50 | 2000x1000 | 4.60 ms | 2.60 ms | 35.6 ms | 10.9 ms | 6.85 ms | +| 50 | 4000x2000 | 49.2 ms | 23.1 ms | 150 ms | 89.9 ms | 61.7 ms | +| 200 | 100x50 | 2.37 ms | 9.49 ms | 1.39 ms | 9.06 ms | 6.17 ms | +| 200 | 500x250 | 3.61 ms | 18.9 ms | 12.7 ms | 10.2 ms | 7.35 ms | +| 200 | 1000x500 | 5.83 ms | 18.5 ms | 43.8 ms | 13.8 ms | 9.82 ms | +| 200 | 2000x1000 | 13.4 ms | 20.8 ms | 158 ms | 27.7 ms | 18.0 ms | +| 200 | 4000x2000 | 68.5 ms | 47.1 ms | 640 ms | 111 ms | 77.6 ms | +| 1000 | 100x50 | 9.53 ms | 15.5 ms | 5.77 ms | 37.7 ms | 34.2 ms | +| 1000 | 500x250 | 18.2 ms | 51.1 ms | 65.3 ms | 42.5 ms | 38.0 ms | +| 1000 | 1000x500 | 30.4 ms | 54.3 ms | 223 ms | 52.0 ms | 47.4 ms | +| 1000 | 2000x1000 | 60.7 ms | 45.3 ms | 813 ms | 79.6 ms | 63.5 ms | +| 1000 | 4000x2000 | 141 ms | 55.6 ms | 3095 ms | 169 ms | 140 ms | + +### Relative to xrs-numpy + +Values below 1.0 mean the competitor is faster than xrs-numpy. + +| n | size | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 2.32x | 0.69x | 5.50x | 2.22x | +| 50 | 500x250 | 1.84x | 2.63x | 3.77x | 1.71x | +| 50 | 1000x500 | 1.15x | 5.12x | 2.95x | 1.38x | +| 50 | 2000x1000 | 0.57x | 7.73x | 2.36x | 1.49x | +| 50 | 4000x2000 | 0.47x | 3.05x | 1.83x | 1.25x | +| 200 | 100x50 | 4.01x | 0.59x | 3.83x | 2.61x | +| 200 | 500x250 | 5.23x | 3.52x | 2.83x | 2.03x | +| 200 | 1000x500 | 3.17x | 7.51x | 2.37x | 1.69x | +| 200 | 2000x1000 | 1.55x | 11.84x | 2.07x | 1.35x | +| 200 | 4000x2000 | 0.69x | 9.34x | 1.62x | 1.13x | +| 1000 | 100x50 | 1.63x | 0.61x | 3.96x | 3.59x | +| 1000 | 500x250 | 2.80x | 3.58x | 2.33x | 2.08x | +| 1000 | 1000x500 | 1.78x | 7.33x | 1.71x | 1.56x | +| 1000 | 2000x1000 | 0.75x | 13.40x | 1.31x | 1.05x | +| 1000 | 4000x2000 | 0.39x | 21.95x | 1.20x | 1.00x | + +### Consistency + +| pair | IoU min | IoU max | RMSE range | +|:-----|--------:|--------:|-----------:| +| xrs-numpy vs datashader | 0.2268 | 0.9321 | 34.7 - 42.3 | +| xrs-numpy vs geocube | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | +| datashader vs geocube | 0.2268 | 0.9321 | 34.7 - 42.3 | +| datashader vs rasterio | 0.2268 | 0.9321 | 34.7 - 42.3 | +| datashader vs xrs-cupy | 0.2268 | 0.9321 | 34.7 - 42.3 | +| geocube vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| geocube vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | +| rasterio vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | + +## Rectangles + +### Timings + +| n | size | xrs-numpy | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 0.41 ms | 1.34 ms | 0.27 ms | 3.22 ms | 0.73 ms | +| 50 | 500x250 | 0.57 ms | 1.56 ms | 0.39 ms | 3.45 ms | 0.88 ms | +| 50 | 1000x500 | 0.99 ms | 1.40 ms | 0.68 ms | 4.19 ms | 1.34 ms | +| 50 | 2000x1000 | 2.97 ms | 1.78 ms | 2.24 ms | 9.05 ms | 5.04 ms | +| 50 | 4000x2000 | 46.5 ms | 12.5 ms | 25.7 ms | 91.5 ms | 59.2 ms | +| 200 | 100x50 | 0.70 ms | 3.50 ms | 0.41 ms | 5.16 ms | 2.56 ms | +| 200 | 500x250 | 1.02 ms | 5.62 ms | 0.67 ms | 5.38 ms | 2.71 ms | +| 200 | 1000x500 | 1.76 ms | 8.05 ms | 1.66 ms | 6.38 ms | 3.33 ms | +| 200 | 2000x1000 | 6.02 ms | 21.7 ms | 6.15 ms | 17.5 ms | 8.44 ms | +| 200 | 4000x2000 | 54.0 ms | 47.0 ms | 39.4 ms | 98.0 ms | 64.3 ms | +| 1000 | 100x50 | 1.38 ms | 3.75 ms | 0.58 ms | 14.6 ms | 11.2 ms | +| 1000 | 500x250 | 3.34 ms | 7.17 ms | 2.19 ms | 15.4 ms | 18.6 ms | +| 1000 | 1000x500 | 12.8 ms | 8.03 ms | 6.47 ms | 17.3 ms | 13.4 ms | +| 1000 | 2000x1000 | 15.7 ms | 28.9 ms | 24.2 ms | 25.4 ms | 21.3 ms | +| 1000 | 4000x2000 | 79.5 ms | 36.1 ms | 107 ms | 116 ms | 90.2 ms | + +### Relative to xrs-numpy + +Values below 1.0 mean the competitor is faster than xrs-numpy. + +| n | size | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 3.24x | 0.66x | 7.78x | 1.77x | +| 50 | 500x250 | 2.76x | 0.69x | 6.08x | 1.56x | +| 50 | 1000x500 | 1.42x | 0.69x | 4.25x | 1.36x | +| 50 | 2000x1000 | 0.60x | 0.75x | 3.05x | 1.70x | +| 50 | 4000x2000 | 0.27x | 0.55x | 1.97x | 1.27x | +| 200 | 100x50 | 5.00x | 0.59x | 7.37x | 3.66x | +| 200 | 500x250 | 5.49x | 0.65x | 5.25x | 2.65x | +| 200 | 1000x500 | 4.58x | 0.94x | 3.62x | 1.89x | +| 200 | 2000x1000 | 3.60x | 1.02x | 2.91x | 1.40x | +| 200 | 4000x2000 | 0.87x | 0.73x | 1.82x | 1.19x | +| 1000 | 100x50 | 2.72x | 0.42x | 10.55x | 8.13x | +| 1000 | 500x250 | 2.14x | 0.65x | 4.62x | 5.57x | +| 1000 | 1000x500 | 0.63x | 0.51x | 1.35x | 1.05x | +| 1000 | 2000x1000 | 1.84x | 1.53x | 1.61x | 1.35x | +| 1000 | 4000x2000 | 0.45x | 1.34x | 1.46x | 1.13x | + +### Consistency + +| pair | IoU min | IoU max | RMSE range | +|:-----|--------:|--------:|-----------:| +| xrs-numpy vs datashader | 0.1280 | 0.9433 | 32.5 - 42.9 | +| xrs-numpy vs geocube | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | +| datashader vs geocube | 0.1280 | 0.9433 | 32.5 - 42.9 | +| datashader vs rasterio | 0.1280 | 0.9433 | 32.5 - 42.9 | +| datashader vs xrs-cupy | 0.1280 | 0.9433 | 32.5 - 42.9 | +| geocube vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| geocube vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | +| rasterio vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | + +## Stars (5-point, concave) + +### Timings + +| n | size | xrs-numpy | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 0.45 ms | 1.39 ms | 0.29 ms | 3.07 ms | 0.74 ms | +| 50 | 500x250 | 0.63 ms | 1.42 ms | 0.66 ms | 3.33 ms | 0.92 ms | +| 50 | 1000x500 | 1.18 ms | 1.59 ms | 1.52 ms | 4.52 ms | 1.43 ms | +| 50 | 2000x1000 | 3.01 ms | 1.71 ms | 4.95 ms | 11.9 ms | 6.22 ms | +| 50 | 4000x2000 | 50.7 ms | 9.68 ms | 34.9 ms | 94.3 ms | 59.8 ms | +| 200 | 100x50 | 0.78 ms | 6.69 ms | 0.47 ms | 5.33 ms | 2.58 ms | +| 200 | 500x250 | 1.49 ms | 7.82 ms | 1.99 ms | 5.60 ms | 3.15 ms | +| 200 | 1000x500 | 2.28 ms | 8.98 ms | 5.49 ms | 6.95 ms | 3.70 ms | +| 200 | 2000x1000 | 11.0 ms | 16.5 ms | 18.6 ms | 20.4 ms | 9.60 ms | +| 200 | 4000x2000 | 54.5 ms | 35.5 ms | 85.6 ms | 119 ms | 66.0 ms | +| 1000 | 100x50 | 1.89 ms | 13.3 ms | 1.16 ms | 15.9 ms | 24.4 ms | +| 1000 | 500x250 | 5.40 ms | 28.0 ms | 8.12 ms | 17.0 ms | 12.6 ms | +| 1000 | 1000x500 | 10.0 ms | 21.6 ms | 25.2 ms | 20.6 ms | 15.3 ms | +| 1000 | 2000x1000 | 23.2 ms | 20.7 ms | 84.7 ms | 31.1 ms | 24.1 ms | +| 1000 | 4000x2000 | 93.4 ms | 27.9 ms | 329 ms | 124 ms | 84.0 ms | + +### Relative to xrs-numpy + +Values below 1.0 mean the competitor is faster than xrs-numpy. + +| n | size | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 3.10x | 0.64x | 6.85x | 1.65x | +| 50 | 500x250 | 2.26x | 1.06x | 5.30x | 1.46x | +| 50 | 1000x500 | 1.35x | 1.29x | 3.84x | 1.22x | +| 50 | 2000x1000 | 0.57x | 1.65x | 3.95x | 2.07x | +| 50 | 4000x2000 | 0.19x | 0.69x | 1.86x | 1.18x | +| 200 | 100x50 | 8.52x | 0.60x | 6.79x | 3.29x | +| 200 | 500x250 | 5.26x | 1.34x | 3.77x | 2.12x | +| 200 | 1000x500 | 3.95x | 2.41x | 3.05x | 1.62x | +| 200 | 2000x1000 | 1.50x | 1.69x | 1.85x | 0.87x | +| 200 | 4000x2000 | 0.65x | 1.57x | 2.18x | 1.21x | +| 1000 | 100x50 | 7.04x | 0.61x | 8.38x | 12.88x | +| 1000 | 500x250 | 5.19x | 1.50x | 3.16x | 2.33x | +| 1000 | 1000x500 | 2.15x | 2.51x | 2.06x | 1.53x | +| 1000 | 2000x1000 | 0.89x | 3.64x | 1.34x | 1.03x | +| 1000 | 4000x2000 | 0.30x | 3.52x | 1.33x | 0.90x | + +### Consistency + +| pair | IoU min | IoU max | RMSE range | +|:-----|--------:|--------:|-----------:| +| xrs-numpy vs datashader | 0.1084 | 0.9001 | 25.3 - 41.2 | +| xrs-numpy vs geocube | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | +| datashader vs geocube | 0.1084 | 0.9001 | 25.3 - 41.2 | +| datashader vs rasterio | 0.1084 | 0.9001 | 25.3 - 41.2 | +| datashader vs xrs-cupy | 0.1084 | 0.9001 | 25.3 - 41.2 | +| geocube vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| geocube vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | +| rasterio vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | + +## Donuts (polygon + hole) + +### Timings + +| n | size | xrs-numpy | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 0.70 ms | 1.65 ms | 0.52 ms | 4.51 ms | 1.94 ms | +| 50 | 500x250 | 1.04 ms | 1.98 ms | 3.77 ms | 4.99 ms | 2.32 ms | +| 50 | 1000x500 | 1.74 ms | 2.10 ms | 13.7 ms | 6.27 ms | 3.08 ms | +| 50 | 2000x1000 | 3.97 ms | 2.72 ms | 51.3 ms | 11.7 ms | 8.71 ms | +| 50 | 4000x2000 | 50.4 ms | 36.3 ms | 209 ms | 91.9 ms | 62.8 ms | +| 200 | 100x50 | 1.62 ms | 2.96 ms | 1.31 ms | 9.92 ms | 7.79 ms | +| 200 | 500x250 | 2.64 ms | 3.96 ms | 14.2 ms | 11.7 ms | 8.29 ms | +| 200 | 1000x500 | 3.82 ms | 4.70 ms | 50.6 ms | 13.9 ms | 11.3 ms | +| 200 | 2000x1000 | 10.0 ms | 21.7 ms | 192 ms | 27.8 ms | 18.1 ms | +| 200 | 4000x2000 | 60.9 ms | 36.4 ms | 762 ms | 117 ms | 78.3 ms | +| 1000 | 100x50 | 7.91 ms | 11.7 ms | 5.38 ms | 44.9 ms | 40.6 ms | +| 1000 | 500x250 | 13.2 ms | 34.4 ms | 72.9 ms | 48.9 ms | 43.3 ms | +| 1000 | 1000x500 | 19.6 ms | 47.2 ms | 261 ms | 56.1 ms | 53.3 ms | +| 1000 | 2000x1000 | 39.7 ms | 39.1 ms | 991 ms | 78.9 ms | 68.6 ms | +| 1000 | 4000x2000 | 97.8 ms | 49.1 ms | 3861 ms | 178 ms | 149 ms | + +### Relative to xrs-numpy + +Values below 1.0 mean the competitor is faster than xrs-numpy. + +| n | size | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 2.34x | 0.74x | 6.40x | 2.76x | +| 50 | 500x250 | 1.91x | 3.64x | 4.82x | 2.24x | +| 50 | 1000x500 | 1.21x | 7.89x | 3.60x | 1.77x | +| 50 | 2000x1000 | 0.69x | 12.93x | 2.96x | 2.20x | +| 50 | 4000x2000 | 0.72x | 4.14x | 1.82x | 1.24x | +| 200 | 100x50 | 1.82x | 0.80x | 6.10x | 4.79x | +| 200 | 500x250 | 1.50x | 5.36x | 4.43x | 3.14x | +| 200 | 1000x500 | 1.23x | 13.24x | 3.63x | 2.96x | +| 200 | 2000x1000 | 2.17x | 19.18x | 2.78x | 1.81x | +| 200 | 4000x2000 | 0.60x | 12.52x | 1.92x | 1.29x | +| 1000 | 100x50 | 1.48x | 0.68x | 5.68x | 5.13x | +| 1000 | 500x250 | 2.61x | 5.54x | 3.71x | 3.29x | +| 1000 | 1000x500 | 2.41x | 13.33x | 2.86x | 2.72x | +| 1000 | 2000x1000 | 0.99x | 25.00x | 1.99x | 1.73x | +| 1000 | 4000x2000 | 0.50x | 39.47x | 1.82x | 1.52x | + +### Consistency + +| pair | IoU min | IoU max | RMSE range | +|:-----|--------:|--------:|-----------:| +| xrs-numpy vs datashader | 0.2476 | 0.9283 | 39.4 - 43.9 | +| xrs-numpy vs geocube | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | +| datashader vs geocube | 0.2476 | 0.9283 | 39.4 - 43.9 | +| datashader vs rasterio | 0.2476 | 0.9283 | 39.4 - 43.9 | +| datashader vs xrs-cupy | 0.2476 | 0.9283 | 39.4 - 43.9 | +| geocube vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| geocube vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | +| rasterio vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | + +## MultiPolygons (2-4 parts) + +### Timings + +| n | size | xrs-numpy | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 0.64 ms | 1.61 ms | 0.44 ms | 4.66 ms | 2.19 ms | +| 50 | 500x250 | 0.95 ms | 1.84 ms | 1.00 ms | 5.82 ms | 2.49 ms | +| 50 | 1000x500 | 1.44 ms | 1.87 ms | 2.36 ms | 6.22 ms | 3.07 ms | +| 50 | 2000x1000 | 4.10 ms | 4.39 ms | 7.39 ms | 12.2 ms | 7.27 ms | +| 50 | 4000x2000 | 46.9 ms | 11.3 ms | 43.1 ms | 95.3 ms | 61.7 ms | +| 200 | 100x50 | 1.38 ms | 5.43 ms | 0.69 ms | 11.1 ms | 8.37 ms | +| 200 | 500x250 | 2.14 ms | 9.03 ms | 2.50 ms | 12.6 ms | 17.8 ms | +| 200 | 1000x500 | 3.37 ms | 12.9 ms | 7.18 ms | 13.2 ms | 10.5 ms | +| 200 | 2000x1000 | 7.24 ms | 20.7 ms | 22.9 ms | 22.1 ms | 22.2 ms | +| 200 | 4000x2000 | 29.0 ms | 31.9 ms | 101 ms | 85.9 ms | 61.9 ms | +| 1000 | 100x50 | 6.15 ms | 16.6 ms | 2.29 ms | 49.7 ms | 44.7 ms | +| 1000 | 500x250 | 11.9 ms | 34.9 ms | 11.6 ms | 59.3 ms | 47.0 ms | +| 1000 | 1000x500 | 14.2 ms | 33.5 ms | 33.7 ms | 57.2 ms | 57.4 ms | +| 1000 | 2000x1000 | 25.5 ms | 40.2 ms | 112 ms | 64.7 ms | 60.3 ms | +| 1000 | 4000x2000 | 64.4 ms | 53.5 ms | 428 ms | 143 ms | 120 ms | + +### Relative to xrs-numpy + +Values below 1.0 mean the competitor is faster than xrs-numpy. + +| n | size | xrs-cupy | datashader | geocube | rasterio | +|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 2.52x | 0.70x | 7.28x | 3.43x | +| 50 | 500x250 | 1.93x | 1.05x | 6.12x | 2.62x | +| 50 | 1000x500 | 1.30x | 1.63x | 4.31x | 2.13x | +| 50 | 2000x1000 | 1.07x | 1.80x | 2.99x | 1.77x | +| 50 | 4000x2000 | 0.24x | 0.92x | 2.03x | 1.31x | +| 200 | 100x50 | 3.94x | 0.50x | 8.07x | 6.08x | +| 200 | 500x250 | 4.22x | 1.17x | 5.88x | 8.31x | +| 200 | 1000x500 | 3.84x | 2.13x | 3.93x | 3.12x | +| 200 | 2000x1000 | 2.86x | 3.16x | 3.05x | 3.06x | +| 200 | 4000x2000 | 1.10x | 3.47x | 2.96x | 2.13x | +| 1000 | 100x50 | 2.69x | 0.37x | 8.08x | 7.26x | +| 1000 | 500x250 | 2.92x | 0.98x | 4.97x | 3.93x | +| 1000 | 1000x500 | 2.35x | 2.37x | 4.02x | 4.03x | +| 1000 | 2000x1000 | 1.57x | 4.38x | 2.54x | 2.36x | +| 1000 | 4000x2000 | 0.83x | 6.64x | 2.22x | 1.87x | + +### Consistency + +| pair | IoU min | IoU max | RMSE range | +|:-----|--------:|--------:|-----------:| +| xrs-numpy vs datashader | 0.1543 | 0.9513 | 34.9 - 40.9 | +| xrs-numpy vs geocube | 0.9961 | 1.0000 | 0.0 - 1.2 | +| xrs-numpy vs rasterio | 0.9961 | 1.0000 | 0.0 - 1.2 | +| xrs-numpy vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 0.0 | +| datashader vs geocube | 0.1547 | 0.9518 | 35.0 - 40.9 | +| datashader vs rasterio | 0.1547 | 0.9518 | 35.0 - 40.9 | +| datashader vs xrs-cupy | 0.1543 | 0.9513 | 34.9 - 40.9 | +| geocube vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| geocube vs xrs-cupy | 0.9961 | 1.0000 | 0.0 - 1.2 | +| rasterio vs xrs-cupy | 0.9961 | 1.0000 | 0.0 - 1.2 | + +## LineStrings + +### Timings + +| n | size | xrs-numpy | xrs-cupy | geocube | rasterio | +|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 0.53 ms | 1.51 ms | 2.94 ms | 0.33 ms | +| 50 | 500x250 | 0.56 ms | 1.43 ms | 3.05 ms | 0.45 ms | +| 50 | 1000x500 | 0.81 ms | 1.49 ms | 3.95 ms | 0.92 ms | +| 50 | 2000x1000 | 3.97 ms | 1.59 ms | 14.2 ms | 5.24 ms | +| 50 | 4000x2000 | 45.7 ms | 7.66 ms | 91.1 ms | 59.2 ms | +| 200 | 100x50 | 0.57 ms | 1.67 ms | 3.46 ms | 1.00 ms | +| 200 | 500x250 | 0.71 ms | 1.95 ms | 3.65 ms | 1.11 ms | +| 200 | 1000x500 | 1.02 ms | 2.62 ms | 4.60 ms | 1.67 ms | +| 200 | 2000x1000 | 2.65 ms | 3.68 ms | 14.9 ms | 6.45 ms | +| 200 | 4000x2000 | 46.8 ms | 12.2 ms | 93.8 ms | 61.1 ms | +| 1000 | 100x50 | 1.27 ms | 2.61 ms | 8.00 ms | 4.26 ms | +| 1000 | 500x250 | 1.46 ms | 2.67 ms | 17.4 ms | 4.42 ms | +| 1000 | 1000x500 | 1.90 ms | 3.67 ms | 9.71 ms | 12.9 ms | +| 1000 | 2000x1000 | 4.13 ms | 6.53 ms | 16.3 ms | 11.1 ms | +| 1000 | 4000x2000 | 48.5 ms | 12.8 ms | 89.5 ms | 64.5 ms | + +### Relative to xrs-numpy + +Values below 1.0 mean the competitor is faster than xrs-numpy. + +| n | size | xrs-cupy | geocube | rasterio | +|---:|---:|---:|---:|---:| +| 50 | 100x50 | 2.83x | 5.52x | 0.62x | +| 50 | 500x250 | 2.54x | 5.41x | 0.79x | +| 50 | 1000x500 | 1.84x | 4.88x | 1.13x | +| 50 | 2000x1000 | 0.40x | 3.58x | 1.32x | +| 50 | 4000x2000 | 0.17x | 1.99x | 1.29x | +| 200 | 100x50 | 2.93x | 6.08x | 1.75x | +| 200 | 500x250 | 2.73x | 5.10x | 1.55x | +| 200 | 1000x500 | 2.56x | 4.51x | 1.64x | +| 200 | 2000x1000 | 1.39x | 5.62x | 2.43x | +| 200 | 4000x2000 | 0.26x | 2.00x | 1.31x | +| 1000 | 100x50 | 2.06x | 6.32x | 3.37x | +| 1000 | 500x250 | 1.83x | 11.92x | 3.03x | +| 1000 | 1000x500 | 1.93x | 5.11x | 6.76x | +| 1000 | 2000x1000 | 1.58x | 3.96x | 2.69x | +| 1000 | 4000x2000 | 0.26x | 1.85x | 1.33x | + +### Consistency + +| pair | IoU min | IoU max | RMSE range | +|:-----|--------:|--------:|-----------:| +| xrs-numpy vs geocube | 0.3514 | 0.9255 | 2.6 - 23.7 | +| xrs-numpy vs rasterio | 0.3514 | 0.9255 | 2.6 - 23.7 | +| xrs-numpy vs xrs-cupy | 1.0000 | 1.0000 | 1.4 - 32.4 | +| geocube vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| geocube vs xrs-cupy | 0.3514 | 0.9255 | 2.7 - 34.7 | +| rasterio vs xrs-cupy | 0.3514 | 0.9255 | 2.7 - 34.7 | + +## MultiLineStrings + +### Timings + +| n | size | xrs-numpy | xrs-cupy | geocube | rasterio | +|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 0.49 ms | 2.75 ms | 3.34 ms | 0.90 ms | +| 50 | 500x250 | 0.64 ms | 1.80 ms | 3.50 ms | 1.06 ms | +| 50 | 1000x500 | 0.90 ms | 1.79 ms | 4.30 ms | 1.52 ms | +| 50 | 2000x1000 | 3.00 ms | 4.34 ms | 15.4 ms | 5.90 ms | +| 50 | 4000x2000 | 45.1 ms | 11.3 ms | 86.7 ms | 58.0 ms | +| 200 | 100x50 | 0.70 ms | 1.82 ms | 6.10 ms | 3.26 ms | +| 200 | 500x250 | 0.84 ms | 2.51 ms | 6.08 ms | 3.58 ms | +| 200 | 1000x500 | 1.20 ms | 2.14 ms | 7.59 ms | 4.21 ms | +| 200 | 2000x1000 | 3.04 ms | 4.89 ms | 18.1 ms | 8.61 ms | +| 200 | 4000x2000 | 45.8 ms | 12.9 ms | 97.8 ms | 80.2 ms | +| 1000 | 100x50 | 2.26 ms | 3.04 ms | 26.1 ms | 15.2 ms | +| 1000 | 500x250 | 2.09 ms | 3.27 ms | 19.9 ms | 15.6 ms | +| 1000 | 1000x500 | 10.1 ms | 4.33 ms | 32.0 ms | 17.5 ms | +| 1000 | 2000x1000 | 6.61 ms | 5.32 ms | 27.9 ms | 23.3 ms | +| 1000 | 4000x2000 | 61.9 ms | 13.9 ms | 142 ms | 82.3 ms | + +### Relative to xrs-numpy + +Values below 1.0 mean the competitor is faster than xrs-numpy. + +| n | size | xrs-cupy | geocube | rasterio | +|---:|---:|---:|---:|---:| +| 50 | 100x50 | 5.64x | 6.84x | 1.85x | +| 50 | 500x250 | 2.81x | 5.47x | 1.66x | +| 50 | 1000x500 | 1.99x | 4.78x | 1.69x | +| 50 | 2000x1000 | 1.45x | 5.14x | 1.97x | +| 50 | 4000x2000 | 0.25x | 1.92x | 1.29x | +| 200 | 100x50 | 2.60x | 8.68x | 4.64x | +| 200 | 500x250 | 2.98x | 7.23x | 4.25x | +| 200 | 1000x500 | 1.79x | 6.34x | 3.52x | +| 200 | 2000x1000 | 1.61x | 5.97x | 2.84x | +| 200 | 4000x2000 | 0.28x | 2.13x | 1.75x | +| 1000 | 100x50 | 1.34x | 11.53x | 6.70x | +| 1000 | 500x250 | 1.57x | 9.54x | 7.49x | +| 1000 | 1000x500 | 0.43x | 3.17x | 1.73x | +| 1000 | 2000x1000 | 0.81x | 4.22x | 3.53x | +| 1000 | 4000x2000 | 0.22x | 2.29x | 1.33x | + +### Consistency + +| pair | IoU min | IoU max | RMSE range | +|:-----|--------:|--------:|-----------:| +| xrs-numpy vs geocube | 0.3541 | 0.9586 | 3.4 - 23.9 | +| xrs-numpy vs rasterio | 0.3541 | 0.9586 | 3.4 - 23.9 | +| xrs-numpy vs xrs-cupy | 1.0000 | 1.0000 | 2.0 - 33.5 | +| geocube vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| geocube vs xrs-cupy | 0.3541 | 0.9586 | 3.8 - 35.1 | +| rasterio vs xrs-cupy | 0.3541 | 0.9586 | 3.8 - 35.1 | + +## Points + +### Timings + +| n | size | xrs-numpy | xrs-cupy | geocube | rasterio | +|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 0.51 ms | 1.62 ms | 2.69 ms | 0.28 ms | +| 50 | 500x250 | 0.43 ms | 1.70 ms | 2.90 ms | 0.40 ms | +| 50 | 1000x500 | 0.75 ms | 2.41 ms | 3.91 ms | 0.85 ms | +| 50 | 2000x1000 | 2.87 ms | 4.29 ms | 7.83 ms | 3.72 ms | +| 50 | 4000x2000 | 45.4 ms | 12.9 ms | 85.2 ms | 55.5 ms | +| 200 | 100x50 | 0.39 ms | 1.43 ms | 3.15 ms | 0.66 ms | +| 200 | 500x250 | 0.49 ms | 1.99 ms | 3.74 ms | 0.78 ms | +| 200 | 1000x500 | 0.84 ms | 1.63 ms | 4.47 ms | 1.34 ms | +| 200 | 2000x1000 | 3.03 ms | 3.40 ms | 9.55 ms | 4.47 ms | +| 200 | 4000x2000 | 45.2 ms | 11.3 ms | 91.2 ms | 61.6 ms | +| 1000 | 100x50 | 0.66 ms | 1.55 ms | 5.96 ms | 2.90 ms | +| 1000 | 500x250 | 0.75 ms | 1.58 ms | 5.95 ms | 3.25 ms | +| 1000 | 1000x500 | 1.06 ms | 2.00 ms | 6.95 ms | 3.76 ms | +| 1000 | 2000x1000 | 3.30 ms | 4.52 ms | 12.5 ms | 8.73 ms | +| 1000 | 4000x2000 | 45.3 ms | 12.9 ms | 93.8 ms | 63.3 ms | + +### Relative to xrs-numpy + +Values below 1.0 mean the competitor is faster than xrs-numpy. + +| n | size | xrs-cupy | geocube | rasterio | +|---:|---:|---:|---:|---:| +| 50 | 100x50 | 3.19x | 5.30x | 0.56x | +| 50 | 500x250 | 3.94x | 6.72x | 0.92x | +| 50 | 1000x500 | 3.20x | 5.18x | 1.12x | +| 50 | 2000x1000 | 1.49x | 2.73x | 1.29x | +| 50 | 4000x2000 | 0.28x | 1.87x | 1.22x | +| 200 | 100x50 | 3.70x | 8.17x | 1.71x | +| 200 | 500x250 | 4.09x | 7.70x | 1.62x | +| 200 | 1000x500 | 1.94x | 5.33x | 1.60x | +| 200 | 2000x1000 | 1.12x | 3.15x | 1.48x | +| 200 | 4000x2000 | 0.25x | 2.02x | 1.36x | +| 1000 | 100x50 | 2.34x | 8.98x | 4.38x | +| 1000 | 500x250 | 2.11x | 7.96x | 4.35x | +| 1000 | 1000x500 | 1.89x | 6.57x | 3.55x | +| 1000 | 2000x1000 | 1.37x | 3.78x | 2.64x | +| 1000 | 4000x2000 | 0.28x | 2.07x | 1.40x | + +### Consistency + +| pair | IoU min | IoU max | RMSE range | +|:-----|--------:|--------:|-----------:| +| xrs-numpy vs geocube | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 11.3 | +| geocube vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| geocube vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 11.3 | +| rasterio vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 11.3 | + +## MultiPoints (3-8 pts) + +### Timings + +| n | size | xrs-numpy | xrs-cupy | geocube | rasterio | +|---:|---:|---:|---:|---:|---:| +| 50 | 100x50 | 0.35 ms | 1.24 ms | 3.57 ms | 1.14 ms | +| 50 | 500x250 | 0.48 ms | 1.65 ms | 3.84 ms | 1.26 ms | +| 50 | 1000x500 | 0.74 ms | 1.67 ms | 4.60 ms | 1.69 ms | +| 50 | 2000x1000 | 2.52 ms | 4.16 ms | 13.5 ms | 5.92 ms | +| 50 | 4000x2000 | 44.6 ms | 9.95 ms | 93.4 ms | 60.2 ms | +| 200 | 100x50 | 0.45 ms | 3.51 ms | 7.00 ms | 4.67 ms | +| 200 | 500x250 | 0.59 ms | 1.37 ms | 7.36 ms | 4.74 ms | +| 200 | 1000x500 | 0.86 ms | 1.54 ms | 8.31 ms | 5.10 ms | +| 200 | 2000x1000 | 2.73 ms | 3.52 ms | 18.4 ms | 9.61 ms | +| 200 | 4000x2000 | 44.4 ms | 10.1 ms | 98.8 ms | 63.7 ms | +| 1000 | 100x50 | 1.00 ms | 2.50 ms | 24.7 ms | 21.0 ms | +| 1000 | 500x250 | 1.12 ms | 2.27 ms | 24.8 ms | 21.6 ms | +| 1000 | 1000x500 | 7.26 ms | 9.20 ms | 26.4 ms | 22.7 ms | +| 1000 | 2000x1000 | 3.44 ms | 6.28 ms | 36.1 ms | 28.6 ms | +| 1000 | 4000x2000 | 46.9 ms | 13.2 ms | 106 ms | 78.6 ms | + +### Relative to xrs-numpy + +Values below 1.0 mean the competitor is faster than xrs-numpy. + +| n | size | xrs-cupy | geocube | rasterio | +|---:|---:|---:|---:|---:| +| 50 | 100x50 | 3.57x | 10.29x | 3.28x | +| 50 | 500x250 | 3.44x | 8.01x | 2.64x | +| 50 | 1000x500 | 2.25x | 6.21x | 2.28x | +| 50 | 2000x1000 | 1.65x | 5.34x | 2.34x | +| 50 | 4000x2000 | 0.22x | 2.09x | 1.35x | +| 200 | 100x50 | 7.80x | 15.53x | 10.36x | +| 200 | 500x250 | 2.31x | 12.39x | 7.98x | +| 200 | 1000x500 | 1.77x | 9.60x | 5.89x | +| 200 | 2000x1000 | 1.29x | 6.74x | 3.51x | +| 200 | 4000x2000 | 0.23x | 2.22x | 1.43x | +| 1000 | 100x50 | 2.50x | 24.63x | 20.99x | +| 1000 | 500x250 | 2.03x | 22.14x | 19.34x | +| 1000 | 1000x500 | 1.27x | 3.63x | 3.12x | +| 1000 | 2000x1000 | 1.83x | 10.49x | 8.33x | +| 1000 | 4000x2000 | 0.28x | 2.27x | 1.68x | + +### Consistency + +| pair | IoU min | IoU max | RMSE range | +|:-----|--------:|--------:|-----------:| +| xrs-numpy vs geocube | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| xrs-numpy vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 23.4 | +| geocube vs rasterio | 1.0000 | 1.0000 | 0.0 - 0.0 | +| geocube vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 23.4 | +| rasterio vs xrs-cupy | 1.0000 | 1.0000 | 0.0 - 23.4 | + +## Where xrs-numpy is slower + +Top 10 configurations where another rasterizer beats xrs-numpy. + +| # | geometry | n | size | faster lib | xrs-numpy | other | xrs slower by | +|--:|:---------|--:|-----:|:-----------|----------:|------:|--------------:| +| 1 | multipolygons | 1000 | 100x50 | datashader | 6.2 ms | 2.3 ms | 2.7x | +| 2 | circles_64v | 1000 | 100x50 | datashader | 4.6 ms | 1.9 ms | 2.4x | +| 3 | rectangles | 1000 | 100x50 | datashader | 1.4 ms | 0.6 ms | 2.4x | +| 4 | multipolygons | 200 | 100x50 | datashader | 1.4 ms | 0.7 ms | 2.0x | +| 5 | rectangles | 1000 | 1000x500 | datashader | 12.8 ms | 6.5 ms | 2.0x | +| 6 | rectangles | 50 | 4000x2000 | datashader | 46.5 ms | 25.7 ms | 1.8x | +| 7 | points | 50 | 100x50 | rasterio | 0.5 ms | 0.3 ms | 1.8x | +| 8 | irregular_128v | 200 | 100x50 | datashader | 2.4 ms | 1.4 ms | 1.7x | +| 9 | rectangles | 200 | 100x50 | datashader | 0.7 ms | 0.4 ms | 1.7x | +| 10 | stars_5pt | 200 | 100x50 | datashader | 0.8 ms | 0.5 ms | 1.7x | diff --git a/benchmarks/rasterizer_benchmarks.py b/benchmarks/rasterizer_benchmarks.py new file mode 100644 index 00000000..7f122169 --- /dev/null +++ b/benchmarks/rasterizer_benchmarks.py @@ -0,0 +1,720 @@ +""" +Benchmark: xarray-spatial vs datashader vs geocube vs rasterio rasterization. + +Compares wall-clock time and output consistency across a range of geometry +types, output resolutions, and feature counts. All libraries rasterize +the same geometries into the same pixel grid. + +Usage: + python benchmarks/rasterizer_benchmarks.py + +Outputs: + - Console: timing tables, consistency checks, top-10 regressions + - benchmarks/RASTERIZER_BENCHMARKS.md: formatted markdown report +""" + +import json +import os +import time +from collections import defaultdict +from datetime import datetime, timezone + +import numpy as np +import geopandas as gpd +import spatialpandas +from rasterio.features import rasterize as rio_rasterize +from rasterio.transform import from_bounds +from shapely.geometry import ( + Point, Polygon, MultiPolygon, LineString, MultiLineString, MultiPoint, + box, +) + +import datashader as ds +from geocube.api.core import make_geocube +from xrspatial.rasterize import rasterize as xrs_rasterize + + +# --------------------------------------------------------------------------- +# Geometry generators +# --------------------------------------------------------------------------- + +def make_circles(n, rng): + """Random buffered points (64-vertex circles).""" + cx = rng.uniform(-150, 150, n) + cy = rng.uniform(-70, 70, n) + radii = rng.uniform(2, 15, n) + geoms = [Point(x, y).buffer(r, resolution=16) + for x, y, r in zip(cx, cy, radii)] + vals = rng.uniform(1, 100, n) + return geoms, vals + + +def make_irregular(n, rng, n_verts=128): + """Irregular star-like polygons with many vertices.""" + geoms = [] + vals = rng.uniform(1, 100, n) + for _ in range(n): + cx = rng.uniform(-150, 150) + cy = rng.uniform(-70, 70) + r = rng.uniform(5, 20) + angles = np.sort(rng.uniform(0, 2 * np.pi, n_verts)) + radii = r * (0.7 + 0.3 * rng.uniform(size=n_verts)) + xs = cx + radii * np.cos(angles) + ys = cy + radii * np.sin(angles) + coords = list(zip(xs, ys)) + coords.append(coords[0]) + geoms.append(Polygon(coords)) + return geoms, vals + + +def make_rectangles(n, rng): + """Axis-aligned rectangles.""" + geoms = [] + vals = rng.uniform(1, 100, n) + for _ in range(n): + cx = rng.uniform(-150, 150) + cy = rng.uniform(-70, 70) + hw = rng.uniform(2, 20) + hh = rng.uniform(2, 15) + geoms.append(box(cx - hw, cy - hh, cx + hw, cy + hh)) + return geoms, vals + + +def make_stars(n, rng, n_points=5): + """Concave star polygons with alternating inner/outer vertices.""" + geoms = [] + vals = rng.uniform(1, 100, n) + for _ in range(n): + cx = rng.uniform(-150, 150) + cy = rng.uniform(-70, 70) + r_outer = rng.uniform(5, 20) + r_inner = r_outer * rng.uniform(0.3, 0.5) + angles = np.linspace(0, 2 * np.pi, 2 * n_points, endpoint=False) + angles -= np.pi / 2 + coords = [] + for i, a in enumerate(angles): + r = r_outer if i % 2 == 0 else r_inner + coords.append((cx + r * np.cos(a), cy + r * np.sin(a))) + coords.append(coords[0]) + geoms.append(Polygon(coords)) + return geoms, vals + + +def make_donuts(n, rng): + """Polygons with holes (annuli).""" + geoms = [] + vals = rng.uniform(1, 100, n) + for _ in range(n): + cx = rng.uniform(-150, 150) + cy = rng.uniform(-70, 70) + r_outer = rng.uniform(8, 20) + r_inner = r_outer * rng.uniform(0.3, 0.6) + shell = Point(cx, cy).buffer(r_outer, resolution=16) + hole = Point(cx, cy).buffer(r_inner, resolution=16) + geoms.append(shell.difference(hole)) + return geoms, vals + + +def make_multipolygons(n, rng): + """MultiPolygon collections (2-4 parts each).""" + geoms = [] + vals = rng.uniform(1, 100, n) + for _ in range(n): + n_parts = rng.integers(2, 5) + parts = [] + for _ in range(n_parts): + cx = rng.uniform(-150, 150) + cy = rng.uniform(-70, 70) + r = rng.uniform(2, 8) + parts.append(Point(cx, cy).buffer(r, resolution=8)) + geoms.append(MultiPolygon(parts)) + return geoms, vals + + +def make_lines(n, rng): + """Random multi-segment LineStrings.""" + geoms = [] + vals = rng.uniform(1, 100, n) + for _ in range(n): + n_pts = rng.integers(3, 10) + xs = np.cumsum(rng.uniform(-20, 20, n_pts)) + rng.uniform(-140, 140) + ys = np.cumsum(rng.uniform(-10, 10, n_pts)) + rng.uniform(-60, 60) + xs = np.clip(xs, -179, 179) + ys = np.clip(ys, -89, 89) + geoms.append(LineString(zip(xs, ys))) + return geoms, vals + + +def make_multilines(n, rng): + """MultiLineString collections (2-4 parts each).""" + geoms = [] + vals = rng.uniform(1, 100, n) + for _ in range(n): + n_parts = rng.integers(2, 5) + parts = [] + for _ in range(n_parts): + n_pts = rng.integers(2, 6) + xs = np.cumsum(rng.uniform(-15, 15, n_pts)) + rng.uniform(-140, 140) + ys = np.cumsum(rng.uniform(-8, 8, n_pts)) + rng.uniform(-60, 60) + xs = np.clip(xs, -179, 179) + ys = np.clip(ys, -89, 89) + parts.append(LineString(zip(xs, ys))) + geoms.append(MultiLineString(parts)) + return geoms, vals + + +def make_points(n, rng): + """Random points.""" + xs = rng.uniform(-170, 170, n) + ys = rng.uniform(-80, 80, n) + geoms = [Point(x, y) for x, y in zip(xs, ys)] + vals = rng.uniform(1, 100, n) + return geoms, vals + + +def make_multipoints(n, rng): + """MultiPoint collections (3-8 points each).""" + geoms = [] + vals = rng.uniform(1, 100, n) + for _ in range(n): + n_pts = rng.integers(3, 9) + xs = rng.uniform(-170, 170, n_pts) + ys = rng.uniform(-80, 80, n_pts) + geoms.append(MultiPoint(list(zip(xs, ys)))) + return geoms, vals + + +# --------------------------------------------------------------------------- +# Runners +# --------------------------------------------------------------------------- + +BOUNDS = (-180, -90, 180, 90) + +GEOM_JSON = json.dumps({ + "type": "Polygon", + "coordinates": [[ + [BOUNDS[0], BOUNDS[1]], + [BOUNDS[2], BOUNDS[1]], + [BOUNDS[2], BOUNDS[3]], + [BOUNDS[0], BOUNDS[3]], + [BOUNDS[0], BOUNDS[1]], + ]], +}) + + +def run_xrspatial(pairs, width, height): + return xrs_rasterize( + pairs, width=width, height=height, + bounds=BOUNDS, fill=0.0, + ) + + +def run_datashader(spd_gdf, width, height): + cvs = ds.Canvas( + plot_width=width, plot_height=height, + x_range=(BOUNDS[0], BOUNDS[2]), + y_range=(BOUNDS[1], BOUNDS[3]), + ) + return cvs.polygons(spd_gdf, geometry="geometry", agg=ds.last("value")) + + +def run_geocube(gdf, width, height): + x_res = (BOUNDS[2] - BOUNDS[0]) / width + y_res = (BOUNDS[3] - BOUNDS[1]) / height + return make_geocube( + gdf, + measurements=["value"], + resolution=(-y_res, x_res), + geom=GEOM_JSON, + fill=0.0, + ) + + +def run_rasterio(shapes, width, height): + transform = from_bounds(*BOUNDS, width, height) + return rio_rasterize( + shapes, + out_shape=(height, width), + transform=transform, + fill=0.0, + dtype=np.float64, + ) + + +def run_xrspatial_cupy(pairs, width, height): + import cupy + result = xrs_rasterize( + pairs, width=width, height=height, + bounds=BOUNDS, fill=0.0, use_cuda=True, + ) + cupy.cuda.Device().synchronize() + return result + + +# --------------------------------------------------------------------------- +# Timing helper +# --------------------------------------------------------------------------- + +def bench(fn, *args, warmup=1, repeats=5): + """Return (result, mean_seconds, std_seconds) over *repeats* after *warmup*.""" + for _ in range(warmup): + fn(*args) + result = None + times = [] + for _ in range(repeats): + t0 = time.perf_counter() + result = fn(*args) + t1 = time.perf_counter() + times.append(t1 - t0) + arr = np.array(times) + return result, arr.mean(), arr.std() + + +# --------------------------------------------------------------------------- +# Output consistency helpers +# --------------------------------------------------------------------------- + +def _to_numpy(result, label): + """Extract a 2-D float64 numpy array from any rasterizer output.""" + if isinstance(result, np.ndarray): + return result.astype(np.float64) + try: + import xarray as xr + if isinstance(result, xr.Dataset): + return result["value"].values.astype(np.float64) + except Exception: + pass + data = getattr(result, "data", result) + try: + data = data.get() # cupy + except AttributeError: + pass + return np.asarray(data, dtype=np.float64) + + +def _coverage_mask(arr): + """Boolean mask of covered (non-zero, non-NaN) pixels.""" + return np.isfinite(arr) & (arr != 0.0) + + +def check_consistency(results, width, height): + """Compare outputs pairwise. Returns list of dicts with results.""" + arrays = {} + for label, res in results.items(): + arr = _to_numpy(res, label) + if arr.shape != (height, width): + arr = arr[:height, :width] + arrays[label] = arr + + labels = list(arrays.keys()) + checks = [] + + for i in range(len(labels)): + for j in range(i + 1, len(labels)): + a_lbl, b_lbl = labels[i], labels[j] + a, b = arrays[a_lbl], arrays[b_lbl] + + mask_a = _coverage_mask(a) + mask_b = _coverage_mask(b) + + intersection = np.sum(mask_a & mask_b) + union = np.sum(mask_a | mask_b) + iou = intersection / union if union > 0 else 1.0 + + overlap = mask_a & mask_b + if np.any(overlap): + rmse = np.sqrt(np.mean((a[overlap] - b[overlap]) ** 2)) + else: + rmse = float("nan") + + checks.append({ + "a": a_lbl, "b": b_lbl, + "iou": iou, "rmse": rmse, + "ok": iou > 0.85, + }) + + return checks + + +# --------------------------------------------------------------------------- +# Geometry type registry +# --------------------------------------------------------------------------- + +GEN_LABELS = { + "circles_64v": ("Circles (64 vertices)", "polygon"), + "irregular_128v": ("Irregular (128 vertices)", "polygon"), + "rectangles": ("Rectangles", "polygon"), + "stars_5pt": ("Stars (5-point, concave)", "polygon"), + "donuts": ("Donuts (polygon + hole)", "polygon"), + "multipolygons": ("MultiPolygons (2-4 parts)","polygon"), + "lines": ("LineStrings", "line"), + "multilines": ("MultiLineStrings", "line"), + "points": ("Points", "point"), + "multipoints": ("MultiPoints (3-8 pts)", "point"), +} + +GENERATORS = { + "circles_64v": { + "fn": lambda n, rng: make_circles(n, rng), + "kind": "polygon", + }, + "irregular_128v": { + "fn": lambda n, rng: make_irregular(n, rng, n_verts=128), + "kind": "polygon", + }, + "rectangles": { + "fn": lambda n, rng: make_rectangles(n, rng), + "kind": "polygon", + }, + "stars_5pt": { + "fn": lambda n, rng: make_stars(n, rng, n_points=5), + "kind": "polygon", + }, + "donuts": { + "fn": lambda n, rng: make_donuts(n, rng), + "kind": "polygon", + }, + "multipolygons": { + "fn": lambda n, rng: make_multipolygons(n, rng), + "kind": "polygon", + }, + "lines": { + "fn": lambda n, rng: make_lines(n, rng), + "kind": "line", + }, + "multilines": { + "fn": lambda n, rng: make_multilines(n, rng), + "kind": "line", + }, + "points": { + "fn": lambda n, rng: make_points(n, rng), + "kind": "point", + }, + "multipoints": { + "fn": lambda n, rng: make_multipoints(n, rng), + "kind": "point", + }, +} + +GEN_ORDER = [ + "circles_64v", "irregular_128v", "rectangles", "stars_5pt", + "donuts", "multipolygons", "lines", "multilines", "points", "multipoints", +] + + +# --------------------------------------------------------------------------- +# Markdown report generation +# --------------------------------------------------------------------------- + +def _fmt_ms(seconds): + ms = seconds * 1000 + if ms >= 100: + return f"{ms:.0f} ms" + if ms >= 10: + return f"{ms:.1f} ms" + return f"{ms:.2f} ms" + + +def generate_markdown(all_results, slower_cases, has_cupy): + """Build the full markdown report string from collected results.""" + lines = [] + w = lines.append + + w("# Rasterizer Benchmarks") + w("") + now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC") + w(f"Generated: {now}") + w("") + w("Compares xarray-spatial (numpy and cupy backends) against datashader, " + "geocube, and rasterio across 10 geometry types, 3 feature counts " + "(50/200/1000), and 5 output resolutions (100-4000 px wide).") + w("") + w("- **Polygon types** (circles, irregular, rectangles, stars, donuts, " + "multipolygons): all 5 rasterizers compared") + w("- **Line types** (lines, multilines): xrspatial, geocube, rasterio " + "(datashader uses a different API for lines)") + w("- **Point types** (points, multipoints): xrspatial, geocube, rasterio " + "(datashader uses a different API for points)") + w("") + + # Per-geometry sections + for gen_key in GEN_ORDER: + if gen_key not in all_results: + continue + label, kind = GEN_LABELS[gen_key] + entries = all_results[gen_key] + is_poly = kind == "polygon" + + w(f"## {label}") + w("") + + # Timing table + w("### Timings") + w("") + if is_poly: + h = "| n | size | xrs-numpy | xrs-cupy | datashader | geocube | rasterio |" + s = "|---:|---:|---:|---:|---:|---:|---:|" + else: + h = "| n | size | xrs-numpy | xrs-cupy | geocube | rasterio |" + s = "|---:|---:|---:|---:|---:|---:|" + w(h) + w(s) + + for e in entries: + size = f"{e['w']}x{e['h']}" + xnp = _fmt_ms(e["xrs_mean"]) + xcu = _fmt_ms(e["cupy_mean"]) if e["cupy_mean"] is not None else "--" + gc = _fmt_ms(e["gc_mean"]) + rio = _fmt_ms(e["rio_mean"]) + if is_poly: + dsh = _fmt_ms(e["ds_mean"]) if e["ds_mean"] is not None else "--" + w(f"| {e['n']} | {size} | {xnp} | {xcu} | {dsh} | {gc} | {rio} |") + else: + w(f"| {e['n']} | {size} | {xnp} | {xcu} | {gc} | {rio} |") + + w("") + + # Ratio table + w("### Relative to xrs-numpy") + w("") + w("Values below 1.0 mean the competitor is faster than xrs-numpy.") + w("") + if is_poly: + h = "| n | size | xrs-cupy | datashader | geocube | rasterio |" + s = "|---:|---:|---:|---:|---:|---:|" + else: + h = "| n | size | xrs-cupy | geocube | rasterio |" + s = "|---:|---:|---:|---:|---:|" + w(h) + w(s) + + for e in entries: + size = f"{e['w']}x{e['h']}" + xrs = e["xrs_mean"] + cu_r = f"{e['cupy_mean']/xrs:.2f}x" if e["cupy_mean"] is not None and xrs > 0 else "--" + gc_r = f"{e['gc_mean']/xrs:.2f}x" if xrs > 0 else "--" + rio_r = f"{e['rio_mean']/xrs:.2f}x" if xrs > 0 else "--" + if is_poly: + ds_r = f"{e['ds_mean']/xrs:.2f}x" if e["ds_mean"] is not None and xrs > 0 else "--" + w(f"| {e['n']} | {size} | {cu_r} | {ds_r} | {gc_r} | {rio_r} |") + else: + w(f"| {e['n']} | {size} | {cu_r} | {gc_r} | {rio_r} |") + + w("") + + # Consistency summary + w("### Consistency") + w("") + pair_data = defaultdict(lambda: {"ious": [], "rmses": []}) + for e in entries: + for c in e["consistency"]: + key = f"{c['a']} vs {c['b']}" + pair_data[key]["ious"].append(c["iou"]) + pair_data[key]["rmses"].append(c["rmse"]) + + w("| pair | IoU min | IoU max | RMSE range |") + w("|:-----|--------:|--------:|-----------:|") + for pair, d in pair_data.items(): + imin = min(d["ious"]) + imax = max(d["ious"]) + rmin = min(r for r in d["rmses"] if not np.isnan(r)) if any(not np.isnan(r) for r in d["rmses"]) else float("nan") + rmax = max(r for r in d["rmses"] if not np.isnan(r)) if any(not np.isnan(r) for r in d["rmses"]) else float("nan") + w(f"| {pair} | {imin:.4f} | {imax:.4f} | {rmin:.1f} - {rmax:.1f} |") + + w("") + + # Top 10 slower cases + if slower_cases: + slower_cases.sort(key=lambda x: -x[1]) + top = slower_cases[:10] + + w("## Where xrs-numpy is slower") + w("") + w("Top 10 configurations where another rasterizer beats xrs-numpy.") + w("") + w("| # | geometry | n | size | faster lib | xrs-numpy | other | xrs slower by |") + w("|--:|:---------|--:|-----:|:-----------|----------:|------:|--------------:|") + for i, (comp, ratio, gen, n, ww, hh, xrs_ms, comp_ms) in enumerate(top, 1): + size = f"{ww}x{hh}" + w(f"| {i} | {gen} | {n} | {size} | {comp} | {xrs_ms:.1f} ms | {comp_ms:.1f} ms | {ratio:.1f}x |") + w("") + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + rng = np.random.default_rng(42) + + resolutions = [100, 500, 1000, 2000, 4000] + feature_counts = [50, 200, 1000] + + try: + import cupy # noqa: F401 + has_cupy = True + except ImportError: + has_cupy = False + + # Structured results: gen_key -> list of row dicts + all_results = defaultdict(list) + slower_cases = [] + + header = ( + f"{'generator':<18} {'n':>5} {'width':>6} {'height':>6}" + f" {'xrs-numpy (s)':>14}" + f" {'xrs-cupy (s)':>14}" + f" {'datashader (s)':>14}" + f" {'geocube (s)':>14}" + f" {'rasterio (s)':>14}" + f" {'ds/xrs':>8}" + f" {'gc/xrs':>8}" + f" {'rio/xrs':>8}" + f" {'cupy/xrs':>8}" + ) + print(header) + print("-" * len(header)) + + for gen_name, gen_cfg in GENERATORS.items(): + gen_fn = gen_cfg["fn"] + kind = gen_cfg["kind"] + use_datashader = kind == "polygon" + + for n in feature_counts: + geoms, vals = gen_fn(n, rng) + pairs = list(zip(geoms, vals)) + + gdf = gpd.GeoDataFrame( + {"value": vals}, geometry=geoms, crs="EPSG:4326") + rio_shapes = [(g, v) for g, v in zip(geoms, vals)] + + if use_datashader: + spd_gdf = spatialpandas.GeoDataFrame(gdf) + + for width in resolutions: + height = width // 2 + + xrs_res, xrs_mean, xrs_std = bench( + run_xrspatial, pairs, width, height) + gc_res, gc_mean, gc_std = bench( + run_geocube, gdf, width, height) + rio_res, rio_mean, rio_std = bench( + run_rasterio, rio_shapes, width, height) + + ds_mean = ds_std = ds_res = None + if use_datashader: + ds_res, ds_mean, ds_std = bench( + run_datashader, spd_gdf, width, height) + ds_ratio = ds_mean / xrs_mean if xrs_mean > 0 else float("inf") + ds_col = f" {ds_mean:>8.4f}+-{ds_std:.4f}" + ds_rat = f" {ds_ratio:>7.2f}x" + else: + ds_col = f" {'--':>14}" + ds_rat = f" {'--':>8}" + + cupy_mean = cupy_std = cupy_res = None + if has_cupy: + cupy_res, cupy_mean, cupy_std = bench( + run_xrspatial_cupy, pairs, width, height) + cupy_ratio = (cupy_mean / xrs_mean + if xrs_mean > 0 else float("inf")) + cupy_col = f" {cupy_mean:>8.4f}+-{cupy_std:.4f}" + cupy_rat = f" {cupy_ratio:>7.2f}x" + else: + cupy_col = f" {'n/a':>14}" + cupy_rat = f" {'n/a':>8}" + + gc_ratio = gc_mean / xrs_mean if xrs_mean > 0 else float("inf") + rio_ratio = rio_mean / xrs_mean if xrs_mean > 0 else float("inf") + + # Track slower-than cases + if use_datashader and ds_mean is not None: + ds_r = ds_mean / xrs_mean if xrs_mean > 0 else float("inf") + if ds_r < 1.0: + slower_cases.append(( + "datashader", 1.0 / ds_r, gen_name, n, + width, height, xrs_mean * 1000, ds_mean * 1000)) + if gc_ratio < 1.0: + slower_cases.append(( + "geocube", 1.0 / gc_ratio, gen_name, n, + width, height, xrs_mean * 1000, gc_mean * 1000)) + if rio_ratio < 1.0: + slower_cases.append(( + "rasterio", 1.0 / rio_ratio, gen_name, n, + width, height, xrs_mean * 1000, rio_mean * 1000)) + + # Console output + print( + f"{gen_name:<18} {n:>5} {width:>6} {height:>6}" + f" {xrs_mean:>8.4f}+-{xrs_std:.4f}" + f"{cupy_col}" + f"{ds_col}" + f" {gc_mean:>8.4f}+-{gc_std:.4f}" + f" {rio_mean:>8.4f}+-{rio_std:.4f}" + f"{ds_rat}" + f" {gc_ratio:>7.2f}x" + f" {rio_ratio:>7.2f}x" + f"{cupy_rat}" + ) + + # Consistency checks + consistency_inputs = {"xrs-numpy": xrs_res} + if ds_res is not None: + consistency_inputs["datashader"] = ds_res + consistency_inputs["geocube"] = gc_res + consistency_inputs["rasterio"] = rio_res + if cupy_res is not None: + consistency_inputs["xrs-cupy"] = cupy_res + checks = check_consistency(consistency_inputs, width, height) + + for c in checks: + status = "OK" if c["ok"] else "WARN" + print(f" {c['a']} vs {c['b']}: " + f"IoU={c['iou']:.4f} RMSE={c['rmse']:.4f} " + f"[{status}]") + + # Store structured result + all_results[gen_name].append({ + "n": n, "w": width, "h": height, + "xrs_mean": xrs_mean, "xrs_std": xrs_std, + "cupy_mean": cupy_mean, "cupy_std": cupy_std, + "ds_mean": ds_mean, "ds_std": ds_std, + "gc_mean": gc_mean, "gc_std": gc_std, + "rio_mean": rio_mean, "rio_std": rio_std, + "consistency": checks, + }) + + # Console: top 10 slower cases + if slower_cases: + slower_cases.sort(key=lambda x: -x[1]) + top = slower_cases[:10] + print() + print("=" * 95) + print(" Top 10 cases where xrs-numpy is SLOWER than a competitor") + print("=" * 95) + print() + print(f" {'#':>2} {'geometry':<18} {'n':>5} {'size':>10}" + f" {'faster lib':<12} {'xrs-np':>9} {'other':>9}" + f" {'xrs slower by':>13}") + print(f" {'':->2} {'':->18} {'':->5} {'':->10}" + f" {'':->12} {'':->9} {'':->9} {'':->13}") + for i, (comp, ratio, gen, n, w, h, xrs_ms, comp_ms) in enumerate( + top, 1): + size = f"{w}x{h}" + print(f" {i:>2} {gen:<18} {n:>5} {size:>10}" + f" {comp:<12} {xrs_ms:>7.1f}ms {comp_ms:>7.1f}ms" + f" {ratio:>11.1f}x") + print() + + # Write markdown report + md = generate_markdown(all_results, slower_cases, has_cupy) + md_path = os.path.join(os.path.dirname(__file__), "RASTERIZER_BENCHMARKS.md") + with open(md_path, "w") as f: + f.write(md) + print(f"Wrote {md_path}") + + +if __name__ == "__main__": + main() diff --git a/docs/source/reference/hydrology.rst b/docs/source/reference/hydrology.rst index f7a7689f..4e5216ef 100644 --- a/docs/source/reference/hydrology.rst +++ b/docs/source/reference/hydrology.rst @@ -32,6 +32,13 @@ Flow Accumulation xrspatial.flow_accumulation.flow_accumulation +Flow Accumulation (D-infinity) +=============================== +.. autosummary:: + :toctree: _autosummary + + xrspatial.flow_accumulation_dinf.flow_accumulation_dinf + Flow Accumulation (MFD) ======================= .. autosummary:: diff --git a/xrspatial/__init__.py b/xrspatial/__init__.py index 1c964840..a0fb4a28 100644 --- a/xrspatial/__init__.py +++ b/xrspatial/__init__.py @@ -39,6 +39,7 @@ from xrspatial.flood import inundation # noqa from xrspatial.flood import travel_time # noqa from xrspatial.flow_accumulation import flow_accumulation # noqa +from xrspatial.flow_accumulation_dinf import flow_accumulation_dinf # noqa from xrspatial.flow_accumulation_mfd import flow_accumulation_mfd # noqa from xrspatial.flow_direction import flow_direction # noqa from xrspatial.flow_direction_dinf import flow_direction_dinf # noqa diff --git a/xrspatial/basin.py b/xrspatial/basin.py index f80f5c63..d7a404c4 100644 --- a/xrspatial/basin.py +++ b/xrspatial/basin.py @@ -364,7 +364,10 @@ def basin(flow_dir: xr.DataArray, fd = data.astype(np.float64) h, w = fd.shape labels = _basins_init_labels(fd, h, w, h, w, 0, 0) - out = _watershed_cpu(fd, labels, h, w) + # Build state array: 0=nodata(NaN), 1=unresolved(-1), 3=resolved + state = np.where(np.isnan(labels), 0, + np.where(labels == -1.0, 1, 3)).astype(np.int8) + out = _watershed_cpu(fd, labels, state, h, w) elif has_cuda_and_cupy() and is_cupy_array(data): out = _basins_cupy(data) diff --git a/xrspatial/flow_accumulation.py b/xrspatial/flow_accumulation.py index 3e75ebf9..0593fe49 100644 --- a/xrspatial/flow_accumulation.py +++ b/xrspatial/flow_accumulation.py @@ -1,15 +1,13 @@ """Flow accumulation: count of upstream cells draining through each cell. -Supports both D8 (integer codes) and D-infinity (continuous angles) -flow direction grids, auto-detected from input values. +Supports D8 (integer codes) flow direction grids. For D-infinity +(continuous angles), see ``flow_accumulation_dinf``. For D8 input, each cell drains to exactly one downstream neighbor. -For D-infinity, flow is split proportionally between two neighbors -following Tarboton (1997). Algorithm --------- -CPU : Kahn's BFS topological sort — O(N). +CPU : Kahn's BFS topological sort -- O(N). GPU : iterative frontier peeling with pull-based kernels. Dask: iterative tile sweep with boundary propagation (one tile in RAM at a time), following the ``cost_distance.py`` pattern. @@ -90,7 +88,7 @@ def _code_to_offset_py(code): # ===================================================================== -# Dinf helpers +# Flow type detection (kept for backward compat) # ===================================================================== @ngjit @@ -142,52 +140,6 @@ def _detect_flow_type(data): return "dinf" if result == 1 else "d8" -@ngjit -def _angle_to_neighbors(angle): - """Decompose Dinf angle into two neighbors and weights. - - Returns (dy1, dx1, w1, dy2, dx2, w2). - Pit (angle < 0) or NaN returns all zeros. - """ - if angle < 0.0 or angle != angle: # pit or NaN - return 0, 0, 0.0, 0, 0, 0.0 - - pi_over_4 = 0.7853981633974483 # np.pi / 4 - k = int(angle / pi_over_4) - if k > 7: - k = 7 - alpha = angle - k * pi_over_4 - w1 = 1.0 - alpha / pi_over_4 - w2 = alpha / pi_over_4 - - if k == 0: - dy1, dx1 = 0, 1 # E - dy2, dx2 = -1, 1 # NE - elif k == 1: - dy1, dx1 = -1, 1 # NE - dy2, dx2 = -1, 0 # N - elif k == 2: - dy1, dx1 = -1, 0 # N - dy2, dx2 = -1, -1 # NW - elif k == 3: - dy1, dx1 = -1, -1 # NW - dy2, dx2 = 0, -1 # W - elif k == 4: - dy1, dx1 = 0, -1 # W - dy2, dx2 = 1, -1 # SW - elif k == 5: - dy1, dx1 = 1, -1 # SW - dy2, dx2 = 1, 0 # S - elif k == 6: - dy1, dx1 = 1, 0 # S - dy2, dx2 = 1, 1 # SE - else: # k == 7 - dy1, dx1 = 1, 1 # SE - dy2, dx2 = 0, 1 # E - - return dy1, dx1, w1, dy2, dx2, w2 - - # ===================================================================== # CPU kernel # ===================================================================== @@ -256,78 +208,6 @@ def _flow_accum_cpu(flow_dir, height, width): return accum -@ngjit -def _flow_accum_dinf_cpu(flow_dir, height, width): - """Kahn's BFS topological sort for Dinf flow accumulation.""" - accum = np.empty((height, width), dtype=np.float64) - in_degree = np.zeros((height, width), dtype=np.int32) - valid = np.zeros((height, width), dtype=np.int8) - - for r in range(height): - for c in range(width): - v = flow_dir[r, c] - if v == v: # not NaN - valid[r, c] = 1 - accum[r, c] = 1.0 - else: - accum[r, c] = np.nan - - for r in range(height): - for c in range(width): - if valid[r, c] == 0: - continue - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(flow_dir[r, c]) - if w1 > 0.0: - nr, nc = r + dy1, c + dx1 - if 0 <= nr < height and 0 <= nc < width and valid[nr, nc] == 1: - in_degree[nr, nc] += 1 - if w2 > 0.0: - nr, nc = r + dy2, c + dx2 - if 0 <= nr < height and 0 <= nc < width and valid[nr, nc] == 1: - in_degree[nr, nc] += 1 - - queue_r = np.empty(height * width, dtype=np.int64) - queue_c = np.empty(height * width, dtype=np.int64) - head = np.int64(0) - tail = np.int64(0) - - for r in range(height): - for c in range(width): - if valid[r, c] == 1 and in_degree[r, c] == 0: - queue_r[tail] = r - queue_c[tail] = c - tail += 1 - - while head < tail: - r = queue_r[head] - c = queue_c[head] - head += 1 - - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(flow_dir[r, c]) - - if w1 > 0.0: - nr, nc = r + dy1, c + dx1 - if 0 <= nr < height and 0 <= nc < width and valid[nr, nc] == 1: - accum[nr, nc] += accum[r, c] * w1 - in_degree[nr, nc] -= 1 - if in_degree[nr, nc] == 0: - queue_r[tail] = nr - queue_c[tail] = nc - tail += 1 - - if w2 > 0.0: - nr, nc = r + dy2, c + dx2 - if 0 <= nr < height and 0 <= nc < width and valid[nr, nc] == 1: - accum[nr, nc] += accum[r, c] * w2 - in_degree[nr, nc] -= 1 - if in_degree[nr, nc] == 0: - queue_r[tail] = nr - queue_c[tail] = nc - tail += 1 - - return accum - - # ===================================================================== # GPU kernels # ===================================================================== @@ -348,7 +228,7 @@ def _init_accum_indegree(flow_dir, accum, in_degree, state, H, W): state[i, j] = 1 accum[i, j] = 1.0 - # Decode direction (inline — can't call @ngjit from @cuda.jit) + # Decode direction (inline -- can't call @ngjit from @cuda.jit) code = int(v) dy = 0 dx = 0 @@ -380,7 +260,7 @@ def _init_accum_indegree(flow_dir, accum, in_degree, state, H, W): @cuda.jit def _find_ready_and_finalize(in_degree, state, changed, H, W): - """Finalize previous frontier (2→3), mark new frontier (1→2).""" + """Finalize previous frontier (2->3), mark new frontier (1->2).""" i, j = cuda.grid(2) if i >= H or j >= W: return @@ -543,232 +423,6 @@ def _flow_accum_tile_cupy(flow_dir_data, return accum -# ===================================================================== -# Dinf GPU kernels -# ===================================================================== - -@cuda.jit -def _init_accum_indegree_dinf(flow_dir, accum, in_degree, state, H, W): - """Initialise accum/in_degree/state for Dinf on GPU.""" - i, j = cuda.grid(2) - if i >= H or j >= W: - return - - v = flow_dir[i, j] - if v != v: # NaN - state[i, j] = 0 - accum[i, j] = 0.0 - return - - state[i, j] = 1 - accum[i, j] = 1.0 - - if v < 0.0: # pit - return - - pi_over_4 = 0.7853981633974483 - k = int(v / pi_over_4) - if k > 7: - k = 7 - alpha = v - k * pi_over_4 - w1 = 1.0 - alpha / pi_over_4 - w2 = alpha / pi_over_4 - - # Facet offsets (inline) - if k == 0: - dy1, dx1 = 0, 1 - dy2, dx2 = -1, 1 - elif k == 1: - dy1, dx1 = -1, 1 - dy2, dx2 = -1, 0 - elif k == 2: - dy1, dx1 = -1, 0 - dy2, dx2 = -1, -1 - elif k == 3: - dy1, dx1 = -1, -1 - dy2, dx2 = 0, -1 - elif k == 4: - dy1, dx1 = 0, -1 - dy2, dx2 = 1, -1 - elif k == 5: - dy1, dx1 = 1, -1 - dy2, dx2 = 1, 0 - elif k == 6: - dy1, dx1 = 1, 0 - dy2, dx2 = 1, 1 - else: - dy1, dx1 = 1, 1 - dy2, dx2 = 0, 1 - - if w1 > 0.0: - ni, nj = i + dy1, j + dx1 - if 0 <= ni < H and 0 <= nj < W: - cuda.atomic.add(in_degree, (ni, nj), 1) - if w2 > 0.0: - ni, nj = i + dy2, j + dx2 - if 0 <= ni < H and 0 <= nj < W: - cuda.atomic.add(in_degree, (ni, nj), 1) - - -@cuda.jit -def _pull_from_frontier_dinf(flow_dir, accum, in_degree, state, H, W): - """Active Dinf cells pull accumulation from frontier neighbours.""" - i, j = cuda.grid(2) - if i >= H or j >= W: - return - - if state[i, j] != 1: - return - - pi_over_4 = 0.7853981633974483 - - for nbr in range(8): - if nbr == 0: - dy, dx = 0, 1 - elif nbr == 1: - dy, dx = 1, 1 - elif nbr == 2: - dy, dx = 1, 0 - elif nbr == 3: - dy, dx = 1, -1 - elif nbr == 4: - dy, dx = 0, -1 - elif nbr == 5: - dy, dx = -1, -1 - elif nbr == 6: - dy, dx = -1, 0 - else: - dy, dx = -1, 1 - - ni = i + dy - nj = j + dx - if ni < 0 or ni >= H or nj < 0 or nj >= W: - continue - if state[ni, nj] != 2: - continue - - nv = flow_dir[ni, nj] - if nv < 0.0 or nv != nv: - continue - - nk = int(nv / pi_over_4) - if nk > 7: - nk = 7 - nalpha = nv - nk * pi_over_4 - nw1 = 1.0 - nalpha / pi_over_4 - nw2 = nalpha / pi_over_4 - - # Inline facet offsets for neighbor's direction - if nk == 0: - ndy1, ndx1 = 0, 1 - ndy2, ndx2 = -1, 1 - elif nk == 1: - ndy1, ndx1 = -1, 1 - ndy2, ndx2 = -1, 0 - elif nk == 2: - ndy1, ndx1 = -1, 0 - ndy2, ndx2 = -1, -1 - elif nk == 3: - ndy1, ndx1 = -1, -1 - ndy2, ndx2 = 0, -1 - elif nk == 4: - ndy1, ndx1 = 0, -1 - ndy2, ndx2 = 1, -1 - elif nk == 5: - ndy1, ndx1 = 1, -1 - ndy2, ndx2 = 1, 0 - elif nk == 6: - ndy1, ndx1 = 1, 0 - ndy2, ndx2 = 1, 1 - else: - ndy1, ndx1 = 1, 1 - ndy2, ndx2 = 0, 1 - - if nw1 > 0.0 and ni + ndy1 == i and nj + ndx1 == j: - accum[i, j] += accum[ni, nj] * nw1 - in_degree[i, j] -= 1 - if nw2 > 0.0 and ni + ndy2 == i and nj + ndx2 == j: - accum[i, j] += accum[ni, nj] * nw2 - in_degree[i, j] -= 1 - - -def _flow_accum_dinf_cupy(flow_dir_data): - """GPU driver: iterative frontier peeling for Dinf.""" - import cupy as cp - - H, W = flow_dir_data.shape - flow_dir_f64 = flow_dir_data.astype(cp.float64) - - accum = cp.zeros((H, W), dtype=cp.float64) - in_degree = cp.zeros((H, W), dtype=cp.int32) - state = cp.zeros((H, W), dtype=cp.int32) - changed = cp.zeros(1, dtype=cp.int32) - - griddim, blockdim = cuda_args((H, W)) - - _init_accum_indegree_dinf[griddim, blockdim]( - flow_dir_f64, accum, in_degree, state, H, W) - - max_iter = H * W - for _ in range(max_iter): - changed[0] = 0 - _find_ready_and_finalize[griddim, blockdim]( - in_degree, state, changed, H, W) - - if int(changed[0]) == 0: - break - - _pull_from_frontier_dinf[griddim, blockdim]( - flow_dir_f64, accum, in_degree, state, H, W) - - accum = cp.where(state == 0, cp.nan, accum) - return accum - - -def _flow_accum_dinf_tile_cupy(flow_dir_data, - seed_top, seed_bottom, seed_left, seed_right, - seed_tl, seed_tr, seed_bl, seed_br): - """GPU seeded Dinf flow accumulation for a single tile.""" - import cupy as cp - - H, W = flow_dir_data.shape - flow_dir_f64 = flow_dir_data.astype(cp.float64) - - accum = cp.zeros((H, W), dtype=cp.float64) - in_degree = cp.zeros((H, W), dtype=cp.int32) - state = cp.zeros((H, W), dtype=cp.int32) - changed = cp.zeros(1, dtype=cp.int32) - - griddim, blockdim = cuda_args((H, W)) - - _init_accum_indegree_dinf[griddim, blockdim]( - flow_dir_f64, accum, in_degree, state, H, W) - - accum[0, :] += cp.asarray(seed_top) - accum[H - 1, :] += cp.asarray(seed_bottom) - accum[:, 0] += cp.asarray(seed_left) - accum[:, W - 1] += cp.asarray(seed_right) - accum[0, 0] += seed_tl - accum[0, W - 1] += seed_tr - accum[H - 1, 0] += seed_bl - accum[H - 1, W - 1] += seed_br - - max_iter = H * W - for _ in range(max_iter): - changed[0] = 0 - _find_ready_and_finalize[griddim, blockdim]( - in_degree, state, changed, H, W) - - if int(changed[0]) == 0: - break - - _pull_from_frontier_dinf[griddim, blockdim]( - flow_dir_f64, accum, in_degree, state, H, W) - - accum = cp.where(state == 0, cp.nan, accum) - return accum - - # ===================================================================== # Tile kernel for dask iterative path # ===================================================================== @@ -915,14 +569,14 @@ def _compute_seeds(iy, ix, boundaries, flow_bdry, nb_fdir = flow_bdry.get('bottom', iy - 1, ix) nb_accum = boundaries.get('bottom', iy - 1, ix) w = len(nb_fdir) - # Cardinal S (code 4): nb cell j → our (0, j) + # Cardinal S (code 4): nb cell j -> our (0, j) mask = (nb_fdir == 4) seed_top += np.where(mask, nb_accum, 0.0) if w > 1: - # Diagonal SE (code 2): nb cell j → our (0, j+1) + # Diagonal SE (code 2): nb cell j -> our (0, j+1) mask = (nb_fdir[:-1] == 2) seed_top[1:] += np.where(mask, nb_accum[:-1], 0.0) - # Diagonal SW (code 8): nb cell j → our (0, j-1) + # Diagonal SW (code 8): nb cell j -> our (0, j-1) mask = (nb_fdir[1:] == 8) seed_top[:-1] += np.where(mask, nb_accum[1:], 0.0) @@ -935,10 +589,10 @@ def _compute_seeds(iy, ix, boundaries, flow_bdry, mask = (nb_fdir == 64) seed_bottom += np.where(mask, nb_accum, 0.0) if w > 1: - # Diagonal NE (code 128): nb cell j → our (h-1, j+1) + # Diagonal NE (code 128): nb cell j -> our (h-1, j+1) mask = (nb_fdir[:-1] == 128) seed_bottom[1:] += np.where(mask, nb_accum[:-1], 0.0) - # Diagonal NW (code 32): nb cell j → our (h-1, j-1) + # Diagonal NW (code 32): nb cell j -> our (h-1, j-1) mask = (nb_fdir[1:] == 32) seed_bottom[:-1] += np.where(mask, nb_accum[1:], 0.0) @@ -951,10 +605,10 @@ def _compute_seeds(iy, ix, boundaries, flow_bdry, mask = (nb_fdir == 1) seed_left += np.where(mask, nb_accum, 0.0) if h > 1: - # Diagonal SE (code 2): nb cell i → our (i+1, 0) + # Diagonal SE (code 2): nb cell i -> our (i+1, 0) mask = (nb_fdir[:-1] == 2) seed_left[1:] += np.where(mask, nb_accum[:-1], 0.0) - # Diagonal NE (code 128): nb cell i → our (i-1, 0) + # Diagonal NE (code 128): nb cell i -> our (i-1, 0) mask = (nb_fdir[1:] == 128) seed_left[:-1] += np.where(mask, nb_accum[1:], 0.0) @@ -967,10 +621,10 @@ def _compute_seeds(iy, ix, boundaries, flow_bdry, mask = (nb_fdir == 16) seed_right += np.where(mask, nb_accum, 0.0) if h > 1: - # Diagonal SW (code 8): nb cell i → our (i+1, w-1) + # Diagonal SW (code 8): nb cell i -> our (i+1, w-1) mask = (nb_fdir[:-1] == 8) seed_right[1:] += np.where(mask, nb_accum[:-1], 0.0) - # Diagonal NW (code 32): nb cell i → our (i-1, w-1) + # Diagonal NW (code 32): nb cell i -> our (i-1, w-1) mask = (nb_fdir[1:] == 32) seed_right[:-1] += np.where(mask, nb_accum[1:], 0.0) @@ -1222,437 +876,6 @@ def _flow_accum_dask_cupy(flow_dir_da): chunks_y, chunks_x, n_tile_y, n_tile_x) -# ===================================================================== -# Dinf tile kernel for dask -# ===================================================================== - -@ngjit -def _flow_accum_dinf_tile_kernel(flow_dir, h, w, - seed_top, seed_bottom, - seed_left, seed_right, - seed_tl, seed_tr, seed_bl, seed_br): - """Seeded BFS Dinf flow accumulation for a single tile.""" - accum = np.empty((h, w), dtype=np.float64) - in_degree = np.zeros((h, w), dtype=np.int32) - valid = np.zeros((h, w), dtype=np.int8) - - for r in range(h): - for c in range(w): - v = flow_dir[r, c] - if v == v: - valid[r, c] = 1 - accum[r, c] = 1.0 - else: - accum[r, c] = np.nan - - # Add external seeds - for c in range(w): - if valid[0, c] == 1: - accum[0, c] += seed_top[c] - if valid[h - 1, c] == 1: - accum[h - 1, c] += seed_bottom[c] - for r in range(h): - if valid[r, 0] == 1: - accum[r, 0] += seed_left[r] - if valid[r, w - 1] == 1: - accum[r, w - 1] += seed_right[r] - if valid[0, 0] == 1: - accum[0, 0] += seed_tl - if valid[0, w - 1] == 1: - accum[0, w - 1] += seed_tr - if valid[h - 1, 0] == 1: - accum[h - 1, 0] += seed_bl - if valid[h - 1, w - 1] == 1: - accum[h - 1, w - 1] += seed_br - - # In-degrees via angle decomposition - for r in range(h): - for c in range(w): - if valid[r, c] == 0: - continue - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(flow_dir[r, c]) - if w1 > 0.0: - nr, nc = r + dy1, c + dx1 - if 0 <= nr < h and 0 <= nc < w and valid[nr, nc] == 1: - in_degree[nr, nc] += 1 - if w2 > 0.0: - nr, nc = r + dy2, c + dx2 - if 0 <= nr < h and 0 <= nc < w and valid[nr, nc] == 1: - in_degree[nr, nc] += 1 - - queue_r = np.empty(h * w, dtype=np.int64) - queue_c = np.empty(h * w, dtype=np.int64) - head = np.int64(0) - tail = np.int64(0) - - for r in range(h): - for c in range(w): - if valid[r, c] == 1 and in_degree[r, c] == 0: - queue_r[tail] = r - queue_c[tail] = c - tail += 1 - - while head < tail: - r = queue_r[head] - c = queue_c[head] - head += 1 - - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(flow_dir[r, c]) - - if w1 > 0.0: - nr, nc = r + dy1, c + dx1 - if 0 <= nr < h and 0 <= nc < w and valid[nr, nc] == 1: - accum[nr, nc] += accum[r, c] * w1 - in_degree[nr, nc] -= 1 - if in_degree[nr, nc] == 0: - queue_r[tail] = nr - queue_c[tail] = nc - tail += 1 - - if w2 > 0.0: - nr, nc = r + dy2, c + dx2 - if 0 <= nr < h and 0 <= nc < w and valid[nr, nc] == 1: - accum[nr, nc] += accum[r, c] * w2 - in_degree[nr, nc] -= 1 - if in_degree[nr, nc] == 0: - queue_r[tail] = nr - queue_c[tail] = nc - tail += 1 - - return accum - - -# ===================================================================== -# Dinf dask iterative tile sweep -# ===================================================================== - -def _compute_seeds_dinf(iy, ix, boundaries, flow_bdry, - chunks_y, chunks_x, n_tile_y, n_tile_x): - """Compute seed arrays for tile (iy, ix) using Dinf angle decomposition.""" - tile_h = chunks_y[iy] - tile_w = chunks_x[ix] - - seed_top = np.zeros(tile_w, dtype=np.float64) - seed_bottom = np.zeros(tile_w, dtype=np.float64) - seed_left = np.zeros(tile_h, dtype=np.float64) - seed_right = np.zeros(tile_h, dtype=np.float64) - seed_tl = 0.0 - seed_tr = 0.0 - seed_bl = 0.0 - seed_br = 0.0 - - # --- Top edge: bottom row of tile above flows south (dy=1) --- - if iy > 0: - nb_fdir = flow_bdry.get('bottom', iy - 1, ix) - nb_accum = boundaries.get('bottom', iy - 1, ix) - for c in range(len(nb_fdir)): - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(nb_fdir[c]) - if w1 > 0.0 and dy1 == 1: - tc = c + dx1 - if 0 <= tc < tile_w: - seed_top[tc] += nb_accum[c] * w1 - if w2 > 0.0 and dy2 == 1: - tc = c + dx2 - if 0 <= tc < tile_w: - seed_top[tc] += nb_accum[c] * w2 - - # --- Bottom edge: top row of tile below flows north (dy=-1) --- - if iy < n_tile_y - 1: - nb_fdir = flow_bdry.get('top', iy + 1, ix) - nb_accum = boundaries.get('top', iy + 1, ix) - for c in range(len(nb_fdir)): - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(nb_fdir[c]) - if w1 > 0.0 and dy1 == -1: - tc = c + dx1 - if 0 <= tc < tile_w: - seed_bottom[tc] += nb_accum[c] * w1 - if w2 > 0.0 and dy2 == -1: - tc = c + dx2 - if 0 <= tc < tile_w: - seed_bottom[tc] += nb_accum[c] * w2 - - # --- Left edge: right column of tile to the left flows east (dx=1) --- - if ix > 0: - nb_fdir = flow_bdry.get('right', iy, ix - 1) - nb_accum = boundaries.get('right', iy, ix - 1) - for r in range(len(nb_fdir)): - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(nb_fdir[r]) - if w1 > 0.0 and dx1 == 1: - tr = r + dy1 - if 0 <= tr < tile_h: - seed_left[tr] += nb_accum[r] * w1 - if w2 > 0.0 and dx2 == 1: - tr = r + dy2 - if 0 <= tr < tile_h: - seed_left[tr] += nb_accum[r] * w2 - - # --- Right edge: left column of tile to the right flows west (dx=-1) --- - if ix < n_tile_x - 1: - nb_fdir = flow_bdry.get('left', iy, ix + 1) - nb_accum = boundaries.get('left', iy, ix + 1) - for r in range(len(nb_fdir)): - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(nb_fdir[r]) - if w1 > 0.0 and dx1 == -1: - tr = r + dy1 - if 0 <= tr < tile_h: - seed_right[tr] += nb_accum[r] * w1 - if w2 > 0.0 and dx2 == -1: - tr = r + dy2 - if 0 <= tr < tile_h: - seed_right[tr] += nb_accum[r] * w2 - - # --- Diagonal corner seeds --- - # TL: bottom-right of (iy-1, ix-1) flows SE (dy=1, dx=1) - if iy > 0 and ix > 0: - fv = flow_bdry.get('bottom', iy - 1, ix - 1)[-1] - av = float(boundaries.get('bottom', iy - 1, ix - 1)[-1]) - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(fv) - if w1 > 0.0 and dy1 == 1 and dx1 == 1: - seed_tl += av * w1 - if w2 > 0.0 and dy2 == 1 and dx2 == 1: - seed_tl += av * w2 - - # TR: bottom-left of (iy-1, ix+1) flows SW (dy=1, dx=-1) - if iy > 0 and ix < n_tile_x - 1: - fv = flow_bdry.get('bottom', iy - 1, ix + 1)[0] - av = float(boundaries.get('bottom', iy - 1, ix + 1)[0]) - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(fv) - if w1 > 0.0 and dy1 == 1 and dx1 == -1: - seed_tr += av * w1 - if w2 > 0.0 and dy2 == 1 and dx2 == -1: - seed_tr += av * w2 - - # BL: top-right of (iy+1, ix-1) flows NE (dy=-1, dx=1) - if iy < n_tile_y - 1 and ix > 0: - fv = flow_bdry.get('top', iy + 1, ix - 1)[-1] - av = float(boundaries.get('top', iy + 1, ix - 1)[-1]) - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(fv) - if w1 > 0.0 and dy1 == -1 and dx1 == 1: - seed_bl += av * w1 - if w2 > 0.0 and dy2 == -1 and dx2 == 1: - seed_bl += av * w2 - - # BR: top-left of (iy+1, ix+1) flows NW (dy=-1, dx=-1) - if iy < n_tile_y - 1 and ix < n_tile_x - 1: - fv = flow_bdry.get('top', iy + 1, ix + 1)[0] - av = float(boundaries.get('top', iy + 1, ix + 1)[0]) - dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(fv) - if w1 > 0.0 and dy1 == -1 and dx1 == -1: - seed_br += av * w1 - if w2 > 0.0 and dy2 == -1 and dx2 == -1: - seed_br += av * w2 - - return (seed_top, seed_bottom, seed_left, seed_right, - seed_tl, seed_tr, seed_bl, seed_br) - - -def _process_tile_dinf(iy, ix, flow_dir_da, boundaries, flow_bdry, - chunks_y, chunks_x, n_tile_y, n_tile_x): - """Run seeded Dinf BFS on one tile; update boundaries in-place.""" - chunk = np.asarray( - flow_dir_da.blocks[iy, ix].compute(), dtype=np.float64) - h, w = chunk.shape - - seeds = _compute_seeds_dinf( - iy, ix, boundaries, flow_bdry, - chunks_y, chunks_x, n_tile_y, n_tile_x) - - accum = _flow_accum_dinf_tile_kernel(chunk, h, w, *seeds) - - new_top = accum[0, :].copy() - new_bottom = accum[-1, :].copy() - new_left = accum[:, 0].copy() - new_right = accum[:, -1].copy() - - change = 0.0 - for side, new in (('top', new_top), ('bottom', new_bottom), - ('left', new_left), ('right', new_right)): - old = boundaries.get(side, iy, ix) - with np.errstate(invalid='ignore'): - diff = np.abs(new - old) - diff = np.where(np.isnan(diff), 0.0, diff) - m = float(np.max(diff)) - if m > change: - change = m - - boundaries.set('top', iy, ix, new_top) - boundaries.set('bottom', iy, ix, new_bottom) - boundaries.set('left', iy, ix, new_left) - boundaries.set('right', iy, ix, new_right) - - return change - - -def _flow_accum_dinf_dask_iterative(flow_dir_da): - """Iterative boundary-propagation for Dinf dask arrays.""" - chunks_y = flow_dir_da.chunks[0] - chunks_x = flow_dir_da.chunks[1] - n_tile_y = len(chunks_y) - n_tile_x = len(chunks_x) - - flow_bdry = _preprocess_tiles(flow_dir_da, chunks_y, chunks_x) - flow_bdry = flow_bdry.snapshot() - - boundaries = BoundaryStore(chunks_y, chunks_x, fill_value=0.0) - - max_iterations = max(n_tile_y, n_tile_x) + 10 - - for _iteration in range(max_iterations): - max_change = 0.0 - - for iy in range(n_tile_y): - for ix in range(n_tile_x): - c = _process_tile_dinf(iy, ix, flow_dir_da, boundaries, - flow_bdry, chunks_y, chunks_x, - n_tile_y, n_tile_x) - if c > max_change: - max_change = c - - for iy in reversed(range(n_tile_y)): - for ix in reversed(range(n_tile_x)): - c = _process_tile_dinf(iy, ix, flow_dir_da, boundaries, - flow_bdry, chunks_y, chunks_x, - n_tile_y, n_tile_x) - if c > max_change: - max_change = c - - if max_change == 0.0: - break - - boundaries = boundaries.snapshot() - - return _assemble_result_dinf(flow_dir_da, boundaries, flow_bdry, - chunks_y, chunks_x, n_tile_y, n_tile_x) - - -def _assemble_result_dinf(flow_dir_da, boundaries, flow_bdry, - chunks_y, chunks_x, n_tile_y, n_tile_x): - """Build lazy dask array by re-running each Dinf tile with converged seeds.""" - - def _tile_fn(flow_dir_block, block_info=None): - if block_info is None or 0 not in block_info: - return np.full(flow_dir_block.shape, np.nan, dtype=np.float64) - iy, ix = block_info[0]['chunk-location'] - h, w = flow_dir_block.shape - seeds = _compute_seeds_dinf( - iy, ix, boundaries, flow_bdry, - chunks_y, chunks_x, n_tile_y, n_tile_x) - return _flow_accum_dinf_tile_kernel( - np.asarray(flow_dir_block, dtype=np.float64), h, w, *seeds) - - return da.map_blocks( - _tile_fn, - flow_dir_da, - dtype=np.float64, - meta=np.array((), dtype=np.float64), - ) - - -def _process_tile_dinf_cupy(iy, ix, flow_dir_da, boundaries, flow_bdry, - chunks_y, chunks_x, n_tile_y, n_tile_x): - """Run seeded GPU Dinf flow accumulation on one tile.""" - import cupy as cp - - chunk = cp.asarray( - flow_dir_da.blocks[iy, ix].compute(), dtype=cp.float64) - - seeds = _compute_seeds_dinf( - iy, ix, boundaries, flow_bdry, - chunks_y, chunks_x, n_tile_y, n_tile_x) - - accum = _flow_accum_dinf_tile_cupy(chunk, *seeds) - - new_top = accum[0, :].get().copy() - new_bottom = accum[-1, :].get().copy() - new_left = accum[:, 0].get().copy() - new_right = accum[:, -1].get().copy() - - change = 0.0 - for side, new in (('top', new_top), ('bottom', new_bottom), - ('left', new_left), ('right', new_right)): - old = boundaries.get(side, iy, ix) - with np.errstate(invalid='ignore'): - diff = np.abs(new - old) - diff = np.where(np.isnan(diff), 0.0, diff) - m = float(np.max(diff)) - if m > change: - change = m - - boundaries.set('top', iy, ix, new_top) - boundaries.set('bottom', iy, ix, new_bottom) - boundaries.set('left', iy, ix, new_left) - boundaries.set('right', iy, ix, new_right) - - return change - - -def _assemble_result_dinf_cupy(flow_dir_da, boundaries, flow_bdry, - chunks_y, chunks_x, n_tile_y, n_tile_x): - """Build lazy dask+cupy array using GPU Dinf tile kernel.""" - import cupy as cp - - def _tile_fn(flow_dir_block, block_info=None): - if block_info is None or 0 not in block_info: - return cp.full(flow_dir_block.shape, cp.nan, dtype=cp.float64) - iy, ix = block_info[0]['chunk-location'] - seeds = _compute_seeds_dinf( - iy, ix, boundaries, flow_bdry, - chunks_y, chunks_x, n_tile_y, n_tile_x) - return _flow_accum_dinf_tile_cupy( - cp.asarray(flow_dir_block, dtype=cp.float64), *seeds) - - return da.map_blocks( - _tile_fn, - flow_dir_da, - dtype=np.float64, - meta=cp.array((), dtype=cp.float64), - ) - - -def _flow_accum_dinf_dask_cupy(flow_dir_da): - """Dask+CuPy Dinf: native GPU processing per tile.""" - chunks_y = flow_dir_da.chunks[0] - chunks_x = flow_dir_da.chunks[1] - n_tile_y = len(chunks_y) - n_tile_x = len(chunks_x) - - flow_bdry = _preprocess_tiles(flow_dir_da, chunks_y, chunks_x) - flow_bdry = flow_bdry.snapshot() - - boundaries = BoundaryStore(chunks_y, chunks_x, fill_value=0.0) - - max_iterations = max(n_tile_y, n_tile_x) + 10 - - for _iteration in range(max_iterations): - max_change = 0.0 - - for iy in range(n_tile_y): - for ix in range(n_tile_x): - c = _process_tile_dinf_cupy( - iy, ix, flow_dir_da, boundaries, - flow_bdry, chunks_y, chunks_x, - n_tile_y, n_tile_x) - if c > max_change: - max_change = c - - for iy in reversed(range(n_tile_y)): - for ix in reversed(range(n_tile_x)): - c = _process_tile_dinf_cupy( - iy, ix, flow_dir_da, boundaries, - flow_bdry, chunks_y, chunks_x, - n_tile_y, n_tile_x) - if c > max_change: - max_change = c - - if max_change == 0.0: - break - - boundaries = boundaries.snapshot() - - return _assemble_result_dinf_cupy(flow_dir_da, boundaries, flow_bdry, - chunks_y, chunks_x, n_tile_y, n_tile_x) - - # ===================================================================== # Public API # ===================================================================== @@ -1660,26 +883,17 @@ def _flow_accum_dinf_dask_cupy(flow_dir_da): @supports_dataset def flow_accumulation(flow_dir: xr.DataArray, name: str = 'flow_accumulation') -> xr.DataArray: - """Compute flow accumulation from a flow direction grid. + """Compute flow accumulation from a D8 flow direction grid. - Supports both D8 (integer codes) and D-infinity (continuous angles) - flow direction grids, auto-detected from input values. - - For D8 input, each cell drains to exactly one downstream neighbor. - For D-infinity, flow is split proportionally between two neighbors - following Tarboton (1997). + Each cell drains to exactly one downstream neighbor based on + integer D8 direction codes. For D-infinity (continuous angle) + grids, use ``flow_accumulation_dinf`` instead. Parameters ---------- flow_dir : xarray.DataArray or xr.Dataset - 2D flow direction grid. Accepts: - - - D8 codes: 0/1/2/4/8/16/32/64/128 (NaN for nodata) - - D-infinity angles: continuous radians in ``[0, 2*pi)``, - ``-1.0`` for pits, NaN for nodata (as produced by - ``flow_direction_dinf``) - - The type is auto-detected from values. + 2D flow direction grid with D8 codes: + 0/1/2/4/8/16/32/64/128 (NaN for nodata). Supported backends: NumPy, CuPy, NumPy-backed Dask, CuPy-backed Dask. If a Dataset is passed, the operation is applied to each @@ -1700,38 +914,21 @@ def flow_accumulation(flow_dir: xr.DataArray, Structure from Digital Elevation Data for Geographic Information System Analysis. Photogrammetric Engineering and Remote Sensing, 54(11), 1593-1600. - - Tarboton, D.G. (1997). A new method for the determination of flow - directions and upslope areas in grid digital elevation models. - Water Resources Research, 33(2), 309-319. """ _validate_raster(flow_dir, func_name='flow_accumulation', name='flow_dir') data = flow_dir.data - flow_type = _detect_flow_type(data) - - if flow_type == "dinf": - if isinstance(data, np.ndarray): - out = _flow_accum_dinf_cpu(data.astype(np.float64), *data.shape) - elif has_cuda_and_cupy() and is_cupy_array(data): - out = _flow_accum_dinf_cupy(data) - elif has_cuda_and_cupy() and is_dask_cupy(flow_dir): - out = _flow_accum_dinf_dask_cupy(data) - elif da is not None and isinstance(data, da.Array): - out = _flow_accum_dinf_dask_iterative(data) - else: - raise TypeError(f"Unsupported array type: {type(data)}") + + if isinstance(data, np.ndarray): + out = _flow_accum_cpu(data.astype(np.float64), *data.shape) + elif has_cuda_and_cupy() and is_cupy_array(data): + out = _flow_accum_cupy(data) + elif has_cuda_and_cupy() and is_dask_cupy(flow_dir): + out = _flow_accum_dask_cupy(data) + elif da is not None and isinstance(data, da.Array): + out = _flow_accum_dask_iterative(data) else: - if isinstance(data, np.ndarray): - out = _flow_accum_cpu(data.astype(np.float64), *data.shape) - elif has_cuda_and_cupy() and is_cupy_array(data): - out = _flow_accum_cupy(data) - elif has_cuda_and_cupy() and is_dask_cupy(flow_dir): - out = _flow_accum_dask_cupy(data) - elif da is not None and isinstance(data, da.Array): - out = _flow_accum_dask_iterative(data) - else: - raise TypeError(f"Unsupported array type: {type(data)}") + raise TypeError(f"Unsupported array type: {type(data)}") return xr.DataArray(out, name=name, diff --git a/xrspatial/flow_accumulation_dinf.py b/xrspatial/flow_accumulation_dinf.py new file mode 100644 index 00000000..9b45dcc1 --- /dev/null +++ b/xrspatial/flow_accumulation_dinf.py @@ -0,0 +1,902 @@ +"""Flow accumulation for D-infinity (continuous angle) flow direction grids. + +Takes the continuous-angle output from ``flow_direction_dinf`` and +accumulates upstream area, splitting flow proportionally between two +neighbors following Tarboton (1997). + +Algorithm +--------- +CPU : Kahn's BFS topological sort -- O(N). +GPU : iterative frontier peeling with pull-based kernels. +Dask: iterative tile sweep with boundary propagation (one tile in + RAM at a time), following the ``flow_accumulation.py`` pattern. +""" + +from __future__ import annotations + +import numpy as np +import xarray as xr +from numba import cuda + +try: + import cupy +except ImportError: + class cupy: # type: ignore[no-redef] + ndarray = False + +try: + import dask.array as da +except ImportError: + da = None + +from xrspatial.utils import ( + _validate_raster, + cuda_args, + has_cuda_and_cupy, + is_cupy_array, + is_dask_cupy, + ngjit, +) +from xrspatial._boundary_store import BoundaryStore +from xrspatial.dataset_support import supports_dataset +from xrspatial.flow_accumulation import ( + _find_ready_and_finalize, + _preprocess_tiles, +) + + +def _to_numpy_f64(arr): + """Convert *arr* to a contiguous numpy float64 array. + + Handles CuPy arrays transparently via ``.get()``. + """ + if hasattr(arr, 'get'): + arr = arr.get() + return np.asarray(arr, dtype=np.float64) + + +# ===================================================================== +# Dinf helpers +# ===================================================================== + +@ngjit +def _angle_to_neighbors(angle): + """Decompose Dinf angle into two neighbors and weights. + + Returns (dy1, dx1, w1, dy2, dx2, w2). + Pit (angle < 0) or NaN returns all zeros. + """ + if angle < 0.0 or angle != angle: # pit or NaN + return 0, 0, 0.0, 0, 0, 0.0 + + pi_over_4 = 0.7853981633974483 # np.pi / 4 + k = int(angle / pi_over_4) + if k > 7: + k = 7 + alpha = angle - k * pi_over_4 + w1 = 1.0 - alpha / pi_over_4 + w2 = alpha / pi_over_4 + + if k == 0: + dy1, dx1 = 0, 1 # E + dy2, dx2 = -1, 1 # NE + elif k == 1: + dy1, dx1 = -1, 1 # NE + dy2, dx2 = -1, 0 # N + elif k == 2: + dy1, dx1 = -1, 0 # N + dy2, dx2 = -1, -1 # NW + elif k == 3: + dy1, dx1 = -1, -1 # NW + dy2, dx2 = 0, -1 # W + elif k == 4: + dy1, dx1 = 0, -1 # W + dy2, dx2 = 1, -1 # SW + elif k == 5: + dy1, dx1 = 1, -1 # SW + dy2, dx2 = 1, 0 # S + elif k == 6: + dy1, dx1 = 1, 0 # S + dy2, dx2 = 1, 1 # SE + else: # k == 7 + dy1, dx1 = 1, 1 # SE + dy2, dx2 = 0, 1 # E + + return dy1, dx1, w1, dy2, dx2, w2 + + +# ===================================================================== +# CPU kernel +# ===================================================================== + +@ngjit +def _flow_accum_dinf_cpu(flow_dir, height, width): + """Kahn's BFS topological sort for Dinf flow accumulation.""" + accum = np.empty((height, width), dtype=np.float64) + in_degree = np.zeros((height, width), dtype=np.int32) + valid = np.zeros((height, width), dtype=np.int8) + + for r in range(height): + for c in range(width): + v = flow_dir[r, c] + if v == v: # not NaN + valid[r, c] = 1 + accum[r, c] = 1.0 + else: + accum[r, c] = np.nan + + for r in range(height): + for c in range(width): + if valid[r, c] == 0: + continue + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(flow_dir[r, c]) + if w1 > 0.0: + nr, nc = r + dy1, c + dx1 + if 0 <= nr < height and 0 <= nc < width and valid[nr, nc] == 1: + in_degree[nr, nc] += 1 + if w2 > 0.0: + nr, nc = r + dy2, c + dx2 + if 0 <= nr < height and 0 <= nc < width and valid[nr, nc] == 1: + in_degree[nr, nc] += 1 + + queue_r = np.empty(height * width, dtype=np.int64) + queue_c = np.empty(height * width, dtype=np.int64) + head = np.int64(0) + tail = np.int64(0) + + for r in range(height): + for c in range(width): + if valid[r, c] == 1 and in_degree[r, c] == 0: + queue_r[tail] = r + queue_c[tail] = c + tail += 1 + + while head < tail: + r = queue_r[head] + c = queue_c[head] + head += 1 + + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(flow_dir[r, c]) + + if w1 > 0.0: + nr, nc = r + dy1, c + dx1 + if 0 <= nr < height and 0 <= nc < width and valid[nr, nc] == 1: + accum[nr, nc] += accum[r, c] * w1 + in_degree[nr, nc] -= 1 + if in_degree[nr, nc] == 0: + queue_r[tail] = nr + queue_c[tail] = nc + tail += 1 + + if w2 > 0.0: + nr, nc = r + dy2, c + dx2 + if 0 <= nr < height and 0 <= nc < width and valid[nr, nc] == 1: + accum[nr, nc] += accum[r, c] * w2 + in_degree[nr, nc] -= 1 + if in_degree[nr, nc] == 0: + queue_r[tail] = nr + queue_c[tail] = nc + tail += 1 + + return accum + + +# ===================================================================== +# GPU kernels +# ===================================================================== + +@cuda.jit +def _init_accum_indegree_dinf(flow_dir, accum, in_degree, state, H, W): + """Initialise accum/in_degree/state for Dinf on GPU.""" + i, j = cuda.grid(2) + if i >= H or j >= W: + return + + v = flow_dir[i, j] + if v != v: # NaN + state[i, j] = 0 + accum[i, j] = 0.0 + return + + state[i, j] = 1 + accum[i, j] = 1.0 + + if v < 0.0: # pit + return + + pi_over_4 = 0.7853981633974483 + k = int(v / pi_over_4) + if k > 7: + k = 7 + alpha = v - k * pi_over_4 + w1 = 1.0 - alpha / pi_over_4 + w2 = alpha / pi_over_4 + + # Facet offsets (inline) + if k == 0: + dy1, dx1 = 0, 1 + dy2, dx2 = -1, 1 + elif k == 1: + dy1, dx1 = -1, 1 + dy2, dx2 = -1, 0 + elif k == 2: + dy1, dx1 = -1, 0 + dy2, dx2 = -1, -1 + elif k == 3: + dy1, dx1 = -1, -1 + dy2, dx2 = 0, -1 + elif k == 4: + dy1, dx1 = 0, -1 + dy2, dx2 = 1, -1 + elif k == 5: + dy1, dx1 = 1, -1 + dy2, dx2 = 1, 0 + elif k == 6: + dy1, dx1 = 1, 0 + dy2, dx2 = 1, 1 + else: + dy1, dx1 = 1, 1 + dy2, dx2 = 0, 1 + + if w1 > 0.0: + ni, nj = i + dy1, j + dx1 + if 0 <= ni < H and 0 <= nj < W: + cuda.atomic.add(in_degree, (ni, nj), 1) + if w2 > 0.0: + ni, nj = i + dy2, j + dx2 + if 0 <= ni < H and 0 <= nj < W: + cuda.atomic.add(in_degree, (ni, nj), 1) + + +@cuda.jit +def _pull_from_frontier_dinf(flow_dir, accum, in_degree, state, H, W): + """Active Dinf cells pull accumulation from frontier neighbours.""" + i, j = cuda.grid(2) + if i >= H or j >= W: + return + + if state[i, j] != 1: + return + + pi_over_4 = 0.7853981633974483 + + for nbr in range(8): + if nbr == 0: + dy, dx = 0, 1 + elif nbr == 1: + dy, dx = 1, 1 + elif nbr == 2: + dy, dx = 1, 0 + elif nbr == 3: + dy, dx = 1, -1 + elif nbr == 4: + dy, dx = 0, -1 + elif nbr == 5: + dy, dx = -1, -1 + elif nbr == 6: + dy, dx = -1, 0 + else: + dy, dx = -1, 1 + + ni = i + dy + nj = j + dx + if ni < 0 or ni >= H or nj < 0 or nj >= W: + continue + if state[ni, nj] != 2: + continue + + nv = flow_dir[ni, nj] + if nv < 0.0 or nv != nv: + continue + + nk = int(nv / pi_over_4) + if nk > 7: + nk = 7 + nalpha = nv - nk * pi_over_4 + nw1 = 1.0 - nalpha / pi_over_4 + nw2 = nalpha / pi_over_4 + + # Inline facet offsets for neighbor's direction + if nk == 0: + ndy1, ndx1 = 0, 1 + ndy2, ndx2 = -1, 1 + elif nk == 1: + ndy1, ndx1 = -1, 1 + ndy2, ndx2 = -1, 0 + elif nk == 2: + ndy1, ndx1 = -1, 0 + ndy2, ndx2 = -1, -1 + elif nk == 3: + ndy1, ndx1 = -1, -1 + ndy2, ndx2 = 0, -1 + elif nk == 4: + ndy1, ndx1 = 0, -1 + ndy2, ndx2 = 1, -1 + elif nk == 5: + ndy1, ndx1 = 1, -1 + ndy2, ndx2 = 1, 0 + elif nk == 6: + ndy1, ndx1 = 1, 0 + ndy2, ndx2 = 1, 1 + else: + ndy1, ndx1 = 1, 1 + ndy2, ndx2 = 0, 1 + + if nw1 > 0.0 and ni + ndy1 == i and nj + ndx1 == j: + accum[i, j] += accum[ni, nj] * nw1 + in_degree[i, j] -= 1 + if nw2 > 0.0 and ni + ndy2 == i and nj + ndx2 == j: + accum[i, j] += accum[ni, nj] * nw2 + in_degree[i, j] -= 1 + + +def _flow_accum_dinf_cupy(flow_dir_data): + """GPU driver: iterative frontier peeling for Dinf.""" + import cupy as cp + + H, W = flow_dir_data.shape + flow_dir_f64 = flow_dir_data.astype(cp.float64) + + accum = cp.zeros((H, W), dtype=cp.float64) + in_degree = cp.zeros((H, W), dtype=cp.int32) + state = cp.zeros((H, W), dtype=cp.int32) + changed = cp.zeros(1, dtype=cp.int32) + + griddim, blockdim = cuda_args((H, W)) + + _init_accum_indegree_dinf[griddim, blockdim]( + flow_dir_f64, accum, in_degree, state, H, W) + + max_iter = H * W + for _ in range(max_iter): + changed[0] = 0 + _find_ready_and_finalize[griddim, blockdim]( + in_degree, state, changed, H, W) + + if int(changed[0]) == 0: + break + + _pull_from_frontier_dinf[griddim, blockdim]( + flow_dir_f64, accum, in_degree, state, H, W) + + accum = cp.where(state == 0, cp.nan, accum) + return accum + + +def _flow_accum_dinf_tile_cupy(flow_dir_data, + seed_top, seed_bottom, seed_left, seed_right, + seed_tl, seed_tr, seed_bl, seed_br): + """GPU seeded Dinf flow accumulation for a single tile.""" + import cupy as cp + + H, W = flow_dir_data.shape + flow_dir_f64 = flow_dir_data.astype(cp.float64) + + accum = cp.zeros((H, W), dtype=cp.float64) + in_degree = cp.zeros((H, W), dtype=cp.int32) + state = cp.zeros((H, W), dtype=cp.int32) + changed = cp.zeros(1, dtype=cp.int32) + + griddim, blockdim = cuda_args((H, W)) + + _init_accum_indegree_dinf[griddim, blockdim]( + flow_dir_f64, accum, in_degree, state, H, W) + + accum[0, :] += cp.asarray(seed_top) + accum[H - 1, :] += cp.asarray(seed_bottom) + accum[:, 0] += cp.asarray(seed_left) + accum[:, W - 1] += cp.asarray(seed_right) + accum[0, 0] += seed_tl + accum[0, W - 1] += seed_tr + accum[H - 1, 0] += seed_bl + accum[H - 1, W - 1] += seed_br + + max_iter = H * W + for _ in range(max_iter): + changed[0] = 0 + _find_ready_and_finalize[griddim, blockdim]( + in_degree, state, changed, H, W) + + if int(changed[0]) == 0: + break + + _pull_from_frontier_dinf[griddim, blockdim]( + flow_dir_f64, accum, in_degree, state, H, W) + + accum = cp.where(state == 0, cp.nan, accum) + return accum + + +# ===================================================================== +# Dinf tile kernel for dask +# ===================================================================== + +@ngjit +def _flow_accum_dinf_tile_kernel(flow_dir, h, w, + seed_top, seed_bottom, + seed_left, seed_right, + seed_tl, seed_tr, seed_bl, seed_br): + """Seeded BFS Dinf flow accumulation for a single tile.""" + accum = np.empty((h, w), dtype=np.float64) + in_degree = np.zeros((h, w), dtype=np.int32) + valid = np.zeros((h, w), dtype=np.int8) + + for r in range(h): + for c in range(w): + v = flow_dir[r, c] + if v == v: + valid[r, c] = 1 + accum[r, c] = 1.0 + else: + accum[r, c] = np.nan + + # Add external seeds + for c in range(w): + if valid[0, c] == 1: + accum[0, c] += seed_top[c] + if valid[h - 1, c] == 1: + accum[h - 1, c] += seed_bottom[c] + for r in range(h): + if valid[r, 0] == 1: + accum[r, 0] += seed_left[r] + if valid[r, w - 1] == 1: + accum[r, w - 1] += seed_right[r] + if valid[0, 0] == 1: + accum[0, 0] += seed_tl + if valid[0, w - 1] == 1: + accum[0, w - 1] += seed_tr + if valid[h - 1, 0] == 1: + accum[h - 1, 0] += seed_bl + if valid[h - 1, w - 1] == 1: + accum[h - 1, w - 1] += seed_br + + # In-degrees via angle decomposition + for r in range(h): + for c in range(w): + if valid[r, c] == 0: + continue + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(flow_dir[r, c]) + if w1 > 0.0: + nr, nc = r + dy1, c + dx1 + if 0 <= nr < h and 0 <= nc < w and valid[nr, nc] == 1: + in_degree[nr, nc] += 1 + if w2 > 0.0: + nr, nc = r + dy2, c + dx2 + if 0 <= nr < h and 0 <= nc < w and valid[nr, nc] == 1: + in_degree[nr, nc] += 1 + + queue_r = np.empty(h * w, dtype=np.int64) + queue_c = np.empty(h * w, dtype=np.int64) + head = np.int64(0) + tail = np.int64(0) + + for r in range(h): + for c in range(w): + if valid[r, c] == 1 and in_degree[r, c] == 0: + queue_r[tail] = r + queue_c[tail] = c + tail += 1 + + while head < tail: + r = queue_r[head] + c = queue_c[head] + head += 1 + + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(flow_dir[r, c]) + + if w1 > 0.0: + nr, nc = r + dy1, c + dx1 + if 0 <= nr < h and 0 <= nc < w and valid[nr, nc] == 1: + accum[nr, nc] += accum[r, c] * w1 + in_degree[nr, nc] -= 1 + if in_degree[nr, nc] == 0: + queue_r[tail] = nr + queue_c[tail] = nc + tail += 1 + + if w2 > 0.0: + nr, nc = r + dy2, c + dx2 + if 0 <= nr < h and 0 <= nc < w and valid[nr, nc] == 1: + accum[nr, nc] += accum[r, c] * w2 + in_degree[nr, nc] -= 1 + if in_degree[nr, nc] == 0: + queue_r[tail] = nr + queue_c[tail] = nc + tail += 1 + + return accum + + +# ===================================================================== +# Dinf dask iterative tile sweep +# ===================================================================== + +def _compute_seeds_dinf(iy, ix, boundaries, flow_bdry, + chunks_y, chunks_x, n_tile_y, n_tile_x): + """Compute seed arrays for tile (iy, ix) using Dinf angle decomposition.""" + tile_h = chunks_y[iy] + tile_w = chunks_x[ix] + + seed_top = np.zeros(tile_w, dtype=np.float64) + seed_bottom = np.zeros(tile_w, dtype=np.float64) + seed_left = np.zeros(tile_h, dtype=np.float64) + seed_right = np.zeros(tile_h, dtype=np.float64) + seed_tl = 0.0 + seed_tr = 0.0 + seed_bl = 0.0 + seed_br = 0.0 + + # --- Top edge: bottom row of tile above flows south (dy=1) --- + if iy > 0: + nb_fdir = flow_bdry.get('bottom', iy - 1, ix) + nb_accum = boundaries.get('bottom', iy - 1, ix) + for c in range(len(nb_fdir)): + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(nb_fdir[c]) + if w1 > 0.0 and dy1 == 1: + tc = c + dx1 + if 0 <= tc < tile_w: + seed_top[tc] += nb_accum[c] * w1 + if w2 > 0.0 and dy2 == 1: + tc = c + dx2 + if 0 <= tc < tile_w: + seed_top[tc] += nb_accum[c] * w2 + + # --- Bottom edge: top row of tile below flows north (dy=-1) --- + if iy < n_tile_y - 1: + nb_fdir = flow_bdry.get('top', iy + 1, ix) + nb_accum = boundaries.get('top', iy + 1, ix) + for c in range(len(nb_fdir)): + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(nb_fdir[c]) + if w1 > 0.0 and dy1 == -1: + tc = c + dx1 + if 0 <= tc < tile_w: + seed_bottom[tc] += nb_accum[c] * w1 + if w2 > 0.0 and dy2 == -1: + tc = c + dx2 + if 0 <= tc < tile_w: + seed_bottom[tc] += nb_accum[c] * w2 + + # --- Left edge: right column of tile to the left flows east (dx=1) --- + if ix > 0: + nb_fdir = flow_bdry.get('right', iy, ix - 1) + nb_accum = boundaries.get('right', iy, ix - 1) + for r in range(len(nb_fdir)): + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(nb_fdir[r]) + if w1 > 0.0 and dx1 == 1: + tr = r + dy1 + if 0 <= tr < tile_h: + seed_left[tr] += nb_accum[r] * w1 + if w2 > 0.0 and dx2 == 1: + tr = r + dy2 + if 0 <= tr < tile_h: + seed_left[tr] += nb_accum[r] * w2 + + # --- Right edge: left column of tile to the right flows west (dx=-1) --- + if ix < n_tile_x - 1: + nb_fdir = flow_bdry.get('left', iy, ix + 1) + nb_accum = boundaries.get('left', iy, ix + 1) + for r in range(len(nb_fdir)): + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(nb_fdir[r]) + if w1 > 0.0 and dx1 == -1: + tr = r + dy1 + if 0 <= tr < tile_h: + seed_right[tr] += nb_accum[r] * w1 + if w2 > 0.0 and dx2 == -1: + tr = r + dy2 + if 0 <= tr < tile_h: + seed_right[tr] += nb_accum[r] * w2 + + # --- Diagonal corner seeds --- + # TL: bottom-right of (iy-1, ix-1) flows SE (dy=1, dx=1) + if iy > 0 and ix > 0: + fv = flow_bdry.get('bottom', iy - 1, ix - 1)[-1] + av = float(boundaries.get('bottom', iy - 1, ix - 1)[-1]) + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(fv) + if w1 > 0.0 and dy1 == 1 and dx1 == 1: + seed_tl += av * w1 + if w2 > 0.0 and dy2 == 1 and dx2 == 1: + seed_tl += av * w2 + + # TR: bottom-left of (iy-1, ix+1) flows SW (dy=1, dx=-1) + if iy > 0 and ix < n_tile_x - 1: + fv = flow_bdry.get('bottom', iy - 1, ix + 1)[0] + av = float(boundaries.get('bottom', iy - 1, ix + 1)[0]) + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(fv) + if w1 > 0.0 and dy1 == 1 and dx1 == -1: + seed_tr += av * w1 + if w2 > 0.0 and dy2 == 1 and dx2 == -1: + seed_tr += av * w2 + + # BL: top-right of (iy+1, ix-1) flows NE (dy=-1, dx=1) + if iy < n_tile_y - 1 and ix > 0: + fv = flow_bdry.get('top', iy + 1, ix - 1)[-1] + av = float(boundaries.get('top', iy + 1, ix - 1)[-1]) + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(fv) + if w1 > 0.0 and dy1 == -1 and dx1 == 1: + seed_bl += av * w1 + if w2 > 0.0 and dy2 == -1 and dx2 == 1: + seed_bl += av * w2 + + # BR: top-left of (iy+1, ix+1) flows NW (dy=-1, dx=-1) + if iy < n_tile_y - 1 and ix < n_tile_x - 1: + fv = flow_bdry.get('top', iy + 1, ix + 1)[0] + av = float(boundaries.get('top', iy + 1, ix + 1)[0]) + dy1, dx1, w1, dy2, dx2, w2 = _angle_to_neighbors(fv) + if w1 > 0.0 and dy1 == -1 and dx1 == -1: + seed_br += av * w1 + if w2 > 0.0 and dy2 == -1 and dx2 == -1: + seed_br += av * w2 + + return (seed_top, seed_bottom, seed_left, seed_right, + seed_tl, seed_tr, seed_bl, seed_br) + + +def _process_tile_dinf(iy, ix, flow_dir_da, boundaries, flow_bdry, + chunks_y, chunks_x, n_tile_y, n_tile_x): + """Run seeded Dinf BFS on one tile; update boundaries in-place.""" + chunk = np.asarray( + flow_dir_da.blocks[iy, ix].compute(), dtype=np.float64) + h, w = chunk.shape + + seeds = _compute_seeds_dinf( + iy, ix, boundaries, flow_bdry, + chunks_y, chunks_x, n_tile_y, n_tile_x) + + accum = _flow_accum_dinf_tile_kernel(chunk, h, w, *seeds) + + new_top = accum[0, :].copy() + new_bottom = accum[-1, :].copy() + new_left = accum[:, 0].copy() + new_right = accum[:, -1].copy() + + change = 0.0 + for side, new in (('top', new_top), ('bottom', new_bottom), + ('left', new_left), ('right', new_right)): + old = boundaries.get(side, iy, ix) + with np.errstate(invalid='ignore'): + diff = np.abs(new - old) + diff = np.where(np.isnan(diff), 0.0, diff) + m = float(np.max(diff)) + if m > change: + change = m + + boundaries.set('top', iy, ix, new_top) + boundaries.set('bottom', iy, ix, new_bottom) + boundaries.set('left', iy, ix, new_left) + boundaries.set('right', iy, ix, new_right) + + return change + + +def _flow_accum_dinf_dask_iterative(flow_dir_da): + """Iterative boundary-propagation for Dinf dask arrays.""" + chunks_y = flow_dir_da.chunks[0] + chunks_x = flow_dir_da.chunks[1] + n_tile_y = len(chunks_y) + n_tile_x = len(chunks_x) + + flow_bdry = _preprocess_tiles(flow_dir_da, chunks_y, chunks_x) + flow_bdry = flow_bdry.snapshot() + + boundaries = BoundaryStore(chunks_y, chunks_x, fill_value=0.0) + + max_iterations = max(n_tile_y, n_tile_x) + 10 + + for _iteration in range(max_iterations): + max_change = 0.0 + + for iy in range(n_tile_y): + for ix in range(n_tile_x): + c = _process_tile_dinf(iy, ix, flow_dir_da, boundaries, + flow_bdry, chunks_y, chunks_x, + n_tile_y, n_tile_x) + if c > max_change: + max_change = c + + for iy in reversed(range(n_tile_y)): + for ix in reversed(range(n_tile_x)): + c = _process_tile_dinf(iy, ix, flow_dir_da, boundaries, + flow_bdry, chunks_y, chunks_x, + n_tile_y, n_tile_x) + if c > max_change: + max_change = c + + if max_change == 0.0: + break + + boundaries = boundaries.snapshot() + + return _assemble_result_dinf(flow_dir_da, boundaries, flow_bdry, + chunks_y, chunks_x, n_tile_y, n_tile_x) + + +def _assemble_result_dinf(flow_dir_da, boundaries, flow_bdry, + chunks_y, chunks_x, n_tile_y, n_tile_x): + """Build lazy dask array by re-running each Dinf tile with converged seeds.""" + + def _tile_fn(flow_dir_block, block_info=None): + if block_info is None or 0 not in block_info: + return np.full(flow_dir_block.shape, np.nan, dtype=np.float64) + iy, ix = block_info[0]['chunk-location'] + h, w = flow_dir_block.shape + seeds = _compute_seeds_dinf( + iy, ix, boundaries, flow_bdry, + chunks_y, chunks_x, n_tile_y, n_tile_x) + return _flow_accum_dinf_tile_kernel( + np.asarray(flow_dir_block, dtype=np.float64), h, w, *seeds) + + return da.map_blocks( + _tile_fn, + flow_dir_da, + dtype=np.float64, + meta=np.array((), dtype=np.float64), + ) + + +def _process_tile_dinf_cupy(iy, ix, flow_dir_da, boundaries, flow_bdry, + chunks_y, chunks_x, n_tile_y, n_tile_x): + """Run seeded GPU Dinf flow accumulation on one tile.""" + import cupy as cp + + chunk = cp.asarray( + flow_dir_da.blocks[iy, ix].compute(), dtype=cp.float64) + + seeds = _compute_seeds_dinf( + iy, ix, boundaries, flow_bdry, + chunks_y, chunks_x, n_tile_y, n_tile_x) + + accum = _flow_accum_dinf_tile_cupy(chunk, *seeds) + + new_top = accum[0, :].get().copy() + new_bottom = accum[-1, :].get().copy() + new_left = accum[:, 0].get().copy() + new_right = accum[:, -1].get().copy() + + change = 0.0 + for side, new in (('top', new_top), ('bottom', new_bottom), + ('left', new_left), ('right', new_right)): + old = boundaries.get(side, iy, ix) + with np.errstate(invalid='ignore'): + diff = np.abs(new - old) + diff = np.where(np.isnan(diff), 0.0, diff) + m = float(np.max(diff)) + if m > change: + change = m + + boundaries.set('top', iy, ix, new_top) + boundaries.set('bottom', iy, ix, new_bottom) + boundaries.set('left', iy, ix, new_left) + boundaries.set('right', iy, ix, new_right) + + return change + + +def _assemble_result_dinf_cupy(flow_dir_da, boundaries, flow_bdry, + chunks_y, chunks_x, n_tile_y, n_tile_x): + """Build lazy dask+cupy array using GPU Dinf tile kernel.""" + import cupy as cp + + def _tile_fn(flow_dir_block, block_info=None): + if block_info is None or 0 not in block_info: + return cp.full(flow_dir_block.shape, cp.nan, dtype=cp.float64) + iy, ix = block_info[0]['chunk-location'] + seeds = _compute_seeds_dinf( + iy, ix, boundaries, flow_bdry, + chunks_y, chunks_x, n_tile_y, n_tile_x) + return _flow_accum_dinf_tile_cupy( + cp.asarray(flow_dir_block, dtype=cp.float64), *seeds) + + return da.map_blocks( + _tile_fn, + flow_dir_da, + dtype=np.float64, + meta=cp.array((), dtype=cp.float64), + ) + + +def _flow_accum_dinf_dask_cupy(flow_dir_da): + """Dask+CuPy Dinf: native GPU processing per tile.""" + chunks_y = flow_dir_da.chunks[0] + chunks_x = flow_dir_da.chunks[1] + n_tile_y = len(chunks_y) + n_tile_x = len(chunks_x) + + flow_bdry = _preprocess_tiles(flow_dir_da, chunks_y, chunks_x) + flow_bdry = flow_bdry.snapshot() + + boundaries = BoundaryStore(chunks_y, chunks_x, fill_value=0.0) + + max_iterations = max(n_tile_y, n_tile_x) + 10 + + for _iteration in range(max_iterations): + max_change = 0.0 + + for iy in range(n_tile_y): + for ix in range(n_tile_x): + c = _process_tile_dinf_cupy( + iy, ix, flow_dir_da, boundaries, + flow_bdry, chunks_y, chunks_x, + n_tile_y, n_tile_x) + if c > max_change: + max_change = c + + for iy in reversed(range(n_tile_y)): + for ix in reversed(range(n_tile_x)): + c = _process_tile_dinf_cupy( + iy, ix, flow_dir_da, boundaries, + flow_bdry, chunks_y, chunks_x, + n_tile_y, n_tile_x) + if c > max_change: + max_change = c + + if max_change == 0.0: + break + + boundaries = boundaries.snapshot() + + return _assemble_result_dinf_cupy(flow_dir_da, boundaries, flow_bdry, + chunks_y, chunks_x, n_tile_y, n_tile_x) + + +# ===================================================================== +# Public API +# ===================================================================== + +@supports_dataset +def flow_accumulation_dinf(flow_dir: xr.DataArray, + name: str = 'flow_accumulation') -> xr.DataArray: + """Compute flow accumulation from a D-infinity flow direction grid. + + Takes a continuous-angle flow direction grid (as produced by + ``flow_direction_dinf``) and accumulates upstream contributing + area. Flow is split proportionally between two neighbors + following Tarboton (1997). + + Parameters + ---------- + flow_dir : xarray.DataArray or xr.Dataset + 2-D D-infinity flow direction grid. Values are continuous + angles in radians ``[0, 2*pi)``, with ``-1.0`` for pits and + NaN for nodata (as produced by ``flow_direction_dinf``). + Supported backends: NumPy, CuPy, NumPy-backed Dask, + CuPy-backed Dask. + If a Dataset is passed, the operation is applied to each + data variable independently. + name : str, default='flow_accumulation' + Name of output DataArray. + + Returns + ------- + xarray.DataArray or xr.Dataset + 2-D float64 array of flow accumulation values. Each cell + holds the total upstream contributing area (including itself) + that drains through it, weighted by D-inf proportional + splitting. NaN where the input has NaN. + + References + ---------- + Tarboton, D.G. (1997). A new method for the determination of flow + directions and upslope areas in grid digital elevation models. + Water Resources Research, 33(2), 309-319. + """ + _validate_raster(flow_dir, func_name='flow_accumulation_dinf', + name='flow_dir') + + data = flow_dir.data + + if isinstance(data, np.ndarray): + out = _flow_accum_dinf_cpu(data.astype(np.float64), *data.shape) + elif has_cuda_and_cupy() and is_cupy_array(data): + out = _flow_accum_dinf_cupy(data) + elif has_cuda_and_cupy() and is_dask_cupy(flow_dir): + out = _flow_accum_dinf_dask_cupy(data) + elif da is not None and isinstance(data, da.Array): + out = _flow_accum_dinf_dask_iterative(data) + else: + raise TypeError(f"Unsupported array type: {type(data)}") + + return xr.DataArray(out, + name=name, + coords=flow_dir.coords, + dims=flow_dir.dims, + attrs=flow_dir.attrs) diff --git a/xrspatial/flow_direction_dinf.py b/xrspatial/flow_direction_dinf.py index 34987aa4..a1e69757 100644 --- a/xrspatial/flow_direction_dinf.py +++ b/xrspatial/flow_direction_dinf.py @@ -51,10 +51,14 @@ def _cpu(data, cellsize_x, cellsize_y): nb_dy = np.array([0, -1, -1, -1, 0, 1, 1, 1]) nb_dx = np.array([1, 1, 0, -1, -1, -1, 0, 1]) - # d1 for each facet (distance from center to e1) - d1 = np.array([cx, diag, cy, diag, cx, diag, cy, diag]) - # d2 for each facet (distance from e1 to e2) + # Tarboton facet decomposition: e1=cardinal, e2=diagonal + e1_idx = np.array([0, 2, 2, 4, 4, 6, 6, 0]) # cardinal neighbor + e2_idx = np.array([1, 1, 3, 3, 5, 5, 7, 7]) # diagonal neighbor + # d1: center->cardinal, d2: cardinal->diagonal (perpendicular) + d1 = np.array([cx, cy, cy, cx, cx, cy, cy, cx]) d2 = np.array([cy, cx, cx, cy, cy, cx, cx, cy]) + ac = np.array([0, 2, 2, 4, 4, 6, 6, 8]) # angle base (pi/4 units) + af = np.array([1, -1, 1, -1, 1, -1, 1, -1]) # angle sign for y in range(1, rows - 1): for x in range(1, cols - 1): @@ -76,9 +80,8 @@ def _cpu(data, cellsize_x, cellsize_y): best_angle = -1.0 for k in range(8): - k2 = (k + 1) % 8 - e1 = data[y + nb_dy[k], x + nb_dx[k]] - e2 = data[y + nb_dy[k2], x + nb_dx[k2]] + e1 = data[y + nb_dy[e1_idx[k]], x + nb_dx[e1_idx[k]]] + e2 = data[y + nb_dy[e2_idx[k]], x + nb_dx[e2_idx[k]]] s1 = (center - e1) / d1[k] s2 = (e1 - e2) / d2[k] @@ -89,13 +92,13 @@ def _cpu(data, cellsize_x, cellsize_y): s = s1 elif r > pi_over_4: r = pi_over_4 - s = (center - e2) / math.sqrt(d1[k] * d1[k] + d2[k] * d2[k]) + s = (center - e2) / diag else: s = math.sqrt(s1 * s1 + s2 * s2) if s > max_slope: max_slope = s - best_angle = k * pi_over_4 + r + best_angle = ac[k] * pi_over_4 + af[k] * r if max_slope <= 0.0: out[y, x] = -1.0 @@ -158,21 +161,21 @@ def _gpu(arr, cellsize_x, cellsize_y): max_slope = slope_val best_angle = r # 0 + r - # Facet 1: e1=NE, e2=N, d1=diag, d2=cx, start=pi/4 - s1 = (center - ne) / diag - s2 = (ne - n) / cx + # Facet 1: e1=N, e2=NE, d1=cy, d2=cx, angle=pi/2-r + s1 = (center - n) / cy + s2 = (n - ne) / cx r = math.atan2(s2, s1) if r < 0.0: r = 0.0 slope_val = s1 elif r > pi_over_4: r = pi_over_4 - slope_val = (center - n) / (diag * diag + cx * cx) ** 0.5 + slope_val = (center - ne) / diag else: slope_val = (s1 * s1 + s2 * s2) ** 0.5 if slope_val > max_slope: max_slope = slope_val - best_angle = pi_over_4 + r + best_angle = 2.0 * pi_over_4 - r # Facet 2: e1=N, e2=NW, d1=cy, d2=cx, start=pi/2 s1 = (center - n) / cy @@ -190,21 +193,21 @@ def _gpu(arr, cellsize_x, cellsize_y): max_slope = slope_val best_angle = 2.0 * pi_over_4 + r - # Facet 3: e1=NW, e2=W, d1=diag, d2=cy, start=3*pi/4 - s1 = (center - nw) / diag - s2 = (nw - w) / cy + # Facet 3: e1=W, e2=NW, d1=cx, d2=cy, angle=pi-r + s1 = (center - w) / cx + s2 = (w - nw) / cy r = math.atan2(s2, s1) if r < 0.0: r = 0.0 slope_val = s1 elif r > pi_over_4: r = pi_over_4 - slope_val = (center - w) / (diag * diag + cy * cy) ** 0.5 + slope_val = (center - nw) / diag else: slope_val = (s1 * s1 + s2 * s2) ** 0.5 if slope_val > max_slope: max_slope = slope_val - best_angle = 3.0 * pi_over_4 + r + best_angle = 4.0 * pi_over_4 - r # Facet 4: e1=W, e2=SW, d1=cx, d2=cy, start=pi s1 = (center - w) / cx @@ -222,21 +225,21 @@ def _gpu(arr, cellsize_x, cellsize_y): max_slope = slope_val best_angle = 4.0 * pi_over_4 + r - # Facet 5: e1=SW, e2=S, d1=diag, d2=cx, start=5*pi/4 - s1 = (center - sw) / diag - s2 = (sw - s) / cx + # Facet 5: e1=S, e2=SW, d1=cy, d2=cx, angle=3*pi/2-r + s1 = (center - s) / cy + s2 = (s - sw) / cx r = math.atan2(s2, s1) if r < 0.0: r = 0.0 slope_val = s1 elif r > pi_over_4: r = pi_over_4 - slope_val = (center - s) / (diag * diag + cx * cx) ** 0.5 + slope_val = (center - sw) / diag else: slope_val = (s1 * s1 + s2 * s2) ** 0.5 if slope_val > max_slope: max_slope = slope_val - best_angle = 5.0 * pi_over_4 + r + best_angle = 6.0 * pi_over_4 - r # Facet 6: e1=S, e2=SE, d1=cy, d2=cx, start=3*pi/2 s1 = (center - s) / cy @@ -254,21 +257,21 @@ def _gpu(arr, cellsize_x, cellsize_y): max_slope = slope_val best_angle = 6.0 * pi_over_4 + r - # Facet 7: e1=SE, e2=E, d1=diag, d2=cy, start=7*pi/4 - s1 = (center - se) / diag - s2 = (se - e) / cy + # Facet 7: e1=E, e2=SE, d1=cx, d2=cy, angle=2*pi-r + s1 = (center - e) / cx + s2 = (e - se) / cy r = math.atan2(s2, s1) if r < 0.0: r = 0.0 slope_val = s1 elif r > pi_over_4: r = pi_over_4 - slope_val = (center - e) / (diag * diag + cy * cy) ** 0.5 + slope_val = (center - se) / diag else: slope_val = (s1 * s1 + s2 * s2) ** 0.5 if slope_val > max_slope: max_slope = slope_val - best_angle = 7.0 * pi_over_4 + r + best_angle = 8.0 * pi_over_4 - r if max_slope <= 0.0: return -1.0 diff --git a/xrspatial/rasterize.py b/xrspatial/rasterize.py index 164b8216..d8f4519d 100644 --- a/xrspatial/rasterize.py +++ b/xrspatial/rasterize.py @@ -11,6 +11,7 @@ """ from __future__ import annotations +import warnings from typing import Optional, Tuple, Union import numpy as np @@ -136,13 +137,74 @@ def _classify_geometries(geometries, props_array): Where poly_props is (N_poly, P), line_props is (N_line, P), point_props is (N_point, P) float64 arrays. """ + if _HAS_SHAPELY2: + return _classify_geometries_vectorized(geometries, props_array) + return _classify_geometries_loop(geometries, props_array) + + +def _classify_geometries_vectorized(geometries, props_array): + """Vectorized classification using shapely 2.0 array ops.""" + import shapely + + n_props = props_array.shape[1] if props_array.ndim == 2 else 1 + geom_arr = np.array(geometries, dtype=object) + n = len(geom_arr) + + if n == 0: + empty_props = np.empty((0, n_props), dtype=np.float64) + return (([], empty_props, []), + ([], empty_props.copy()), + ([], empty_props.copy())) + + type_ids = shapely.get_type_id(geom_arr) + empty = shapely.is_empty(geom_arr) + valid = ~empty + + # Type ID mapping: + # 0=Point, 1=LineString, 2=LinearRing, 3=Polygon, + # 4=MultiPoint, 5=MultiLineString, 6=MultiPolygon, + # 7=GeometryCollection + poly_mask = valid & ((type_ids == 3) | (type_ids == 6)) + line_mask = valid & ((type_ids == 1) | (type_ids == 5)) + point_mask = valid & ((type_ids == 0) | (type_ids == 4)) + gc_mask = valid & (type_ids == 7) + + has_gc = np.any(gc_mask) + + # Fast path: no GeometryCollections (common case) + if not has_gc: + poly_idx = np.where(poly_mask)[0] + line_idx = np.where(line_mask)[0] + point_idx = np.where(point_mask)[0] + + poly_geoms = [geometries[i] for i in poly_idx] + poly_ids = list(range(len(poly_idx))) + poly_props = (props_array[poly_idx] if len(poly_idx) > 0 + else np.empty((0, n_props), dtype=np.float64)) + + line_geoms = [geometries[i] for i in line_idx] + line_props = (props_array[line_idx] if len(line_idx) > 0 + else np.empty((0, n_props), dtype=np.float64)) + + point_geoms = [geometries[i] for i in point_idx] + point_props = (props_array[point_idx] if len(point_idx) > 0 + else np.empty((0, n_props), dtype=np.float64)) + + return ((poly_geoms, poly_props, poly_ids), + (line_geoms, line_props), + (point_geoms, point_props)) + + # Slow path: unpack GeometryCollections, then classify + return _classify_geometries_loop(geometries, props_array) + + +def _classify_geometries_loop(geometries, props_array): + """Per-object classification fallback (handles GeometryCollections).""" n_props = props_array.shape[1] if props_array.ndim == 2 else 1 poly_geoms, poly_prop_rows, poly_ids = [], [], [] line_geoms, line_prop_rows = [], [] point_geoms, point_prop_rows = [], [] - # poly_counter tracks per-type indices that both preserve input order - # and serve as valid indices into the poly_props table. poly_counter = [0] def _classify_one(geom, prop_row, global_idx): @@ -1083,12 +1145,12 @@ def _scanline_fill_gpu(out, written, edge_y_min, edge_x_at_ymin, if count == 0: return - MAX_ISECT = 512 + MAX_ISECT = 2048 if count > MAX_ISECT: count = MAX_ISECT - xs = cuda.local.array(512, dtype=np.float64) - gs = cuda.local.array(512, dtype=np.int32) + xs = cuda.local.array(2048, dtype=np.float64) + gs = cuda.local.array(2048, dtype=np.int32) actual = 0 for k in range(start, end): @@ -1287,6 +1349,18 @@ def _run_cupy(geometries, props_array, bounds, height, width, fill, dtype, row_ptr, col_idx = _build_row_csr_numba( edge_y_min, edge_y_max, height) + # Check for rows exceeding the GPU kernel's local array limit + _GPU_MAX_ISECT = 2048 + max_edges_per_row = int(np.diff(row_ptr).max()) + if max_edges_per_row > _GPU_MAX_ISECT: + warnings.warn( + f"rasterize CUDA kernel: {max_edges_per_row} active edges " + f"in a single row exceeds the limit of {_GPU_MAX_ISECT}. " + f"Excess edges will be silently dropped, causing incorrect " + f"results. Reduce polygon density or use the numpy backend.", + stacklevel=3, + ) + d_y_min = cupy.asarray(edge_y_min) d_x_at_ymin = cupy.asarray(edge_x_at_ymin) d_inv_slope = cupy.asarray(edge_inv_slope) @@ -1814,13 +1888,9 @@ def _parse_input(geometries, column=None, columns=None): props_array = np.array(value_list, dtype=np.float64).reshape(-1, 1) - # Compute per-geometry bboxes (reused by dask tile filtering later) - geom_bboxes = _geometry_bboxes(geom_list) - if len(geom_bboxes) == 0: - return geom_list, props_array, None - total_bounds = (geom_bboxes[:, 0].min(), geom_bboxes[:, 1].min(), - geom_bboxes[:, 2].max(), geom_bboxes[:, 3].max()) - return geom_list, props_array, total_bounds + # Bounds computation is deferred: return None here and let the + # caller compute bboxes only when bounds are actually needed. + return geom_list, props_array, None def _extract_grid_from_like(like): @@ -2023,6 +2093,14 @@ def rasterize( final_bounds = like_bounds if final_bounds is None: final_bounds = inferred_bounds + if final_bounds is None and geom_list: + # Compute bounds lazily only when not supplied by the caller + geom_bboxes = _geometry_bboxes(geom_list) + if len(geom_bboxes) > 0: + final_bounds = (geom_bboxes[:, 0].min(), + geom_bboxes[:, 1].min(), + geom_bboxes[:, 2].max(), + geom_bboxes[:, 3].max()) if final_bounds is None: raise ValueError( "bounds must be provided when geometries are empty or have " diff --git a/xrspatial/stream_order.py b/xrspatial/stream_order.py index aa731202..c7122dad 100644 --- a/xrspatial/stream_order.py +++ b/xrspatial/stream_order.py @@ -694,6 +694,7 @@ def _preprocess_stream_tiles(flow_dir_da, accum_da, threshold, accum_da.blocks[iy, ix].compute()) sm = np.where(ac_chunk >= threshold, 1.0, 0.0) sm = np.where(np.isnan(ac_chunk), 0.0, sm) + sm = np.where(np.isnan(fd_chunk), 0.0, sm) for side, row_data_fd, row_data_sm in [ ('top', fd_chunk[0, :], sm[0, :]), @@ -986,6 +987,7 @@ def _process_strahler_tile(iy, ix, flow_dir_da, accum_da, threshold, accum_da.blocks[iy, ix].compute(), dtype=np.float64) sm = np.where(ac_chunk >= threshold, 1, 0).astype(np.int8) sm = np.where(np.isnan(ac_chunk), 0, sm).astype(np.int8) + sm = np.where(np.isnan(fd_chunk), 0, sm).astype(np.int8) h, w = fd_chunk.shape seeds = _compute_strahler_seeds( @@ -1031,6 +1033,7 @@ def _process_shreve_tile(iy, ix, flow_dir_da, accum_da, threshold, accum_da.blocks[iy, ix].compute(), dtype=np.float64) sm = np.where(ac_chunk >= threshold, 1, 0).astype(np.int8) sm = np.where(np.isnan(ac_chunk), 0, sm).astype(np.int8) + sm = np.where(np.isnan(fd_chunk), 0, sm).astype(np.int8) h, w = fd_chunk.shape seeds = _compute_shreve_seeds( @@ -1111,6 +1114,7 @@ def _tile_fn(block, accum_block, block_info=None): ac = np.asarray(accum_block, dtype=np.float64) sm = np.where(ac >= _threshold, 1, 0).astype(np.int8) sm = np.where(np.isnan(ac), 0, sm).astype(np.int8) + sm = np.where(np.isnan(fd), 0, sm).astype(np.int8) h, w = fd.shape seeds = _compute_strahler_seeds( iy, ix, _bdry_max, _bdry_cnt, _flow_bdry, _mask_bdry, @@ -1173,6 +1177,7 @@ def _tile_fn(block, accum_block, block_info=None): ac = np.asarray(accum_block, dtype=np.float64) sm = np.where(ac >= _threshold, 1, 0).astype(np.int8) sm = np.where(np.isnan(ac), 0, sm).astype(np.int8) + sm = np.where(np.isnan(fd), 0, sm).astype(np.int8) h, w = fd.shape seeds = _compute_shreve_seeds( iy, ix, _boundaries, _flow_bdry, _mask_bdry, diff --git a/xrspatial/stream_order_mfd.py b/xrspatial/stream_order_mfd.py index 5c09d8cf..cd216b24 100644 --- a/xrspatial/stream_order_mfd.py +++ b/xrspatial/stream_order_mfd.py @@ -590,7 +590,15 @@ def _strahler_mfd_tile_kernel(fractions, stream_mask, h, w, queue_c[tail] = nc tail += 1 - return order + # Fix headwater cells: represent (order=1, no inputs) as (max=1, cnt=1) + # so that the boundary (max, cnt) reconstruction works correctly. + for r in range(h): + for c in range(w): + if stream_mask[r, c] == 1 and max_in[r, c] == 0.0: + max_in[r, c] = order[r, c] + cnt_max[r, c] = 1 + + return order, max_in, cnt_max @ngjit @@ -1092,17 +1100,22 @@ def _process_strahler_tile_mfd(iy, ix, fractions_da, accum_da, threshold, iy, ix, bdry_max, bdry_cnt, frac_bdry, mask_bdry, chunks_y, chunks_x, n_tile_y, n_tile_x) - order = _strahler_mfd_tile_kernel(frac_chunk, sm, h, w, *seeds) + order, ki_max_in, ki_cnt_max = _strahler_mfd_tile_kernel( + frac_chunk, sm, h, w, *seeds) - # Extract boundary order values and recompute max/cnt for boundaries + # Extract boundary max_in/cnt_max values (not final order) so that + # the seed reconstruction (cnt>=2 -> order+1) works at tile borders. change = 0.0 - for side, strip in [('top', order[0, :]), - ('bottom', order[-1, :]), - ('left', order[:, 0]), - ('right', order[:, -1])]: - new_vals = strip.copy() - new_max = np.where(np.isnan(new_vals), 0.0, new_vals) - new_cnt = np.where(new_max > 0, 1.0, 0.0) + bdry_slices = [ + ('top', order[0, :], ki_max_in[0, :], ki_cnt_max[0, :]), + ('bottom', order[-1, :], ki_max_in[-1, :], ki_cnt_max[-1, :]), + ('left', order[:, 0], ki_max_in[:, 0], ki_cnt_max[:, 0]), + ('right', order[:, -1], ki_max_in[:, -1], ki_cnt_max[:, -1]), + ] + for side, order_strip, mi_strip, cm_strip in bdry_slices: + is_nan = np.isnan(order_strip) + new_max = np.where(is_nan, 0.0, mi_strip.astype(np.float64)) + new_cnt = np.where(is_nan, 0.0, cm_strip.astype(np.float64)) old_max = bdry_max.get(side, iy, ix).copy() with np.errstate(invalid='ignore'): @@ -1224,7 +1237,7 @@ def _stream_order_mfd_dask_strahler(fractions_da, accum_da, threshold): iy, ix, _bdry_max, _bdry_cnt, _frac_bdry, _mask_bdry, chunks_y, chunks_x, n_tile_y, n_tile_x) - tile_order = _strahler_mfd_tile_kernel( + tile_order, _, _ = _strahler_mfd_tile_kernel( frac_chunk, sm, h, w, *seeds) row.append(da.from_array(tile_order, chunks=tile_order.shape)) rows.append(row) diff --git a/xrspatial/tests/test_flow_accumulation.py b/xrspatial/tests/test_flow_accumulation.py index 9c2565b6..925e2db0 100644 --- a/xrspatial/tests/test_flow_accumulation.py +++ b/xrspatial/tests/test_flow_accumulation.py @@ -15,7 +15,7 @@ # --------------------------------------------------------------------------- def test_known_bowl(): - """5×5 bowl: hand-computed flow_dir and expected accum. + """5x5 bowl: hand-computed flow_dir and expected accum. Bowl surface: 9 9 9 9 9 @@ -72,7 +72,7 @@ def test_linear_chain_east(): # --------------------------------------------------------------------------- def test_pit_center(): - """3×3 grid, all 8 neighbours flow to center → center accum = 9.""" + """3x3 grid, all 8 neighbours flow to center -> center accum = 9.""" flow_dir = np.array([ [2.0, 4.0, 8.0], [1.0, 0.0, 16.0], @@ -92,7 +92,7 @@ def test_pit_center(): # --------------------------------------------------------------------------- def test_all_pits(): - """Every cell code = 0 → every cell accum = 1.""" + """Every cell code = 0 -> every cell accum = 1.""" flow_dir = np.zeros((4, 5), dtype=np.float64) agg = create_test_raster(flow_dir) result = flow_accumulation(agg) @@ -104,7 +104,7 @@ def test_all_pits(): # --------------------------------------------------------------------------- def test_nan_handling(): - """NaN flow_dir → NaN accum; valid neighbours still correct.""" + """NaN flow_dir -> NaN accum; valid neighbours still correct.""" flow_dir = np.array([ [1.0, 1.0, 0.0], [np.nan, 1.0, 0.0], @@ -126,14 +126,14 @@ def test_nan_handling(): # --------------------------------------------------------------------------- def test_code_0_pit(): - """Pit cells (code 0) count themselves → accum = 1.""" + """Pit cells (code 0) count themselves -> accum = 1.""" flow_dir = np.array([ [1.0, 0.0], [64.0, 0.0], ], dtype=np.float64) agg = create_test_raster(flow_dir) result = flow_accumulation(agg) - # (1,0) → N → (0,0) → E → (0,1) pit + # (1,0) -> N -> (0,0) -> E -> (0,1) pit assert result.data[1, 0] == 1.0 # leaf, just itself assert result.data[0, 0] == 2.0 # itself + (1,0) assert result.data[0, 1] == 3.0 # itself + (0,0) chain @@ -179,7 +179,7 @@ def test_output_dtype(): "dtype", [np.int32, np.int64, np.float32, np.float64]) def test_dtype_acceptance(dtype): """Function accepts int32/int64/float32/float64 flow_dir inputs.""" - # All zeros (pits) — valid for any dtype + # All zeros (pits) -- valid for any dtype flow_dir = np.zeros((3, 4), dtype=dtype) agg = create_test_raster(flow_dir) result = flow_accumulation(agg) @@ -210,7 +210,7 @@ def test_dataset_support(): # --------------------------------------------------------------------------- def _make_cross_backend_flow_dir(): - """6×6 flow-east grid for cross-backend comparison.""" + """6x6 flow-east grid for cross-backend comparison.""" flow_dir = np.full((6, 6), 1.0, dtype=np.float64) flow_dir[:, -1] = 0.0 # last column = pits return flow_dir @@ -369,204 +369,3 @@ def test_dask_cupy_chunk_configs(chunks): np.testing.assert_allclose( np_result.data, dcp_result.data.compute().get(), equal_nan=True, ), f"Mismatch with chunks={chunks}" - - -# =========================================================================== -# Dinf flow accumulation tests -# =========================================================================== - -from xrspatial.flow_accumulation import _detect_flow_type - - -def test_dinf_detection_d8(): - """D8 codes detected as 'd8'.""" - flow_dir = np.array([[1.0, 2.0], [4.0, 0.0]], dtype=np.float64) - assert _detect_flow_type(flow_dir) == "d8" - - -def test_dinf_detection_dinf(): - """Dinf angles detected as 'dinf'.""" - flow_dir = np.array([[0.5, 1.2], [-1.0, np.nan]], dtype=np.float64) - assert _detect_flow_type(flow_dir) == "dinf" - - -def test_dinf_detection_all_nan(): - """All-NaN grid detected as 'd8' (default).""" - flow_dir = np.full((3, 3), np.nan, dtype=np.float64) - assert _detect_flow_type(flow_dir) == "d8" - - -def test_dinf_detection_pit_minus_one(): - """-1.0 (Dinf pit) triggers 'dinf' detection.""" - flow_dir = np.array([[0.0, -1.0], [0.0, 0.0]], dtype=np.float64) - assert _detect_flow_type(flow_dir) == "dinf" - - -def test_dinf_cardinal_east_chain(): - """Pure east angles (0.0) chain: matches D8 code=1 result.""" - N = 6 - # Dinf: angle=0 means east, pit=-1 - flow_dir = np.full((1, N), 0.0, dtype=np.float64) - flow_dir[0, -1] = -1.0 # last cell is pit - agg = create_test_raster(flow_dir) - result = flow_accumulation(agg) - expected = np.arange(1, N + 1, dtype=np.float64).reshape(1, N) - np.testing.assert_allclose(result.data, expected) - - -def test_dinf_pit_center(): - """8 neighbours with exact cardinal/diagonal angles point to center.""" - pi = np.pi - flow_dir = np.array([ - [7 * pi / 4, 3 * pi / 2, 5 * pi / 4], - [0.0, -1.0, pi], - [pi / 4, pi / 2, 3 * pi / 4], - ], dtype=np.float64) - agg = create_test_raster(flow_dir) - result = flow_accumulation(agg) - assert result.data[1, 1] == 9.0 - for r, c in [(0, 0), (0, 1), (0, 2), (1, 0), (1, 2), - (2, 0), (2, 1), (2, 2)]: - assert result.data[r, c] == 1.0, f"Cell ({r},{c}) = {result.data[r,c]}" - - -def test_dinf_proportional_split(): - """angle=pi/8 splits flow 50/50 to E and NE.""" - pi = np.pi - flow_dir = np.array([ - [-1.0, -1.0, -1.0], - [-1.0, pi / 8, -1.0], - [-1.0, -1.0, -1.0], - ], dtype=np.float64) - agg = create_test_raster(flow_dir) - result = flow_accumulation(agg) - # Center splits 50/50 to E (1,2) and NE (0,2) - np.testing.assert_allclose(result.data[1, 2], 1.5) - np.testing.assert_allclose(result.data[0, 2], 1.5) - assert result.data[1, 1] == 1.0 - - -def test_dinf_chain_with_split(): - """Multi-cell chain with fractional flow, verify cascading sums.""" - pi = np.pi - flow_dir = np.array([ - [-1.0, -1.0, -1.0], - [pi / 8, pi / 8, -1.0], - [-1.0, -1.0, -1.0], - ], dtype=np.float64) - agg = create_test_raster(flow_dir) - result = flow_accumulation(agg) - # (1,0) sends 0.5 to E(1,1) and 0.5 to NE(0,1) - # (1,1) has accum=1.5, sends 0.75 to E(1,2) and 0.75 to NE(0,2) - np.testing.assert_allclose(result.data[1, 0], 1.0) - np.testing.assert_allclose(result.data[0, 1], 1.5) - np.testing.assert_allclose(result.data[1, 1], 1.5) - np.testing.assert_allclose(result.data[1, 2], 1.75) - np.testing.assert_allclose(result.data[0, 2], 1.75) - - -def test_dinf_end_to_end(): - """flow_direction_dinf -> flow_accumulation produces valid results.""" - from xrspatial import flow_direction_dinf - - rng = np.random.default_rng(42) - elev = create_test_raster(rng.random((20, 20)) * 100) - fdir = flow_direction_dinf(elev) - acc = flow_accumulation(fdir) - data = acc.data - valid = data[~np.isnan(data)] - assert len(valid) > 0 - assert np.all(valid >= 1.0) - assert np.max(valid) <= data.size - - -def test_dinf_nan_handling(): - """NaN flow dir produces NaN accumulation.""" - pi = np.pi - flow_dir = np.array([ - [np.nan, pi / 2], - [0.0, -1.0], - ], dtype=np.float64) - agg = create_test_raster(flow_dir) - result = flow_accumulation(agg) - assert np.isnan(result.data[0, 0]) - np.testing.assert_allclose(result.data[0, 1], 1.0) - np.testing.assert_allclose(result.data[1, 0], 1.0) - np.testing.assert_allclose(result.data[1, 1], 2.0) - - -@dask_array_available -@pytest.mark.parametrize("chunks", [ - (3, 3), (5, 5), (2, 6), (6, 2), (1, 1), (6, 6), -]) -def test_dinf_dask_matches_numpy(chunks): - """Dinf dask matches numpy across chunk sizes.""" - from xrspatial import flow_direction_dinf - - rng = np.random.default_rng(123) - elev = create_test_raster(rng.random((6, 6)) * 100) - fdir_np = flow_direction_dinf(elev).data - - numpy_agg = create_test_raster(fdir_np, backend='numpy') - dask_agg = create_test_raster(fdir_np, backend='dask', chunks=chunks) - np_result = flow_accumulation(numpy_agg) - da_result = flow_accumulation(dask_agg) - np.testing.assert_allclose( - np_result.data, da_result.data.compute(), equal_nan=True, - ), f"Mismatch with chunks={chunks}" - - -@dask_array_available -def test_dinf_dask_larger_grid(): - """Dinf dask on a larger grid with cross-tile flow.""" - from xrspatial import flow_direction_dinf - - rng = np.random.default_rng(99) - elev = create_test_raster(rng.random((12, 12)) * 100) - fdir_np = flow_direction_dinf(elev).data - - numpy_agg = create_test_raster(fdir_np, backend='numpy') - np_result = flow_accumulation(numpy_agg) - - for chunks in [(3, 3), (4, 6), (6, 4), (2, 2)]: - dask_agg = create_test_raster(fdir_np, backend='dask', chunks=chunks) - da_result = flow_accumulation(dask_agg) - np.testing.assert_allclose( - np_result.data, da_result.data.compute(), equal_nan=True, - ), f"Mismatch with chunks={chunks}" - - -@cuda_and_cupy_available -def test_dinf_cupy_matches_numpy(): - """Dinf CuPy matches NumPy.""" - from xrspatial import flow_direction_dinf - - rng = np.random.default_rng(42) - elev = create_test_raster(rng.random((10, 10)) * 100) - fdir_np = flow_direction_dinf(elev).data - - numpy_agg = create_test_raster(fdir_np, backend='numpy') - cupy_agg = create_test_raster(fdir_np, backend='cupy') - np_result = flow_accumulation(numpy_agg) - cp_result = flow_accumulation(cupy_agg) - np.testing.assert_allclose( - np_result.data, cp_result.data.get(), equal_nan=True) - - -@dask_array_available -@cuda_and_cupy_available -def test_dinf_dask_cupy_matches_numpy(): - """Dinf Dask+CuPy matches NumPy.""" - from xrspatial import flow_direction_dinf - - rng = np.random.default_rng(42) - elev = create_test_raster(rng.random((8, 8)) * 100) - fdir_np = flow_direction_dinf(elev).data - - numpy_agg = create_test_raster(fdir_np, backend='numpy') - dask_cupy_agg = create_test_raster(fdir_np, backend='dask+cupy', - chunks=(4, 4)) - np_result = flow_accumulation(numpy_agg) - dcp_result = flow_accumulation(dask_cupy_agg) - np.testing.assert_allclose( - np_result.data, dcp_result.data.compute().get(), equal_nan=True) diff --git a/xrspatial/tests/test_flow_accumulation_dinf.py b/xrspatial/tests/test_flow_accumulation_dinf.py new file mode 100644 index 00000000..8665d05a --- /dev/null +++ b/xrspatial/tests/test_flow_accumulation_dinf.py @@ -0,0 +1,216 @@ +import numpy as np +import pytest + +from xrspatial.flow_accumulation_dinf import flow_accumulation_dinf +from xrspatial.flow_accumulation import _detect_flow_type +from xrspatial.tests.general_checks import ( + create_test_raster, + cuda_and_cupy_available, + dask_array_available, +) + + +# =========================================================================== +# Detection tests +# =========================================================================== + +def test_dinf_detection_d8(): + """D8 codes detected as 'd8'.""" + flow_dir = np.array([[1.0, 2.0], [4.0, 0.0]], dtype=np.float64) + assert _detect_flow_type(flow_dir) == "d8" + + +def test_dinf_detection_dinf(): + """Dinf angles detected as 'dinf'.""" + flow_dir = np.array([[0.5, 1.2], [-1.0, np.nan]], dtype=np.float64) + assert _detect_flow_type(flow_dir) == "dinf" + + +def test_dinf_detection_all_nan(): + """All-NaN grid detected as 'd8' (default).""" + flow_dir = np.full((3, 3), np.nan, dtype=np.float64) + assert _detect_flow_type(flow_dir) == "d8" + + +def test_dinf_detection_pit_minus_one(): + """-1.0 (Dinf pit) triggers 'dinf' detection.""" + flow_dir = np.array([[0.0, -1.0], [0.0, 0.0]], dtype=np.float64) + assert _detect_flow_type(flow_dir) == "dinf" + + +# =========================================================================== +# Core functionality tests +# =========================================================================== + +def test_dinf_cardinal_east_chain(): + """Pure east angles (0.0) chain: matches D8 code=1 result.""" + N = 6 + # Dinf: angle=0 means east, pit=-1 + flow_dir = np.full((1, N), 0.0, dtype=np.float64) + flow_dir[0, -1] = -1.0 # last cell is pit + agg = create_test_raster(flow_dir) + result = flow_accumulation_dinf(agg) + expected = np.arange(1, N + 1, dtype=np.float64).reshape(1, N) + np.testing.assert_allclose(result.data, expected) + + +def test_dinf_pit_center(): + """8 neighbours with exact cardinal/diagonal angles point to center.""" + pi = np.pi + flow_dir = np.array([ + [7 * pi / 4, 3 * pi / 2, 5 * pi / 4], + [0.0, -1.0, pi], + [pi / 4, pi / 2, 3 * pi / 4], + ], dtype=np.float64) + agg = create_test_raster(flow_dir) + result = flow_accumulation_dinf(agg) + assert result.data[1, 1] == 9.0 + for r, c in [(0, 0), (0, 1), (0, 2), (1, 0), (1, 2), + (2, 0), (2, 1), (2, 2)]: + assert result.data[r, c] == 1.0, f"Cell ({r},{c}) = {result.data[r,c]}" + + +def test_dinf_proportional_split(): + """angle=pi/8 splits flow 50/50 to E and NE.""" + pi = np.pi + flow_dir = np.array([ + [-1.0, -1.0, -1.0], + [-1.0, pi / 8, -1.0], + [-1.0, -1.0, -1.0], + ], dtype=np.float64) + agg = create_test_raster(flow_dir) + result = flow_accumulation_dinf(agg) + # Center splits 50/50 to E (1,2) and NE (0,2) + np.testing.assert_allclose(result.data[1, 2], 1.5) + np.testing.assert_allclose(result.data[0, 2], 1.5) + assert result.data[1, 1] == 1.0 + + +def test_dinf_chain_with_split(): + """Multi-cell chain with fractional flow, verify cascading sums.""" + pi = np.pi + flow_dir = np.array([ + [-1.0, -1.0, -1.0], + [pi / 8, pi / 8, -1.0], + [-1.0, -1.0, -1.0], + ], dtype=np.float64) + agg = create_test_raster(flow_dir) + result = flow_accumulation_dinf(agg) + # (1,0) sends 0.5 to E(1,1) and 0.5 to NE(0,1) + # (1,1) has accum=1.5, sends 0.75 to E(1,2) and 0.75 to NE(0,2) + np.testing.assert_allclose(result.data[1, 0], 1.0) + np.testing.assert_allclose(result.data[0, 1], 1.5) + np.testing.assert_allclose(result.data[1, 1], 1.5) + np.testing.assert_allclose(result.data[1, 2], 1.75) + np.testing.assert_allclose(result.data[0, 2], 1.75) + + +def test_dinf_end_to_end(): + """flow_direction_dinf -> flow_accumulation_dinf produces valid results.""" + from xrspatial import flow_direction_dinf + + rng = np.random.default_rng(42) + elev = create_test_raster(rng.random((20, 20)) * 100) + fdir = flow_direction_dinf(elev) + acc = flow_accumulation_dinf(fdir) + data = acc.data + valid = data[~np.isnan(data)] + assert len(valid) > 0 + assert np.all(valid >= 1.0) + assert np.max(valid) <= data.size + + +def test_dinf_nan_handling(): + """NaN flow dir produces NaN accumulation.""" + pi = np.pi + flow_dir = np.array([ + [np.nan, pi / 2], + [0.0, -1.0], + ], dtype=np.float64) + agg = create_test_raster(flow_dir) + result = flow_accumulation_dinf(agg) + assert np.isnan(result.data[0, 0]) + np.testing.assert_allclose(result.data[0, 1], 1.0) + np.testing.assert_allclose(result.data[1, 0], 1.0) + np.testing.assert_allclose(result.data[1, 1], 2.0) + + +# =========================================================================== +# Cross-backend tests +# =========================================================================== + +@dask_array_available +@pytest.mark.parametrize("chunks", [ + (3, 3), (5, 5), (2, 6), (6, 2), (1, 1), (6, 6), +]) +def test_dinf_dask_matches_numpy(chunks): + """Dinf dask matches numpy across chunk sizes.""" + from xrspatial import flow_direction_dinf + + rng = np.random.default_rng(123) + elev = create_test_raster(rng.random((6, 6)) * 100) + fdir_np = flow_direction_dinf(elev).data + + numpy_agg = create_test_raster(fdir_np, backend='numpy') + dask_agg = create_test_raster(fdir_np, backend='dask', chunks=chunks) + np_result = flow_accumulation_dinf(numpy_agg) + da_result = flow_accumulation_dinf(dask_agg) + np.testing.assert_allclose( + np_result.data, da_result.data.compute(), equal_nan=True, + ), f"Mismatch with chunks={chunks}" + + +@dask_array_available +def test_dinf_dask_larger_grid(): + """Dinf dask on a larger grid with cross-tile flow.""" + from xrspatial import flow_direction_dinf + + rng = np.random.default_rng(99) + elev = create_test_raster(rng.random((12, 12)) * 100) + fdir_np = flow_direction_dinf(elev).data + + numpy_agg = create_test_raster(fdir_np, backend='numpy') + np_result = flow_accumulation_dinf(numpy_agg) + + for chunks in [(3, 3), (4, 6), (6, 4), (2, 2)]: + dask_agg = create_test_raster(fdir_np, backend='dask', chunks=chunks) + da_result = flow_accumulation_dinf(dask_agg) + np.testing.assert_allclose( + np_result.data, da_result.data.compute(), equal_nan=True, + ), f"Mismatch with chunks={chunks}" + + +@cuda_and_cupy_available +def test_dinf_cupy_matches_numpy(): + """Dinf CuPy matches NumPy.""" + from xrspatial import flow_direction_dinf + + rng = np.random.default_rng(42) + elev = create_test_raster(rng.random((10, 10)) * 100) + fdir_np = flow_direction_dinf(elev).data + + numpy_agg = create_test_raster(fdir_np, backend='numpy') + cupy_agg = create_test_raster(fdir_np, backend='cupy') + np_result = flow_accumulation_dinf(numpy_agg) + cp_result = flow_accumulation_dinf(cupy_agg) + np.testing.assert_allclose( + np_result.data, cp_result.data.get(), equal_nan=True) + + +@dask_array_available +@cuda_and_cupy_available +def test_dinf_dask_cupy_matches_numpy(): + """Dinf Dask+CuPy matches NumPy.""" + from xrspatial import flow_direction_dinf + + rng = np.random.default_rng(42) + elev = create_test_raster(rng.random((8, 8)) * 100) + fdir_np = flow_direction_dinf(elev).data + + numpy_agg = create_test_raster(fdir_np, backend='numpy') + dask_cupy_agg = create_test_raster(fdir_np, backend='dask+cupy', + chunks=(4, 4)) + np_result = flow_accumulation_dinf(numpy_agg) + dcp_result = flow_accumulation_dinf(dask_cupy_agg) + np.testing.assert_allclose( + np_result.data, dcp_result.data.compute().get(), equal_nan=True) diff --git a/xrspatial/tests/test_flow_direction_dinf.py b/xrspatial/tests/test_flow_direction_dinf.py index 48c4baec..e53bf2e8 100644 --- a/xrspatial/tests/test_flow_direction_dinf.py +++ b/xrspatial/tests/test_flow_direction_dinf.py @@ -120,6 +120,128 @@ def test_interior_facet_angle(): f"Center = {result.data[1,1]}, expected {expected}") +# --------------------------------------------------------------------------- +# Tarboton odd-facet tests +# --------------------------------------------------------------------------- + +def test_odd_facet_1_tarboton(): + """Facet 1 (N, NE) wins with interior r; angle = pi/2 - r.""" + # Center=10, N=7, NE=6; all others equal to center. + # Facet 1: s1=(10-7)/1=3, s2=(7-6)/1=1, r=atan2(1,3), s=sqrt(10) + # Facet 0: s1=(10-10)/1=0, s2=(10-6)/1=4, r clamped pi/4, s=(10-6)/sqrt(2)~2.83 + # Facet 1 wins (sqrt(10)~3.16 > 2.83). + data = np.array([ + [10, 7, 6], + [10, 10, 10], + [10, 10, 10], + ], dtype=np.float64) + agg = _make_raster(data) + result = flow_direction_dinf(agg) + r = math.atan2(1, 3) + expected = math.pi / 2.0 - r + assert abs(result.data[1, 1] - expected) < 1e-10, ( + f"Center = {result.data[1,1]}, expected {expected}") + + +def test_odd_facet_7_tarboton(): + """Facet 7 (E, SE) wins with interior r; angle = 2*pi - r.""" + # Center=10, E=7, SE=6; all others equal to center. + # Facet 7: s1=(10-7)/1=3, s2=(7-6)/1=1, r=atan2(1,3), s=sqrt(10) + data = np.array([ + [10, 10, 10], + [10, 10, 7], + [10, 10, 6], + ], dtype=np.float64) + agg = _make_raster(data) + result = flow_direction_dinf(agg) + r = math.atan2(1, 3) + expected = 2.0 * math.pi - r + assert abs(result.data[1, 1] - expected) < 1e-10, ( + f"Center = {result.data[1,1]}, expected {expected}") + + +def _reference_tarboton(data, cx, cy): + """Pure-Python reference Tarboton (1997) D-inf implementation.""" + rows, cols = data.shape + out = np.full(data.shape, np.nan) + + nb_dy = [0, -1, -1, -1, 0, 1, 1, 1] + nb_dx = [1, 1, 0, -1, -1, -1, 0, 1] + e1_idx = [0, 2, 2, 4, 4, 6, 6, 0] + e2_idx = [1, 1, 3, 3, 5, 5, 7, 7] + d1_arr = [cx, cy, cy, cx, cx, cy, cy, cx] + d2_arr = [cy, cx, cx, cy, cy, cx, cx, cy] + ac = [0, 2, 2, 4, 4, 6, 6, 8] + af = [1, -1, 1, -1, 1, -1, 1, -1] + + diag = math.sqrt(cx * cx + cy * cy) + pi4 = math.pi / 4.0 + + for y in range(1, rows - 1): + for x in range(1, cols - 1): + center = data[y, x] + if np.isnan(center): + continue + has_nan = False + nbs = [] + for k in range(8): + v = data[y + nb_dy[k], x + nb_dx[k]] + if np.isnan(v): + has_nan = True + break + nbs.append(v) + if has_nan: + continue + + max_slope = -1e308 + best_angle = -1.0 + for k in range(8): + e1 = nbs[e1_idx[k]] + e2 = nbs[e2_idx[k]] + s1 = (center - e1) / d1_arr[k] + s2 = (e1 - e2) / d2_arr[k] + r = math.atan2(s2, s1) + if r < 0.0: + r = 0.0 + s = s1 + elif r > pi4: + r = pi4 + s = (center - e2) / diag + else: + s = math.sqrt(s1 * s1 + s2 * s2) + if s > max_slope: + max_slope = s + best_angle = ac[k] * pi4 + af[k] * r + + if max_slope <= 0.0: + out[y, x] = -1.0 + else: + if best_angle >= 2.0 * math.pi: + best_angle = 0.0 + out[y, x] = best_angle + return out + + +def test_reference_tarboton_agreement(): + """Cone DEM: xrspatial matches reference Tarboton on all interior cells.""" + n = 51 + cx, cy = 1.0, 1.0 + yy, xx = np.mgrid[0:n, 0:n] + center = (n - 1) / 2.0 + data = np.sqrt((yy - center) ** 2 + (xx - center) ** 2).astype(np.float64) + + agg = _make_raster(data, res=(cy, cx)) + result = flow_direction_dinf(agg) + ref = _reference_tarboton(data, cx, cy) + + # Compare interior cells (skip edges which are NaN) + interior = ~np.isnan(ref) + assert interior.sum() > 0 + np.testing.assert_allclose( + result.data[interior], ref[interior], atol=1e-12, + err_msg="xrspatial dinf does not match reference Tarboton") + + # --------------------------------------------------------------------------- # Pit / flat / NaN tests # --------------------------------------------------------------------------- diff --git a/xrspatial/watershed.py b/xrspatial/watershed.py index c11f50f8..439177d4 100644 --- a/xrspatial/watershed.py +++ b/xrspatial/watershed.py @@ -93,40 +93,45 @@ def _code_to_offset_py(code): # ===================================================================== @ngjit -def _watershed_cpu(flow_dir, labels, h, w): +def _watershed_cpu(flow_dir, labels, state, h, w): """Downstream tracing with path compression for watershed. - ``labels`` is pre-initialised: pour points >= 0, NaN for nodata, - -1 for unresolved. On return every reachable cell has the label - of its pour point, unreachable cells are NaN. + Uses a separate ``state`` array to track cell status, so that + pour-point labels can be any float value (including negative). + + State values: 0=nodata, 1=unresolved, 2=in-trace, 3=resolved. + On return every reachable cell has state 3 and the label of its + pour point; unreachable cells have state 0 and NaN. """ path_r = np.empty(h * w, dtype=np.int64) path_c = np.empty(h * w, dtype=np.int64) for r in range(h): for c in range(w): - if labels[r, c] != -1.0: - continue # already resolved or NaN + if state[r, c] != 1: + continue # already resolved, nodata, or in-trace # Trace downstream, collecting path path_len = 0 cr, cc = r, c found_label = np.nan + found = False while True: - lbl = labels[cr, cc] - if lbl >= 0.0: - # Hit a labeled cell (pour point or previously resolved) - found_label = lbl + s = state[cr, cc] + if s == 3: + # Hit a resolved cell (pour point or previously resolved) + found_label = labels[cr, cc] + found = True break - if lbl != -1.0: - # NaN or in-trace marker (-2) → cycle or dead end + if s != 1: + # nodata (0) or in-trace (2) → cycle or dead end break path_r[path_len] = cr path_c[path_len] = cc path_len += 1 - labels[cr, cc] = -2.0 # in-trace marker + state[cr, cc] = 2 # in-trace marker v = flow_dir[cr, cc] if v != v: # NaN @@ -141,7 +146,12 @@ def _watershed_cpu(flow_dir, labels, h, w): # Assign label to entire traced path for i in range(path_len): - labels[path_r[i], path_c[i]] = found_label + if found: + labels[path_r[i], path_c[i]] = found_label + state[path_r[i], path_c[i]] = 3 + else: + labels[path_r[i], path_c[i]] = np.nan + state[path_r[i], path_c[i]] = 0 return labels @@ -278,61 +288,72 @@ def _watershed_tile_kernel(flow_dir, h, w, pour_points, exit_tl, exit_tr, exit_bl, exit_br): """Seeded downstream tracing for a single tile. - Labels are initialised from pour_points and exit labels (resolved - labels of the destination cell in adjacent tiles). Downstream - tracing with path compression resolves as many cells as possible - within the tile. + Uses a separate state array so pour-point labels can be any float + value (including negative). State: 0=nodata, 1=unresolved, + 2=in-trace, 3=resolved. """ labels = np.empty((h, w), dtype=np.float64) + state = np.empty((h, w), dtype=np.int8) - # Initialise labels + # Initialise labels and state for r in range(h): for c in range(w): v = flow_dir[r, c] if v != v: # NaN labels[r, c] = np.nan + state[r, c] = 0 continue pp = pour_points[r, c] if pp == pp: # not NaN → pour point labels[r, c] = pp + state[r, c] = 3 continue - labels[r, c] = -1.0 # unresolved + labels[r, c] = np.nan + state[r, c] = 1 # unresolved # Apply exit labels to boundary cells that flow OUT of tile # Top row: cells flowing north for c in range(w): - if labels[0, c] == -1.0: + if state[0, c] == 1: el = exit_top[c] - if el == el and el >= 0.0: # not NaN and resolved + if el == el: # not NaN → resolved labels[0, c] = el + state[0, c] = 3 # Bottom row for c in range(w): - if labels[h - 1, c] == -1.0: + if state[h - 1, c] == 1: el = exit_bottom[c] - if el == el and el >= 0.0: + if el == el: labels[h - 1, c] = el + state[h - 1, c] = 3 # Left column for r in range(h): - if labels[r, 0] == -1.0: + if state[r, 0] == 1: el = exit_left[r] - if el == el and el >= 0.0: + if el == el: labels[r, 0] = el + state[r, 0] = 3 # Right column for r in range(h): - if labels[r, w - 1] == -1.0: + if state[r, w - 1] == 1: el = exit_right[r] - if el == el and el >= 0.0: + if el == el: labels[r, w - 1] = el + state[r, w - 1] = 3 # Corners - if labels[0, 0] == -1.0 and exit_tl == exit_tl and exit_tl >= 0.0: + if state[0, 0] == 1 and exit_tl == exit_tl: labels[0, 0] = exit_tl - if labels[0, w - 1] == -1.0 and exit_tr == exit_tr and exit_tr >= 0.0: + state[0, 0] = 3 + if state[0, w - 1] == 1 and exit_tr == exit_tr: labels[0, w - 1] = exit_tr - if labels[h - 1, 0] == -1.0 and exit_bl == exit_bl and exit_bl >= 0.0: + state[0, w - 1] = 3 + if state[h - 1, 0] == 1 and exit_bl == exit_bl: labels[h - 1, 0] = exit_bl - if labels[h - 1, w - 1] == -1.0 and exit_br == exit_br and exit_br >= 0.0: + state[h - 1, 0] = 3 + if state[h - 1, w - 1] == 1 and exit_br == exit_br: labels[h - 1, w - 1] = exit_br + state[h - 1, w - 1] = 3 # Downstream tracing with path compression path_r = np.empty(h * w, dtype=np.int64) @@ -340,25 +361,28 @@ def _watershed_tile_kernel(flow_dir, h, w, pour_points, for r in range(h): for c in range(w): - if labels[r, c] != -1.0: + if state[r, c] != 1: continue path_len = 0 cr, cc = r, c found_label = np.nan + found = False + exit_tile = False while True: - lbl = labels[cr, cc] - if lbl >= 0.0: - found_label = lbl + s = state[cr, cc] + if s == 3: + found_label = labels[cr, cc] + found = True break - if lbl != -1.0: + if s != 1: break path_r[path_len] = cr path_c[path_len] = cc path_len += 1 - labels[cr, cc] = -2.0 + state[cr, cc] = 2 v = flow_dir[cr, cc] if v != v: @@ -368,13 +392,20 @@ def _watershed_tile_kernel(flow_dir, h, w, pour_points, break nr, nc = cr + dy, cc + dx if nr < 0 or nr >= h or nc < 0 or nc >= w: - # Exits tile — leave as unresolved (-1) - found_label = -1.0 + # Exits tile — leave as unresolved + exit_tile = True break cr, cc = nr, nc for i in range(path_len): - labels[path_r[i], path_c[i]] = found_label + if found: + labels[path_r[i], path_c[i]] = found_label + state[path_r[i], path_c[i]] = 3 + elif exit_tile: + state[path_r[i], path_c[i]] = 1 # still unresolved + else: + labels[path_r[i], path_c[i]] = np.nan + state[path_r[i], path_c[i]] = 0 # dead end return labels @@ -701,9 +732,6 @@ def _tile_fn(flow_dir_block, pp_block, block_info=None): h, w, np.asarray(pp_block, dtype=np.float64), *exits) - # After convergence, any remaining unresolved cells → NaN - result = np.where((result == -1.0) | (result == -2.0), - np.nan, result) return result return da.map_blocks( @@ -742,29 +770,29 @@ def _watershed_tile_cupy(flow_dir_data, pour_points_data, # Inject exit labels at boundary cells where active (state==1) # and exit label is resolved (not NaN, >= 0). exit_top_cp = cp.asarray(exit_top) - m = (state[0, :] == 1) & ~cp.isnan(exit_top_cp) & (exit_top_cp >= 0) + m = (state[0, :] == 1) & ~cp.isnan(exit_top_cp) labels[0, :] = cp.where(m, exit_top_cp, labels[0, :]) state[0, :] = cp.where(m, 2, state[0, :]) exit_bot_cp = cp.asarray(exit_bottom) - m = (state[H - 1, :] == 1) & ~cp.isnan(exit_bot_cp) & (exit_bot_cp >= 0) + m = (state[H - 1, :] == 1) & ~cp.isnan(exit_bot_cp) labels[H - 1, :] = cp.where(m, exit_bot_cp, labels[H - 1, :]) state[H - 1, :] = cp.where(m, 2, state[H - 1, :]) exit_left_cp = cp.asarray(exit_left) - m = (state[:, 0] == 1) & ~cp.isnan(exit_left_cp) & (exit_left_cp >= 0) + m = (state[:, 0] == 1) & ~cp.isnan(exit_left_cp) labels[:, 0] = cp.where(m, exit_left_cp, labels[:, 0]) state[:, 0] = cp.where(m, 2, state[:, 0]) exit_right_cp = cp.asarray(exit_right) - m = (state[:, W - 1] == 1) & ~cp.isnan(exit_right_cp) & (exit_right_cp >= 0) + m = (state[:, W - 1] == 1) & ~cp.isnan(exit_right_cp) labels[:, W - 1] = cp.where(m, exit_right_cp, labels[:, W - 1]) state[:, W - 1] = cp.where(m, 2, state[:, W - 1]) # Corner exit labels for r, c, val in [(0, 0, exit_tl), (0, W - 1, exit_tr), (H - 1, 0, exit_bl), (H - 1, W - 1, exit_br)]: - if val == val and val >= 0 and int(state[r, c]) == 1: + if val == val and int(state[r, c]) == 1: labels[r, c] = val state[r, c] = 2 @@ -938,16 +966,20 @@ def watershed(flow_dir: xr.DataArray, fd = data.astype(np.float64) pp = np.asarray(pp_data, dtype=np.float64) h, w = fd.shape - # Init labels: pour points get their value, NaN flow_dir → NaN, - # others → -1 - labels = np.full((h, w), -1.0, dtype=np.float64) + # Init labels and state: pour points → resolved (state 3), + # NaN flow_dir → nodata (state 0), others → unresolved (state 1) + labels = np.full((h, w), np.nan, dtype=np.float64) + state = np.zeros((h, w), dtype=np.int8) for r in range(h): for c in range(w): if fd[r, c] != fd[r, c]: # NaN - labels[r, c] = np.nan + pass # state 0, label NaN elif pp[r, c] == pp[r, c]: # not NaN → pour point labels[r, c] = pp[r, c] - out = _watershed_cpu(fd, labels, h, w) + state[r, c] = 3 + else: + state[r, c] = 1 # unresolved + out = _watershed_cpu(fd, labels, state, h, w) elif has_cuda_and_cupy() and is_cupy_array(data): out = _watershed_cupy(data, pp_data)