@@ -36,6 +36,78 @@ def _chunked(items: Sequence[Slide], size: int) -> Iterable[Sequence[Slide]]:
3636 yield items [i : i + size ]
3737
3838
39+ def build_existing_extraction_result (slide : Slide , h5_path : Path ) -> ExtractionResult | None :
40+ """Create a lightweight ExtractionResult from an existing H5."""
41+ metadata : dict [str , Any ] = {}
42+ num_patches : int | None = None
43+ patch_size_level0 : int | None = None
44+ try :
45+ with h5py .File (h5_path , "r" ) as f :
46+ num_attr = f .attrs .get ("num_patches" )
47+ if num_attr is not None :
48+ num_patches = int (num_attr )
49+ elif "coords" in f :
50+ num_patches = int (f ["coords" ].shape [0 ])
51+
52+ ps_level0_attr = f .attrs .get ("patch_size_level0" )
53+ if ps_level0_attr is not None :
54+ patch_size_level0 = int (ps_level0_attr )
55+ except Exception as e : # noqa: BLE001
56+ logger .warning (
57+ "Failed to read existing output for %s; will reprocess. Error: %s" ,
58+ slide .path .name ,
59+ e ,
60+ )
61+ return None
62+
63+ if num_patches is None or num_patches <= 0 :
64+ return None
65+
66+ return ExtractionResult (
67+ slide = slide ,
68+ h5_path = h5_path ,
69+ num_patches = int (num_patches ),
70+ patch_size_level0 = patch_size_level0 ,
71+ metadata = metadata ,
72+ )
73+
74+
75+ def classify_existing_slide_output (
76+ config : AppConfig ,
77+ slide : Slide ,
78+ ) -> tuple [str | None , ExtractionResult | None ]:
79+ """Return how an existing patch H5 should be treated for this run.
80+
81+ Returns one of:
82+ - (None, None): no reusable output; the slide needs full processing
83+ - ("skip", None): output is fully complete for this run
84+ - ("reuse", ExtractionResult): reuse patches/H5 and continue with downstream features
85+ """
86+ if not config .output .skip_existing :
87+ return None , None
88+
89+ existing_path = find_existing_patch (slide , config .output , config .extraction )
90+ if existing_path is None :
91+ return None , None
92+
93+ feat_cfg = config .features
94+ if feat_cfg is None or not feat_cfg .extractors :
95+ return "skip" , None
96+
97+ existing_result = build_existing_extraction_result (slide , existing_path )
98+ if existing_result is None :
99+ return None , None
100+
101+ missing = missing_features (
102+ existing_path ,
103+ feat_cfg .extractors ,
104+ expected_total = existing_result .num_patches ,
105+ )
106+ if not missing :
107+ return "skip" , None
108+ return "reuse" , existing_result
109+
110+
39111class ProcessingRunner :
40112 """High-level orchestration of WSI segmentation, patch extraction, and visualization."""
41113
@@ -68,41 +140,6 @@ def discover_slides(self) -> list[Slide]:
68140 slides .append (slide )
69141 return slides
70142
71- def _build_existing_result (self , slide : Slide , h5_path : Path ) -> ExtractionResult | None :
72- """Create a lightweight ExtractionResult from an existing H5 (no re-segmentation)."""
73- metadata : dict [str , Any ] = {}
74- num_patches : int | None = None
75- patch_size_level0 : int | None = None
76- try :
77- with h5py .File (h5_path , "r" ) as f :
78- num_attr = f .attrs .get ("num_patches" )
79- if num_attr is not None :
80- num_patches = int (num_attr )
81- elif "coords" in f :
82- num_patches = int (f ["coords" ].shape [0 ])
83-
84- ps_level0_attr = f .attrs .get ("patch_size_level0" )
85- if ps_level0_attr is not None :
86- patch_size_level0 = int (ps_level0_attr )
87- except Exception as e : # noqa: BLE001
88- logger .warning (
89- "Failed to read existing output for %s; will reprocess. Error: %s" ,
90- slide .path .name ,
91- e ,
92- )
93- return None
94-
95- if num_patches is None or num_patches <= 0 :
96- return None
97-
98- return ExtractionResult (
99- slide = slide ,
100- h5_path = h5_path ,
101- num_patches = int (num_patches ),
102- patch_size_level0 = patch_size_level0 ,
103- metadata = metadata ,
104- )
105-
106143 def _handle_existing_slide (
107144 self ,
108145 slide : Slide ,
@@ -113,35 +150,22 @@ def _handle_existing_slide(
113150
114151 Returns True when the slide is fully handled (skip or reuse), False to continue processing.
115152 """
116- if not self .config .output .skip_existing :
117- return False
118-
119- existing_path = find_existing_patch (slide , self .config .output , self .config .extraction )
120- if existing_path is None :
153+ decision , existing_result = classify_existing_slide_output (self .config , slide )
154+ if decision is None :
121155 return False
122-
123- feat_cfg = self .config .features
124- if feat_cfg is None or not feat_cfg .extractors :
156+ if decision == "skip" :
125157 logger .info ("Skipping %s (already processed)." , slide .path .name )
126158 if progress :
127159 progress .update (1 )
128160 return True
129-
130- existing_result = self ._build_existing_result (slide , existing_path )
131161 if existing_result is None :
132- logger .info ("Existing output invalid for %s; reprocessing." , slide .path .name )
133162 return False
134-
163+ results . append ( existing_result )
135164 missing = missing_features (
136- existing_path , feat_cfg .extractors , expected_total = existing_result .num_patches
165+ existing_result .h5_path ,
166+ self .config .features .extractors ,
167+ expected_total = existing_result .num_patches ,
137168 )
138- if not missing :
139- logger .info ("Skipping %s (features complete)." , slide .path .name )
140- if progress :
141- progress .update (1 )
142- return True
143-
144- results .append (existing_result )
145169 logger .info (
146170 "Reusing existing patches for %s; missing features: %s" ,
147171 slide .path .name ,
0 commit comments