@@ -251,37 +251,56 @@ def discover_comparison_pairs(
251251
252252
253253def _extract_files (data : Dict [str , Any ]) -> List [Dict ]:
254- """Extract file list from various oracle/ground_truth formats."""
255- # oracle_answer.json format
256- if "files" in data and isinstance (data ["files" ], list ):
257- return data ["files" ]
254+ """Extract file list from various oracle/ground_truth formats.
255+
256+ Handles:
257+ - oracle_answer.json: {"files": [{"repo": ..., "path": ...}]}
258+ - ground_truth.json (fix/feature/refactor): {"files": ["path", ...], "repo": "..."}
259+ - ground_truth.json (debug/design/understand): {"file_references": [...]}
260+ - ground_truth.json (dependency_chain): {"dependency_chain": ["path", ...]}
261+ - ground_truth_agent.json: same as oracle_answer.json format
262+ """
263+ result = []
258264
259- # ground_truth.json format (ccb_fix/feature/refactor)
265+ # "files" key — could be dicts or plain strings
260266 if "files" in data and isinstance (data ["files" ], list ):
261- # Files might be plain strings
262- result = []
263267 repo = data .get ("repo" , "" )
264268 for f in data ["files" ]:
265- if isinstance (f , str ):
266- result .append ({"repo" : repo , "path" : f })
267- elif isinstance (f , dict ):
269+ if isinstance (f , dict ):
268270 result .append (f )
269- return result
271+ elif isinstance (f , str ):
272+ result .append ({"repo" : repo , "path" : f })
273+ if result :
274+ return result
270275
271- # ground_truth.json format (ccb_debug/design/understand)
276+ # "file_references" key (ccb_debug/design/understand)
272277 if "file_references" in data :
273278 refs = data ["file_references" ]
274- result = []
275279 for ref in refs :
276280 if isinstance (ref , dict ):
277281 path = ref .get ("file" , ref .get ("path" , "" ))
278282 if path :
279283 result .append ({"repo" : "" , "path" : path })
280284 elif isinstance (ref , str ):
281285 result .append ({"repo" : "" , "path" : ref })
282- return result
283-
284- return []
286+ if result :
287+ return result
288+
289+ # "root_cause_files" as additional files (ccb_fix)
290+ for key in ("root_cause_files" , "dependency_chain" ):
291+ items = data .get (key , [])
292+ if isinstance (items , list ):
293+ repo = data .get ("repo" , "" )
294+ for f in items :
295+ if isinstance (f , str ):
296+ entry = {"repo" : repo , "path" : f }
297+ if entry not in result :
298+ result .append (entry )
299+ elif isinstance (f , dict ):
300+ if f not in result :
301+ result .append (f )
302+
303+ return result
285304
286305
287306# ---------------------------------------------------------------------------
0 commit comments