test: align metrics imports with csb rename

sjarmak · sjarmak · commit a19f83ebcb6a · 2026-03-04T02:32:38.000Z
diff --git a/tests/test_extract_task_metrics.py b/tests/test_extract_task_metrics.py
@@ -7,7 +7,7 @@
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 
 from scripts.extract_task_metrics import process_task_dir
-from scripts.ccb_metrics.models import TaskMetrics
+from scripts.csb_metrics.models import TaskMetrics
 
 
 class ExtractTaskMetricsEmitterTests(unittest.TestCase):
diff --git a/tests/test_judge_engine.py b/tests/test_judge_engine.py
@@ -16,13 +16,13 @@
 # Make scripts/ importable
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
 
-from ccb_metrics.judge import (
+from csb_metrics.judge import (
     JudgeInput,
     JudgeResult,
     LLMJudge,
     normalize_score,
 )
-from ccb_metrics.judge.backends import AnthropicBackend, JudgeBackendError
+from csb_metrics.judge.backends import AnthropicBackend, JudgeBackendError, _parse_json
 
 # ---------------------------------------------------------------------------
 # Shared fixtures / helpers
@@ -70,7 +70,7 @@ def _make_judge() -> LLMJudge:
 
 
 class TestNormalizeScore:
-    """Tests for ccb_metrics.judge.models.normalize_score."""
+    """Tests for csb_metrics.judge.models.normalize_score."""
 
     def test_string_pass(self):
         assert normalize_score("pass") == 1.0
@@ -292,7 +292,7 @@ def flaky_raw_call(system_prompt: str, user_prompt: str) -> str:
             return valid_json_str
 
         with patch.object(AnthropicBackend, "_raw_call", side_effect=flaky_raw_call):
-            with patch("ccb_metrics.judge.backends.time.sleep"):  # skip actual sleep
+            with patch("csb_metrics.judge.backends.time.sleep"):  # skip actual sleep
                 result = backend.call("system", "user")
 
         assert call_count == 2
@@ -314,5 +314,5 @@ def bad_raw_call(system_prompt: str, user_prompt: str) -> str:
     def test_parse_json_from_markdown_code_block(self):
         """_parse_json handles responses wrapped in ```json``` code blocks."""
         wrapped = f"```json\n{json.dumps(_MOCK_BACKEND_RESPONSE)}\n```"
-        result = AnthropicBackend._parse_json(wrapped)
+        result = _parse_json(wrapped)
         assert result["reasoning"] == _MOCK_BACKEND_RESPONSE["reasoning"]
diff --git a/tests/test_oracle_checks_tiered.py b/tests/test_oracle_checks_tiered.py
@@ -1,4 +1,4 @@
-"""Unit tests for tiered scoring in scripts/ccb_metrics/oracle_checks.py.
+"""Unit tests for tiered scoring in scripts/csb_metrics/oracle_checks.py.
 
 Covers the two-tier weighted scoring added to check_file_set_match and the
 _get_primary_score preference for weighted_f1 over plain f1.
@@ -9,7 +9,7 @@
 
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 
-from scripts.ccb_metrics.oracle_checks import check_file_set_match, _get_primary_score
+from scripts.csb_metrics.oracle_checks import check_file_set_match, _get_primary_score
 
 
 def _file(repo, path, tier=None):