1616# Make scripts/ importable
1717sys .path .insert (0 , str (Path (__file__ ).resolve ().parent .parent / "scripts" ))
1818
19- from ccb_metrics .judge import (
19+ from csb_metrics .judge import (
2020 JudgeInput ,
2121 JudgeResult ,
2222 LLMJudge ,
2323 normalize_score ,
2424)
25- from ccb_metrics .judge .backends import AnthropicBackend , JudgeBackendError
25+ from csb_metrics .judge .backends import AnthropicBackend , JudgeBackendError , _parse_json
2626
2727# ---------------------------------------------------------------------------
2828# Shared fixtures / helpers
@@ -70,7 +70,7 @@ def _make_judge() -> LLMJudge:
7070
7171
7272class TestNormalizeScore :
73- """Tests for ccb_metrics .judge.models.normalize_score."""
73+ """Tests for csb_metrics .judge.models.normalize_score."""
7474
7575 def test_string_pass (self ):
7676 assert normalize_score ("pass" ) == 1.0
@@ -292,7 +292,7 @@ def flaky_raw_call(system_prompt: str, user_prompt: str) -> str:
292292 return valid_json_str
293293
294294 with patch .object (AnthropicBackend , "_raw_call" , side_effect = flaky_raw_call ):
295- with patch ("ccb_metrics .judge.backends.time.sleep" ): # skip actual sleep
295+ with patch ("csb_metrics .judge.backends.time.sleep" ): # skip actual sleep
296296 result = backend .call ("system" , "user" )
297297
298298 assert call_count == 2
@@ -314,5 +314,5 @@ def bad_raw_call(system_prompt: str, user_prompt: str) -> str:
314314 def test_parse_json_from_markdown_code_block (self ):
315315 """_parse_json handles responses wrapped in ```json``` code blocks."""
316316 wrapped = f"```json\n { json .dumps (_MOCK_BACKEND_RESPONSE )} \n ```"
317- result = AnthropicBackend . _parse_json (wrapped )
317+ result = _parse_json (wrapped )
318318 assert result ["reasoning" ] == _MOCK_BACKEND_RESPONSE ["reasoning" ]
0 commit comments