fix: update hardcoded paths for CodeContextBench → CodeScaleBench directory rename

sjarmak · claude · sjarmak · commit 10b2a5e8ea99 · 2026-03-02T21:09:33.000Z
Update absolute paths in scripts, agents, and configs that reference
/home/stephanie_jarmak/CodeContextBench/ to /home/stephanie_jarmak/CodeScaleBench/
in preparation for directory rename. Also update paper title in README.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # CodeScaleBench
 
-Benchmark suite for evaluating how AI coding agents leverage external context tools on software engineering tasks across the SDLC. Developed as the reproducibility artifact for the paper *"CodeScaleBench: A Systematic Evaluation Framework for Assessing the Impact of Enhanced Code Intelligence on AI Coding Agent Performance."*
+Benchmark suite for evaluating how AI coding agents leverage external context tools on software engineering tasks across the SDLC. Developed as the reproducibility artifact for the paper *"CodeScaleBench: Evaluating Coding Agents on Real-Scale Software Engineering Tasks Across the Development Lifecycle."*
 
 This repository contains **benchmark task definitions**, **evaluation configs**, and a **metrics extraction pipeline**. Tasks are executed via the [Harbor](https://github.com/laude-institute/harbor/tree/main) runner with the Claude Code agent harness.
 
diff --git a/agents/claude_baseline_agent.py b/agents/claude_baseline_agent.py
@@ -28,7 +28,7 @@
 logger = logging.getLogger(__name__)
 
 # Path to CLAUDE.md template for Deep Search tasks
-LOCOBENCH_CLAUDE_MD_TEMPLATE = Path("/home/stephanie_jarmak/CodeContextBench/benchmarks/locobench_agent/templates/CLAUDE.md")
+LOCOBENCH_CLAUDE_MD_TEMPLATE = Path("/home/stephanie_jarmak/CodeScaleBench/benchmarks/locobench_agent/templates/CLAUDE.md")
 
 # System prompt for evaluation context - delivered via --append-system-prompt for ALL modes
 # This is the single authoritative source of test-first instructions (US-003)
diff --git a/agents/harnesses/base.py b/agents/harnesses/base.py
@@ -13,7 +13,7 @@ class BaselineHarnessMixin:
 
     # Path used by the Claude-specific template; remains available for fallback content.
     LOCOBENCH_CLAUDE_MD_TEMPLATE = Path(
-        "/home/stephanie_jarmak/CodeContextBench/benchmarks/locobench_agent/templates/CLAUDE.md"
+        "/home/stephanie_jarmak/CodeScaleBench/benchmarks/locobench_agent/templates/CLAUDE.md"
     )
 
     EVALUATION_CONTEXT_PROMPT = """## EVALUATION CONTEXT
diff --git a/configs/control_plane_ccb.yaml b/configs/control_plane_ccb.yaml
@@ -1,4 +1,4 @@
-# Deterministic control plane for CodeContextBench 2-config runs.
+# Deterministic control plane for CodeScaleBench 2-config runs.
 # Same file + same task source → same experiment_id and run list.
 #
 # Generate manifest:
diff --git a/configs/validate_one_per_benchmark.sh b/configs/validate_one_per_benchmark.sh
@@ -114,7 +114,7 @@ for t in sel['tasks']:
 ")
 
 echo "=============================================="
-echo "CodeContextBench Validation Run (parallel)"
+echo "CodeScaleBench Validation Run (parallel)"
 echo "=============================================="
 if [ "$SG_ONLY" = true ]; then
     echo "Mode:    sg_only_env smoke (Dockerfile.sg_only swap)"
diff --git a/scripts/daytona_task_registry.json b/scripts/daytona_task_registry.json
@@ -1,7 +1,7 @@
 {
   "version": "1.0",
   "generated_by": "scripts/build_daytona_registry.py",
-  "benchmarks_dir": "/home/stephanie_jarmak/CodeContextBench/benchmarks",
+  "benchmarks_dir": "/home/stephanie_jarmak/CodeScaleBench/benchmarks",
   "summary": {
     "total_tasks": 298,
     "total_suites": 20,
diff --git a/scripts/extract_build_diary.py b/scripts/extract_build_diary.py
@@ -119,7 +119,7 @@ def parse_transcripts(transcript_dir: str):
                                 fp = inp.get("file_path", "")
                                 if fp:
                                     fp = re.sub(
-                                        r"^/home/stephanie_jarmak/CodeContextBench/",
+                                        r"^/home/stephanie_jarmak/CodeScaleBench/",
                                         "", fp,
                                     )
                                     if name == "Read":
@@ -329,7 +329,7 @@ def main():
     parser.add_argument(
         "--transcript-dir",
         default=os.path.expanduser(
-            "~/.claude/projects/-home-stephanie-jarmak-CodeContextBench/"
+            "~/.claude/projects/-home-stephanie-jarmak-CodeScaleBench/"
         ),
     )
     parser.add_argument("--output-dir", default="data/build_diary")
diff --git a/scripts/plot_build_diary_supplementary.py b/scripts/plot_build_diary_supplementary.py

Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@ class BaselineHarnessMixin:`
`13`	`13`
`14`	`14`	`# Path used by the Claude-specific template; remains available for fallback content.`
`15`	`15`	`LOCOBENCH_CLAUDE_MD_TEMPLATE = Path(`
`16`		`- "/home/stephanie_jarmak/CodeContextBench/benchmarks/locobench_agent/templates/CLAUDE.md"`
	`16`	`+ "/home/stephanie_jarmak/CodeScaleBench/benchmarks/locobench_agent/templates/CLAUDE.md"`
`17`	`17`	`)`
`18`	`18`
`19`	`19`	`EVALUATION_CONTEXT_PROMPT = """## EVALUATION CONTEXT`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Deterministic control plane for CodeContextBench 2-config runs.`
	`1`	`+# Deterministic control plane for CodeScaleBench 2-config runs.`
`2`	`2`	`# Same file + same task source → same experiment_id and run list.`
`3`	`3`	`#`
`4`	`4`	`# Generate manifest:`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"version": "1.0",`
`3`	`3`	`"generated_by": "scripts/build_daytona_registry.py",`
`4`		`- "benchmarks_dir": "/home/stephanie_jarmak/CodeContextBench/benchmarks",`
	`4`	`+ "benchmarks_dir": "/home/stephanie_jarmak/CodeScaleBench/benchmarks",`
`5`	`5`	`"summary": {`
`6`	`6`	`"total_tasks": 298,`
`7`	`7`	`"total_suites": 20,`