|
146 | 146 | {"id":"CodeContextBench-yk3","title":"Run CodeReview benchmark: 3 tasks x 3 configs","description":"Execute first runs for CodeReview benchmark. 3 tasks (cr-ghost-001, cr-aspnetcore-001, cr-calcom-001) across baseline, sourcegraph_base, sourcegraph_full. Config script exists: configs/codereview_3config.sh. Verify Docker builds succeed first with a quick single-task test.","notes":"Baseline: 3/3 done (1.0, 1.0, 0.8). MCP-base launched (sg-benchmarks mirrors). MCP-full will auto-launch after base. Invalid MCP runs archived.","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-06T14:50:01.091878411Z","created_by":"LoCoBench Bot","updated_at":"2026-02-06T17:05:14.773296031Z","closed_at":"2026-02-06T17:05:14.773296031Z","close_reason":"CodeReview 3x3 complete. Baseline avg=0.93, SG_base avg=0.89, SG_full avg=0.93. Results in codereview_opus_20260206_155036 (baseline), codereview_opus_20260206_163958 (SG_base), codereview_opus_20260206_164838 (SG_full)."} |
147 | 147 | {"id":"CodeContextBench-yvz","title":"Fix CrossRepo verifier to accept standard unified diff","status":"closed","priority":1,"issue_type":"bug","owner":"locobench@anthropic.com","created_at":"2026-02-11T10:50:13.344399749Z","created_by":"LoCoBench Bot","updated_at":"2026-02-11T11:00:56.006455693Z","closed_at":"2026-02-11T11:00:56.006455693Z","close_reason":"Added fallback unified diff parser for +++ b/ headers when no diff --git headers found."} |
148 | 148 | {"id":"CodeContextBench-yzh","title":"Create configs/run_overnight.sh orchestrator","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-08T03:36:13.60028793Z","created_by":"LoCoBench Bot","updated_at":"2026-02-08T03:40:10.064123563Z","closed_at":"2026-02-08T03:40:10.064123563Z","close_reason":"Created configs/run_overnight.sh with sequential benchmark queue, token health checks, canary integration, dry-run, resume-from support"} |
149 | | -{"id":"CodeContextBench-z3h","title":"US-004: Populate 3 Go navprove tasks (teleport, vuls, flipt)","status":"in_progress","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T21:21:03.032952702Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T21:21:06.699104497Z"} |
| 149 | +{"id":"CodeContextBench-z3h","title":"US-004: Populate 3 Go navprove tasks (teleport, vuls, flipt)","status":"closed","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T21:21:03.032952702Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T21:24:56.388059078Z","closed_at":"2026-02-16T21:24:56.388059078Z","close_reason":"US-004 complete: all 3 Go navprove tasks populated"} |
150 | 150 | {"id":"CodeContextBench-z7n","title":"US-006b: Scaffold 3 architectural understanding tasks (Tier B repos)","status":"closed","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-15T23:21:56.729085935Z","created_by":"LoCoBench Bot","updated_at":"2026-02-15T23:23:08.101442069Z","closed_at":"2026-02-15T23:23:08.101442069Z","close_reason":"Scaffolded 3 Tier B architectural understanding tasks (Camel, Flink, QuantLib)"} |
151 | 151 | {"id":"CodeContextBench-zj6","title":"Rerun SG_base after fixing doubled github.com bug","description":"After fixing the doubled github.com prefix bug in claude_baseline_agent.py, ALL SG_base results for benchmarks using sg-benchmarks mirror repos need reruns. Known affected: LinuxFLBench (confirmed 2 tasks scored lower), plus any benchmark where instance_to_mirror.json maps to github.com/sg-benchmarks/*. SG_full may also be affected for keyword_search calls (Deep Search uses different mechanics so may be less impacted).","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-06T22:02:14.358067618Z","created_by":"LoCoBench Bot","updated_at":"2026-02-07T18:39:19.01740942Z","closed_at":"2026-02-07T18:39:19.01740942Z","close_reason":"SG_base reruns complete: CodeReview (0.98 vs BL 0.93), LinuxFLBench (0.82 vs BL 0.86)"} |
152 | 152 | {"id":"CodeContextBench-zku","title":"Phase 2: Add sourcegraph_only config to agent code","status":"closed","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T18:42:39.88870793Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T18:45:33.239193235Z","closed_at":"2026-02-16T18:45:33.239193235Z","close_reason":"sourcegraph_only added to claude_baseline_agent.py (9 locations), eval_matrix.json, aggregate_status.py, generate_manifest.py"} |
0 commit comments