Skip to content

Commit 84a4142

Browse files
sjarmakclaude
andcommitted
fix: clean selection file, fix Node.js Dockerfile conflict, run+promote DOE rebalance batch
- Remove 20 stale entries from selected_benchmark_tasks.json for tasks moved to benchmarks/backups/*_doe_trim/ during MCP-unique DOE shrink (390 → 370: 150 SDLC + 220 MCP-unique) - Add task_dir field to mcp_doe_rebalance_newruns.json for run_selected_tasks.sh - Fix Dockerfile for ccx-migration-276 and ccx-vuln-remed-281: remove nodejs/npm apt packages that conflict with cloned node--v22.13.0 - All 20 DOE rebalance tasks ran (40 agent runs), 38/40 succeeded, 2 errored baselines fixed and rerun successfully - 6 batches promoted to runs/official/ (247 runs, 1315 scored tasks) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 4b273e1 commit 84a4142

File tree

4 files changed

+629
-472
lines changed

4 files changed

+629
-472
lines changed

benchmarks/ccb_mcp_migration/ccx-migration-276/environment/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
88
ca-certificates \
99
curl \
1010
python3 \
11-
nodejs npm \
1211
&& rm -rf /var/lib/apt/lists/*
1312

1413
WORKDIR /workspace

benchmarks/ccb_mcp_security/ccx-vuln-remed-281/environment/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
88
ca-certificates \
99
curl \
1010
python3 \
11-
nodejs npm \
1211
&& rm -rf /var/lib/apt/lists/*
1312

1413
WORKDIR /workspace
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
{
2+
"metadata": {
3+
"description": "DOE rebalance: 13 scaffolded tasks (IDs 272-284) + 7 promoted onboarding tasks \u2014 first paired runs",
4+
"generated": "2026-03-01",
5+
"note": "13 scaffolded tasks have zero runs. 7 promoted onboarding tasks have 2-6 historical runs but need coverage under current DOE allocation."
6+
},
7+
"methodology": "first_result",
8+
"statistics": {
9+
"total_tasks": 20
10+
},
11+
"tasks": [
12+
{
13+
"task_id": "ccx-dep-trace-272",
14+
"suite": "ccb_mcp_crossrepo_tracing",
15+
"benchmark": "ccb_mcp_crossrepo_tracing",
16+
"task_dir": "ccb_mcp_crossrepo_tracing/ccx-dep-trace-272"
17+
},
18+
{
19+
"task_id": "ccx-dep-trace-273",
20+
"suite": "ccb_mcp_crossrepo_tracing",
21+
"benchmark": "ccb_mcp_crossrepo_tracing",
22+
"task_dir": "ccb_mcp_crossrepo_tracing/ccx-dep-trace-273"
23+
},
24+
{
25+
"task_id": "ccx-migration-274",
26+
"suite": "ccb_mcp_migration",
27+
"benchmark": "ccb_mcp_migration",
28+
"task_dir": "ccb_mcp_migration/ccx-migration-274"
29+
},
30+
{
31+
"task_id": "ccx-migration-275",
32+
"suite": "ccb_mcp_migration",
33+
"benchmark": "ccb_mcp_migration",
34+
"task_dir": "ccb_mcp_migration/ccx-migration-275"
35+
},
36+
{
37+
"task_id": "ccx-migration-276",
38+
"suite": "ccb_mcp_migration",
39+
"benchmark": "ccb_mcp_migration",
40+
"task_dir": "ccb_mcp_migration/ccx-migration-276"
41+
},
42+
{
43+
"task_id": "ccx-migration-277",
44+
"suite": "ccb_mcp_migration",
45+
"benchmark": "ccb_mcp_migration",
46+
"task_dir": "ccb_mcp_migration/ccx-migration-277"
47+
},
48+
{
49+
"task_id": "ccx-migration-278",
50+
"suite": "ccb_mcp_migration",
51+
"benchmark": "ccb_mcp_migration",
52+
"task_dir": "ccb_mcp_migration/ccx-migration-278"
53+
},
54+
{
55+
"task_id": "ccx-migration-279",
56+
"suite": "ccb_mcp_migration",
57+
"benchmark": "ccb_mcp_migration",
58+
"task_dir": "ccb_mcp_migration/ccx-migration-279"
59+
},
60+
{
61+
"task_id": "ccx-onboard-280",
62+
"suite": "ccb_mcp_onboarding",
63+
"benchmark": "ccb_mcp_onboarding",
64+
"task_dir": "ccb_mcp_onboarding/ccx-onboard-280"
65+
},
66+
{
67+
"task_id": "ccx-vuln-remed-281",
68+
"suite": "ccb_mcp_security",
69+
"benchmark": "ccb_mcp_security",
70+
"task_dir": "ccb_mcp_security/ccx-vuln-remed-281"
71+
},
72+
{
73+
"task_id": "ccx-vuln-remed-282",
74+
"suite": "ccb_mcp_security",
75+
"benchmark": "ccb_mcp_security",
76+
"task_dir": "ccb_mcp_security/ccx-vuln-remed-282"
77+
},
78+
{
79+
"task_id": "ccx-vuln-remed-283",
80+
"suite": "ccb_mcp_security",
81+
"benchmark": "ccb_mcp_security",
82+
"task_dir": "ccb_mcp_security/ccx-vuln-remed-283"
83+
},
84+
{
85+
"task_id": "ccx-vuln-remed-284",
86+
"suite": "ccb_mcp_security",
87+
"benchmark": "ccb_mcp_security",
88+
"task_dir": "ccb_mcp_security/ccx-vuln-remed-284"
89+
},
90+
{
91+
"task_id": "ccx-explore-042-ds",
92+
"suite": "ccb_mcp_onboarding",
93+
"benchmark": "ccb_mcp_onboarding",
94+
"task_dir": "ccb_mcp_onboarding/ccx-explore-042-ds"
95+
},
96+
{
97+
"task_id": "ccx-onboard-050-ds",
98+
"suite": "ccb_mcp_onboarding",
99+
"benchmark": "ccb_mcp_onboarding",
100+
"task_dir": "ccb_mcp_onboarding/ccx-onboard-050-ds"
101+
},
102+
{
103+
"task_id": "ccx-onboard-search-210",
104+
"suite": "ccb_mcp_onboarding",
105+
"benchmark": "ccb_mcp_onboarding",
106+
"task_dir": "ccb_mcp_onboarding/ccx-onboard-search-210"
107+
},
108+
{
109+
"task_id": "ccx-onboard-search-211",
110+
"suite": "ccb_mcp_onboarding",
111+
"benchmark": "ccb_mcp_onboarding",
112+
"task_dir": "ccb_mcp_onboarding/ccx-onboard-search-211"
113+
},
114+
{
115+
"task_id": "ccx-onboard-search-212",
116+
"suite": "ccb_mcp_onboarding",
117+
"benchmark": "ccb_mcp_onboarding",
118+
"task_dir": "ccb_mcp_onboarding/ccx-onboard-search-212"
119+
},
120+
{
121+
"task_id": "ccx-onboard-search-213",
122+
"suite": "ccb_mcp_onboarding",
123+
"benchmark": "ccb_mcp_onboarding",
124+
"task_dir": "ccb_mcp_onboarding/ccx-onboard-search-213"
125+
},
126+
{
127+
"task_id": "ccx-onboard-search-214",
128+
"suite": "ccb_mcp_onboarding",
129+
"benchmark": "ccb_mcp_onboarding",
130+
"task_dir": "ccb_mcp_onboarding/ccx-onboard-search-214"
131+
}
132+
]
133+
}

0 commit comments

Comments
 (0)