|
1 | 1 | { |
2 | 2 | "metadata": { |
3 | | - "title": "CodeContextBench Selected Benchmark Tasks (SDLC Suite Reorganization)", |
4 | | - "version": "2.0", |
| 3 | + "title": "CodeScaleBench Selected Benchmark Tasks", |
| 4 | + "version": "3.0", |
5 | 5 | "generated_by": "SDLC suite migration from migration_map.json", |
6 | 6 | "generated_date": "2026-02-18", |
7 | 7 | "total_available": 835, |
8 | 8 | "total_selected": 370, |
9 | 9 | "migration_source": "migration_map.json (157 mapped tasks across 8 SDLC suites)", |
10 | 10 | "target_total": 370, |
11 | 11 | "target_note": "150 SDLC tasks (9 suites, Neyman-optimal) + 220 MCP-unique tasks (11 suites, Neyman-optimal) = 370 active.", |
12 | | - "last_updated": "2026-03-01", |
| 12 | + "last_updated": "2026-03-03", |
13 | 13 | "note": "DOE-driven rebalance: 150 SDLC (Neyman-optimal) + 220 MCP-unique (Neyman-optimal). MCP-unique rebalance: 13 scaffolded (IDs 272-284), 7 promoted from onboarding_extra, 20 low-IV tasks moved to benchmarks/backups/*_doe_trim/.", |
14 | 14 | "per_suite": { |
15 | 15 | "csb_sdlc_debug": 18, |
|
103 | 103 | }, |
104 | 104 | "statistics": { |
105 | 105 | "tasks_per_sdlc_phase": { |
106 | | - "Analysis": 8, |
107 | | - "Architecture & Design": 1, |
108 | | - "Compliance audit": 2, |
109 | | - "Cross-org discovery": 2, |
110 | | - "Cross-repo tracing": 3, |
111 | | - "Debugging": 5, |
112 | | - "Documentation": 16, |
113 | | - "Implementation (bug fix)": 1, |
114 | | - "Implementation (feature)": 26, |
| 106 | + "unknown": 61, |
| 107 | + "Requirements & Discovery": 28, |
| 108 | + "cross-repo-dep-trace": 24, |
| 109 | + "Implementation (feature)": 21, |
| 110 | + "Testing & QA": 18, |
| 111 | + "fix": 15, |
| 112 | + "Refactoring": 13, |
| 113 | + "debug": 13, |
| 114 | + "migration-inventory": 13, |
| 115 | + "platform-knowledge": 13, |
| 116 | + "compliance-audit": 12, |
| 117 | + "cross-org-discovery": 12, |
| 118 | + "Documentation": 11, |
| 119 | + "domain-lineage": 10, |
| 120 | + "vuln-remediation": 10, |
| 121 | + "agentic-correctness": 10, |
| 122 | + "secure": 9, |
| 123 | + "Onboarding & comprehension": 9, |
| 124 | + "incident-debug": 9, |
| 125 | + "design": 8, |
| 126 | + "Migration analysis": 8, |
| 127 | + "Implementation (bug fix)": 7, |
| 128 | + "Analysis": 6, |
| 129 | + "Security remediation": 6, |
| 130 | + "Cross-repo tracing": 5, |
| 131 | + "Debugging": 4, |
| 132 | + "Incident investigation": 3, |
115 | 133 | "Implementation (refactor)": 2, |
| 134 | + "Platform engineering": 2, |
| 135 | + "Onboarding comprehension": 2, |
| 136 | + "Bug Repair": 2, |
116 | 137 | "Implementation (refactoring)": 1, |
117 | | - "Incident investigation": 3, |
118 | | - "Migration analysis": 2, |
119 | | - "Onboarding & comprehension": 3, |
120 | | - "Platform engineering": 3, |
121 | | - "Refactoring": 17, |
122 | | - "Requirements & Discovery": 34, |
123 | | - "Security remediation": 2, |
124 | | - "Security review": 3, |
125 | | - "Testing & QA": 18, |
126 | | - "debug": 14, |
127 | | - "design": 11, |
128 | | - "fix": 19, |
129 | | - "secure": 14 |
| 138 | + "Security review": 1, |
| 139 | + "Compliance audit": 1, |
| 140 | + "Platform knowledge": 1 |
130 | 141 | }, |
131 | 142 | "tasks_per_benchmark": { |
132 | | - "csb_sdlc_debug": 20, |
133 | | - "csb_sdlc_design": 20, |
134 | | - "csb_sdlc_document": 20, |
135 | | - "csb_sdlc_fix": 25, |
136 | | - "csb_org_compliance": 7, |
137 | | - "csb_org_crossorg": 5, |
138 | | - "csb_org_crossrepo": 1, |
139 | | - "csb_org_crossrepo_tracing": 9, |
140 | | - "csb_org_domain": 10, |
141 | | - "csb_org_incident": 11, |
142 | | - "csb_org_migration": 7, |
143 | | - "csb_org_onboarding": 11, |
144 | | - "csb_org_org": 5, |
145 | | - "csb_org_platform": 5, |
146 | | - "csb_org_security": 10, |
147 | | - "csb_sdlc_secure": 20, |
148 | | - "csb_sdlc_test": 20, |
149 | | - "csb_sdlc_understand": 20, |
150 | | - "csb_sdlc_feature": 20, |
151 | | - "csb_sdlc_refactor": 20 |
| 143 | + "csb_org_compliance": 18, |
| 144 | + "csb_org_crossorg": 15, |
| 145 | + "csb_org_crossrepo": 14, |
| 146 | + "csb_org_crossrepo_tracing": 22, |
| 147 | + "csb_org_domain": 20, |
| 148 | + "csb_org_incident": 20, |
| 149 | + "csb_org_migration": 26, |
| 150 | + "csb_org_onboarding": 28, |
| 151 | + "csb_org_org": 15, |
| 152 | + "csb_org_platform": 18, |
| 153 | + "csb_org_security": 24, |
| 154 | + "csb_sdlc_debug": 18, |
| 155 | + "csb_sdlc_design": 14, |
| 156 | + "csb_sdlc_document": 13, |
| 157 | + "csb_sdlc_feature": 23, |
| 158 | + "csb_sdlc_fix": 26, |
| 159 | + "csb_sdlc_refactor": 16, |
| 160 | + "csb_sdlc_secure": 12, |
| 161 | + "csb_sdlc_test": 18, |
| 162 | + "csb_sdlc_understand": 10 |
152 | 163 | }, |
153 | 164 | "tasks_per_language": { |
154 | | - "java": 56, |
155 | | - "go": 138, |
| 165 | + "go": 134, |
| 166 | + "cpp": 73, |
| 167 | + "java": 57, |
| 168 | + "python": 55, |
156 | 169 | "rust": 12, |
| 170 | + "c": 10, |
| 171 | + "javascript": 8, |
| 172 | + "typescript": 7, |
| 173 | + "java,cpp": 5, |
| 174 | + "cpp,c,javascript": 3, |
157 | 175 | "python,cpp": 1, |
158 | | - "typescript": 4, |
159 | | - "python": 52, |
160 | 176 | "csharp": 1, |
161 | | - "cpp": 75, |
162 | | - "javascript": 6, |
163 | | - "c": 9, |
164 | 177 | "go,protobuf": 1, |
165 | 178 | "go,cpp": 1, |
166 | 179 | "mixed": 1, |
167 | | - "java,cpp": 5, |
168 | | - "cpp,c,javascript": 3, |
169 | 180 | "unknown": 1 |
170 | 181 | }, |
171 | | - "avg_mcp_benefit_score": 0.868, |
172 | | - "total_tasks": 390, |
| 182 | + "avg_mcp_benefit_score": 0.891, |
| 183 | + "total_tasks": 370, |
173 | 184 | "per_suite": { |
174 | 185 | "csb_sdlc_feature": 23, |
175 | 186 | "csb_sdlc_refactor": 16, |
|
0 commit comments