Skip to content

Commit 570cbc8

Browse files
LoCoBench Botclaude
andcommitted
fix: standardize 17 task.toml schemas + register 41 tasks
task.toml standardization (17 files across docgen, onboarding, security): - Add missing version="1.0", [verification] with command/reward_type - Flatten non-standard nesting ([runtime], [scoring], [task.language], etc.) - Remove MCP contamination from [mcp] sections (3 tasks) - Add missing id/repo fields, fix field names (limit_seconds→time_limit_sec) Task registration (selected_benchmark_tasks.json): - Register 41 new tasks: 13 docgen, 8 nlqa, 8 onboarding, 5 codereview, 7 crossrepo - Total tasks: 175 → 216 - All entries include MCP benefit scores computed from category heuristics Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent aab9e97 commit 570cbc8

File tree

19 files changed

+978
-231
lines changed

19 files changed

+978
-231
lines changed

.beads/issues.jsonl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
{"id":"CodeContextBench-5kj","title":"US-002: Create workflow taxonomy module and methodology doc","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-15T13:29:45.828982776Z","created_by":"LoCoBench Bot","updated_at":"2026-02-15T13:32:45.597491177Z","closed_at":"2026-02-15T13:32:45.597491177Z","close_reason":"US-002 complete: workflow taxonomy module + methodology doc"}
3737
{"id":"CodeContextBench-5m5","title":"Document PyTorch sgt-025 as permanently excluded from SG_full","description":"sgt-025 Docker build fails because the referenced PyTorch commit is unreachable. Two attempts both failed with RuntimeError: Docker compose command failed. This is an unresolvable infrastructure issue. Document in TASK_CATALOG.md and potentially remove from SG_full task list.","status":"closed","priority":4,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-10T11:28:28.715732672Z","created_by":"LoCoBench Bot","updated_at":"2026-02-12T10:29:30.648519648Z","closed_at":"2026-02-12T10:29:30.648519648Z","close_reason":"Documented in benchmarks/ccb_pytorch/README.md — new Excluded Tasks section. sgt-025 Docker build fails due to unreachable pre_fix_rev commit. Permanently excluded from SG_full."}
3838
{"id":"CodeContextBench-5mp","title":"US-012: Build failure analysis engine","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-15T14:01:18.160263388Z","created_by":"LoCoBench Bot","updated_at":"2026-02-15T14:09:41.058430692Z","closed_at":"2026-02-15T14:09:41.058430692Z","close_reason":"US-012 already complete - failure_analysis.py exists and passes all acceptance criteria"}
39+
{"id":"CodeContextBench-5pg","title":"US-001: Select qutebrowser and flipt source tasks","status":"in_progress","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T21:06:03.285366916Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T21:06:06.996922022Z"}
3940
{"id":"CodeContextBench-6bl","title":"US-008: Scaffold 3 bug investigation tasks","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-15T23:48:13.666948824Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T00:10:09.807327379Z","closed_at":"2026-02-16T00:10:09.807327379Z","close_reason":"Scaffolded 3 bug investigation tasks: Django #36301, Kafka KAFKA-19012, K8s PR#126807"}
4041
{"id":"CodeContextBench-6o7","title":"SSH key configuration blocking git push","description":"git push fails with 'Permission denied (publickey)'. Need to configure SSH keys for git@github.com:sjarmak/CodeContextBench.git. All US-001 through US-014 are complete and ready to push.","status":"closed","priority":0,"issue_type":"bug","owner":"locobench@anthropic.com","created_at":"2026-02-16T16:48:11.289873552Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T17:16:54.032008507Z","closed_at":"2026-02-16T17:16:54.032008507Z","close_reason":"Resolved - push succeeded with HTTPS remote URL. No SSH configuration needed."}
4142
{"id":"CodeContextBench-6sr","title":"US-002: Add trajectory.json post-run warning to _common.sh","status":"closed","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T01:01:50.006070632Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T01:05:37.809300987Z","closed_at":"2026-02-16T01:05:37.809300987Z","close_reason":"US-002 implemented: per-task and per-batch trajectory.json checks in _common.sh, AGENTS.md docs updated"}
Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1+
version = "1.0"
2+
3+
[metadata]
4+
name = "docgen-api-001"
5+
description = "Generate API reference documentation for VS Code's Diagnostic API"
6+
17
[task]
8+
id = "docgen-api-001"
9+
repo = "microsoft/vscode"
210
category = "api_reference"
3-
suite = "ccb_docgen"
4-
name = "docgen-api-001"
511
language = "typescript"
612
difficulty = "hard"
713
time_limit_sec = 1200
814

9-
[task.metadata]
10-
description = "Generate API reference documentation for VS Code's Diagnostic API"
11-
tags = ["documentation", "api-reference", "vscode", "typescript"]
15+
[verification]
16+
type = "custom"
17+
command = "bash /tests/test.sh"
18+
reward_type = "continuous"
Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1-
[task]
2-
category = "api_reference"
1+
version = "1.0"
2+
3+
[metadata]
34
name = "docgen-api-002"
45
description = "Generate API reference documentation for Cilium's eBPF map API covering map types, CRUD operations, pinning, and lifecycle management"
6+
7+
[task]
8+
id = "docgen-api-002"
9+
repo = "cilium/cilium"
10+
category = "api_reference"
511
language = "go"
612
difficulty = "hard"
713
time_limit_sec = 1200
814

9-
[metadata]
10-
output_format = "/workspace/documentation.md"
11-
repo = "cilium/cilium"
15+
[verification]
16+
type = "custom"
17+
command = "bash /tests/test.sh"
18+
reward_type = "continuous"
Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1-
[task]
2-
category = "api_reference"
1+
version = "1.0"
2+
3+
[metadata]
34
name = "docgen-api-003"
45
description = "Generate API reference documentation for Apache Kafka KafkaConsumer Java API"
6+
7+
[task]
8+
id = "docgen-api-003"
9+
repo = "apache/kafka"
10+
category = "api_reference"
511
language = "java"
612
difficulty = "hard"
713
time_limit_sec = 1200
814

9-
[metadata]
10-
output_format = "/workspace/documentation.md"
11-
repo = "apache/kafka"
15+
[verification]
16+
type = "custom"
17+
command = "bash /tests/test.sh"
18+
reward_type = "continuous"
Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
1-
[task]
1+
version = "1.0"
2+
3+
[metadata]
24
name = "docgen-arch-003"
3-
category = "architecture_doc"
45
description = "Generate architecture documentation for Terraform's plan/apply pipeline"
56

6-
[runtime]
7+
[task]
8+
id = "docgen-arch-003"
9+
repo = "hashicorp/terraform"
10+
category = "architecture_doc"
711
language = "go"
812
difficulty = "hard"
913
time_limit_sec = 1200
1014

11-
[scoring]
12-
method = "weighted_checklist"
13-
max_reward = 1.0
15+
[verification]
16+
type = "custom"
17+
command = "bash /tests/test.sh"
18+
reward_type = "continuous"
Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,23 @@
1-
[task]
2-
id = "docgen-migration-001"
3-
category = "migration_guide"
4-
language = "go"
5-
difficulty = "hard"
6-
time_limit_sec = 1200
1+
version = "1.0"
72

83
[metadata]
4+
name = "docgen-migration-001"
5+
description = "Migration guide for upgrading from Terraform v1.9.0 to v1.10.0"
96
source_repo = "hashicorp/terraform"
107
old_version = "v1.9.0"
118
new_version = "v1.10.0"
129
old_commit = "7637a9216e68f2812887eaf0722f9d3f9804c45e"
1310
new_commit = "24236f4f0bd10ada71d70868abe15f9d88099747"
14-
description = "Migration guide for upgrading from Terraform v1.9.0 to v1.10.0"
11+
12+
[task]
13+
id = "docgen-migration-001"
14+
repo = "hashicorp/terraform"
15+
category = "migration_guide"
16+
language = "go"
17+
difficulty = "hard"
18+
time_limit_sec = 1200
19+
20+
[verification]
21+
type = "custom"
22+
command = "bash /tests/test.sh"
23+
reward_type = "continuous"
Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
1-
[task]
1+
version = "1.0"
2+
3+
[metadata]
24
name = "docgen-migration-002"
5+
description = "Generate migration guide for Envoy External Processing (ext_proc) filter upgrade from v1.30 to v1.31"
6+
7+
[task]
8+
id = "docgen-migration-002"
9+
repo = "envoyproxy/envoy"
310
category = "migration_guide"
411
language = "cpp"
512
difficulty = "hard"
613
time_limit_sec = 1200
714

8-
[metadata]
9-
description = "Generate migration guide for Envoy External Processing (ext_proc) filter upgrade from v1.30 to v1.31"
10-
tags = ["envoy", "migration", "ext_proc", "filter", "configuration"]
15+
[verification]
16+
type = "custom"
17+
command = "bash /tests/test.sh"
18+
reward_type = "continuous"
Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
1-
[task]
1+
version = "1.0"
2+
3+
[metadata]
24
name = "onboard-handoff-001"
5+
description = "Team handoff for Envoy ext_authz filter ownership"
6+
7+
[task]
8+
id = "onboard-handoff-001"
9+
repo = "envoyproxy/envoy"
310
category = "team_handoff"
411
language = "cpp"
512
difficulty = "hard"
613
time_limit_sec = 1800
7-
environment = "docker"
8-
9-
[task.metadata]
10-
description = "Team handoff for Envoy ext_authz filter ownership"
11-
repo = "envoyproxy/envoy"
12-
target_component = "ext_authz HTTP filter"
13-
output_path = "/logs/agent/onboarding.md"
1414

15-
[task.metadata.mcp_benefit]
16-
context_complexity = 0.9
17-
cross_file_deps = 0.8
18-
semantic_search_potential = 0.85
19-
tool_chain_weight = 0.7
15+
[verification]
16+
type = "custom"
17+
command = "bash /tests/test.sh"
18+
reward_type = "continuous"
Lines changed: 11 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,18 @@
1+
version = "1.0"
2+
3+
[metadata]
4+
name = "onboard-handoff-002"
5+
description = "Team handoff for Terraform provider framework ownership"
6+
17
[task]
28
id = "onboard-handoff-002"
9+
repo = "hashicorp/terraform"
310
category = "team_handoff"
411
language = "go"
512
difficulty = "hard"
613
time_limit_sec = 1200
7-
repo = "terraform"
8-
9-
[mcp]
10-
mcp_config_setup_script = """#!/bin/bash
11-
set -e
12-
13-
# Sourcegraph instance (public code)
14-
SG_ENDPOINT="https://sourcegraph.com"
15-
16-
# Repository for MCP context
17-
REPO_NAME="github.com/hashicorp/terraform"
18-
19-
# Write MCP server config
20-
cat > /workspace/.mcp.json << 'MCPEOF'
21-
{
22-
"mcpServers": {
23-
"sourcegraph": {
24-
"command": "npx",
25-
"args": [
26-
"-y",
27-
"@sourcegraph/mcp-server-cody@latest",
28-
"--endpoint",
29-
"SOURCEGRAPH_ENDPOINT_PLACEHOLDER",
30-
"--access-token",
31-
"SOURCEGRAPH_TOKEN_PLACEHOLDER",
32-
"--repository",
33-
"REPO_NAME_PLACEHOLDER"
34-
]
35-
}
36-
}
37-
}
38-
MCPEOF
39-
40-
# Replace placeholders
41-
sed -i "s|SOURCEGRAPH_ENDPOINT_PLACEHOLDER|${SG_ENDPOINT}|g" /workspace/.mcp.json
42-
sed -i "s|SOURCEGRAPH_TOKEN_PLACEHOLDER|${SOURCEGRAPH_ACCESS_TOKEN}|g" /workspace/.mcp.json
43-
sed -i "s|REPO_NAME_PLACEHOLDER|${REPO_NAME}|g" /workspace/.mcp.json
4414

45-
echo "MCP config written to /workspace/.mcp.json"
46-
"""
15+
[verification]
16+
type = "custom"
17+
command = "bash /tests/test.sh"
18+
reward_type = "continuous"
Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
1-
[task]
1+
version = "1.0"
2+
3+
[metadata]
24
name = "onboard-handoff-003"
5+
description = "Team handoff for Cilium eBPF datapath subsystem ownership"
6+
7+
[task]
8+
id = "onboard-handoff-003"
9+
repo = "cilium/cilium"
310
category = "team_handoff"
411
language = "go"
512
difficulty = "hard"
613
time_limit_sec = 1800
7-
environment = "docker"
8-
9-
[task.metadata]
10-
description = "Team handoff for Cilium eBPF datapath subsystem ownership"
11-
repo = "cilium/cilium"
12-
target_component = "eBPF datapath loader and policy enforcement"
13-
output_path = "/logs/agent/onboarding.md"
1414

15-
[task.metadata.mcp_benefit]
16-
context_complexity = 0.95
17-
cross_file_deps = 0.9
18-
semantic_search_potential = 0.9
19-
tool_chain_weight = 0.8
15+
[verification]
16+
type = "custom"
17+
command = "bash /tests/test.sh"
18+
reward_type = "continuous"

0 commit comments

Comments
 (0)