feat: US-008 - Register 9 navprove tasks in selected_benchmark_tasks.json

LoCoBench Bot · claude · LoCoBench Bot · commit 9cdbae52b55d · 2026-02-16T20:47:54.000Z
Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/configs/selected_benchmark_tasks.json b/configs/selected_benchmark_tasks.json
@@ -5,7 +5,7 @@
     "generated_by": "scripts/select_benchmark_tasks.py",
     "random_seed": 42,
     "total_available": 835,
-    "total_selected": 166
+    "total_selected": 175
   },
   "methodology": {
     "description": "Tasks selected via stratified sampling across 15 benchmarks, covering all SDLC phases. Each task scored for MCP benefit using weighted combination of context complexity, cross-file dependencies, semantic search potential, and task category affinity.",
@@ -38,14 +38,15 @@
       "ccb_linuxflbench": "5 (representative tasks across kernel subsystems)",
       "ccb_investigation": "all (4, investigation-only: debug, impact, migration, regression)",
       "ccb_governance": "all (6, governance compliance: repo scope, sensitive exclusion, cross-team, audit trail, degraded context)",
-      "ccb_enterprise": "all (5, enterprise complexity: multi-team ownership, conflicting docs, legacy deps, polyglot)"
+      "ccb_enterprise": "all (5, enterprise complexity: multi-team ownership, conflicting docs, legacy deps, polyglot)",
+      "ccb_navprove": "all (9, navigation-verified: find bug + write regression test)"
     }
   },
   "statistics": {
     "tasks_per_sdlc_phase": {
       "Analysis": 8,
       "Architecture & Design": 10,
-      "Debugging": 3,
+      "Debugging": 12,
       "Documentation": 5,
       "Implementation (bug fix)": 62,
       "Implementation (feature)": 28,
@@ -69,6 +70,7 @@
       "ccb_largerepo": 25,
       "ccb_linuxflbench": 5,
       "ccb_locobench": 25,
+      "ccb_navprove": 9,
       "ccb_pytorch": 11,
       "ccb_security": 8,
       "ccb_swebenchpro": 36,
@@ -79,13 +81,13 @@
       "c": 16,
       "cpp": 19,
       "csharp": 6,
-      "go": 41,
+      "go": 44,
       "java": 11,
       "javascript": 6,
-      "python": 45,
+      "python": 50,
       "python,cpp": 1,
       "rust": 11,
-      "typescript": 10
+      "typescript": 11
     },
     "avg_mcp_benefit_score": 0.7648
   },
@@ -3077,6 +3079,168 @@
       },
       "selection_rationale": "Transitive dependency vulnerability analysis (NOT AFFECTED case) - analyze CVE-2023-39325 in grpcurl -> grpc-go -> golang.org/x/net chain. Agent must prove grpcurl is NOT affected despite having vulnerable x/net version because it uses HTTP/2 client (Transport) not server (Server.ServeConn). Harder than sec-transitive-001 - requires proving a negative by distinguishing client vs server code paths.",
       "task_dir": "ccb_security/sec-transitive-002"
+    },
+    {
+      "task_id": "navprove-qb-url-001",
+      "benchmark": "ccb_navprove",
+      "sdlc_phase": "Debugging",
+      "language": "python",
+      "difficulty": "hard",
+      "category": "navigation_verified",
+      "repo": "qutebrowser/qutebrowser",
+      "mcp_benefit_score": 0.83,
+      "mcp_breakdown": {
+        "context_complexity": 0.85,
+        "cross_file_deps": 0.80,
+        "semantic_search_potential": 0.90,
+        "task_category_weight": 0.80
+      },
+      "selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
+      "task_dir": "ccb_navprove/navprove-qb-url-001"
+    },
+    {
+      "task_id": "navprove-qb-tab-001",
+      "benchmark": "ccb_navprove",
+      "sdlc_phase": "Debugging",
+      "language": "python",
+      "difficulty": "hard",
+      "category": "navigation_verified",
+      "repo": "qutebrowser/qutebrowser",
+      "mcp_benefit_score": 0.83,
+      "mcp_breakdown": {
+        "context_complexity": 0.85,
+        "cross_file_deps": 0.80,
+        "semantic_search_potential": 0.90,
+        "task_category_weight": 0.80
+      },
+      "selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
+      "task_dir": "ccb_navprove/navprove-qb-tab-001"
+    },
+    {
+      "task_id": "navprove-qb-bookmark-001",
+      "benchmark": "ccb_navprove",
+      "sdlc_phase": "Debugging",
+      "language": "python",
+      "difficulty": "hard",
+      "category": "navigation_verified",
+      "repo": "qutebrowser/qutebrowser",
+      "mcp_benefit_score": 0.83,
+      "mcp_breakdown": {
+        "context_complexity": 0.85,
+        "cross_file_deps": 0.80,
+        "semantic_search_potential": 0.90,
+        "task_category_weight": 0.80
+      },
+      "selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
+      "task_dir": "ccb_navprove/navprove-qb-bookmark-001"
+    },
+    {
+      "task_id": "navprove-qb-download-001",
+      "benchmark": "ccb_navprove",
+      "sdlc_phase": "Debugging",
+      "language": "python",
+      "difficulty": "hard",
+      "category": "navigation_verified",
+      "repo": "qutebrowser/qutebrowser",
+      "mcp_benefit_score": 0.83,
+      "mcp_breakdown": {
+        "context_complexity": 0.85,
+        "cross_file_deps": 0.80,
+        "semantic_search_potential": 0.90,
+        "task_category_weight": 0.80
+      },
+      "selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
+      "task_dir": "ccb_navprove/navprove-qb-download-001"
+    },
+    {
+      "task_id": "navprove-ansible-vault-001",
+      "benchmark": "ccb_navprove",
+      "sdlc_phase": "Debugging",
+      "language": "python",
+      "difficulty": "hard",
+      "category": "navigation_verified",
+      "repo": "ansible/ansible",
+      "mcp_benefit_score": 0.86,
+      "mcp_breakdown": {
+        "context_complexity": 0.90,
+        "cross_file_deps": 0.85,
+        "semantic_search_potential": 0.90,
+        "task_category_weight": 0.80
+      },
+      "selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
+      "task_dir": "ccb_navprove/navprove-ansible-vault-001"
+    },
+    {
+      "task_id": "navprove-teleport-ssh-001",
+      "benchmark": "ccb_navprove",
+      "sdlc_phase": "Debugging",
+      "language": "go",
+      "difficulty": "hard",
+      "category": "navigation_verified",
+      "repo": "gravitational/teleport",
+      "mcp_benefit_score": 0.85,
+      "mcp_breakdown": {
+        "context_complexity": 0.90,
+        "cross_file_deps": 0.85,
+        "semantic_search_potential": 0.85,
+        "task_category_weight": 0.80
+      },
+      "selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
+      "task_dir": "ccb_navprove/navprove-teleport-ssh-001"
+    },
+    {
+      "task_id": "navprove-vuls-oval-001",
+      "benchmark": "ccb_navprove",
+      "sdlc_phase": "Debugging",
+      "language": "go",
+      "difficulty": "hard",
+      "category": "navigation_verified",
+      "repo": "future-architect/vuls",
+      "mcp_benefit_score": 0.77,
+      "mcp_breakdown": {
+        "context_complexity": 0.75,
+        "cross_file_deps": 0.70,
+        "semantic_search_potential": 0.85,
+        "task_category_weight": 0.80
+      },
+      "selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
+      "task_dir": "ccb_navprove/navprove-vuls-oval-001"
+    },
+    {
+      "task_id": "navprove-flipt-cache-001",
+      "benchmark": "ccb_navprove",
+      "sdlc_phase": "Debugging",
+      "language": "go",
+      "difficulty": "hard",
+      "category": "navigation_verified",
+      "repo": "flipt-io/flipt",
+      "mcp_benefit_score": 0.80,
+      "mcp_breakdown": {
+        "context_complexity": 0.80,
+        "cross_file_deps": 0.75,
+        "semantic_search_potential": 0.85,
+        "task_category_weight": 0.80
+      },
+      "selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
+      "task_dir": "ccb_navprove/navprove-flipt-cache-001"
+    },
+    {
+      "task_id": "navprove-tutanota-search-001",
+      "benchmark": "ccb_navprove",
+      "sdlc_phase": "Debugging",
+      "language": "typescript",
+      "difficulty": "hard",
+      "category": "navigation_verified",
+      "repo": "tutao/tutanota",
+      "mcp_benefit_score": 0.82,
+      "mcp_breakdown": {
+        "context_complexity": 0.85,
+        "cross_file_deps": 0.80,
+        "semantic_search_potential": 0.85,
+        "task_category_weight": 0.80
+      },
+      "selection_rationale": "All ccb_navprove tasks selected (navigation-verified benchmark, 9 tasks)",
+      "task_dir": "ccb_navprove/navprove-tutanota-search-001"
     }
   ]
 }