Skip to content

Commit dd4d62e

Browse files
sjarmakclaude
andcommitted
Add calibrated curator ground truth (311/367) and harden Daytona sandbox lifecycle
Re-curated ground truth using Opus 4.6 + phase1 prompt + hybrid backend: - SDLC: 104/160 tasks with ground_truth_agent.json (56 remaining, rate-limited) - Org: 207/207 tasks with oracle_answer_agent.json (complete) Curator runner hardening: - Add cleanup_orphaned_sandboxes() at startup/shutdown to reclaim CPU quota - Set auto_stop_interval=20, auto_archive_interval=60 to prevent sandbox leaks - Add SIGTERM/SIGINT handler for graceful shutdown with sandbox cleanup - Bump DEFAULT_PARALLEL from 20 to 55 (matches Tier 3 capacity) - Add --overwrite-existing and --skip-agent-variants flags - Add Strategy 5 (repo_fixture.local_checkout_repos) for Org task repo resolution - Add signal.alarm(840) OS-level timeout in embedded curator runner - Add onboarding-search task skip filter Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 60ea357 commit dd4d62e

File tree

779 files changed

+57429
-216
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

779 files changed

+57429
-216
lines changed
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
{
2+
"files": [
3+
"envoy::source/common/access_log/access_log_impl.h",
4+
"envoy::source/common/access_log/access_log_impl.cc",
5+
"envoy::source/common/access_log/access_log_manager_impl.h",
6+
"envoy::source/common/access_log/access_log_manager_impl.cc",
7+
"envoy::source/extensions/access_loggers/common/access_log_base.h",
8+
"envoy::source/extensions/access_loggers/common/access_log_base.cc",
9+
"envoy::source/extensions/access_loggers/common/file_access_log_impl.h",
10+
"envoy::source/extensions/access_loggers/common/file_access_log_impl.cc",
11+
"envoy::source/extensions/access_loggers/common/stream_access_log_common_impl.h",
12+
"envoy::source/extensions/access_loggers/common/grpc_access_logger.h",
13+
"envoy::source/extensions/access_loggers/common/grpc_access_logger_utils.h",
14+
"envoy::source/extensions/access_loggers/common/grpc_access_logger_utils.cc",
15+
"envoy::source/extensions/access_loggers/common/grpc_access_logger_clients.h",
16+
"envoy::source/extensions/access_loggers/file/config.h",
17+
"envoy::source/extensions/access_loggers/file/config.cc",
18+
"envoy::source/extensions/access_loggers/grpc/http_config.h",
19+
"envoy::source/extensions/access_loggers/grpc/http_config.cc",
20+
"envoy::source/extensions/access_loggers/grpc/tcp_config.h",
21+
"envoy::source/extensions/access_loggers/grpc/tcp_config.cc",
22+
"envoy::source/extensions/access_loggers/grpc/http_grpc_access_log_impl.h",
23+
"envoy::source/extensions/access_loggers/grpc/http_grpc_access_log_impl.cc",
24+
"envoy::source/extensions/access_loggers/grpc/tcp_grpc_access_log_impl.h",
25+
"envoy::source/extensions/access_loggers/grpc/tcp_grpc_access_log_impl.cc",
26+
"envoy::source/extensions/access_loggers/grpc/grpc_access_log_impl.h",
27+
"envoy::source/extensions/access_loggers/grpc/grpc_access_log_impl.cc",
28+
"envoy::source/extensions/access_loggers/grpc/grpc_access_log_utils.h",
29+
"envoy::source/extensions/access_loggers/grpc/grpc_access_log_utils.cc",
30+
"envoy::source/extensions/access_loggers/grpc/config_utils.h",
31+
"envoy::source/extensions/access_loggers/grpc/config_utils.cc",
32+
"envoy::source/extensions/access_loggers/open_telemetry/access_log_impl.h",
33+
"envoy::source/extensions/access_loggers/open_telemetry/access_log_impl.cc",
34+
"envoy::source/extensions/access_loggers/open_telemetry/grpc_access_log_impl.h",
35+
"envoy::source/extensions/access_loggers/open_telemetry/grpc_access_log_impl.cc",
36+
"envoy::source/extensions/access_loggers/open_telemetry/config.h",
37+
"envoy::source/extensions/access_loggers/open_telemetry/config.cc",
38+
"envoy::source/extensions/access_loggers/open_telemetry/substitution_formatter.h",
39+
"envoy::source/extensions/access_loggers/open_telemetry/substitution_formatter.cc",
40+
"envoy::source/extensions/access_loggers/fluentd/fluentd_access_log_impl.h",
41+
"envoy::source/extensions/access_loggers/fluentd/fluentd_access_log_impl.cc",
42+
"envoy::source/extensions/access_loggers/fluentd/config.h",
43+
"envoy::source/extensions/access_loggers/fluentd/config.cc",
44+
"envoy::source/extensions/access_loggers/fluentd/substitution_formatter.h",
45+
"envoy::source/extensions/access_loggers/fluentd/substitution_formatter.cc",
46+
"envoy::source/extensions/access_loggers/stream/config.h",
47+
"envoy::source/extensions/access_loggers/stream/config.cc",
48+
"envoy::source/extensions/access_loggers/wasm/config.h",
49+
"envoy::source/extensions/access_loggers/wasm/config.cc",
50+
"envoy::source/extensions/access_loggers/wasm/wasm_access_log_impl.h",
51+
"envoy::source/extensions/access_loggers/filters/cel/cel.h",
52+
"envoy::source/extensions/access_loggers/filters/cel/cel.cc",
53+
"envoy::source/extensions/access_loggers/filters/cel/config.h",
54+
"envoy::source/extensions/access_loggers/filters/cel/config.cc",
55+
"data-plane-api::envoy/extensions/access_loggers/file/v3/file.proto",
56+
"data-plane-api::envoy/extensions/access_loggers/grpc/v3/als.proto",
57+
"data-plane-api::envoy/extensions/access_loggers/open_telemetry/v3/logs_service.proto",
58+
"data-plane-api::envoy/extensions/access_loggers/fluentd/v3/fluentd.proto",
59+
"data-plane-api::envoy/extensions/access_loggers/stream/v3/stream.proto",
60+
"data-plane-api::envoy/extensions/access_loggers/wasm/v3/wasm.proto",
61+
"data-plane-api::envoy/extensions/access_loggers/filters/cel/v3/cel.proto",
62+
"data-plane-api::envoy/extensions/access_loggers/stats/v3/stats.proto",
63+
"data-plane-api::envoy/extensions/access_loggers/dynamic_modules/v3/dynamic_modules.proto",
64+
"data-plane-api::envoy/extensions/access_loggers/filters/process_ratelimit/v3/process_ratelimit.proto"
65+
],
66+
"symbols": [
67+
{
68+
"file": "source/common/access_log/access_log_impl.h",
69+
"symbol": "FilterFactory",
70+
"repo": "sg-evals/envoy--v1.31.2"
71+
},
72+
{
73+
"file": "source/common/access_log/access_log_impl.h",
74+
"symbol": "ComparisonFilter",
75+
"repo": "sg-evals/envoy--v1.31.2"
76+
},
77+
{
78+
"file": "source/common/access_log/access_log_impl.h",
79+
"symbol": "StatusCodeFilter",
80+
"repo": "sg-evals/envoy--v1.31.2"
81+
},
82+
{
83+
"file": "source/common/access_log/access_log_impl.h",
84+
"symbol": "DurationFilter",
85+
"repo": "sg-evals/envoy--v1.31.2"
86+
},
87+
{
88+
"file": "source/common/access_log/access_log_impl.h",
89+
"symbol": "OperatorFilter",
90+
"repo": "sg-evals/envoy--v1.31.2"
91+
},
92+
{
93+
"file": "source/common/access_log/access_log_impl.h",
94+
"symbol": "AndFilter",
95+
"repo": "sg-evals/envoy--v1.31.2"
96+
},
97+
{
98+
"file": "source/common/access_log/access_log_impl.h",
99+
"symbol": "OrFilter",
100+
"repo": "sg-evals/envoy--v1.31.2"
101+
},
102+
{
103+
"file": "source/common/access_log/access_log_impl.h",
104+
"symbol": "NotHealthCheckFilter",
105+
"repo": "sg-evals/envoy--v1.31.2"
106+
},
107+
{
108+
"file": "source/common/access_log/access_log_impl.h",
109+
"symbol": "TraceableRequestFilter",
110+
"repo": "sg-evals/envoy--v1.31.2"
111+
},
112+
{
113+
"file": "source/common/access_log/access_log_impl.h",
114+
"symbol": "RuntimeFilter",
115+
"repo": "sg-evals/envoy--v1.31.2"
116+
},
117+
{
118+
"file": "source/common/access_log/access_log_impl.h",
119+
"symbol": "HeaderFilter",
120+
"repo": "sg-evals/envoy--v1.31.2"
121+
},
122+
{
123+
"file": "source/common/access_log/access_log_impl.h",
124+
"symbol": "ResponseFlagFilter",
125+
"repo": "sg-evals/envoy--v1.31.2"
126+
},
127+
{
128+
"file": "source/common/access_log/access_log_impl.h",
129+
"symbol": "GrpcStatusFilter",
130+
"repo": "sg-evals/envoy--v1.31.2"
131+
},
132+
{
133+
"file": "source/common/access_log/access_log_impl.h",
134+
"symbol": "LogTypeFilter",
135+
"repo": "sg-evals/envoy--v1.31.2"
136+
},
137+
{
138+
"file": "source/common/access_log/access_log_impl.h",
139+
"symbol": "MetadataFilter",
140+
"repo": "sg-evals/envoy--v1.31.2"
141+
},
142+
{
143+
"file": "source/common/access_log/access_log_impl.h",
144+
"symbol": "AccessLogFactory",
145+
"repo": "sg-evals/envoy--v1.31.2"
146+
},
147+
{
148+
"file": "source/extensions/access_loggers/common/access_log_base.h",
149+
"symbol": "ImplBase",
150+
"repo": "sg-evals/envoy--v1.31.2"
151+
},
152+
{
153+
"file": "source/extensions/access_loggers/common/file_access_log_impl.h",
154+
"symbol": "FileAccessLog",
155+
"repo": "sg-evals/envoy--v1.31.2"
156+
},
157+
{
158+
"file": "source/extensions/access_loggers/file/config.cc",
159+
"symbol": "FileAccessLogFactory",
160+
"repo": "sg-evals/envoy--v1.31.2"
161+
},
162+
{
163+
"file": "source/extensions/access_loggers/grpc/http_grpc_access_log_impl.h",
164+
"symbol": "HttpGrpcAccessLog",
165+
"repo": "sg-evals/envoy--v1.31.2"
166+
},
167+
{
168+
"file": "source/extensions/access_loggers/grpc/tcp_grpc_access_log_impl.h",
169+
"symbol": "TcpGrpcAccessLog",
170+
"repo": "sg-evals/envoy--v1.31.2"
171+
},
172+
{
173+
"file": "source/extensions/access_loggers/open_telemetry/access_log_impl.h",
174+
"symbol": "AccessLog",
175+
"repo": "sg-evals/envoy--v1.31.2"
176+
},
177+
{
178+
"file": "source/extensions/access_loggers/open_telemetry/substitution_formatter.h",
179+
"symbol": "OpenTelemetryFormatter",
180+
"repo": "sg-evals/envoy--v1.31.2"
181+
},
182+
{
183+
"file": "source/extensions/access_loggers/fluentd/fluentd_access_log_impl.h",
184+
"symbol": "FluentdAccessLog",
185+
"repo": "sg-evals/envoy--v1.31.2"
186+
},
187+
{
188+
"file": "source/extensions/access_loggers/fluentd/fluentd_access_log_impl.h",
189+
"symbol": "FluentdAccessLoggerImpl",
190+
"repo": "sg-evals/envoy--v1.31.2"
191+
},
192+
{
193+
"file": "source/extensions/access_loggers/fluentd/substitution_formatter.h",
194+
"symbol": "FluentdFormatterImpl",
195+
"repo": "sg-evals/envoy--v1.31.2"
196+
},
197+
{
198+
"file": "source/extensions/access_loggers/stream/config.cc",
199+
"symbol": "StdoutAccessLogFactory",
200+
"repo": "sg-evals/envoy--v1.31.2"
201+
},
202+
{
203+
"file": "source/extensions/access_loggers/stream/config.cc",
204+
"symbol": "StderrAccessLogFactory",
205+
"repo": "sg-evals/envoy--v1.31.2"
206+
},
207+
{
208+
"file": "source/extensions/access_loggers/wasm/wasm_access_log_impl.h",
209+
"symbol": "WasmAccessLog",
210+
"repo": "sg-evals/envoy--v1.31.2"
211+
},
212+
{
213+
"file": "source/extensions/access_loggers/wasm/config.h",
214+
"symbol": "WasmAccessLogFactory",
215+
"repo": "sg-evals/envoy--v1.31.2"
216+
},
217+
{
218+
"file": "source/extensions/access_loggers/filters/cel/cel.h",
219+
"symbol": "CELAccessLogExtensionFilter",
220+
"repo": "sg-evals/envoy--v1.31.2"
221+
},
222+
{
223+
"file": "source/common/access_log/access_log_manager_impl.h",
224+
"symbol": "AccessLogManagerImpl",
225+
"repo": "sg-evals/envoy--v1.31.2"
226+
},
227+
{
228+
"file": "source/common/access_log/access_log_manager_impl.h",
229+
"symbol": "AccessLogFileImpl",
230+
"repo": "sg-evals/envoy--v1.31.2"
231+
}
232+
]
233+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"has_ground_truth": true,
3+
"has_chunk_ground_truth": false,
4+
"ground_truth_source": "curator_agent",
5+
"ground_truth_confidence": "medium",
6+
"task_name": "ccx-compliance-052",
7+
"curator_agent_version": "2.0",
8+
"model": "claude-opus-4-6",
9+
"backend": "hybrid",
10+
"timestamp": "2026-03-04T01:05:23Z",
11+
"files_count": 62,
12+
"edit_files_count": 0,
13+
"chunks_count": 0,
14+
"symbols_count": 33,
15+
"cost_usd": 0.7186535,
16+
"elapsed_sec": 117.0,
17+
"exploration_notes": "## Access Log Filter Implementations (source/common/access_log/)\n\nAll filter implementations are in `access_log_impl.h/.cc`. Filters are boolean evaluators (not loggers), so they do NOT produce text/JSON output. They are:\n- **ComparisonFilter** (base), **StatusCodeFilter**, **DurationFilter** \u2014 compare numeric values\n- **OperatorFilter** (base), **AndFilter**, **OrFilter** \u2014 boolean combinators\n- **NotHealthCheckFilter**, **TraceableRequestFilter**, **RuntimeFilter** \u2014 conditional filters\n- **He"
18+
}

0 commit comments

Comments
 (0)