Skip to content

Commit 4d522d3

Browse files
sjarmakclaude
andcommitted
fix: add normalize_repo to 3 design F1 scorers + fix Daytona disk limit
- Add normalize_repo() to make_key() in 3 ccb_design F1 scorer test.sh files (envoy-stream-aggregated-sym, k8s-sharedinformer-sym, terraform-provider-iface-sym) so sg-evals mirror names match upstream repo names in ground truth - Fix daytona_runner.py disk floor from 20GB to 10GB to fit Daytona Tier 3 sandbox limit Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent f20444c commit 4d522d3

File tree

4 files changed

+109
-7
lines changed

4 files changed

+109
-7
lines changed

benchmarks/ccb_design/envoy-stream-aggregated-sym-001/tests/test.sh

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,44 @@ try:
9898
write_reward(0.0)
9999
sys.exit(0)
100100
101+
# ── Repo name normalization ────────────────────────────────────────────
102+
def normalize_repo(name):
103+
"""Normalize repo names to a canonical short form for matching.
104+
105+
Handles sg-evals mirror names and upstream org/repo names by
106+
extracting just the repo basename (last path segment).
107+
108+
sg-evals/envoy--2300e245 -> envoy
109+
github.com/sg-evals/envoy--h -> envoy
110+
envoyproxy/envoy -> envoy
111+
grpc/grpc-go -> grpc-go
112+
"""
113+
n = name.strip()
114+
# Strip URL prefixes
115+
for prefix in ("github.com/", "https://github.com/"):
116+
if n.startswith(prefix):
117+
n = n[len(prefix):]
118+
# Strip sg-evals/ prefix
119+
if n.startswith("sg-evals/"):
120+
n = n[len("sg-evals/"):]
121+
# Strip --hexhash suffix (8+ hex chars after --)
122+
n = re.sub(r'--[0-9a-f]{7,}$', '', n)
123+
# Take just the last path segment (repo basename)
124+
if "/" in n:
125+
n = n.rsplit("/", 1)[-1]
126+
return n
127+
101128
# ── Build composite keys ─────────────────────────────────────────────
102129
def make_key(entry, fields):
103-
"""Build a composite key tuple from an entry's field values."""
104-
return tuple(str(entry.get(f, "")).strip() for f in fields)
130+
"""Build a composite key tuple from an entry's field values.
131+
Normalizes 'repo' field to handle sg-evals mirror names."""
132+
parts = []
133+
for f in fields:
134+
val = str(entry.get(f, "")).strip()
135+
if f == "repo":
136+
val = normalize_repo(val)
137+
parts.append(val)
138+
return tuple(parts)
105139
106140
expected_keys = [make_key(e, key_fields) for e in expected]
107141
reported_keys = [make_key(r, key_fields) for r in reported]

benchmarks/ccb_design/k8s-sharedinformer-sym-001/tests/test.sh

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,44 @@ try:
9999
write_reward(0.0)
100100
sys.exit(0)
101101
102+
# ── Repo name normalization ────────────────────────────────────────────
103+
def normalize_repo(name):
104+
"""Normalize repo names to a canonical short form for matching.
105+
106+
Handles sg-evals mirror names and upstream org/repo names by
107+
extracting just the repo basename (last path segment).
108+
109+
sg-evals/kubernetes--abc123 -> kubernetes
110+
github.com/sg-evals/k8s--h -> k8s
111+
kubernetes/kubernetes -> kubernetes
112+
kubernetes/autoscaler -> autoscaler
113+
"""
114+
n = name.strip()
115+
# Strip URL prefixes
116+
for prefix in ("github.com/", "https://github.com/"):
117+
if n.startswith(prefix):
118+
n = n[len(prefix):]
119+
# Strip sg-evals/ prefix
120+
if n.startswith("sg-evals/"):
121+
n = n[len("sg-evals/"):]
122+
# Strip --hexhash suffix (8+ hex chars after --)
123+
n = re.sub(r'--[0-9a-f]{7,}$', '', n)
124+
# Take just the last path segment (repo basename)
125+
if "/" in n:
126+
n = n.rsplit("/", 1)[-1]
127+
return n
128+
102129
# ── Build composite keys ─────────────────────────────────────────────
103130
def make_key(entry, fields):
104-
"""Build a composite key tuple from an entry's field values."""
105-
return tuple(str(entry.get(f, "")).strip() for f in fields)
131+
"""Build a composite key tuple from an entry's field values.
132+
Normalizes 'repo' field to handle sg-evals mirror names."""
133+
parts = []
134+
for f in fields:
135+
val = str(entry.get(f, "")).strip()
136+
if f == "repo":
137+
val = normalize_repo(val)
138+
parts.append(val)
139+
return tuple(parts)
106140
107141
expected_keys = [make_key(e, key_fields) for e in expected]
108142
reported_keys = [make_key(r, key_fields) for r in reported]

benchmarks/ccb_design/terraform-provider-iface-sym-001/tests/test.sh

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,44 @@ try:
9999
write_reward(0.0)
100100
sys.exit(0)
101101
102+
# ── Repo name normalization ────────────────────────────────────────────
103+
def normalize_repo(name):
104+
"""Normalize repo names to a canonical short form for matching.
105+
106+
Handles sg-evals mirror names and upstream org/repo names by
107+
extracting just the repo basename (last path segment).
108+
109+
sg-evals/terraform--abc123 -> terraform
110+
github.com/sg-evals/tf--h -> tf
111+
hashicorp/terraform -> terraform
112+
hashicorp/terraform-provider-aws -> terraform-provider-aws
113+
"""
114+
n = name.strip()
115+
# Strip URL prefixes
116+
for prefix in ("github.com/", "https://github.com/"):
117+
if n.startswith(prefix):
118+
n = n[len(prefix):]
119+
# Strip sg-evals/ prefix
120+
if n.startswith("sg-evals/"):
121+
n = n[len("sg-evals/"):]
122+
# Strip --hexhash suffix (8+ hex chars after --)
123+
n = re.sub(r'--[0-9a-f]{7,}$', '', n)
124+
# Take just the last path segment (repo basename)
125+
if "/" in n:
126+
n = n.rsplit("/", 1)[-1]
127+
return n
128+
102129
# ── Build composite keys ─────────────────────────────────────────────
103130
def make_key(entry, fields):
104-
"""Build a composite key tuple from an entry's field values."""
105-
return tuple(str(entry.get(f, "")).strip() for f in fields)
131+
"""Build a composite key tuple from an entry's field values.
132+
Normalizes 'repo' field to handle sg-evals mirror names."""
133+
parts = []
134+
for f in fields:
135+
val = str(entry.get(f, "")).strip()
136+
if f == "repo":
137+
val = normalize_repo(val)
138+
parts.append(val)
139+
return tuple(parts)
106140
107141
expected_keys = [make_key(e, key_fields) for e in expected]
108142
reported_keys = [make_key(r, key_fields) for r in reported]

scripts/daytona_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ def create_sandbox(self, task: TaskSpec):
400400
resources=Resources(
401401
cpu=task.cpus,
402402
memory=max(task.memory_mb // 1024, 1),
403-
disk=max(task.storage_mb // 1024, 20),
403+
disk=max(task.storage_mb // 1024, 10),
404404
),
405405
auto_stop_interval=0,
406406
)

0 commit comments

Comments
 (0)