Skip to content

Commit cc4815a

Browse files
committed
test(eval): add external-context regression fixtures
1 parent b7ba04b commit cc4815a

11 files changed

Lines changed: 194 additions & 1 deletion

File tree

TODO.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ This roadmap is derived from deep research into Greptile's public docs, blog, MC
139139

140140
## 10. Eval, Benchmarking, and Model Governance
141141

142-
91. [ ] Add eval fixtures for external-context alignment, not just diff-local correctness.
142+
91. [x] Add eval fixtures for external-context alignment, not just diff-local correctness.
143143
92. [x] Add eval fixtures for merge-readiness judgments and unresolved-blocker classification.
144144
93. [x] Add eval fixtures for addressed-vs-stale finding lifecycle inference.
145145
94. [x] Add eval fixtures for multi-hop graph reasoning across call chains and contract edges.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: repo regression - API status breaks documented contract
2+
repo_path: api_contract_repo
3+
diff: |
4+
diff --git a/handlers.py b/handlers.py
5+
index 1111111..2222222 100644
6+
--- a/handlers.py
7+
+++ b/handlers.py
8+
@@ -1,3 +1,3 @@
9+
def serialize_order(order):
10+
- status = "shipped" if order.shipped_at else "pending"
11+
+ status = "fulfilled" if order.shipped_at else "pending"
12+
return {"id": order.id, "status": status}
13+
expect:
14+
must_find:
15+
- file: handlers.py
16+
contains_any:
17+
- API contract drift
18+
- openapi schema
19+
- fulfilled is not part of the documented status enum
20+
- documented enum only allows pending shipped or cancelled
21+
must_not_find:
22+
- contains: style
23+
min_total: 1
24+
max_total: 8
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
def serialize_order(order):
2+
status = "shipped" if order.shipped_at else "pending"
3+
return {"id": order.id, "status": status}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
openapi: 3.1.0
2+
info:
3+
title: Orders API
4+
version: 1.0.0
5+
components:
6+
schemas:
7+
Order:
8+
type: object
9+
properties:
10+
id:
11+
type: string
12+
status:
13+
type: string
14+
enum:
15+
- pending
16+
- shipped
17+
- cancelled
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
env:
2+
- name: AUTOMATION_WEBHOOK_SECRET
3+
valueFrom:
4+
secretKeyRef:
5+
name: diffscope-secrets
6+
key: automationWebhookSecret
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
export function webhookSecret(env: NodeJS.ProcessEnv): string | undefined {
2+
return env.AUTOMATION_WEBHOOK_SECRET
3+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: repo regression - deployment env var rename not wired through charts
2+
repo_path: deploy_env_repo
3+
diff: |
4+
diff --git a/settings.ts b/settings.ts
5+
index 1111111..2222222 100644
6+
--- a/settings.ts
7+
+++ b/settings.ts
8+
@@ -1,3 +1,3 @@
9+
export function webhookSecret(env: NodeJS.ProcessEnv): string | undefined {
10+
- return env.AUTOMATION_WEBHOOK_SECRET
11+
+ return env.REVIEW_WEBHOOK_SECRET
12+
}
13+
expect:
14+
must_find:
15+
- file: settings.ts
16+
contains_any:
17+
- deployment configuration drift
18+
- Helm chart still injects AUTOMATION_WEBHOOK_SECRET
19+
- renamed environment variable is not wired through deployment manifests
20+
- update charts or keep the old environment variable name
21+
must_not_find:
22+
- contains: style
23+
min_total: 1
24+
max_total: 8
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
create table review_runs (
2+
id text primary key,
3+
status text not null
4+
);
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pub fn insert_review(db: &Db, review: &Review) {
2+
db.execute(
3+
"insert into review_runs (id, status) values ($1, $2)",
4+
&[&review.id, &review.status],
5+
);
6+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: repo regression - review run column added without migration
2+
repo_path: migration_drift_repo
3+
diff: |
4+
diff --git a/user_repo.rs b/user_repo.rs
5+
index 1111111..2222222 100644
6+
--- a/user_repo.rs
7+
+++ b/user_repo.rs
8+
@@ -1,6 +1,6 @@
9+
pub fn insert_review(db: &Db, review: &Review) {
10+
db.execute(
11+
- "insert into review_runs (id, status) values ($1, $2)",
12+
+ "insert into review_runs (id, status, last_reviewed_at) values ($1, $2, $3)",
13+
- &[&review.id, &review.status],
14+
+ &[&review.id, &review.status, &review.last_reviewed_at],
15+
);
16+
}
17+
expect:
18+
must_find:
19+
- file: user_repo.rs
20+
contains_any:
21+
- missing migration
22+
- schema drift
23+
- review_runs table does not define last_reviewed_at
24+
- add a database migration for the new column
25+
must_not_find:
26+
- contains: style
27+
min_total: 1
28+
max_total: 8

0 commit comments

Comments
 (0)