Skip to content

Commit 2d2ff2a

Browse files
committed
test(eval): add lifecycle regression fixtures
1 parent 2b7cdb9 commit 2d2ff2a

File tree

5 files changed

+195
-1
lines changed

5 files changed

+195
-1
lines changed

TODO.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ This roadmap is derived from deep research into Greptile's public docs, blog, MC
141141

142142
91. [ ] Add eval fixtures for external-context alignment, not just diff-local correctness.
143143
92. [x] Add eval fixtures for merge-readiness judgments and unresolved-blocker classification.
144-
93. [ ] Add eval fixtures for addressed-vs-stale finding lifecycle inference.
144+
93. [x] Add eval fixtures for addressed-vs-stale finding lifecycle inference.
145145
94. [x] Add eval fixtures for multi-hop graph reasoning across call chains and contract edges.
146146
95. [ ] Add eval runs that compare single-pass review against agentic loop review.
147147
96. [ ] Add production replay evals using anonymized accepted/rejected review outcomes.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: repo regression - API drops follow-up addressed outcome
2+
repo_path: ../../..
3+
diff: |
4+
diff --git a/src/server/api/types.rs b/src/server/api/types.rs
5+
index 6666666..deadbeef 100644
6+
--- a/src/server/api/types.rs
7+
+++ b/src/server/api/types.rs
8+
@@ -111,9 +111,7 @@ pub(crate) fn build_api_review_session(
9+
comments: session
10+
.comments
11+
.into_iter()
12+
.map(|comment| {
13+
- let addressed_by_follow_up =
14+
- addressed_by_follow_up_comment_ids.contains(&comment.id);
15+
- ApiComment::from_comment(comment, stale_review, addressed_by_follow_up)
16+
+ ApiComment::from_comment(comment, stale_review, false)
17+
})
18+
.collect(),
19+
summary: session.summary,
20+
expect:
21+
must_find:
22+
- file: src/server/api/types.rs
23+
contains_any:
24+
- API review sessions discard addressed-by-follow-up state
25+
- comments touched by later commits will no longer surface as addressed
26+
- stale review responses can report fixed open findings as merely stale
27+
rule_id: bug.lifecycle.api-drops-followup-addressed
28+
must_not_find:
29+
- contains: style
30+
summary:
31+
merge_readiness: NeedsAttention
32+
min_open_blockers: 1
33+
min_total: 1
34+
max_total: 8
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: repo regression - context-only edits marked addressed
2+
repo_path: ../../..
3+
diff: |
4+
diff --git a/src/core/comment/outcomes.rs b/src/core/comment/outcomes.rs
5+
index 4444444..deadbeef 100644
6+
--- a/src/core/comment/outcomes.rs
7+
+++ b/src/core/comment/outcomes.rs
8+
@@ -37,9 +37,7 @@ pub fn infer_addressed_by_follow_up_comments(
9+
for hunk in &diff.hunks {
10+
for line in &hunk.changes {
11+
- if line.change_type != ChangeType::Context {
12+
- if let Some(old_line_no) = line.old_line_no {
13+
- changed_old_lines.insert(old_line_no);
14+
- }
15+
+ if let Some(old_line_no) = line.old_line_no {
16+
+ changed_old_lines.insert(old_line_no);
17+
}
18+
}
19+
}
20+
expect:
21+
must_find:
22+
- file: src/core/comment/outcomes.rs
23+
contains_any:
24+
- context-only lines are being treated as addressed follow-up edits
25+
- inserting lines above a finding can falsely mark it addressed
26+
- follow-up inference should only count non-context old lines
27+
rule_id: bug.lifecycle.context-only-addressed
28+
must_not_find:
29+
- contains: style
30+
summary:
31+
merge_readiness: NeedsAttention
32+
min_open_blockers: 1
33+
min_total: 1
34+
max_total: 8
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
name: repo regression - persistent findings dropped from not-addressed inference
2+
repo_path: ../../..
3+
diff: |
4+
diff --git a/src/core/comment/outcomes.rs b/src/core/comment/outcomes.rs
5+
index 5555555..deadbeef 100644
6+
--- a/src/core/comment/outcomes.rs
7+
+++ b/src/core/comment/outcomes.rs
8+
@@ -74,8 +74,8 @@ pub fn infer_follow_up_comment_resolution_outcomes(
9+
let not_addressed_comment_ids = previous_comments
10+
.iter()
11+
.filter(|comment| comment.status == CommentStatus::Open)
12+
.filter(|comment| !addressed_comment_ids.contains(&comment.id))
13+
- .filter(|comment| current_comment_ids.contains(comment.id.as_str()))
14+
+ .filter(|comment| !current_comment_ids.contains(comment.id.as_str()))
15+
.map(|comment| comment.id.clone())
16+
.collect();
17+
18+
expect:
19+
must_find:
20+
- file: src/core/comment/outcomes.rs
21+
contains_any:
22+
- repeated findings stop being recorded as not addressed
23+
- the persistence check is inverted so surviving comments are dropped
24+
- this breaks addressed-vs-still-open lifecycle inference for recurring findings
25+
rule_id: bug.lifecycle.not-addressed-persistence-inversion
26+
must_not_find:
27+
- contains: style
28+
summary:
29+
merge_readiness: NeedsAttention
30+
min_open_blockers: 1
31+
min_total: 1
32+
max_total: 8

src/commands/eval/fixtures.rs

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,100 @@ expect:
318318
);
319319
}
320320

321+
#[test]
322+
fn test_checked_in_lifecycle_context_only_fixture_loads_summary_expectations() {
323+
let fixture_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
324+
.join("eval/fixtures/repo_regressions/lifecycle_context_only_addressed.yml");
325+
326+
let fixtures = load_eval_fixtures_from_path(&fixture_path).unwrap();
327+
328+
assert_eq!(fixtures.len(), 1);
329+
assert_eq!(
330+
fixtures[0].fixture.name.as_deref(),
331+
Some("repo regression - context-only edits marked addressed")
332+
);
333+
assert_eq!(
334+
fixtures[0].fixture.expect.must_find[0].rule_id.as_deref(),
335+
Some("bug.lifecycle.context-only-addressed")
336+
);
337+
assert_eq!(
338+
fixtures[0]
339+
.fixture
340+
.expect
341+
.summary
342+
.merge_readiness
343+
.as_deref(),
344+
Some("NeedsAttention")
345+
);
346+
assert_eq!(
347+
fixtures[0].fixture.expect.summary.min_open_blockers,
348+
Some(1)
349+
);
350+
}
351+
352+
#[test]
353+
fn test_checked_in_lifecycle_persistence_fixture_loads_summary_expectations() {
354+
let fixture_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join(
355+
"eval/fixtures/repo_regressions/lifecycle_not_addressed_persistence_inversion.yml",
356+
);
357+
358+
let fixtures = load_eval_fixtures_from_path(&fixture_path).unwrap();
359+
360+
assert_eq!(fixtures.len(), 1);
361+
assert_eq!(
362+
fixtures[0].fixture.name.as_deref(),
363+
Some("repo regression - persistent findings dropped from not-addressed inference")
364+
);
365+
assert_eq!(
366+
fixtures[0].fixture.expect.must_find[0].rule_id.as_deref(),
367+
Some("bug.lifecycle.not-addressed-persistence-inversion")
368+
);
369+
assert_eq!(
370+
fixtures[0]
371+
.fixture
372+
.expect
373+
.summary
374+
.merge_readiness
375+
.as_deref(),
376+
Some("NeedsAttention")
377+
);
378+
assert_eq!(
379+
fixtures[0].fixture.expect.summary.min_open_blockers,
380+
Some(1)
381+
);
382+
}
383+
384+
#[test]
385+
fn test_checked_in_lifecycle_api_fixture_loads_summary_expectations() {
386+
let fixture_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
387+
.join("eval/fixtures/repo_regressions/lifecycle_api_drops_followup_addressed.yml");
388+
389+
let fixtures = load_eval_fixtures_from_path(&fixture_path).unwrap();
390+
391+
assert_eq!(fixtures.len(), 1);
392+
assert_eq!(
393+
fixtures[0].fixture.name.as_deref(),
394+
Some("repo regression - API drops follow-up addressed outcome")
395+
);
396+
assert_eq!(
397+
fixtures[0].fixture.expect.must_find[0].rule_id.as_deref(),
398+
Some("bug.lifecycle.api-drops-followup-addressed")
399+
);
400+
assert_eq!(
401+
fixtures[0]
402+
.fixture
403+
.expect
404+
.summary
405+
.merge_readiness
406+
.as_deref(),
407+
Some("NeedsAttention")
408+
);
409+
assert_eq!(
410+
fixtures[0].fixture.expect.summary.min_open_blockers,
411+
Some(1)
412+
);
413+
}
414+
321415
#[test]
322416
fn test_collect_eval_fixtures_expands_pack_entries_in_sorted_order() {
323417
let dir = tempdir().unwrap();

0 commit comments

Comments
 (0)