Skip to content

Commit f411a3f

Browse files
jahoomaclaude
andcommitted
Add unit tests for evalbuff (35 tests across 4 files)
Tests for criteria (promotion logic, level accumulation), docs-optimizer (apply/overwrite/reject/AGENTS.md creation, compareScores, readCurrentDocs), cli-runner (happy path, diff capture, crash, timeout, CLI not found), and morning-report (normal/empty/error reports, score trajectory, JSONL append). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ebaf37b commit f411a3f

File tree

4 files changed

+505
-0
lines changed

4 files changed

+505
-0
lines changed
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import fs from 'fs'
2+
import os from 'os'
3+
import path from 'path'
4+
import { execSync } from 'child_process'
5+
6+
import { afterEach, beforeEach, describe, expect, it } from 'bun:test'
7+
8+
import { runCliAgent } from '../cli-runner'
9+
10+
let tmpDir: string
11+
12+
beforeEach(() => {
13+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'evalbuff-cli-test-'))
14+
// Initialize a git repo so git diff works
15+
execSync('git init && git add . && git commit --allow-empty -m "init"', {
16+
cwd: tmpDir,
17+
stdio: 'ignore',
18+
})
19+
})
20+
21+
afterEach(() => {
22+
fs.rmSync(tmpDir, { recursive: true, force: true })
23+
})
24+
25+
describe('runCliAgent', () => {
26+
it('happy path: captures stdout and exit code 0', async () => {
27+
const result = await runCliAgent({
28+
command: 'echo',
29+
prompt: 'hello world',
30+
cwd: tmpDir,
31+
timeoutMs: 10_000,
32+
})
33+
34+
expect(result.exitCode).toBe(0)
35+
expect(result.stdout.trim()).toBe('hello world')
36+
expect(result.durationMs).toBeGreaterThan(0)
37+
})
38+
39+
it('captures git diff when agent creates a file', async () => {
40+
// Use a bash command that creates a file
41+
const scriptPath = path.join(tmpDir, 'agent.sh')
42+
fs.writeFileSync(
43+
scriptPath,
44+
'#!/bin/bash\necho "new content" > newfile.txt\n',
45+
)
46+
fs.chmodSync(scriptPath, '755')
47+
48+
const result = await runCliAgent({
49+
command: scriptPath,
50+
prompt: 'create a file',
51+
cwd: tmpDir,
52+
timeoutMs: 10_000,
53+
})
54+
55+
expect(result.exitCode).toBe(0)
56+
expect(result.diff).toContain('newfile.txt')
57+
expect(result.diff).toContain('new content')
58+
})
59+
60+
it('handles agent crash with non-zero exit code', async () => {
61+
const result = await runCliAgent({
62+
command: 'bash -c',
63+
prompt: 'exit 42',
64+
cwd: tmpDir,
65+
timeoutMs: 10_000,
66+
})
67+
68+
expect(result.exitCode).toBe(42)
69+
})
70+
71+
it('returns empty diff when agent makes no changes', async () => {
72+
const result = await runCliAgent({
73+
command: 'echo',
74+
prompt: 'do nothing',
75+
cwd: tmpDir,
76+
timeoutMs: 10_000,
77+
})
78+
79+
expect(result.diff).toBe('')
80+
})
81+
82+
it('rejects when agent CLI is not found', async () => {
83+
const promise = runCliAgent({
84+
command: 'nonexistent-agent-binary-xyz',
85+
prompt: 'test',
86+
cwd: tmpDir,
87+
timeoutMs: 10_000,
88+
})
89+
90+
await expect(promise).rejects.toThrow('CLI agent failed to start')
91+
await expect(promise).rejects.toThrow('nonexistent-agent-binary-xyz')
92+
})
93+
94+
it('kills agent on timeout', async () => {
95+
const result = await runCliAgent({
96+
command: 'sleep',
97+
prompt: '30',
98+
cwd: tmpDir,
99+
timeoutMs: 500, // 500ms timeout
100+
})
101+
102+
// Process should have been killed
103+
expect(result.durationMs).toBeLessThan(5000)
104+
// Exit code is null when killed by signal, which becomes 1
105+
expect(result.exitCode).not.toBe(0)
106+
})
107+
})
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import { describe, expect, it } from 'bun:test'
2+
3+
import {
4+
formatCriteriaForPrompt,
5+
getCriteriaForLevel,
6+
maybePromoteCriteria,
7+
} from '../criteria'
8+
9+
import type { QualityCriteria } from '../criteria'
10+
11+
function makeCriteria(
12+
level: number,
13+
threshold = 8.0,
14+
window = 10,
15+
): QualityCriteria {
16+
return {
17+
level,
18+
criteria: getCriteriaForLevel(level),
19+
promotionThreshold: threshold,
20+
promotionWindow: window,
21+
}
22+
}
23+
24+
describe('getCriteriaForLevel', () => {
25+
it('returns only L1 criteria at level 1', () => {
26+
const criteria = getCriteriaForLevel(1)
27+
expect(criteria).toHaveLength(3)
28+
expect(criteria.map((c) => c.name)).toEqual([
29+
'Correctness',
30+
'Completeness',
31+
'Basic Style',
32+
])
33+
})
34+
35+
it('accumulates criteria up to level 3', () => {
36+
const criteria = getCriteriaForLevel(3)
37+
expect(criteria.map((c) => c.name)).toEqual([
38+
'Correctness',
39+
'Completeness',
40+
'Basic Style',
41+
'Pattern Consistency',
42+
'Test Quality',
43+
])
44+
})
45+
46+
it('includes all criteria at level 5', () => {
47+
const criteria = getCriteriaForLevel(5)
48+
expect(criteria).toHaveLength(7)
49+
expect(criteria[criteria.length - 1].name).toBe('Fluency')
50+
})
51+
52+
it('caps at level 5 even if higher number passed', () => {
53+
const criteria = getCriteriaForLevel(10)
54+
expect(criteria).toHaveLength(7)
55+
})
56+
})
57+
58+
describe('maybePromoteCriteria', () => {
59+
it('promotes when avg above threshold over window', () => {
60+
const criteria = makeCriteria(1, 8.0, 5)
61+
const scores = [8.5, 9.0, 8.2, 8.8, 8.6]
62+
const newLevel = maybePromoteCriteria(criteria, scores)
63+
expect(newLevel).toBe(2)
64+
})
65+
66+
it('does NOT promote when avg below threshold', () => {
67+
const criteria = makeCriteria(1, 8.0, 5)
68+
const scores = [7.0, 6.5, 8.0, 7.5, 7.0]
69+
const newLevel = maybePromoteCriteria(criteria, scores)
70+
expect(newLevel).toBe(1)
71+
})
72+
73+
it('does NOT promote when already at max level (5)', () => {
74+
const criteria = makeCriteria(5, 8.0, 3)
75+
const scores = [9.0, 9.5, 9.0]
76+
const newLevel = maybePromoteCriteria(criteria, scores)
77+
expect(newLevel).toBe(5)
78+
})
79+
80+
it('does NOT promote when fewer iterations than window size', () => {
81+
const criteria = makeCriteria(1, 8.0, 10)
82+
const scores = [9.0, 9.5, 9.0]
83+
const newLevel = maybePromoteCriteria(criteria, scores)
84+
expect(newLevel).toBe(1)
85+
})
86+
87+
it('uses only the last N scores in the window', () => {
88+
const criteria = makeCriteria(2, 8.0, 3)
89+
// Old scores are low, but last 3 are high
90+
const scores = [3.0, 4.0, 5.0, 8.5, 9.0, 8.5]
91+
const newLevel = maybePromoteCriteria(criteria, scores)
92+
expect(newLevel).toBe(3)
93+
})
94+
})
95+
96+
describe('formatCriteriaForPrompt', () => {
97+
it('includes level and all criteria names', () => {
98+
const criteria = makeCriteria(2)
99+
const prompt = formatCriteriaForPrompt(criteria)
100+
expect(prompt).toContain('Level 2/5')
101+
expect(prompt).toContain('Correctness')
102+
expect(prompt).toContain('Pattern Consistency')
103+
})
104+
105+
it('includes weights', () => {
106+
const criteria = makeCriteria(1)
107+
const prompt = formatCriteriaForPrompt(criteria)
108+
expect(prompt).toContain('weight: 3')
109+
expect(prompt).toContain('weight: 1')
110+
})
111+
})
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import fs from 'fs'
2+
import os from 'os'
3+
import path from 'path'
4+
5+
import { afterEach, beforeEach, describe, expect, it } from 'bun:test'
6+
7+
import { applyDocEdit, compareScores, readCurrentDocs } from '../docs-optimizer'
8+
9+
let tmpDir: string
10+
11+
beforeEach(() => {
12+
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'evalbuff-test-'))
13+
})
14+
15+
afterEach(() => {
16+
fs.rmSync(tmpDir, { recursive: true, force: true })
17+
})
18+
19+
describe('applyDocEdit', () => {
20+
it('creates new file under docs/ and updates AGENTS.md TOC', () => {
21+
const result = applyDocEdit(
22+
tmpDir,
23+
'patterns/error-handling.md',
24+
'# Error Handling\n\nAlways use try/catch.',
25+
)
26+
expect(result).toBe(true)
27+
28+
const docPath = path.join(tmpDir, 'docs', 'patterns', 'error-handling.md')
29+
expect(fs.existsSync(docPath)).toBe(true)
30+
expect(fs.readFileSync(docPath, 'utf-8')).toContain('Error Handling')
31+
32+
const agentsMd = fs.readFileSync(
33+
path.join(tmpDir, 'AGENTS.md'),
34+
'utf-8',
35+
)
36+
expect(agentsMd).toContain('docs/patterns/error-handling.md')
37+
})
38+
39+
it('overwrites existing file content', () => {
40+
// Create initial doc
41+
applyDocEdit(tmpDir, 'conventions/naming.md', 'Original content')
42+
43+
// Overwrite
44+
applyDocEdit(tmpDir, 'conventions/naming.md', 'Updated content')
45+
46+
const content = fs.readFileSync(
47+
path.join(tmpDir, 'docs', 'conventions', 'naming.md'),
48+
'utf-8',
49+
)
50+
expect(content).toBe('Updated content')
51+
})
52+
53+
it('does not duplicate AGENTS.md entry on overwrite', () => {
54+
applyDocEdit(tmpDir, 'test.md', 'v1')
55+
applyDocEdit(tmpDir, 'test.md', 'v2')
56+
57+
const agentsMd = fs.readFileSync(
58+
path.join(tmpDir, 'AGENTS.md'),
59+
'utf-8',
60+
)
61+
// The link format is "- [docs/test.md](docs/test.md)" — one entry has two occurrences of the path
62+
const entryMatches = agentsMd.match(/- \[docs\/test\.md\]/g)
63+
expect(entryMatches).toHaveLength(1)
64+
})
65+
66+
it('rejects path starting with /', () => {
67+
const result = applyDocEdit(tmpDir, '/etc/passwd', 'bad')
68+
expect(result).toBe(false)
69+
})
70+
71+
it('rejects path with ..', () => {
72+
const result = applyDocEdit(tmpDir, '../outside/file.md', 'bad')
73+
expect(result).toBe(false)
74+
})
75+
76+
it('creates AGENTS.md if it does not exist', () => {
77+
expect(fs.existsSync(path.join(tmpDir, 'AGENTS.md'))).toBe(false)
78+
applyDocEdit(tmpDir, 'new-doc.md', 'content')
79+
expect(fs.existsSync(path.join(tmpDir, 'AGENTS.md'))).toBe(true)
80+
81+
const agentsMd = fs.readFileSync(
82+
path.join(tmpDir, 'AGENTS.md'),
83+
'utf-8',
84+
)
85+
expect(agentsMd).toContain('# Documentation')
86+
expect(agentsMd).toContain('docs/new-doc.md')
87+
})
88+
})
89+
90+
describe('compareScores', () => {
91+
it('returns improved when new > old', () => {
92+
expect(compareScores(5.0, 7.0)).toBe('improved')
93+
})
94+
95+
it('returns same when new == old', () => {
96+
expect(compareScores(5.0, 5.0)).toBe('same')
97+
})
98+
99+
it('returns worse when new < old', () => {
100+
expect(compareScores(7.0, 5.0)).toBe('worse')
101+
})
102+
})
103+
104+
describe('readCurrentDocs', () => {
105+
it('returns empty object when docs/ does not exist', () => {
106+
const docs = readCurrentDocs(tmpDir)
107+
expect(docs).toEqual({})
108+
})
109+
110+
it('reads all markdown files recursively', () => {
111+
const docsDir = path.join(tmpDir, 'docs')
112+
fs.mkdirSync(path.join(docsDir, 'patterns'), { recursive: true })
113+
fs.writeFileSync(path.join(docsDir, 'intro.md'), 'intro content')
114+
fs.writeFileSync(
115+
path.join(docsDir, 'patterns', 'api.md'),
116+
'api patterns',
117+
)
118+
// Non-md file should be ignored
119+
fs.writeFileSync(path.join(docsDir, 'notes.txt'), 'ignored')
120+
121+
const docs = readCurrentDocs(tmpDir)
122+
expect(Object.keys(docs).sort()).toEqual(['intro.md', 'patterns/api.md'])
123+
expect(docs['intro.md']).toBe('intro content')
124+
expect(docs['patterns/api.md']).toBe('api patterns')
125+
})
126+
})

0 commit comments

Comments
 (0)