Skip to content

Commit 663bcd9

Browse files
committed
cancel all evals when ctrl-c
1 parent ea1777d commit 663bcd9

File tree

3 files changed

+125
-4
lines changed

3 files changed

+125
-4
lines changed

evals/git-evals/run-eval-set.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
mockRunGitEvals,
1111
runGitEvals,
1212
setGlobalConcurrencyLimit,
13+
terminateAllEvalChildren,
1314
} from './run-git-evals'
1415

1516
import type { EvalConfig, EvalResult } from './types'
@@ -107,6 +108,17 @@ async function runEvalSet(options: {
107108
console.log('Starting eval set run...')
108109
console.log(`Output directory: ${outputDir}`)
109110

111+
// Set up signal handlers to clean up child processes
112+
const signalHandler = async (signal: string) => {
113+
console.log(`\nReceived ${signal}, cleaning up evaluation processes...`)
114+
await terminateAllEvalChildren()
115+
console.log('Cleanup complete.')
116+
process.exit(signal === 'SIGINT' ? 130 : 143)
117+
}
118+
119+
process.on('SIGINT', () => signalHandler('SIGINT'))
120+
process.on('SIGTERM', () => signalHandler('SIGTERM'))
121+
110122
setGlobalConcurrencyLimit(options.concurrency ?? 5)
111123

112124
// Define the eval configurations

evals/git-evals/run-git-evals.ts

Lines changed: 88 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import type {
2929
EvalData,
3030
} from './types'
3131
import type { z } from 'zod/v4'
32+
import type { ChildProcess } from 'child_process'
3233

3334
disableLiveUserInputCheck()
3435

@@ -257,7 +258,7 @@ function getCodebuffFileStates(
257258
cwd: projectPath,
258259
stdio: ['ignore', 'pipe', 'pipe'],
259260
})
260-
261+
261262
// Get diff of staged files to include new files
262263
return execFileSync('git', ['diff', '--staged'], {
263264
cwd: projectPath,
@@ -274,17 +275,88 @@ export function mockRunGitEvals(path: string) {
274275
// Global concurrency limiter that can be shared across multiple repository evaluations
275276
let globalConcurrencyLimiter: ReturnType<typeof pLimit> | null = null
276277

278+
// Track all active child processes for cleanup
279+
const activeChildProcesses = new Set<ChildProcess>()
280+
let isCleaningUp = false
281+
277282
export function setGlobalConcurrencyLimit(limit: number) {
278283
globalConcurrencyLimiter = pLimit(limit)
279284
}
280285

286+
/**
287+
* Terminates all active evaluation child processes
288+
*/
289+
export async function terminateAllEvalChildren(): Promise<void> {
290+
if (isCleaningUp || activeChildProcesses.size === 0) {
291+
return
292+
}
293+
294+
isCleaningUp = true
295+
console.log(
296+
`\nTerminating ${activeChildProcesses.size} active evaluation processes...`,
297+
)
298+
299+
const killPromises = Array.from(activeChildProcesses).map(async (child) => {
300+
if (!child.pid || child.killed) {
301+
return
302+
}
303+
304+
try {
305+
// First try graceful termination
306+
if (process.platform === 'win32') {
307+
// Windows: kill process tree
308+
execFileSync('taskkill', ['/PID', String(child.pid), '/T'], {
309+
stdio: 'ignore',
310+
timeout: 3000,
311+
})
312+
} else {
313+
// POSIX: kill process group
314+
process.kill(-child.pid, 'SIGTERM')
315+
}
316+
317+
// Wait a bit for graceful shutdown
318+
await new Promise((resolve) => setTimeout(resolve, 2000))
319+
320+
// Force kill if still alive
321+
if (!child.killed) {
322+
if (process.platform === 'win32') {
323+
execFileSync('taskkill', ['/F', '/PID', String(child.pid), '/T'], {
324+
stdio: 'ignore',
325+
timeout: 1000,
326+
})
327+
} else {
328+
process.kill(-child.pid, 'SIGKILL')
329+
}
330+
}
331+
} catch (error) {
332+
// Process may have already exited
333+
console.warn(`Failed to kill process ${child.pid}:`, error)
334+
}
335+
})
336+
337+
await Promise.allSettled(killPromises)
338+
activeChildProcesses.clear()
339+
isCleaningUp = false
340+
}
341+
281342
export async function runGitEvals(
282343
evalDataPath: string,
283344
outputDir: string,
284345
codingAgent: 'codebuff' | 'claude',
285346
limit?: number,
286347
logToStdout: boolean = false,
287348
): Promise<FullEvalLog> {
349+
// Set up signal handlers if this is the main module
350+
if (require.main === module) {
351+
const signalHandler = async (signal: string) => {
352+
console.log(`\nReceived ${signal}, cleaning up...`)
353+
await terminateAllEvalChildren()
354+
process.exit(signal === 'SIGINT' ? 130 : 143)
355+
}
356+
357+
process.on('SIGINT', () => signalHandler('SIGINT'))
358+
process.on('SIGTERM', () => signalHandler('SIGTERM'))
359+
}
288360
console.log(`Loading eval data from: ${evalDataPath}`)
289361
const evalData = JSON.parse(
290362
fs.readFileSync(evalDataPath, 'utf-8'),
@@ -379,9 +451,16 @@ export async function runGitEvals(
379451
fingerprintId,
380452
codingAgent,
381453
],
382-
{ stdio: ['pipe', 'pipe', 'pipe', 'ipc'], env: process.env },
454+
{
455+
stdio: ['pipe', 'pipe', 'pipe', 'ipc'],
456+
env: process.env,
457+
detached: true, // Create new process group for proper signal handling
458+
},
383459
)
384460

461+
// Track child process for cleanup
462+
activeChildProcesses.add(child)
463+
385464
child.stdout?.pipe(logStream)
386465
child.stderr?.pipe(logStream)
387466

@@ -421,7 +500,13 @@ export async function runGitEvals(
421500
)
422501

423502
child.on('exit', (code) => {
424-
logStream.end()
503+
// Remove from tracking
504+
activeChildProcesses.delete(child)
505+
506+
if (!logToStdout && logStream !== process.stdout) {
507+
logStream.end()
508+
}
509+
425510
if (code !== 0) {
426511
console.error(
427512
`Eval process for ${evalCommit.sha} exited with code ${code}. See logs at ${logPath}`,

evals/git-evals/run-single-eval-process.ts

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,17 @@ import { runSingleEval } from './run-git-evals'
1313
import type { EvalCommit } from './types'
1414

1515
async function main() {
16+
// Set up signal handlers for graceful shutdown
17+
let shouldExit = false
18+
const signalHandler = (signal: string) => {
19+
console.log(`Child process received ${signal}, exiting gracefully...`)
20+
shouldExit = true
21+
process.exit(0)
22+
}
23+
24+
process.on('SIGINT', () => signalHandler('SIGINT'))
25+
process.on('SIGTERM', () => signalHandler('SIGTERM'))
26+
1627
const [
1728
evalCommitFilePath,
1829
projectPath,
@@ -49,13 +60,24 @@ async function main() {
4960
recreateShell(projectPath)
5061
setWorkingDirectory(projectPath)
5162

63+
// Check if we should exit early due to signal
64+
if (shouldExit) {
65+
process.exit(0)
66+
}
67+
5268
const result = await runSingleEval(
5369
evalCommit,
5470
projectPath,
5571
clientSessionId,
5672
fingerprintId,
5773
codingAgent as any,
5874
)
75+
76+
// Check again after long-running operation
77+
if (shouldExit) {
78+
process.exit(0)
79+
}
80+
5981
console.log('Final result:', { result })
6082
if (process.send) {
6183
process.send({ type: 'result', result })
@@ -71,9 +93,11 @@ async function main() {
7193
})
7294
}
7395
} finally {
96+
// Exit more quickly if signal received, otherwise wait briefly
97+
const exitDelay = shouldExit ? 100 : 2000
7498
setTimeout(() => {
7599
process.exit(0)
76-
}, 2000)
100+
}, exitDelay)
77101
}
78102
}
79103

0 commit comments

Comments
 (0)