@@ -21,6 +21,7 @@ import { judgeEvalRun } from './judge-git-eval'
2121import { extractRepoNameFromUrl , setupTestRepo } from './setup-test-repo'
2222import { AgentDecisionSchema } from './types'
2323
24+ import type { AgentStep } from '../scaffolding'
2425import type {
2526 AgentDecision ,
2627 CodebuffTrace ,
@@ -87,10 +88,18 @@ export async function runSingleEval(
8788 throw new Error ( processError )
8889 }
8990
91+ function renderAgentStep ( step : AgentStep ) : string {
92+ const { response, toolCalls, toolResults } = step
93+ return [
94+ `\`\`\`text_response\n${ response } \n\`\`\`` ,
95+ `\`\`\`tool_calls\n${ JSON . stringify ( toolCalls , null , 2 ) } \n\`\`\`` ,
96+ `\`\`\`tool_results\n${ JSON . stringify ( toolResults , null , 2 ) } \n\`\`\`` ,
97+ ] . join ( '\n\n' )
98+ }
9099 const renderedTrace = trace
91100 . map (
92101 ( { prompt, steps } ) =>
93- `You: ${ prompt } \n\nCodebuff:${ steps . map ( ( { response } ) => response ) . join ( '\n\n' ) } ` ,
102+ `You: ${ prompt } \n\nCodebuff:${ steps . map ( renderAgentStep ) . join ( '\n\n' ) } ` ,
94103 )
95104 . join ( '\n\n' )
96105
@@ -109,6 +118,8 @@ Current spec to implement:
109118Your conversation with Codebuff so far:
110119<conversation>${ renderedTrace } </conversation>
111120
121+ Note that files can only be changed with tools. If no tools are called, no files were changed.
122+
112123You must decide whether to:
1131241. 'continue' - Generate a follow-up prompt for Codebuff
1141252. 'complete' - The implementation is done and satisfies the spec
0 commit comments