Skip to content

Commit ea98ed5

Browse files
committed
update instructions for eval superagent
1 parent 8b18473 commit ea98ed5

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

evals/git-evals/run-git-evals.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import { judgeEvalRun } from './judge-git-eval'
2121
import { extractRepoNameFromUrl, setupTestRepo } from './setup-test-repo'
2222
import { AgentDecisionSchema } from './types'
2323

24+
import type { AgentStep } from '../scaffolding'
2425
import type {
2526
AgentDecision,
2627
CodebuffTrace,
@@ -87,10 +88,18 @@ export async function runSingleEval(
8788
throw new Error(processError)
8889
}
8990

91+
function renderAgentStep(step: AgentStep): string {
92+
const { response, toolCalls, toolResults } = step
93+
return [
94+
`\`\`\`text_response\n${response}\n\`\`\``,
95+
`\`\`\`tool_calls\n${JSON.stringify(toolCalls, null, 2)}\n\`\`\``,
96+
`\`\`\`tool_results\n${JSON.stringify(toolResults, null, 2)}\n\`\`\``,
97+
].join('\n\n')
98+
}
9099
const renderedTrace = trace
91100
.map(
92101
({ prompt, steps }) =>
93-
`You: ${prompt}\n\nCodebuff:${steps.map(({ response }) => response).join('\n\n')}`,
102+
`You: ${prompt}\n\nCodebuff:${steps.map(renderAgentStep).join('\n\n')}`,
94103
)
95104
.join('\n\n')
96105

@@ -109,6 +118,8 @@ Current spec to implement:
109118
Your conversation with Codebuff so far:
110119
<conversation>${renderedTrace}</conversation>
111120
121+
Note that files can only be changed with tools. If no tools are called, no files were changed.
122+
112123
You must decide whether to:
113124
1. 'continue' - Generate a follow-up prompt for Codebuff
114125
2. 'complete' - The implementation is done and satisfies the spec

0 commit comments

Comments
 (0)