@@ -70,6 +70,10 @@ class RunEvalSetCommand extends Command {
7070 description: 'Coding agent to use',
7171 default: 'codebuff',
7272 }),
73+ agent: Flags.string({
74+ description: 'Codebuff agent id to use',
75+ default: 'base',
76+ }),
7377 help: Flags.help({ char: 'h' }),
7478 }
7579
@@ -89,6 +93,7 @@ async function runEvalSet(options: {
8993 title?: string
9094 concurrency?: number
9195 'coding-agent': string
96+ agent: string
9297}): Promise<void> {
9398 const {
9499 'output-dir': outputDir,
@@ -98,6 +103,7 @@ async function runEvalSet(options: {
98103 insert: shouldInsert,
99104 title,
100105 'coding-agent': codingAgentstr,
106+ agent,
101107 } = options
102108
103109 if (!['codebuff', 'claude'].includes(codingAgentstr)) {
@@ -127,32 +133,28 @@ async function runEvalSet(options: {
127133 name: 'codebuff',
128134 evalDataPath: path.join(__dirname, 'eval-codebuff2.json'),
129135 outputDir,
130- agentType: undefined,
131136 },
132137 {
133138 name: 'manifold',
134139 evalDataPath: path.join(__dirname, 'eval-manifold2.json'),
135140 outputDir,
136- agentType: undefined,
137141 },
138142 {
139143 name: 'plane',
140144 evalDataPath: path.join(__dirname, 'eval-plane.json'),
141145 outputDir,
142- agentType: undefined,
143146 },
144147 {
145148 name: 'saleor',
146149 evalDataPath: path.join(__dirname, 'eval-saleor.json'),
147150 outputDir,
148- agentType: undefined,
149151 },
150152 ]
151153
152154 console.log(`Running ${evalConfigs.length} evaluations:`)
153155 evalConfigs.forEach((config) => {
154156 console.log(
155- ` - ${config.name}: ${config.evalDataPath} -> ${config.outputDir} (${config.agentType })`,
157+ ` - ${config.name}: ${config.evalDataPath} -> ${config.outputDir} (${agent })`,
156158 )
157159 })
158160
@@ -174,6 +176,7 @@ async function runEvalSet(options: {
174176 codingAgent,
175177 config.limit,
176178 options.concurrency === 1,
179+ agent,
177180 )
178181 } catch (error) {
179182 const evalDuration = Date.now() - evalStartTime
@@ -360,7 +363,7 @@ async function runEvalSet(options: {
360363 const payload: GitEvalResultRequest = {
361364 cost_mode: 'normal', // You can modify this based on your needs
362365 reasoner_model: undefined, // No longer using model config
363- agent_model: config?.agentType ,
366+ agent_model: agent ,
364367 metadata: {
365368 numCases: evalResult?.overall_metrics?.total_runs,
366369 avgScore: evalResult?.overall_metrics?.average_overall,
0 commit comments