Revert using Codebuff sdk for prompting agent

jahooma · jahooma · commit 964f44122173 · 2025-08-25T14:05:58.000-07:00
diff --git a/evals/git-evals/run-git-evals.ts b/evals/git-evals/run-git-evals.ts
@@ -3,19 +3,19 @@ import fs from 'fs'
 import path from 'path'
 
 import { disableLiveUserInputCheck } from '@codebuff/backend/live-user-inputs'
+import { promptAiSdkStructured } from '@codebuff/backend/llm-apis/vercel-ai-sdk/ai-sdk'
+import { models } from '@codebuff/common/constants'
 import { withTimeout } from '@codebuff/common/util/promise'
 import { generateCompactId } from '@codebuff/common/util/string'
 import pLimit from 'p-limit'
-import { getUserCredentials } from '@codebuff/npm-app/credentials'
 
 import { resetRepoToCommit } from '../scaffolding'
 import { createInitialSessionState } from '../test-setup'
-import { judgeEvalRun } from './judge-git-eval'
 import { ClaudeRunner } from './runners/claude'
 import { CodebuffRunner } from './runners/codebuff'
 import { extractRepoNameFromUrl, setupTestRepo } from './setup-test-repo'
 import { AgentDecisionSchema } from './types'
-import { getNextEvalPrompt } from './prompting-agent'
+import { judgeEvalRun } from './judge-git-eval'
 
 import type { AgentStep } from '../scaffolding'
 import type { Runner } from './runners/runner'
@@ -30,8 +30,6 @@ import type {
 } from './types'
 import type { z } from 'zod/v4'
 import type { ChildProcess } from 'child_process'
-import { CodebuffClient } from '../../sdk/src/client'
-import { API_KEY_ENV_VAR } from '@codebuff/common/constants'
 
 disableLiveUserInputCheck()
 
@@ -66,16 +64,6 @@ export async function runSingleEval(
   process.on('uncaughtException', uncaughtHandler)
   process.on('unhandledRejection', unhandledHandler)
 
-  // SDK client for prompting agent
-  const apiKey = process.env[API_KEY_ENV_VAR] || getLocalAuthToken()
-  const sdkClient = new CodebuffClient({
-    apiKey,
-    cwd: projectPath,
-    onError: (error) => {
-      throw new Error(`Prompting agent error: ${error.message}`)
-    },
-  })
-
   try {
     // Reset to the commit before the target commit
     resetRepoToCommit(projectPath, `${evalCommit.sha}^`)
@@ -122,18 +110,40 @@ export async function runSingleEval(
         )
         .join('\n\n')
 
-      // Get next prompt from prompting agent using Codebuff SDK
+      // Get next prompt from prompting agent with timeout
       let agentResponse: z.infer<typeof AgentDecisionSchema>
       try {
-        agentResponse = await withTimeout(
-          getNextEvalPrompt({
-            client: sdkClient,
-            spec: evalCommit.spec,
-            conversationHistory: renderedTrace,
-            attemptsRemaining: MAX_ATTEMPTS - attempts,
-          }),
-          5 * 60_000, // 5 minute timeout
-        )
+        agentResponse = await promptAiSdkStructured({
+          messages: [
+            {
+              role: 'user',
+              content: `You are an expert software engineer tasked with implementing a specification using CodeBuff, an AI coding assistant. Your goal is to prompt CodeBuff to implement the spec correctly. You are in a conversation with this coding agent.
+
+Current spec to implement:
+<spec>${evalCommit.spec}</spec>
+
+Your conversation with Codebuff so far:
+<conversation>${renderedTrace}</conversation>
+
+Note that files can only be changed with tools. If no tools are called, no files were changed.
+
+You must decide whether to:
+1. 'continue' - Generate a follow-up prompt for Codebuff
+2. 'complete' - The implementation is done and satisfies the spec
+3. 'halt' - The implementation is off track and unlikely to be completed within ${MAX_ATTEMPTS - attempts} more attempts
+
+If deciding to continue, include a clear, focused prompt for Codebuff in next_prompt. Note that Codebuff does not have access to the spec, so you must describe the changes you want Codebuff to make in a way that is clear and concise.
+Explain your reasoning in detail.`,
+            },
+          ],
+          schema: AgentDecisionSchema,
+          model: models.gemini2_5_flash,
+          clientSessionId,
+          fingerprintId,
+          userInputId: generateCompactId(),
+          userId: undefined,
+          timeout: 5 * 60_000, // 5 minute timeout
+        })
       } catch (agentError) {
         throw new Error(
           `Agent decision failed: ${agentError instanceof Error ? `${agentError.message}\n${JSON.stringify(agentError)}\n${agentError.stack}` : String(agentError)}`,
@@ -187,8 +197,6 @@ export async function runSingleEval(
         process.on('unhandledRejection', handler)
       }
     })
-
-    sdkClient.closeConnection()
   }
 
   // If we caught a process-level error, use that
@@ -241,10 +249,6 @@ export async function runSingleEval(
   }
 }
 
-const getLocalAuthToken = () => {
-  return getUserCredentials()?.authToken
-}
-
 function getCodebuffFileStates(
   evalCommitSha: string,
   projectPath: string,