Skip to content

Commit 19ff372

Browse files
committed
loop-agent-steps.test.ts
1 parent 8b6285b commit 19ff372

File tree

1 file changed

+286
-0
lines changed

1 file changed

+286
-0
lines changed
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
import * as analytics from '@codebuff/common/analytics'
2+
import { TEST_USER_ID } from '@codebuff/common/constants'
3+
import {
4+
clearMockedModules,
5+
mockModule,
6+
} from '@codebuff/common/testing/mock-modules'
7+
import { getInitialSessionState } from '@codebuff/common/types/session-state'
8+
import {
9+
afterAll,
10+
afterEach,
11+
beforeAll,
12+
beforeEach,
13+
describe,
14+
expect,
15+
it,
16+
mock,
17+
spyOn,
18+
} from 'bun:test'
19+
20+
import { loopAgentSteps } from '../run-agent-step'
21+
import { clearAgentGeneratorCache } from '../run-programmatic-step'
22+
import { mockFileContext, MockWebSocket } from './test-utils'
23+
import * as promptAgentStream from '../prompt-agent-stream'
24+
import * as requestContext from '../websockets/request-context'
25+
26+
import type { AgentTemplate, StepGenerator } from '../templates/types'
27+
import type {
28+
AgentState,
29+
} from '@codebuff/common/types/session-state'
30+
import type { WebSocket } from 'ws'
31+
32+
describe('loopAgentSteps STEP behavior', () => {
33+
let mockTemplate: AgentTemplate
34+
let mockAgentState: AgentState
35+
let getAgentStreamFromTemplateSpy: any
36+
let getRequestContextSpy: any
37+
let llmCallCount: number
38+
39+
beforeAll(() => {
40+
// Mock logger
41+
mockModule('@codebuff/backend/util/logger', () => ({
42+
logger: {
43+
debug: () => {},
44+
error: () => {},
45+
info: () => {},
46+
warn: () => {},
47+
},
48+
withLoggerContext: async (context: any, fn: () => Promise<any>) => fn(),
49+
}))
50+
51+
// Mock bigquery
52+
mockModule('@codebuff/bigquery', () => ({
53+
insertTrace: () => {},
54+
}))
55+
56+
// Mock agent registry
57+
mockModule('@codebuff/backend/templates/agent-registry', () => ({
58+
getAgentTemplate: async (agentType: string, localTemplates: any) => {
59+
return localTemplates[agentType] || mockTemplate
60+
},
61+
}))
62+
63+
// Mock template strings
64+
mockModule('@codebuff/backend/templates/strings', () => ({
65+
getAgentPrompt: async () => 'Mock prompt',
66+
}))
67+
68+
// Mock live user inputs - will be overridden in individual tests
69+
mockModule('@codebuff/backend/live-user-inputs', () => ({
70+
checkLiveUserInput: () => false, // Default to false, override in tests
71+
}))
72+
73+
// Mock file reading updates
74+
mockModule('@codebuff/backend/get-file-reading-updates', () => ({
75+
getFileReadingUpdates: async () => ({
76+
addedFiles: [],
77+
updatedFilePaths: [],
78+
clearReadFileToolResults: false,
79+
}),
80+
}))
81+
82+
// Mock async agent manager
83+
mockModule('@codebuff/backend/async-agent-manager', () => ({
84+
asyncAgentManager: {
85+
getAgent: () => null,
86+
registerAgent: () => {},
87+
updateAgentState: () => {},
88+
getAndClearMessages: () => [],
89+
getMessages: () => [],
90+
},
91+
}))
92+
93+
// Mock stream parser
94+
mockModule('@codebuff/backend/tools/stream-parser', () => ({
95+
processStreamWithTools: async (options: any) => {
96+
llmCallCount++ // Count LLM calls here since this is where the stream is processed
97+
return {
98+
toolCalls: [],
99+
toolResults: [],
100+
state: {
101+
agentState: options.agentState || mockAgentState,
102+
agentContext: {},
103+
messages: options.messages || [],
104+
},
105+
fullResponse: 'LLM response',
106+
fullResponseChunks: ['LLM response'],
107+
}
108+
},
109+
}))
110+
})
111+
112+
beforeEach(() => {
113+
llmCallCount = 0
114+
115+
// Mock analytics
116+
spyOn(analytics, 'initAnalytics').mockImplementation(() => {})
117+
analytics.initAnalytics()
118+
spyOn(analytics, 'trackEvent').mockImplementation(() => {})
119+
120+
// Mock getAgentStreamFromTemplate
121+
getAgentStreamFromTemplateSpy = spyOn(
122+
promptAgentStream,
123+
'getAgentStreamFromTemplate',
124+
).mockImplementation(() => {
125+
return (messages: any) => {
126+
// Return a mock stream
127+
return (async function* () {
128+
yield 'Mock LLM response'
129+
})()
130+
}
131+
})
132+
133+
// Mock getRequestContext
134+
getRequestContextSpy = spyOn(
135+
requestContext,
136+
'getRequestContext',
137+
).mockImplementation(() => ({
138+
processedRepoId: 'test-repo-id',
139+
}))
140+
141+
// Mock crypto.randomUUID
142+
spyOn(crypto, 'randomUUID').mockImplementation(
143+
() =>
144+
'mock-uuid-0000-0000-0000-000000000000' as `${string}-${string}-${string}-${string}-${string}`,
145+
)
146+
147+
// Create mock template with programmatic agent
148+
mockTemplate = {
149+
id: 'test-agent',
150+
displayName: 'Test Agent',
151+
parentPrompt: 'Testing',
152+
model: 'claude-3-5-sonnet-20241022',
153+
inputSchema: {},
154+
outputMode: 'json',
155+
includeMessageHistory: true,
156+
toolNames: ['read_files', 'write_file', 'end_turn'],
157+
subagents: [],
158+
systemPrompt: 'Test system prompt',
159+
instructionsPrompt: 'Test user prompt',
160+
stepPrompt: 'Test agent step prompt',
161+
handleSteps: undefined, // Will be set in individual tests
162+
} as AgentTemplate
163+
164+
// Create mock agent state
165+
const sessionState = getInitialSessionState(mockFileContext)
166+
mockAgentState = {
167+
...sessionState.mainAgentState,
168+
agentId: 'test-agent-id',
169+
messageHistory: [
170+
{ role: 'user', content: 'Initial message' },
171+
{ role: 'assistant', content: 'Initial response' },
172+
],
173+
output: undefined,
174+
stepsRemaining: 10, // Ensure we don't hit the limit
175+
}
176+
})
177+
178+
afterEach(() => {
179+
mock.restore()
180+
clearAgentGeneratorCache()
181+
})
182+
183+
llmCallCount = 0 // Reset LLM call count
184+
afterAll(() => {
185+
clearMockedModules()
186+
})
187+
188+
it('should verify correct STEP behavior - LLM called once after STEP', async () => {
189+
// This test verifies that programmatic agents don't call the LLM,
190+
// and that STEP yielding works correctly without LLM involvement
191+
192+
let stepCount = 0
193+
const mockGenerator = (function* () {
194+
stepCount++
195+
196+
if (stepCount === 1) {
197+
// First call: Execute a tool, then STEP
198+
yield { toolName: 'read_files', args: { paths: ['file1.txt'] } }
199+
yield 'STEP' // Should pause here
200+
} else if (stepCount === 2) {
201+
// Second call: Should continue from here, not call LLM
202+
yield { toolName: 'write_file', args: { path: 'output.txt', content: 'test' } }
203+
yield { toolName: 'end_turn', args: {} }
204+
}
205+
})() as StepGenerator
206+
207+
mockTemplate.handleSteps = () => mockGenerator
208+
209+
const localAgentTemplates = {
210+
'test-agent': mockTemplate,
211+
}
212+
213+
// Mock checkLiveUserInput to return true for multiple iterations
214+
let checkCallCount = 0
215+
const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs')
216+
spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation(() => {
217+
checkCallCount++
218+
// Allow enough iterations to see the bug
219+
return checkCallCount <= 3
220+
})
221+
222+
const result = await loopAgentSteps(new MockWebSocket() as unknown as WebSocket, {
223+
userInputId: 'test-user-input',
224+
agentType: 'test-agent',
225+
agentState: mockAgentState,
226+
prompt: 'Test prompt',
227+
params: undefined,
228+
fingerprintId: 'test-fingerprint',
229+
fileContext: mockFileContext,
230+
toolResults: [],
231+
localAgentTemplates,
232+
userId: TEST_USER_ID,
233+
clientSessionId: 'test-session',
234+
onResponseChunk: () => {},
235+
})
236+
237+
console.log(`LLM calls made: ${llmCallCount}`)
238+
console.log(`Step count: ${stepCount}`)
239+
240+
// CORRECT BEHAVIOR: After STEP, LLM should be called once, then no more
241+
// The programmatic agent yields STEP, then LLM runs once
242+
expect(llmCallCount).toBe(1) // LLM called once after STEP
243+
244+
// The programmatic agent should have been called once (yielded STEP)
245+
expect(stepCount).toBe(1)
246+
247+
// After STEP, the LLM should run once, then the loop should continue correctly
248+
})
249+
250+
it('should demonstrate correct behavior when programmatic agent completes without STEP', async () => {
251+
// This test shows that when a programmatic agent doesn't yield STEP,
252+
// it should complete without calling the LLM at all (since it ends with end_turn)
253+
254+
const mockGenerator = (function* () {
255+
yield { toolName: 'read_files', args: { paths: ['file1.txt'] } }
256+
yield { toolName: 'write_file', args: { path: 'output.txt', content: 'test' } }
257+
yield { toolName: 'end_turn', args: {} }
258+
})() as StepGenerator
259+
260+
mockTemplate.handleSteps = () => mockGenerator
261+
262+
const localAgentTemplates = {
263+
'test-agent': mockTemplate,
264+
}
265+
266+
const result = await loopAgentSteps(new MockWebSocket() as unknown as WebSocket, {
267+
userInputId: 'test-user-input',
268+
agentType: 'test-agent',
269+
agentState: mockAgentState,
270+
prompt: 'Test prompt',
271+
params: undefined,
272+
fingerprintId: 'test-fingerprint',
273+
fileContext: mockFileContext,
274+
toolResults: [],
275+
localAgentTemplates,
276+
userId: TEST_USER_ID,
277+
clientSessionId: 'test-session',
278+
onResponseChunk: () => {},
279+
})
280+
281+
// Should NOT call LLM since the programmatic agent ended with end_turn
282+
expect(llmCallCount).toBe(0)
283+
// The result should have agentState but hasEndTurn might be undefined
284+
expect(result.agentState).toBeDefined()
285+
})
286+
})

0 commit comments

Comments
 (0)