Skip to content

Commit f590eae

Browse files
committed
Fix run programmatic step for after llm finishes. Add test
1 parent f2e54e5 commit f590eae

File tree

3 files changed

+85
-3
lines changed

3 files changed

+85
-3
lines changed

backend/src/__tests__/loop-agent-steps.test.ts

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
189189
spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation(
190190
() => {
191191
checkCallCount++
192-
// Allow enough iterations to see the bug
193-
return checkCallCount <= 3
192+
return true
194193
},
195194
)
196195

@@ -634,4 +633,83 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
634633
expect(result.agentState).toBeDefined()
635634
expect(getMessagesCallCount).toBeGreaterThan(0)
636635
})
636+
637+
it('should pass shouldEndTurn: true as stepsComplete when end_turn tool is called', async () => {
638+
// Test that when LLM calls end_turn, shouldEndTurn is correctly passed to runProgrammaticStep
639+
640+
let runProgrammaticStepCalls: any[] = []
641+
642+
// Mock runProgrammaticStep module to capture calls and verify stepsComplete parameter
643+
mockModule('@codebuff/backend/run-programmatic-step', () => ({
644+
runProgrammaticStep: async (agentState: any, options: any) => {
645+
runProgrammaticStepCalls.push({ agentState, options })
646+
// Return default behavior
647+
return { agentState, endTurn: false }
648+
},
649+
clearAgentGeneratorCache: () => {},
650+
agentIdToStepAll: new Set(),
651+
}))
652+
653+
const mockGeneratorFunction = function* () {
654+
yield 'STEP' // Hand control to LLM
655+
} as () => StepGenerator
656+
657+
mockTemplate.handleSteps = mockGeneratorFunction
658+
659+
const localAgentTemplates = {
660+
'test-agent': mockTemplate,
661+
}
662+
663+
// Mock the stream parser to simulate LLM calling end_turn tool
664+
mockModule('@codebuff/backend/tools/stream-parser', () => ({
665+
processStreamWithTools: async (options: any) => {
666+
llmCallCount++
667+
return {
668+
toolCalls: [
669+
{ toolName: 'end_turn', input: {}, toolCallId: 'test-id' },
670+
],
671+
toolResults: [],
672+
state: {
673+
agentState: options.agentState,
674+
agentContext: {},
675+
messages: options.messages,
676+
},
677+
fullResponse: 'LLM response with end_turn',
678+
fullResponseChunks: ['LLM response with end_turn'],
679+
}
680+
},
681+
}))
682+
683+
// Mock checkLiveUserInput to allow the loop to run
684+
const mockCheckLiveUserInput = require('@codebuff/backend/live-user-inputs')
685+
spyOn(mockCheckLiveUserInput, 'checkLiveUserInput').mockImplementation(
686+
() => true,
687+
)
688+
689+
await loopAgentSteps(new MockWebSocket() as unknown as WebSocket, {
690+
userInputId: 'test-user-input',
691+
agentType: 'test-agent',
692+
agentState: mockAgentState,
693+
prompt: 'Test shouldEndTurn to stepsComplete flow',
694+
params: undefined,
695+
fingerprintId: 'test-fingerprint',
696+
fileContext: mockFileContext,
697+
toolResults: [],
698+
localAgentTemplates,
699+
userId: TEST_USER_ID,
700+
clientSessionId: 'test-session',
701+
onResponseChunk: () => {},
702+
})
703+
704+
// Verify that runProgrammaticStep was called twice:
705+
// 1. First with stepsComplete: false (initial call)
706+
// 2. Second with stepsComplete: true (after LLM called end_turn)
707+
expect(runProgrammaticStepCalls).toHaveLength(2)
708+
709+
// First call should have stepsComplete: false
710+
expect(runProgrammaticStepCalls[0].options.stepsComplete).toBe(false)
711+
712+
// Second call should have stepsComplete: true (after end_turn tool was called)
713+
expect(runProgrammaticStepCalls[1].options.stepsComplete).toBe(true)
714+
})
637715
})

backend/src/run-agent-step.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ export const loopAgentSteps = async (
536536
localAgentTemplates,
537537
prompt: currentPrompt,
538538
params: currentParams,
539-
stepsComplete: false,
539+
stepsComplete: shouldEndTurn,
540540
})
541541
currentAgentState = programmaticAgentState
542542

bunfig.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@
22
# In CI, skip local linking and pull prebuilt artifacts from npm
33
linkWorkspacePackages = false
44

5+
[test]
6+
# Exclude test repositories from test execution to prevent timeouts
7+
exclude = ["evals/test-repos/**"]
8+

0 commit comments

Comments
 (0)