Skip to content

Commit dac33f3

Browse files
committed
Tool to spawn agent inline!
1 parent 99fde68 commit dac33f3

File tree

10 files changed

+561
-141
lines changed

10 files changed

+561
-141
lines changed

backend/src/__tests__/run-agent-step-tools.test.ts

Lines changed: 183 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import {
2121

2222
// Mock imports
2323
import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
24+
import * as liveUserInputs from '../live-user-inputs'
2425
import { runAgentStep } from '../run-agent-step'
2526
import { clearAgentGeneratorCache } from '../run-programmatic-step'
2627
import { assembleLocalAgentTemplates } from '../templates/agent-registry'
@@ -53,6 +54,12 @@ describe('runAgentStep - set_output tool', () => {
5354
Promise.resolve(true),
5455
)
5556

57+
// Mock live user inputs to always return true (simulating active session)
58+
spyOn(liveUserInputs, 'checkLiveUserInput').mockImplementation(() => true)
59+
spyOn(liveUserInputs, 'startUserInput').mockImplementation(() => {})
60+
spyOn(liveUserInputs, 'endUserInput').mockImplementation(() => {})
61+
spyOn(liveUserInputs, 'setSessionConnected').mockImplementation(() => {})
62+
5663
spyOn(websocketAction, 'requestFiles').mockImplementation(
5764
async (ws: any, paths: string[]) => {
5865
const results: Record<string, string | null> = {}
@@ -80,10 +87,7 @@ describe('runAgentStep - set_output tool', () => {
8087
},
8188
)
8289

83-
spyOn(websocketAction, 'requestToolCall').mockImplementation(async () => ({
84-
success: true,
85-
result: 'Tool call success' as any,
86-
}))
90+
// Don't mock requestToolCall for integration test - let real tool execution happen
8791

8892
// Mock LLM APIs
8993
spyOn(aisdk, 'promptAiSdk').mockImplementation(() =>
@@ -148,7 +152,8 @@ describe('runAgentStep - set_output tool', () => {
148152

149153
const sessionState = getInitialSessionState(mockFileContext)
150154
const agentState = sessionState.mainAgentState
151-
const { agentTemplates: localAgentTemplates } = assembleLocalAgentTemplates(mockFileContext)
155+
const { agentTemplates: localAgentTemplates } =
156+
assembleLocalAgentTemplates(mockFileContext)
152157

153158
const result = await runAgentStep(
154159
new MockWebSocket() as unknown as WebSocket,
@@ -180,15 +185,15 @@ describe('runAgentStep - set_output tool', () => {
180185
status: 'success',
181186
findings: ['Bug in auth.ts', 'Missing validation'],
182187
}) + getToolCallString('end_turn', {})
183-
console.log('mockResponse', mockResponse)
184188

185189
spyOn(aisdk, 'promptAiSdkStream').mockImplementation(async function* () {
186190
yield mockResponse
187191
})
188192

189193
const sessionState = getInitialSessionState(mockFileContext)
190194
const agentState = sessionState.mainAgentState
191-
const { agentTemplates: localAgentTemplates } = assembleLocalAgentTemplates(mockFileContext)
195+
const { agentTemplates: localAgentTemplates } =
196+
assembleLocalAgentTemplates(mockFileContext)
192197

193198
const result = await runAgentStep(
194199
new MockWebSocket() as unknown as WebSocket,
@@ -233,7 +238,8 @@ describe('runAgentStep - set_output tool', () => {
233238
existingField: 'original value',
234239
anotherField: 'unchanged',
235240
}
236-
const { agentTemplates: localAgentTemplates } = assembleLocalAgentTemplates(mockFileContext)
241+
const { agentTemplates: localAgentTemplates } =
242+
assembleLocalAgentTemplates(mockFileContext)
237243

238244
const result = await runAgentStep(
239245
new MockWebSocket() as unknown as WebSocket,
@@ -269,7 +275,8 @@ describe('runAgentStep - set_output tool', () => {
269275
const sessionState = getInitialSessionState(mockFileContext)
270276
const agentState = sessionState.mainAgentState
271277
agentState.output = { existingField: 'value' }
272-
const { agentTemplates: localAgentTemplates } = assembleLocalAgentTemplates(mockFileContext)
278+
const { agentTemplates: localAgentTemplates } =
279+
assembleLocalAgentTemplates(mockFileContext)
273280

274281
const result = await runAgentStep(
275282
new MockWebSocket() as unknown as WebSocket,
@@ -305,7 +312,7 @@ describe('runAgentStep - set_output tool', () => {
305312
toolNames: ['read_files', 'end_turn'],
306313
subagents: [],
307314
systemPrompt: 'Test system prompt',
308-
instructionsPrompt: 'Test user prompt',
315+
instructionsPrompt: 'Test instructions prompt',
309316
stepPrompt: 'Test agent step prompt',
310317
handleSteps: function* ({ agentState, prompt, params }) {
311318
// Yield one tool call
@@ -368,51 +375,180 @@ describe('runAgentStep - set_output tool', () => {
368375
// (The programmatic step tool results don't count toward this calculation)
369376
expect(result.shouldEndTurn).toBe(true)
370377

371-
const messageHistory = result.agentState.messageHistory
372-
373-
// Verify exactly five messages were added: user prompt, user input prompt, tool call, tool result, and assistant response
374-
expect(messageHistory.length).toBe(initialMessageCount + 5)
378+
const finalMessages = result.agentState.messageHistory
375379

376-
// Get the five new messages
377-
const newMessages = messageHistory.slice(initialMessageCount)
380+
// Verify the exact sequence of messages in the final message history
381+
// The stepPrompt with timeToLive: 'agentStep' is removed by expireMessages
382+
const expectedMessages = [
383+
{
384+
role: 'user',
385+
content: expect.stringContaining('Test the handleSteps functionality'),
386+
},
387+
{
388+
role: 'user',
389+
content: expect.stringContaining('Test instructions prompt'),
390+
},
391+
{
392+
role: 'user',
393+
content: expect.stringContaining('read_files'),
394+
},
395+
{
396+
role: 'user',
397+
content: expect.stringContaining('testFunction'),
398+
},
399+
{
400+
role: 'assistant',
401+
content: 'Continuing with the analysis...',
402+
},
403+
]
378404

379-
// First message: user prompt (user role)
380-
const userPromptMessage = newMessages[0]
381-
expect(userPromptMessage.role).toBe('user')
382-
expect(typeof userPromptMessage.content).toBe('string')
383-
expect(userPromptMessage.content).toContain(
384-
'Test the handleSteps functionality',
385-
)
405+
const newMessages = finalMessages.slice(initialMessageCount)
386406

387-
// Second message: user input prompt (user role)
388-
const instructionsPromptMessage = newMessages[1]
389-
expect(instructionsPromptMessage.role).toBe('user')
390-
expect(typeof instructionsPromptMessage.content).toBe('string')
391-
expect(instructionsPromptMessage.content).toContain('Test user prompt')
392-
393-
// Third message: read_files tool call (user role)
394-
const toolCallMessage = newMessages[2]
395-
expect(toolCallMessage.role).toBe('user')
396-
expect(typeof toolCallMessage.content).toBe('string')
397-
expect(toolCallMessage.content).toContain('read_files')
398-
expect(toolCallMessage.content).toContain('src/test.ts')
399-
400-
// Fourth message: read_files tool result (user role)
401-
const toolResultMessage = newMessages[3]
402-
expect(toolResultMessage.role).toBe('user')
403-
expect(typeof toolResultMessage.content).toBe('string')
404-
expect(toolResultMessage.content).toContain('testFunction')
405-
406-
// Fifth message: assistant response (assistant role)
407-
const assistantMessage = newMessages[4]
408-
expect(assistantMessage.role).toBe('assistant')
409-
expect(typeof assistantMessage.content).toBe('string')
410-
expect(assistantMessage.content).toBe('Continuing with the analysis...')
407+
expectedMessages.forEach((expected, index) => {
408+
expect(newMessages[index]).toMatchObject(expected)
409+
})
410+
expect(newMessages).toHaveLength(expectedMessages.length)
411411

412412
// Verify requestFiles was called with correct parameters
413413
expect(websocketAction.requestFiles).toHaveBeenCalledWith(
414414
expect.any(Object), // WebSocket
415415
['src/test.ts'],
416416
)
417417
})
418+
419+
it('should spawn agent inline that deletes last two assistant messages', async () => {
420+
// Create a mock inline agent template that deletes messages
421+
const mockInlineAgentTemplate: AgentTemplate = {
422+
id: 'message-deleter-agent',
423+
displayName: 'Message Deleter Agent',
424+
parentPrompt: 'Deletes assistant messages',
425+
model: 'claude-3-5-sonnet-20241022',
426+
inputSchema: {},
427+
outputMode: 'json' as const,
428+
includeMessageHistory: true,
429+
toolNames: ['set_messages', 'end_turn'],
430+
subagents: [],
431+
systemPrompt: 'Delete messages system prompt',
432+
instructionsPrompt: 'Delete messages instructions prompt',
433+
stepPrompt: 'Delete messages step prompt',
434+
handleSteps: function* ({ agentState, prompt, params }) {
435+
// Delete the last two assistant messages by doing two iterations
436+
const messages = [...agentState.messageHistory]
437+
438+
// First iteration: find and remove the last assistant message
439+
for (let i = messages.length - 1; i >= 0; i--) {
440+
if (messages[i].role === 'assistant') {
441+
messages.splice(i, 1)
442+
break
443+
}
444+
}
445+
446+
// Second iteration: find and remove the next-to-last assistant message
447+
for (let i = messages.length - 1; i >= 0; i--) {
448+
if (messages[i].role === 'assistant') {
449+
messages.splice(i, 1)
450+
break
451+
}
452+
}
453+
454+
// Set the updated messages
455+
yield {
456+
toolName: 'set_messages',
457+
args: { messages },
458+
}
459+
},
460+
}
461+
462+
// Create a parent agent template that can spawn the inline agent
463+
const mockParentAgentTemplate: AgentTemplate = {
464+
id: 'parent-agent',
465+
displayName: 'Parent Agent',
466+
parentPrompt: 'Parent agent that spawns inline agents',
467+
model: 'claude-3-5-sonnet-20241022',
468+
inputSchema: {},
469+
outputMode: 'json' as const,
470+
includeMessageHistory: true,
471+
toolNames: ['spawn_agent_inline', 'end_turn'],
472+
subagents: ['message-deleter-agent'],
473+
systemPrompt: 'Parent system prompt',
474+
instructionsPrompt: 'Parent instructions prompt',
475+
stepPrompt: 'Parent step prompt',
476+
}
477+
478+
// Mock the agent registry to include both agents
479+
const mockAgentRegistry = {
480+
'parent-agent': mockParentAgentTemplate,
481+
'message-deleter-agent': mockInlineAgentTemplate,
482+
}
483+
484+
// Mock the LLM stream to spawn the inline agent
485+
spyOn(aisdk, 'promptAiSdkStream').mockImplementation(async function* () {
486+
yield getToolCallString('spawn_agent_inline', {
487+
agent_type: 'message-deleter-agent',
488+
prompt: 'Delete the last two assistant messages',
489+
})
490+
})
491+
492+
const sessionState = getInitialSessionState(mockFileContext)
493+
const agentState = sessionState.mainAgentState
494+
495+
// Add some initial messages including assistant messages to delete
496+
agentState.messageHistory = [
497+
{ role: 'user', content: 'Hello' },
498+
{ role: 'assistant', content: 'Hi there!' },
499+
{ role: 'user', content: 'How are you?' },
500+
{ role: 'assistant', content: 'I am doing well, thank you!' },
501+
{ role: 'user', content: 'Can you help me?' },
502+
{ role: 'assistant', content: 'Of course, I would be happy to help!' },
503+
]
504+
505+
const result = await runAgentStep(
506+
new MockWebSocket() as unknown as WebSocket,
507+
{
508+
userId: TEST_USER_ID,
509+
userInputId: 'test-input',
510+
clientSessionId: 'test-session',
511+
fingerprintId: 'test-fingerprint',
512+
onResponseChunk: () => {},
513+
agentType: 'parent-agent' as any,
514+
fileContext: mockFileContext,
515+
localAgentTemplates: mockAgentRegistry,
516+
agentState,
517+
prompt: 'Spawn an inline agent to clean up messages',
518+
params: undefined,
519+
},
520+
)
521+
522+
const finalMessages = result.agentState.messageHistory
523+
524+
// This integration test demonstrates that spawn_agent_inline tool calls are executed successfully!
525+
// The inline agent runs its handleSteps function and executes tool calls
526+
527+
// Verify the exact sequence of messages in the final message history
528+
// The inline agent's instructionsPrompt and stepPrompt should be removed by expireMessages
529+
const expectedMessages = [
530+
{ role: 'user', content: 'Hello' },
531+
{ role: 'assistant', content: 'Hi there!' },
532+
{ role: 'user', content: 'How are you?' },
533+
{ role: 'assistant', content: 'I am doing well, thank you!' },
534+
{ role: 'user', content: 'Can you help me?' },
535+
{
536+
role: 'user',
537+
content: expect.stringContaining(
538+
'Spawn an inline agent to clean up messages',
539+
),
540+
},
541+
{
542+
role: 'user',
543+
content: expect.stringContaining(
544+
'Delete the last two assistant messages',
545+
),
546+
},
547+
]
548+
549+
expectedMessages.forEach((expected, index) => {
550+
expect(finalMessages[index]).toMatchObject(expected)
551+
})
552+
expect(finalMessages).toHaveLength(expectedMessages.length)
553+
})
418554
})

backend/src/tools/definitions/list.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import { setMessagesTool } from './tool/set-messages'
1616
import { setOutputTool } from './tool/set-output'
1717
import { spawnAgentsTool } from './tool/spawn-agents'
1818
import { spawnAgentsAsyncTool } from './tool/spawn-agents-async'
19+
import { spawnAgentInlineTool } from './tool/spawn-agent-inline'
1920
import { strReplaceTool } from './tool/str-replace'
2021
import { thinkDeeplyTool } from './tool/think-deeply'
2122
import { updateSubgoalTool } from './tool/update-subgoal'
@@ -43,6 +44,7 @@ const toolDescriptions = {
4344
set_output: setOutputTool,
4445
spawn_agents: spawnAgentsTool,
4546
spawn_agents_async: spawnAgentsAsyncTool,
47+
spawn_agent_inline: spawnAgentInlineTool,
4648
str_replace: strReplaceTool,
4749
think_deeply: thinkDeeplyTool,
4850
update_subgoal: updateSubgoalTool,
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import { getToolCallString } from '@codebuff/common/tools/utils'
2+
3+
import type { ToolDescription } from '../tool-def-type'
4+
5+
const toolName = 'spawn_agent_inline'
6+
export const spawnAgentInlineTool = {
7+
toolName,
8+
description: `
9+
Spawn a single agent that runs within the current message history.
10+
The spawned agent sees all previous messages and any messages it adds
11+
are preserved when control returns to you.
12+
13+
This is useful for:
14+
- Delegating specific tasks while maintaining context
15+
- Having specialized agents process information inline
16+
- Managing message history (e.g., summarization)
17+
The agent will run until it calls end_turn, then control returns to you. There is no tool result for this tool.
18+
Example:
19+
${getToolCallString(toolName, {
20+
agent_type: 'file-picker',
21+
prompt: 'Find files related to authentication',
22+
params: { paths: ['src/auth.ts', 'src/user.ts'] },
23+
})}
24+
`.trim(),
25+
} satisfies ToolDescription

backend/src/tools/handlers/list.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { handleSetMessages } from './tool/set-messages'
1414
import { handleSetOutput } from './tool/set-output'
1515
import { handleSpawnAgents } from './tool/spawn-agents'
1616
import { handleSpawnAgentsAsync } from './tool/spawn-agents-async'
17+
import { handleSpawnAgentInline } from './tool/spawn-agent-inline'
1718
import { handleStrReplace } from './tool/str-replace'
1819
import { handleThinkDeeply } from './tool/think-deeply'
1920
import { handleUpdateSubgoal } from './tool/update-subgoal'
@@ -49,6 +50,7 @@ export const codebuffToolHandlers = {
4950
set_output: handleSetOutput,
5051
spawn_agents: handleSpawnAgents,
5152
spawn_agents_async: handleSpawnAgentsAsync,
53+
spawn_agent_inline: handleSpawnAgentInline,
5254
str_replace: handleStrReplace,
5355
think_deeply: handleThinkDeeply,
5456
update_subgoal: handleUpdateSubgoal,

0 commit comments

Comments
 (0)