Skip to content

Commit 98cd731

Browse files
committed
Add context pruner agent, spawn it inline before each STEP of base. Add unit tests for .agents
1 parent 800eb52 commit 98cd731

File tree

9 files changed

+679
-48
lines changed

9 files changed

+679
-48
lines changed

.agents/__tests__/context-pruner.test.ts

Lines changed: 445 additions & 0 deletions
Large diffs are not rendered by default.

.agents/context-pruner.ts

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
import { publisher } from './constants'
2+
import type { AgentDefinition, Message } from './types/agent-definition'
3+
4+
const definition: AgentDefinition = {
5+
id: 'context-pruner',
6+
publisher,
7+
displayName: 'Context Pruner',
8+
model: 'openai/gpt-5-mini',
9+
10+
toolNames: ['set_messages'],
11+
12+
spawnerPrompt: `Spawn this agent between steps to prune context, starting with old tool results and then old messages.`,
13+
14+
inputSchema: {},
15+
16+
includeMessageHistory: true,
17+
18+
handleSteps: function* ({ agentState }) {
19+
const messages = agentState.messageHistory
20+
21+
const countTokensJson = (obj: any): number => {
22+
// Very rough approximation
23+
return Math.ceil(JSON.stringify(obj).length / 3)
24+
}
25+
26+
const maxMessageTokens = 200_000
27+
const numTerminalCommandsToKeep = 5
28+
29+
// Remove the last assistant message if it contains the spawn call that invoked this context-pruner
30+
let currentMessages = [...messages]
31+
if (currentMessages.length > 0) {
32+
const lastMessage = currentMessages[currentMessages.length - 1]
33+
if (
34+
lastMessage.role === 'assistant' &&
35+
typeof lastMessage.content === 'string'
36+
) {
37+
// Check if this message contains a spawn_agent_inline call for context-pruner
38+
if (
39+
lastMessage.content.includes('spawn_agent_inline') &&
40+
lastMessage.content.includes('context-pruner')
41+
) {
42+
// Remove the entire message
43+
currentMessages.pop()
44+
}
45+
}
46+
}
47+
48+
// Initial check - if already under limit, return (with inline agent tool call removed)
49+
const initialTokens = countTokensJson(currentMessages)
50+
if (initialTokens < maxMessageTokens) {
51+
yield {
52+
toolName: 'set_messages',
53+
input: {
54+
messages: currentMessages,
55+
},
56+
}
57+
return
58+
}
59+
60+
// PASS 1: Remove terminal command results (oldest first, preserve recent 5)
61+
let numKeptTerminalCommands = 0
62+
const afterTerminalPass: Message[] = []
63+
64+
for (let i = currentMessages.length - 1; i >= 0; i--) {
65+
const message = currentMessages[i]
66+
let processedContent =
67+
typeof message.content === 'string'
68+
? message.content
69+
: JSON.stringify(message.content)
70+
71+
if (processedContent.includes('<tool>run_terminal_command</tool>')) {
72+
if (numKeptTerminalCommands < numTerminalCommandsToKeep) {
73+
numKeptTerminalCommands++
74+
afterTerminalPass.unshift({ ...message, content: processedContent })
75+
} else {
76+
// Simplify terminal command result
77+
processedContent = processedContent.replace(
78+
/<tool_result>\s*<tool>run_terminal_command<\/tool>\s*<result>[\s\S]*?<\/result>\s*<\/tool_result>/g,
79+
'<tool_result><tool>run_terminal_command</tool><result>[Output omitted]</result></tool_result>',
80+
)
81+
afterTerminalPass.unshift({ ...message, content: processedContent })
82+
}
83+
} else {
84+
afterTerminalPass.unshift({ ...message, content: processedContent })
85+
}
86+
}
87+
88+
// Check if terminal pass was enough
89+
const tokensAfterTerminal = countTokensJson(afterTerminalPass)
90+
if (tokensAfterTerminal < maxMessageTokens) {
91+
yield {
92+
toolName: 'set_messages',
93+
input: {
94+
messages: afterTerminalPass,
95+
},
96+
}
97+
return
98+
}
99+
100+
// PASS 2: Remove large tool results (any tool result > 1000 chars)
101+
const afterToolResultsPass: Message[] = afterTerminalPass.map((message) => {
102+
let processedContent =
103+
typeof message.content === 'string'
104+
? message.content
105+
: JSON.stringify(message.content)
106+
107+
if (
108+
processedContent.includes('<tool_result>') &&
109+
processedContent.length > 1000
110+
) {
111+
processedContent = processedContent.replace(
112+
/<result>[\s\S]*?<\/result>/g,
113+
'<result>[Large tool result omitted]</result>',
114+
)
115+
}
116+
117+
return { ...message, content: processedContent }
118+
})
119+
120+
// Check if tool results pass was enough
121+
const tokensAfterToolResults = countTokensJson(afterToolResultsPass)
122+
if (tokensAfterToolResults < maxMessageTokens) {
123+
yield {
124+
toolName: 'set_messages',
125+
input: {
126+
messages: afterToolResultsPass,
127+
},
128+
}
129+
return
130+
}
131+
132+
// PASS 3: Message-level pruning (like trimMessagesToFitTokenLimit)
133+
const shortenedMessageTokenFactor = 0.5
134+
const replacementMessage: Message = {
135+
role: 'user',
136+
content: '<system>Previous message(s) omitted due to length</system>',
137+
}
138+
139+
const requiredTokens = countTokensJson(
140+
afterToolResultsPass.filter((m: any) => m.keepDuringTruncation),
141+
)
142+
let removedTokens = 0
143+
const tokensToRemove =
144+
(maxMessageTokens - requiredTokens) * (1 - shortenedMessageTokenFactor)
145+
146+
const placeholder = 'deleted'
147+
const filteredMessages: (Message | typeof placeholder)[] = []
148+
149+
for (const message of afterToolResultsPass) {
150+
if (
151+
removedTokens >= tokensToRemove ||
152+
(message as any).keepDuringTruncation
153+
) {
154+
filteredMessages.push(message)
155+
continue
156+
}
157+
158+
removedTokens += countTokensJson(message)
159+
if (
160+
filteredMessages.length === 0 ||
161+
filteredMessages[filteredMessages.length - 1] !== placeholder
162+
) {
163+
filteredMessages.push(placeholder)
164+
removedTokens -= countTokensJson(replacementMessage)
165+
}
166+
}
167+
168+
const finalMessages = filteredMessages.map((m) =>
169+
m === placeholder ? replacementMessage : m,
170+
)
171+
172+
// Apply the final pruned message history
173+
yield {
174+
toolName: 'set_messages',
175+
input: {
176+
messages: finalMessages,
177+
},
178+
}
179+
},
180+
}
181+
182+
export default definition

.agents/factory/base.ts

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,7 @@ import { AgentTemplateTypes } from '../types/secret-agent-definition'
1010
import type { SecretAgentDefinition } from '../types/secret-agent-definition'
1111
import type { ModelName } from 'types/agent-definition'
1212

13-
export const base = (
14-
model: ModelName,
15-
allAvailableAgents?: string[],
16-
): Omit<SecretAgentDefinition, 'id'> => ({
13+
export const base = (model: ModelName): Omit<SecretAgentDefinition, 'id'> => ({
1714
model,
1815
displayName: AGENT_PERSONAS.base.displayName,
1916
spawnerPrompt: AGENT_PERSONAS.base.purpose,
@@ -31,6 +28,7 @@ export const base = (
3128
'str_replace',
3229
'write_file',
3330
'spawn_agents',
31+
'spawn_agent_inline',
3432
'add_subgoal',
3533
'browser_logs',
3634
'code_search',
@@ -39,17 +37,30 @@ export const base = (
3937
'think_deeply',
4038
'update_subgoal',
4139
],
42-
spawnableAgents: allAvailableAgents
43-
? (allAvailableAgents as any[])
44-
: [
45-
AgentTemplateTypes.file_explorer,
46-
AgentTemplateTypes.file_picker,
47-
AgentTemplateTypes.researcher,
48-
AgentTemplateTypes.thinker,
49-
AgentTemplateTypes.reviewer,
50-
],
40+
spawnableAgents: [
41+
AgentTemplateTypes.file_explorer,
42+
AgentTemplateTypes.file_picker,
43+
AgentTemplateTypes.researcher,
44+
AgentTemplateTypes.thinker,
45+
AgentTemplateTypes.reviewer,
46+
'context-pruner',
47+
],
5148

5249
systemPrompt: baseAgentSystemPrompt(model),
5350
instructionsPrompt: baseAgentUserInputPrompt(model),
5451
stepPrompt: baseAgentAgentStepPrompt(model),
52+
53+
handleSteps: function* ({ agentState }) {
54+
while (true) {
55+
// Run context-pruner before each step
56+
yield {
57+
toolName: 'spawn_agent_inline',
58+
input: {
59+
agent_type: 'context-pruner',
60+
},
61+
} as any
62+
63+
yield 'STEP'
64+
}
65+
},
5566
})

.agents/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"private": true,
55
"type": "module",
66
"scripts": {
7-
"typecheck": "bun x tsc --noEmit -p tsconfig.json"
7+
"typecheck": "bun x tsc --noEmit -p tsconfig.json",
8+
"test": "bun test"
89
}
910
}

.github/workflows/ci.yml

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,12 @@ jobs:
8282
needs: [build-and-check]
8383
strategy:
8484
matrix:
85-
package: [npm-app, backend, common]
85+
package: [npm-app, backend, common, .agents]
8686
include:
8787
- package: npm-app
8888
- package: backend
8989
- package: common
90+
- package: .agents
9091
name: test-${{ matrix.package }}
9192
runs-on: ubuntu-latest
9293
steps:
@@ -130,7 +131,13 @@ jobs:
130131
with:
131132
timeout_minutes: 10
132133
max_attempts: 5
133-
command: cd ${{ matrix.package }} && find src -name '*.test.ts' ! -name '*.integration.test.ts' | sort | xargs -I {} bun test {}
134+
command: |
135+
cd ${{ matrix.package }}
136+
if [ "${{ matrix.package }}" = ".agents" ]; then
137+
find __tests__ -name '*.test.ts' ! -name '*.integration.test.ts' 2>/dev/null | sort | xargs -I {} bun test {} || echo "No regular tests found in .agents"
138+
else
139+
find src -name '*.test.ts' ! -name '*.integration.test.ts' | sort | xargs -I {} bun test {}
140+
fi
134141
135142
# - name: Open interactive debug shell
136143
# if: ${{ failure() }}
@@ -142,11 +149,12 @@ jobs:
142149
needs: [build-and-check]
143150
strategy:
144151
matrix:
145-
package: [npm-app, backend, common]
152+
package: [npm-app, backend, common, .agents]
146153
include:
147154
- package: npm-app
148155
- package: backend
149156
- package: common
157+
- package: .agents
150158
name: test-integration-${{ matrix.package }}
151159
runs-on: ubuntu-latest
152160
steps:
@@ -190,7 +198,13 @@ jobs:
190198
with:
191199
timeout_minutes: 15
192200
max_attempts: 3
193-
command: cd ${{ matrix.package }} && find src -name '*.integration.test.ts' | sort | xargs -I {} bun test {}
201+
command: |
202+
cd ${{ matrix.package }}
203+
if [ "${{ matrix.package }}" = ".agents" ]; then
204+
find __tests__ -name '*.integration.test.ts' 2>/dev/null | sort | xargs -I {} bun test {} || echo "No integration tests found in .agents"
205+
else
206+
find src -name '*.integration.test.ts' | sort | xargs -I {} bun test {}
207+
fi
194208
195209
# - name: Open interactive debug shell
196210
# if: ${{ failure() }}

npm-app/src/agents/agent-utils.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ export function getAllTsFiles(dir: string): string[] {
2323
} else if (
2424
entry.isFile() &&
2525
entry.name.endsWith('.ts') &&
26-
!entry.name.endsWith('.d.ts')
26+
!entry.name.endsWith('.d.ts') &&
27+
!entry.name.endsWith('.test.ts')
2728
) {
2829
files.push(fullPath)
2930
}

npm-app/src/client.ts

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,6 +1225,10 @@ export class Client {
12251225
}
12261226

12271227
const xmlStreamParser = createXMLStreamParser(toolRenderers, (chunk) => {
1228+
if (!streamStarted) {
1229+
streamStarted = true
1230+
onStreamStart()
1231+
}
12281232
onChunk(chunk)
12291233
})
12301234

@@ -1254,13 +1258,6 @@ export class Client {
12541258
}
12551259
}
12561260

1257-
if (chunk && chunk.trim()) {
1258-
if (!streamStarted && chunk.trim()) {
1259-
streamStarted = true
1260-
onStreamStart()
1261-
}
1262-
}
1263-
12641261
try {
12651262
xmlStreamParser.write(chunk, 'utf8')
12661263
} catch (e) {

npm-app/src/utils/tool-renderers.ts

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -319,27 +319,7 @@ export const toolRenderers: Record<ToolName, ToolCallRenderer> = {
319319
},
320320
},
321321
spawn_agent_inline: {
322-
onToolStart: (toolName) => {
323-
return '\n\n' + gray(`[${bold('Spawn Agent Inline')}]`) + '\n'
324-
},
325-
onParamEnd: (paramName, toolName, content) => {
326-
if (paramName === 'agent_type') {
327-
const client = Client.getInstance(false)
328-
const agentName =
329-
(client?.agentNames && client.agentNames[content]) || content
330-
return gray(`@${bold(agentName)}\n`)
331-
}
332-
if (paramName === 'prompt') {
333-
return gray(content + '\n')
334-
}
335-
return null
336-
},
337-
onToolEnd: () => {
338-
return () => {
339-
Spinner.get().start('Agent running inline...')
340-
return '\n'
341-
}
342-
},
322+
// Don't render anything
343323
},
344324
add_message: {
345325
// Don't render anything

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
"release:npm-app": "bun run --cwd npm-app release",
3333
"clean-ts": "find . -name '*.tsbuildinfo' -type f -delete && find . -name '.next' -type d -exec rm -rf {} + 2>/dev/null || true && find . -name 'node_modules' -type d -exec rm -rf {} + 2>/dev/null || true && bun install",
3434
"typecheck": "bun --filter='*' run typecheck && echo '✅ All type checks passed!'",
35-
"test": "bun --filter='{@codebuff/backend,@codebuff/common,@codebuff/npm-app}' run test",
35+
"test": "bun --filter='{@codebuff/backend,@codebuff/common,@codebuff/npm-app,@codebuff/agents}' run test",
3636
"init-worktree": "bun scripts/init-worktree.ts",
3737
"cleanup-worktree": "bash scripts/cleanup-worktree.sh",
3838
"generate-tool-definitions": "bun scripts/generate-tool-definitions.ts"

0 commit comments

Comments
 (0)