Add context pruner agent, spawn it inline before each STEP of base. Add unit tests for .agents

jahooma · jahooma · commit 98cd731e9079 · 2025-08-17T17:13:00.000-07:00
diff --git a/.agents/__tests__/context-pruner.test.ts b/.agents/__tests__/context-pruner.test.ts
diff --git a/.agents/context-pruner.ts b/.agents/context-pruner.ts
@@ -0,0 +1,182 @@
+import { publisher } from './constants'
+import type { AgentDefinition, Message } from './types/agent-definition'
+
+const definition: AgentDefinition = {
+  id: 'context-pruner',
+  publisher,
+  displayName: 'Context Pruner',
+  model: 'openai/gpt-5-mini',
+
+  toolNames: ['set_messages'],
+
+  spawnerPrompt: `Spawn this agent between steps to prune context, starting with old tool results and then old messages.`,
+
+  inputSchema: {},
+
+  includeMessageHistory: true,
+
+  handleSteps: function* ({ agentState }) {
+    const messages = agentState.messageHistory
+
+    const countTokensJson = (obj: any): number => {
+      // Very rough approximation
+      return Math.ceil(JSON.stringify(obj).length / 3)
+    }
+
+    const maxMessageTokens = 200_000
+    const numTerminalCommandsToKeep = 5
+
+    // Remove the last assistant message if it contains the spawn call that invoked this context-pruner
+    let currentMessages = [...messages]
+    if (currentMessages.length > 0) {
+      const lastMessage = currentMessages[currentMessages.length - 1]
+      if (
+        lastMessage.role === 'assistant' &&
+        typeof lastMessage.content === 'string'
+      ) {
+        // Check if this message contains a spawn_agent_inline call for context-pruner
+        if (
+          lastMessage.content.includes('spawn_agent_inline') &&
+          lastMessage.content.includes('context-pruner')
+        ) {
+          // Remove the entire message
+          currentMessages.pop()
+        }
+      }
+    }
+
+    // Initial check - if already under limit, return (with inline agent tool call removed)
+    const initialTokens = countTokensJson(currentMessages)
+    if (initialTokens < maxMessageTokens) {
+      yield {
+        toolName: 'set_messages',
+        input: {
+          messages: currentMessages,
+        },
+      }
+      return
+    }
+
+    // PASS 1: Remove terminal command results (oldest first, preserve recent 5)
+    let numKeptTerminalCommands = 0
+    const afterTerminalPass: Message[] = []
+
+    for (let i = currentMessages.length - 1; i >= 0; i--) {
+      const message = currentMessages[i]
+      let processedContent =
+        typeof message.content === 'string'
+          ? message.content
+          : JSON.stringify(message.content)
+
+      if (processedContent.includes('<tool>run_terminal_command</tool>')) {
+        if (numKeptTerminalCommands < numTerminalCommandsToKeep) {
+          numKeptTerminalCommands++
+          afterTerminalPass.unshift({ ...message, content: processedContent })
+        } else {
+          // Simplify terminal command result
+          processedContent = processedContent.replace(
+            /<tool_result>\s*<tool>run_terminal_command<\/tool>\s*<result>[\s\S]*?<\/result>\s*<\/tool_result>/g,
+            '<tool_result><tool>run_terminal_command</tool><result>[Output omitted]</result></tool_result>',
+          )
+          afterTerminalPass.unshift({ ...message, content: processedContent })
+        }
+      } else {
+        afterTerminalPass.unshift({ ...message, content: processedContent })
+      }
+    }
+
+    // Check if terminal pass was enough
+    const tokensAfterTerminal = countTokensJson(afterTerminalPass)
+    if (tokensAfterTerminal < maxMessageTokens) {
+      yield {
+        toolName: 'set_messages',
+        input: {
+          messages: afterTerminalPass,
+        },
+      }
+      return
+    }
+
+    // PASS 2: Remove large tool results (any tool result > 1000 chars)
+    const afterToolResultsPass: Message[] = afterTerminalPass.map((message) => {
+      let processedContent =
+        typeof message.content === 'string'
+          ? message.content
+          : JSON.stringify(message.content)
+
+      if (
+        processedContent.includes('<tool_result>') &&
+        processedContent.length > 1000
+      ) {
+        processedContent = processedContent.replace(
+          /<result>[\s\S]*?<\/result>/g,
+          '<result>[Large tool result omitted]</result>',
+        )
+      }
+
+      return { ...message, content: processedContent }
+    })
+
+    // Check if tool results pass was enough
+    const tokensAfterToolResults = countTokensJson(afterToolResultsPass)
+    if (tokensAfterToolResults < maxMessageTokens) {
+      yield {
+        toolName: 'set_messages',
+        input: {
+          messages: afterToolResultsPass,
+        },
+      }
+      return
+    }
+
+    // PASS 3: Message-level pruning (like trimMessagesToFitTokenLimit)
+    const shortenedMessageTokenFactor = 0.5
+    const replacementMessage: Message = {
+      role: 'user',
+      content: '<system>Previous message(s) omitted due to length</system>',
+    }
+
+    const requiredTokens = countTokensJson(
+      afterToolResultsPass.filter((m: any) => m.keepDuringTruncation),
+    )
+    let removedTokens = 0
+    const tokensToRemove =
+      (maxMessageTokens - requiredTokens) * (1 - shortenedMessageTokenFactor)
+
+    const placeholder = 'deleted'
+    const filteredMessages: (Message | typeof placeholder)[] = []
+
+    for (const message of afterToolResultsPass) {
+      if (
+        removedTokens >= tokensToRemove ||
+        (message as any).keepDuringTruncation
+      ) {
+        filteredMessages.push(message)
+        continue
+      }
+
+      removedTokens += countTokensJson(message)
+      if (
+        filteredMessages.length === 0 ||
+        filteredMessages[filteredMessages.length - 1] !== placeholder
+      ) {
+        filteredMessages.push(placeholder)
+        removedTokens -= countTokensJson(replacementMessage)
+      }
+    }
+
+    const finalMessages = filteredMessages.map((m) =>
+      m === placeholder ? replacementMessage : m,
+    )
+
+    // Apply the final pruned message history
+    yield {
+      toolName: 'set_messages',
+      input: {
+        messages: finalMessages,
+      },
+    }
+  },
+}
+
+export default definition
diff --git a/.agents/factory/base.ts b/.agents/factory/base.ts
@@ -10,10 +10,7 @@ import { AgentTemplateTypes } from '../types/secret-agent-definition'
 import type { SecretAgentDefinition } from '../types/secret-agent-definition'
 import type { ModelName } from 'types/agent-definition'
 
-export const base = (
-  model: ModelName,
-  allAvailableAgents?: string[],
-): Omit<SecretAgentDefinition, 'id'> => ({
+export const base = (model: ModelName): Omit<SecretAgentDefinition, 'id'> => ({
   model,
   displayName: AGENT_PERSONAS.base.displayName,
   spawnerPrompt: AGENT_PERSONAS.base.purpose,
@@ -31,6 +28,7 @@ export const base = (
     'str_replace',
     'write_file',
     'spawn_agents',
+    'spawn_agent_inline',
     'add_subgoal',
     'browser_logs',
     'code_search',
@@ -39,17 +37,30 @@ export const base = (
     'think_deeply',
     'update_subgoal',
   ],
-  spawnableAgents: allAvailableAgents
-    ? (allAvailableAgents as any[])
-    : [
-        AgentTemplateTypes.file_explorer,
-        AgentTemplateTypes.file_picker,
-        AgentTemplateTypes.researcher,
-        AgentTemplateTypes.thinker,
-        AgentTemplateTypes.reviewer,
-      ],
+  spawnableAgents: [
+    AgentTemplateTypes.file_explorer,
+    AgentTemplateTypes.file_picker,
+    AgentTemplateTypes.researcher,
+    AgentTemplateTypes.thinker,
+    AgentTemplateTypes.reviewer,
+    'context-pruner',
+  ],
 
   systemPrompt: baseAgentSystemPrompt(model),
   instructionsPrompt: baseAgentUserInputPrompt(model),
   stepPrompt: baseAgentAgentStepPrompt(model),
+
+  handleSteps: function* ({ agentState }) {
+    while (true) {
+      // Run context-pruner before each step
+      yield {
+        toolName: 'spawn_agent_inline',
+        input: {
+          agent_type: 'context-pruner',
+        },
+      } as any
+
+      yield 'STEP'
+    }
+  },
 })
diff --git a/.agents/package.json b/.agents/package.json
@@ -4,6 +4,7 @@
   "private": true,
   "type": "module",
   "scripts": {
-    "typecheck": "bun x tsc --noEmit -p tsconfig.json"
+    "typecheck": "bun x tsc --noEmit -p tsconfig.json",
+    "test": "bun test"
   }
 }
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -82,11 +82,12 @@ jobs:
     needs: [build-and-check]
     strategy:
       matrix:
-        package: [npm-app, backend, common]
+        package: [npm-app, backend, common, .agents]
         include:
           - package: npm-app
           - package: backend
           - package: common
+          - package: .agents
     name: test-${{ matrix.package }}
     runs-on: ubuntu-latest
     steps:
@@ -130,7 +131,13 @@ jobs:
         with:
           timeout_minutes: 10
           max_attempts: 5
-          command: cd ${{ matrix.package }} && find src -name '*.test.ts' ! -name '*.integration.test.ts' | sort | xargs -I {} bun test {}
+          command: |
+            cd ${{ matrix.package }}
+            if [ "${{ matrix.package }}" = ".agents" ]; then
+              find __tests__ -name '*.test.ts' ! -name '*.integration.test.ts' 2>/dev/null | sort | xargs -I {} bun test {} || echo "No regular tests found in .agents"
+            else
+              find src -name '*.test.ts' ! -name '*.integration.test.ts' | sort | xargs -I {} bun test {}
+            fi
 
       # - name: Open interactive debug shell
       #   if: ${{ failure() }}
@@ -142,11 +149,12 @@ jobs:
     needs: [build-and-check]
     strategy:
       matrix:
-        package: [npm-app, backend, common]
+        package: [npm-app, backend, common, .agents]
         include:
           - package: npm-app
           - package: backend
           - package: common
+          - package: .agents
     name: test-integration-${{ matrix.package }}
     runs-on: ubuntu-latest
     steps:
@@ -190,7 +198,13 @@ jobs:
         with:
           timeout_minutes: 15
           max_attempts: 3
-          command: cd ${{ matrix.package }} && find src -name '*.integration.test.ts' | sort | xargs -I {} bun test {}
+          command: |
+            cd ${{ matrix.package }}
+            if [ "${{ matrix.package }}" = ".agents" ]; then
+              find __tests__ -name '*.integration.test.ts' 2>/dev/null | sort | xargs -I {} bun test {} || echo "No integration tests found in .agents"
+            else
+              find src -name '*.integration.test.ts' | sort | xargs -I {} bun test {}
+            fi
 
       # - name: Open interactive debug shell
       #   if: ${{ failure() }}
diff --git a/npm-app/src/agents/agent-utils.ts b/npm-app/src/agents/agent-utils.ts
@@ -23,7 +23,8 @@ export function getAllTsFiles(dir: string): string[] {
       } else if (
         entry.isFile() &&
         entry.name.endsWith('.ts') &&
-        !entry.name.endsWith('.d.ts')
+        !entry.name.endsWith('.d.ts') &&
+        !entry.name.endsWith('.test.ts')
       ) {
         files.push(fullPath)
       }
diff --git a/npm-app/src/client.ts b/npm-app/src/client.ts
@@ -1225,6 +1225,10 @@ export class Client {
     }
 
     const xmlStreamParser = createXMLStreamParser(toolRenderers, (chunk) => {
+      if (!streamStarted) {
+        streamStarted = true
+        onStreamStart()
+      }
       onChunk(chunk)
     })
 
@@ -1254,13 +1258,6 @@ export class Client {
           }
         }
 
-        if (chunk && chunk.trim()) {
-          if (!streamStarted && chunk.trim()) {
-            streamStarted = true
-            onStreamStart()
-          }
-        }
-
         try {
           xmlStreamParser.write(chunk, 'utf8')
         } catch (e) {
diff --git a/npm-app/src/utils/tool-renderers.ts b/npm-app/src/utils/tool-renderers.ts
@@ -319,27 +319,7 @@ export const toolRenderers: Record<ToolName, ToolCallRenderer> = {
     },
   },
   spawn_agent_inline: {
-    onToolStart: (toolName) => {
-      return '\n\n' + gray(`[${bold('Spawn Agent Inline')}]`) + '\n'
-    },
-    onParamEnd: (paramName, toolName, content) => {
-      if (paramName === 'agent_type') {
-        const client = Client.getInstance(false)
-        const agentName =
-          (client?.agentNames && client.agentNames[content]) || content
-        return gray(`@${bold(agentName)}\n`)
-      }
-      if (paramName === 'prompt') {
-        return gray(content + '\n')
-      }
-      return null
-    },
-    onToolEnd: () => {
-      return () => {
-        Spinner.get().start('Agent running inline...')
-        return '\n'
-      }
-    },
+    // Don't render anything
   },
   add_message: {
     // Don't render anything
diff --git a/package.json b/package.json
@@ -32,7 +32,7 @@
     "release:npm-app": "bun run --cwd npm-app release",
     "clean-ts": "find . -name '*.tsbuildinfo' -type f -delete && find . -name '.next' -type d -exec rm -rf {} + 2>/dev/null || true && find . -name 'node_modules' -type d -exec rm -rf {} + 2>/dev/null || true && bun install",
     "typecheck": "bun --filter='*' run typecheck && echo '✅ All type checks passed!'",
-    "test": "bun --filter='{@codebuff/backend,@codebuff/common,@codebuff/npm-app}' run test",
+    "test": "bun --filter='{@codebuff/backend,@codebuff/common,@codebuff/npm-app,@codebuff/agents}' run test",
     "init-worktree": "bun scripts/init-worktree.ts",
     "cleanup-worktree": "bash scripts/cleanup-worktree.sh",
     "generate-tool-definitions": "bun scripts/generate-tool-definitions.ts"

Original file line number	Diff line number	Diff line change
`@@ -4,6 +4,7 @@`
`4`	`4`	`"private": true,`
`5`	`5`	`"type": "module",`
`6`	`6`	`"scripts": {`
`7`		`- "typecheck": "bun x tsc --noEmit -p tsconfig.json"`
	`7`	`+ "typecheck": "bun x tsc --noEmit -p tsconfig.json",`
	`8`	`+ "test": "bun test"`
`8`	`9`	`}`
`9`	`10`	`}`