Skip to content

Commit 37d56de

Browse files
committed
Switch planner to gpt-5-high thinking now that it is specifically for complex tasks
1 parent ec9620c commit 37d56de

File tree

5 files changed

+36
-132
lines changed

5 files changed

+36
-132
lines changed

.agents/base2/gpt-5-high/base2-gpt-5-high.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ const definition: SecretAgentDefinition = {
99
...base2('anthropic/claude-4-sonnet-20250522'),
1010
displayName: 'Base2 GPT-5 High',
1111
spawnableAgents: [
12-
'planner-gpt-5-high',
12+
'planner',
1313
'editor-gpt-5-high',
1414
'reviewer-gpt-5-high',
1515
'context-pruner',

.agents/base2/gpt-5-high/planner-gpt-5-high.ts

Lines changed: 0 additions & 104 deletions
This file was deleted.

.agents/base2/planner-factory.ts

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,46 +13,54 @@ export const plannerFactory = (
1313
model,
1414
displayName: 'Peter Plan',
1515
spawnerPrompt:
16-
'Creates comprehensive plans by exploring the codebase, doing research on the web, and thinking deeply. You can also use it get deep answer to any question.',
16+
'Creates comprehensive plans by exploring the codebase, doing research on the web, and thinking deeply. You can also use it get deep answer to any question. This is a slow agent -- use it only for complex tasks.',
1717
inputSchema: {
1818
prompt: {
1919
type: 'string',
2020
description: 'The task to plan for',
2121
},
2222
},
23-
outputMode: 'last_message',
23+
outputMode: 'structured_output',
2424
includeMessageHistory: true,
25-
toolNames: ['spawn_agents', 'end_turn'],
26-
spawnableAgents: ['file-explorer', 'researcher', 'gemini-thinker-high'],
25+
toolNames: ['spawn_agents', 'read_files', 'end_turn', 'set_output'],
26+
spawnableAgents: [
27+
'file-explorer',
28+
'web-researcher',
29+
'docs-researcher',
30+
'thinker-gpt-5-high',
31+
],
2732

2833
systemPrompt: `You are an expert programmer, architect, researcher, and general problem solver.
29-
You spawn agents to help you gather information and think through the problems.
34+
You spawn agents to help you gather information which will be used to create a plan.
3035
3136
${PLACEHOLDER.FILE_TREE_PROMPT}
3237
${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS}`,
3338

34-
instructionsPrompt: `Create a comprehensive plan for the given task.
35-
36-
Process:
37-
- Spawn a file-explorer to understand the relevant codebase. You may also spawn a researcher to search the web for relevant information at the same time.
38-
- After gathering information, spawn a thinker to analyze the best approach and craft a plan.`,
39+
instructionsPrompt: `You are gathering information which will be used to create a plan.
40+
41+
- It's helpful to spawn a file-explorer to find all the relevant parts of the codebase. In parallel as part of the same spawn_agents tool call, you may also spawn a web-researcher or docs-researcher to search the web or technical documentation for relevant information.
42+
- After you are satisfied with the information you have gathered from these agents, stop and use the end_turn tool. The plan will be created in a separate step. Do not spawn thinker-gpt-5-high in this step.`,
3943

4044
handleSteps: function* ({ prompt }) {
41-
// Step 1: Spawn file-explorer and parse out the file paths
42-
const { agentState: stateAfterFileExplorer } = yield 'STEP'
43-
const { messageHistory } = stateAfterFileExplorer
44-
const lastAssistantMessageIndex =
45-
stateAfterFileExplorer.messageHistory.findLastIndex(
46-
(message) => message.role === 'assistant',
47-
)
48-
const toolResultMessage = (messageHistory[
49-
lastAssistantMessageIndex + 1
50-
] as { content: string }) ?? {
51-
content: '',
52-
}
53-
const filePaths = parseFilePathsFromToolResult(toolResultMessage.content)
45+
// Step 1: Gather information
46+
const { agentState } = yield 'STEP_ALL'
47+
48+
// Step 2: Parse out all the file paths and read them.
49+
const messagesBlob =
50+
// Exclude the first two messages, which are system prompt + context
51+
agentState.messageHistory
52+
.slice(2)
53+
.map((message) =>
54+
typeof message.content === 'string'
55+
? message.content
56+
: message.content
57+
.map((content) => (content.type === 'text' ? content.text : ''))
58+
.join('\n'),
59+
)
60+
.join('\n')
61+
62+
const filePaths = parseFilePathsFromToolResult(messagesBlob)
5463

55-
// Step 2: Read the files
5664
yield {
5765
toolName: 'read_files',
5866
input: {
@@ -66,7 +74,7 @@ Process:
6674
input: {
6775
agents: [
6876
{
69-
agent_type: 'gemini-thinker-high',
77+
agent_type: 'thinker-gpt-5-high',
7078
prompt: `Create a clear implementation plan for the following task, with a focus on simplicity and making the minimal changes necessary for an awesome implementation. Prompt: ${prompt}`,
7179
},
7280
],

.agents/factory/docs-researcher.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ export const docsResearcher = (
66
): Omit<SecretAgentDefinition, 'id'> => ({
77
model,
88
displayName: 'Doc',
9-
spawnerPrompt: `Expert at reading technical documentation to find relevant information.`,
9+
spawnerPrompt: `Expert at reading technical documentation of major public libraries and frameworks to find relevant information. (e.g. React, MongoDB, Postgres, etc.)`,
1010
inputSchema: {
1111
prompt: {
1212
type: 'string',

evals/git-evals/run-git-evals.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ export async function runSingleEval(
5858

5959
const unhandledHandler = (reason: any, promise: Promise<any>) => {
6060
console.error('Unhandled rejection during eval:', reason)
61-
processError = `Unhandled rejection: ${reason instanceof Error ? { message: reason.message, stack: reason.stack } : String(reason)}`
61+
processError = `Unhandled rejection: ${reason instanceof Error ? `${reason.message}\n${reason.stack}` : String(reason)}`
6262
}
6363

6464
process.on('uncaughtException', uncaughtHandler)

0 commit comments

Comments
 (0)