From 9fff8cb5bcbaebaff1788b7756fdb534fbcc1c41 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 30 Jan 2026 11:26:41 -0800 Subject: [PATCH 1/5] remove for bedrock since they handle on their own --- .cursor/commands/council.md | 7 +++++++ apps/sim/providers/anthropic/index.ts | 21 +++++++++++++++------ apps/sim/providers/bedrock/index.ts | 13 ++++++++++--- 3 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 .cursor/commands/council.md diff --git a/.cursor/commands/council.md b/.cursor/commands/council.md new file mode 100644 index 0000000000..f754d92f74 --- /dev/null +++ b/.cursor/commands/council.md @@ -0,0 +1,7 @@ +Based on the given area of interest, please: + +1. Dig around the codebase in terms of that given area of interest, gather general information such as keywords and architecture overview. +2. Spawn off n=10 (unless specified otherwise) task agents to dig deeper into the codebase in terms of that given area of interest, some of them should be out of the box for variance. +3. Once the task agents are done, use the information to do what the user wants. + +If user is in plan mode, use the information to create the plan. \ No newline at end of file diff --git a/apps/sim/providers/anthropic/index.ts b/apps/sim/providers/anthropic/index.ts index 29e277825e..ad56c5efcb 100644 --- a/apps/sim/providers/anthropic/index.ts +++ b/apps/sim/providers/anthropic/index.ts @@ -302,13 +302,18 @@ export const anthropicProvider: ProviderConfig = { const providerStartTime = Date.now() const providerStartTimeISO = new Date(providerStartTime).toISOString() + const nonStreamingMaxTokens = + Number.parseInt(String(request.maxTokens)) || + getMaxOutputTokensForModel(request.model, false) + const intermediatePayload = { ...payload, max_tokens: nonStreamingMaxTokens } + try { const initialCallTime = Date.now() - const originalToolChoice = payload.tool_choice + const originalToolChoice = intermediatePayload.tool_choice const forcedTools = preparedTools?.forcedTools || [] let usedForcedTools: string[] = [] - let currentResponse = await anthropic.messages.create(payload) + let currentResponse = await anthropic.messages.create(intermediatePayload) const firstResponseTime = Date.now() - initialCallTime let content = '' @@ -491,7 +496,7 @@ export const anthropicProvider: ProviderConfig = { toolsTime += thisToolsTime const nextPayload = { - ...payload, + ...intermediatePayload, messages: currentMessages, } @@ -674,13 +679,17 @@ export const anthropicProvider: ProviderConfig = { const providerStartTime = Date.now() const providerStartTimeISO = new Date(providerStartTime).toISOString() + const toolLoopMaxTokens = + Number.parseInt(String(request.maxTokens)) || getMaxOutputTokensForModel(request.model, false) + const toolLoopPayload = { ...payload, max_tokens: toolLoopMaxTokens } + try { const initialCallTime = Date.now() - const originalToolChoice = payload.tool_choice + const originalToolChoice = toolLoopPayload.tool_choice const forcedTools = preparedTools?.forcedTools || [] let usedForcedTools: string[] = [] - let currentResponse = await anthropic.messages.create(payload) + let currentResponse = await anthropic.messages.create(toolLoopPayload) const firstResponseTime = Date.now() - initialCallTime let content = '' @@ -867,7 +876,7 @@ export const anthropicProvider: ProviderConfig = { toolsTime += thisToolsTime const nextPayload = { - ...payload, + ...toolLoopPayload, messages: currentMessages, } diff --git a/apps/sim/providers/bedrock/index.ts b/apps/sim/providers/bedrock/index.ts index 1edee08d81..83ae98911f 100644 --- a/apps/sim/providers/bedrock/index.ts +++ b/apps/sim/providers/bedrock/index.ts @@ -268,6 +268,13 @@ export const bedrockProvider: ProviderConfig = { getMaxOutputTokensForModel(request.model, request.stream ?? false), } + const toolLoopInferenceConfig = { + ...inferenceConfig, + maxTokens: + Number.parseInt(String(request.maxTokens)) || + getMaxOutputTokensForModel(request.model, false), + } + const shouldStreamToolCalls = request.streamToolCalls ?? false if (request.stream && (!bedrockTools || bedrockTools.length === 0)) { @@ -374,7 +381,7 @@ export const bedrockProvider: ProviderConfig = { modelId: bedrockModelId, messages, system: systemPromptWithSchema.length > 0 ? systemPromptWithSchema : undefined, - inferenceConfig, + inferenceConfig: toolLoopInferenceConfig, toolConfig, }) @@ -620,7 +627,7 @@ export const bedrockProvider: ProviderConfig = { modelId: bedrockModelId, messages: currentMessages, system: systemPromptWithSchema.length > 0 ? systemPromptWithSchema : undefined, - inferenceConfig, + inferenceConfig: toolLoopInferenceConfig, toolConfig: bedrockTools?.length ? { tools: bedrockTools, toolChoice: nextToolChoice } : undefined, @@ -687,7 +694,7 @@ export const bedrockProvider: ProviderConfig = { modelId: bedrockModelId, messages: currentMessages, system: systemPromptWithSchema.length > 0 ? systemPromptWithSchema : undefined, - inferenceConfig, + inferenceConfig: toolLoopInferenceConfig, toolConfig: { tools: [structuredOutputTool], toolChoice: { tool: { name: structuredOutputToolName } }, From 6c57663503e184ce67cf711964a53473900f6d27 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 30 Jan 2026 11:26:51 -0800 Subject: [PATCH 2/5] fix --- apps/sim/providers/bedrock/index.ts | 25 +++++++------------------ apps/sim/providers/models.ts | 11 ++++++----- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/apps/sim/providers/bedrock/index.ts b/apps/sim/providers/bedrock/index.ts index 83ae98911f..9ca8d17cda 100644 --- a/apps/sim/providers/bedrock/index.ts +++ b/apps/sim/providers/bedrock/index.ts @@ -20,11 +20,7 @@ import { generateToolUseId, getBedrockInferenceProfileId, } from '@/providers/bedrock/utils' -import { - getMaxOutputTokensForModel, - getProviderDefaultModel, - getProviderModels, -} from '@/providers/models' +import { getProviderDefaultModel, getProviderModels } from '@/providers/models' import type { ProviderConfig, ProviderRequest, @@ -261,18 +257,11 @@ export const bedrockProvider: ProviderConfig = { const systemPromptWithSchema = systemContent - const inferenceConfig = { + const inferenceConfig: { temperature: number; maxTokens?: number } = { temperature: Number.parseFloat(String(request.temperature ?? 0.7)), - maxTokens: - Number.parseInt(String(request.maxTokens)) || - getMaxOutputTokensForModel(request.model, request.stream ?? false), } - - const toolLoopInferenceConfig = { - ...inferenceConfig, - maxTokens: - Number.parseInt(String(request.maxTokens)) || - getMaxOutputTokensForModel(request.model, false), + if (request.maxTokens != null) { + inferenceConfig.maxTokens = Number.parseInt(String(request.maxTokens)) } const shouldStreamToolCalls = request.streamToolCalls ?? false @@ -381,7 +370,7 @@ export const bedrockProvider: ProviderConfig = { modelId: bedrockModelId, messages, system: systemPromptWithSchema.length > 0 ? systemPromptWithSchema : undefined, - inferenceConfig: toolLoopInferenceConfig, + inferenceConfig, toolConfig, }) @@ -627,7 +616,7 @@ export const bedrockProvider: ProviderConfig = { modelId: bedrockModelId, messages: currentMessages, system: systemPromptWithSchema.length > 0 ? systemPromptWithSchema : undefined, - inferenceConfig: toolLoopInferenceConfig, + inferenceConfig: inferenceConfig, toolConfig: bedrockTools?.length ? { tools: bedrockTools, toolChoice: nextToolChoice } : undefined, @@ -694,7 +683,7 @@ export const bedrockProvider: ProviderConfig = { modelId: bedrockModelId, messages: currentMessages, system: systemPromptWithSchema.length > 0 ? systemPromptWithSchema : undefined, - inferenceConfig: toolLoopInferenceConfig, + inferenceConfig: inferenceConfig, toolConfig: { tools: [structuredOutputTool], toolChoice: { tool: { name: structuredOutputToolName } }, diff --git a/apps/sim/providers/models.ts b/apps/sim/providers/models.ts index 5922bf8e72..b89172f270 100644 --- a/apps/sim/providers/models.ts +++ b/apps/sim/providers/models.ts @@ -34,10 +34,15 @@ export interface ModelCapabilities { toolUsageControl?: boolean computerUse?: boolean nativeStructuredOutputs?: boolean + /** + * Max output tokens configuration for Anthropic SDK's streaming timeout workaround. + * The Anthropic SDK throws an error for non-streaming requests that may take >10 minutes. + * This only applies to direct Anthropic API calls, not Bedrock (which uses AWS SDK). + */ maxOutputTokens?: { /** Maximum tokens for streaming requests */ max: number - /** Safe default for non-streaming requests (to avoid timeout issues) */ + /** Safe default for non-streaming requests (to avoid Anthropic SDK timeout errors) */ default: number } reasoningEffort?: { @@ -1709,7 +1714,6 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: { temperature: { min: 0, max: 1 }, nativeStructuredOutputs: true, - maxOutputTokens: { max: 64000, default: 8192 }, }, contextWindow: 200000, }, @@ -1723,7 +1727,6 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: { temperature: { min: 0, max: 1 }, nativeStructuredOutputs: true, - maxOutputTokens: { max: 64000, default: 8192 }, }, contextWindow: 200000, }, @@ -1737,7 +1740,6 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: { temperature: { min: 0, max: 1 }, nativeStructuredOutputs: true, - maxOutputTokens: { max: 64000, default: 8192 }, }, contextWindow: 200000, }, @@ -1751,7 +1753,6 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: { temperature: { min: 0, max: 1 }, nativeStructuredOutputs: true, - maxOutputTokens: { max: 64000, default: 8192 }, }, contextWindow: 200000, }, From e09879dd9544c50154695057f0569146c5939beb Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 30 Jan 2026 11:31:29 -0800 Subject: [PATCH 3/5] fix inference config reference --- apps/sim/providers/bedrock/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/sim/providers/bedrock/index.ts b/apps/sim/providers/bedrock/index.ts index 9ca8d17cda..57935394a5 100644 --- a/apps/sim/providers/bedrock/index.ts +++ b/apps/sim/providers/bedrock/index.ts @@ -616,7 +616,7 @@ export const bedrockProvider: ProviderConfig = { modelId: bedrockModelId, messages: currentMessages, system: systemPromptWithSchema.length > 0 ? systemPromptWithSchema : undefined, - inferenceConfig: inferenceConfig, + inferenceConfig, toolConfig: bedrockTools?.length ? { tools: bedrockTools, toolChoice: nextToolChoice } : undefined, @@ -683,7 +683,7 @@ export const bedrockProvider: ProviderConfig = { modelId: bedrockModelId, messages: currentMessages, system: systemPromptWithSchema.length > 0 ? systemPromptWithSchema : undefined, - inferenceConfig: inferenceConfig, + inferenceConfig, toolConfig: { tools: [structuredOutputTool], toolChoice: { tool: { name: structuredOutputToolName } }, From 5b0a9413d39bf919df0d7a6aedebd9b835c72a7d Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 30 Jan 2026 11:34:46 -0800 Subject: [PATCH 4/5] add to docs --- apps/docs/content/docs/en/blocks/agent.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/docs/content/docs/en/blocks/agent.mdx b/apps/docs/content/docs/en/blocks/agent.mdx index 377cb8c1c7..fa96784044 100644 --- a/apps/docs/content/docs/en/blocks/agent.mdx +++ b/apps/docs/content/docs/en/blocks/agent.mdx @@ -58,7 +58,7 @@ Controls response randomness and creativity: ### Max Output Tokens -Controls the maximum length of the model's response. For Anthropic models, Sim uses reliable defaults: streaming executions use the model's full capacity (e.g. 64,000 tokens for Claude 4.5), while non-streaming executions default to 8,192 to avoid timeout issues. For long-form content generation via API, explicitly set a higher value. +Controls the maximum length of the model's response. For Anthropic models, Sim uses reliable defaults: streaming executions use the model's full capacity (e.g. 64,000 tokens for Claude 4.5), while non-streaming executions default to 8,192 to avoid timeout issues. When using tools with Anthropic models, intermediate tool-calling requests use a capped limit of 8,192 tokens to avoid SDK timeout errors, regardless of your configured max tokens—the final streaming response uses your full configured limit. This only affects Anthropic's direct API; AWS Bedrock handles this automatically. For long-form content generation via API, explicitly set a higher value. ### API Key From c2aef6a3a555df5d9c3955c2a2a75df2724258aa Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 30 Jan 2026 11:55:46 -0800 Subject: [PATCH 5/5] make it min between max tokens --- apps/sim/providers/anthropic/index.ts | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/apps/sim/providers/anthropic/index.ts b/apps/sim/providers/anthropic/index.ts index ad56c5efcb..ebb111ffc4 100644 --- a/apps/sim/providers/anthropic/index.ts +++ b/apps/sim/providers/anthropic/index.ts @@ -302,9 +302,12 @@ export const anthropicProvider: ProviderConfig = { const providerStartTime = Date.now() const providerStartTimeISO = new Date(providerStartTime).toISOString() - const nonStreamingMaxTokens = - Number.parseInt(String(request.maxTokens)) || - getMaxOutputTokensForModel(request.model, false) + // Cap intermediate calls at non-streaming limit to avoid SDK timeout errors, + // but allow users to set lower values if desired + const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false) + const nonStreamingMaxTokens = request.maxTokens + ? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit) + : nonStreamingLimit const intermediatePayload = { ...payload, max_tokens: nonStreamingMaxTokens } try { @@ -679,8 +682,12 @@ export const anthropicProvider: ProviderConfig = { const providerStartTime = Date.now() const providerStartTimeISO = new Date(providerStartTime).toISOString() - const toolLoopMaxTokens = - Number.parseInt(String(request.maxTokens)) || getMaxOutputTokensForModel(request.model, false) + // Cap intermediate calls at non-streaming limit to avoid SDK timeout errors, + // but allow users to set lower values if desired + const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false) + const toolLoopMaxTokens = request.maxTokens + ? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit) + : nonStreamingLimit const toolLoopPayload = { ...payload, max_tokens: toolLoopMaxTokens } try {