Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .cursor/commands/council.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Based on the given area of interest, please:

1. Dig around the codebase in terms of that given area of interest, gather general information such as keywords and architecture overview.
2. Spawn off n=10 (unless specified otherwise) task agents to dig deeper into the codebase in terms of that given area of interest, some of them should be out of the box for variance.
3. Once the task agents are done, use the information to do what the user wants.

If user is in plan mode, use the information to create the plan.
2 changes: 1 addition & 1 deletion apps/docs/content/docs/en/blocks/agent.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ Controls response randomness and creativity:

### Max Output Tokens

Controls the maximum length of the model's response. For Anthropic models, Sim uses reliable defaults: streaming executions use the model's full capacity (e.g. 64,000 tokens for Claude 4.5), while non-streaming executions default to 8,192 to avoid timeout issues. For long-form content generation via API, explicitly set a higher value.
Controls the maximum length of the model's response. For Anthropic models, Sim uses reliable defaults: streaming executions use the model's full capacity (e.g. 64,000 tokens for Claude 4.5), while non-streaming executions default to 8,192 to avoid timeout issues. When using tools with Anthropic models, intermediate tool-calling requests use a capped limit of 8,192 tokens to avoid SDK timeout errors, regardless of your configured max tokens—the final streaming response uses your full configured limit. This only affects Anthropic's direct API; AWS Bedrock handles this automatically. For long-form content generation via API, explicitly set a higher value.

### API Key

Expand Down
28 changes: 22 additions & 6 deletions apps/sim/providers/anthropic/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -302,13 +302,21 @@ export const anthropicProvider: ProviderConfig = {
const providerStartTime = Date.now()
const providerStartTimeISO = new Date(providerStartTime).toISOString()

// Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
// but allow users to set lower values if desired
const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
const nonStreamingMaxTokens = request.maxTokens
? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
: nonStreamingLimit
const intermediatePayload = { ...payload, max_tokens: nonStreamingMaxTokens }

try {
const initialCallTime = Date.now()
const originalToolChoice = payload.tool_choice
const originalToolChoice = intermediatePayload.tool_choice
const forcedTools = preparedTools?.forcedTools || []
let usedForcedTools: string[] = []

let currentResponse = await anthropic.messages.create(payload)
let currentResponse = await anthropic.messages.create(intermediatePayload)
const firstResponseTime = Date.now() - initialCallTime

let content = ''
Expand Down Expand Up @@ -491,7 +499,7 @@ export const anthropicProvider: ProviderConfig = {
toolsTime += thisToolsTime

const nextPayload = {
...payload,
...intermediatePayload,
messages: currentMessages,
}

Expand Down Expand Up @@ -674,13 +682,21 @@ export const anthropicProvider: ProviderConfig = {
const providerStartTime = Date.now()
const providerStartTimeISO = new Date(providerStartTime).toISOString()

// Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
// but allow users to set lower values if desired
const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
const toolLoopMaxTokens = request.maxTokens
? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
: nonStreamingLimit
const toolLoopPayload = { ...payload, max_tokens: toolLoopMaxTokens }

try {
const initialCallTime = Date.now()
const originalToolChoice = payload.tool_choice
const originalToolChoice = toolLoopPayload.tool_choice
const forcedTools = preparedTools?.forcedTools || []
let usedForcedTools: string[] = []

let currentResponse = await anthropic.messages.create(payload)
let currentResponse = await anthropic.messages.create(toolLoopPayload)
const firstResponseTime = Date.now() - initialCallTime

let content = ''
Expand Down Expand Up @@ -867,7 +883,7 @@ export const anthropicProvider: ProviderConfig = {
toolsTime += thisToolsTime

const nextPayload = {
...payload,
...toolLoopPayload,
messages: currentMessages,
}

Expand Down
14 changes: 5 additions & 9 deletions apps/sim/providers/bedrock/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,7 @@ import {
generateToolUseId,
getBedrockInferenceProfileId,
} from '@/providers/bedrock/utils'
import {
getMaxOutputTokensForModel,
getProviderDefaultModel,
getProviderModels,
} from '@/providers/models'
import { getProviderDefaultModel, getProviderModels } from '@/providers/models'
import type {
ProviderConfig,
ProviderRequest,
Expand Down Expand Up @@ -261,11 +257,11 @@ export const bedrockProvider: ProviderConfig = {

const systemPromptWithSchema = systemContent

const inferenceConfig = {
const inferenceConfig: { temperature: number; maxTokens?: number } = {
temperature: Number.parseFloat(String(request.temperature ?? 0.7)),
maxTokens:
Number.parseInt(String(request.maxTokens)) ||
getMaxOutputTokensForModel(request.model, request.stream ?? false),
}
if (request.maxTokens != null) {
inferenceConfig.maxTokens = Number.parseInt(String(request.maxTokens))
}

const shouldStreamToolCalls = request.streamToolCalls ?? false
Expand Down
11 changes: 6 additions & 5 deletions apps/sim/providers/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,15 @@ export interface ModelCapabilities {
toolUsageControl?: boolean
computerUse?: boolean
nativeStructuredOutputs?: boolean
/**
* Max output tokens configuration for Anthropic SDK's streaming timeout workaround.
* The Anthropic SDK throws an error for non-streaming requests that may take >10 minutes.
* This only applies to direct Anthropic API calls, not Bedrock (which uses AWS SDK).
*/
maxOutputTokens?: {
/** Maximum tokens for streaming requests */
max: number
/** Safe default for non-streaming requests (to avoid timeout issues) */
/** Safe default for non-streaming requests (to avoid Anthropic SDK timeout errors) */
default: number
}
reasoningEffort?: {
Expand Down Expand Up @@ -1709,7 +1714,6 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
},
contextWindow: 200000,
},
Expand All @@ -1723,7 +1727,6 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
},
contextWindow: 200000,
},
Expand All @@ -1737,7 +1740,6 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
},
contextWindow: 200000,
},
Expand All @@ -1751,7 +1753,6 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
},
contextWindow: 200000,
},
Expand Down