diff --git a/.cursor/commands/council.md b/.cursor/commands/council.md new file mode 100644 index 0000000000..f754d92f74 --- /dev/null +++ b/.cursor/commands/council.md @@ -0,0 +1,7 @@ +Based on the given area of interest, please: + +1. Dig around the codebase in terms of that given area of interest, gather general information such as keywords and architecture overview. +2. Spawn off n=10 (unless specified otherwise) task agents to dig deeper into the codebase in terms of that given area of interest, some of them should be out of the box for variance. +3. Once the task agents are done, use the information to do what the user wants. + +If user is in plan mode, use the information to create the plan. \ No newline at end of file diff --git a/apps/docs/content/docs/en/blocks/agent.mdx b/apps/docs/content/docs/en/blocks/agent.mdx index 377cb8c1c7..fa96784044 100644 --- a/apps/docs/content/docs/en/blocks/agent.mdx +++ b/apps/docs/content/docs/en/blocks/agent.mdx @@ -58,7 +58,7 @@ Controls response randomness and creativity: ### Max Output Tokens -Controls the maximum length of the model's response. For Anthropic models, Sim uses reliable defaults: streaming executions use the model's full capacity (e.g. 64,000 tokens for Claude 4.5), while non-streaming executions default to 8,192 to avoid timeout issues. For long-form content generation via API, explicitly set a higher value. +Controls the maximum length of the model's response. For Anthropic models, Sim uses reliable defaults: streaming executions use the model's full capacity (e.g. 64,000 tokens for Claude 4.5), while non-streaming executions default to 8,192 to avoid timeout issues. When using tools with Anthropic models, intermediate tool-calling requests use a capped limit of 8,192 tokens to avoid SDK timeout errors, regardless of your configured max tokens—the final streaming response uses your full configured limit. This only affects Anthropic's direct API; AWS Bedrock handles this automatically. For long-form content generation via API, explicitly set a higher value. ### API Key diff --git a/apps/sim/providers/anthropic/index.ts b/apps/sim/providers/anthropic/index.ts index 29e277825e..ebb111ffc4 100644 --- a/apps/sim/providers/anthropic/index.ts +++ b/apps/sim/providers/anthropic/index.ts @@ -302,13 +302,21 @@ export const anthropicProvider: ProviderConfig = { const providerStartTime = Date.now() const providerStartTimeISO = new Date(providerStartTime).toISOString() + // Cap intermediate calls at non-streaming limit to avoid SDK timeout errors, + // but allow users to set lower values if desired + const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false) + const nonStreamingMaxTokens = request.maxTokens + ? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit) + : nonStreamingLimit + const intermediatePayload = { ...payload, max_tokens: nonStreamingMaxTokens } + try { const initialCallTime = Date.now() - const originalToolChoice = payload.tool_choice + const originalToolChoice = intermediatePayload.tool_choice const forcedTools = preparedTools?.forcedTools || [] let usedForcedTools: string[] = [] - let currentResponse = await anthropic.messages.create(payload) + let currentResponse = await anthropic.messages.create(intermediatePayload) const firstResponseTime = Date.now() - initialCallTime let content = '' @@ -491,7 +499,7 @@ export const anthropicProvider: ProviderConfig = { toolsTime += thisToolsTime const nextPayload = { - ...payload, + ...intermediatePayload, messages: currentMessages, } @@ -674,13 +682,21 @@ export const anthropicProvider: ProviderConfig = { const providerStartTime = Date.now() const providerStartTimeISO = new Date(providerStartTime).toISOString() + // Cap intermediate calls at non-streaming limit to avoid SDK timeout errors, + // but allow users to set lower values if desired + const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false) + const toolLoopMaxTokens = request.maxTokens + ? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit) + : nonStreamingLimit + const toolLoopPayload = { ...payload, max_tokens: toolLoopMaxTokens } + try { const initialCallTime = Date.now() - const originalToolChoice = payload.tool_choice + const originalToolChoice = toolLoopPayload.tool_choice const forcedTools = preparedTools?.forcedTools || [] let usedForcedTools: string[] = [] - let currentResponse = await anthropic.messages.create(payload) + let currentResponse = await anthropic.messages.create(toolLoopPayload) const firstResponseTime = Date.now() - initialCallTime let content = '' @@ -867,7 +883,7 @@ export const anthropicProvider: ProviderConfig = { toolsTime += thisToolsTime const nextPayload = { - ...payload, + ...toolLoopPayload, messages: currentMessages, } diff --git a/apps/sim/providers/bedrock/index.ts b/apps/sim/providers/bedrock/index.ts index 1edee08d81..57935394a5 100644 --- a/apps/sim/providers/bedrock/index.ts +++ b/apps/sim/providers/bedrock/index.ts @@ -20,11 +20,7 @@ import { generateToolUseId, getBedrockInferenceProfileId, } from '@/providers/bedrock/utils' -import { - getMaxOutputTokensForModel, - getProviderDefaultModel, - getProviderModels, -} from '@/providers/models' +import { getProviderDefaultModel, getProviderModels } from '@/providers/models' import type { ProviderConfig, ProviderRequest, @@ -261,11 +257,11 @@ export const bedrockProvider: ProviderConfig = { const systemPromptWithSchema = systemContent - const inferenceConfig = { + const inferenceConfig: { temperature: number; maxTokens?: number } = { temperature: Number.parseFloat(String(request.temperature ?? 0.7)), - maxTokens: - Number.parseInt(String(request.maxTokens)) || - getMaxOutputTokensForModel(request.model, request.stream ?? false), + } + if (request.maxTokens != null) { + inferenceConfig.maxTokens = Number.parseInt(String(request.maxTokens)) } const shouldStreamToolCalls = request.streamToolCalls ?? false diff --git a/apps/sim/providers/models.ts b/apps/sim/providers/models.ts index 5922bf8e72..b89172f270 100644 --- a/apps/sim/providers/models.ts +++ b/apps/sim/providers/models.ts @@ -34,10 +34,15 @@ export interface ModelCapabilities { toolUsageControl?: boolean computerUse?: boolean nativeStructuredOutputs?: boolean + /** + * Max output tokens configuration for Anthropic SDK's streaming timeout workaround. + * The Anthropic SDK throws an error for non-streaming requests that may take >10 minutes. + * This only applies to direct Anthropic API calls, not Bedrock (which uses AWS SDK). + */ maxOutputTokens?: { /** Maximum tokens for streaming requests */ max: number - /** Safe default for non-streaming requests (to avoid timeout issues) */ + /** Safe default for non-streaming requests (to avoid Anthropic SDK timeout errors) */ default: number } reasoningEffort?: { @@ -1709,7 +1714,6 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: { temperature: { min: 0, max: 1 }, nativeStructuredOutputs: true, - maxOutputTokens: { max: 64000, default: 8192 }, }, contextWindow: 200000, }, @@ -1723,7 +1727,6 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: { temperature: { min: 0, max: 1 }, nativeStructuredOutputs: true, - maxOutputTokens: { max: 64000, default: 8192 }, }, contextWindow: 200000, }, @@ -1737,7 +1740,6 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: { temperature: { min: 0, max: 1 }, nativeStructuredOutputs: true, - maxOutputTokens: { max: 64000, default: 8192 }, }, contextWindow: 200000, }, @@ -1751,7 +1753,6 @@ export const PROVIDER_DEFINITIONS: Record = { capabilities: { temperature: { min: 0, max: 1 }, nativeStructuredOutputs: true, - maxOutputTokens: { max: 64000, default: 8192 }, }, contextWindow: 200000, },