Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion src/crates/adapters/ai-adapters/src/client/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,15 @@ use std::time::Duration;
use tokio::sync::mpsc;

const BASE_RETRY_DELAY_MS: u64 = 500;
const MAX_RETRY_AFTER_DELAY_MS: u64 = 10_000;
/// Maximum delay applied to a `Retry-After` header value.
///
/// Some providers (especially TPM-based rate limits on aggregator platforms
/// like NVIDIA's integrate API) return large `Retry-After` values of 30-60
/// seconds. Capping at 10s caused tight retry loops that burned through the
/// user's request budget without actually waiting for the TPM window to reset.
/// 60s is a reasonable upper bound that respects provider guidance without
/// locking the user into an interminable stall.
const MAX_RETRY_AFTER_DELAY_MS: u64 = 60_000;

enum StreamSendOutcome {
Response(reqwest::Response),
Expand Down Expand Up @@ -328,6 +336,14 @@ mod tests {
);
}

#[test]
fn retry_after_preserves_sub_cap_values() {
let mut headers = HeaderMap::new();
headers.insert(RETRY_AFTER, HeaderValue::from_static("45"));

assert_eq!(retry_after_delay_ms(&headers), Some(45_000));
}

#[test]
fn retry_delay_falls_back_to_exponential_backoff() {
let headers = HeaderMap::new();
Expand Down
87 changes: 86 additions & 1 deletion src/crates/assembly/core/src/agentic/execution/round_executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,21 @@ impl RoundExecutor {
max_attempts, err_msg
)));
}
return Err(BitFunError::AIClient(err_msg));
// Non-transient errors (429 budget exhausted, context
// overflow, auth, etc.) are returned directly. The error
// message is classified downstream via
// `BitFunError::error_category()` into `ErrorCategory` for
// frontend recovery actions (wait_and_retry, switch_model,
// etc.).
let error = BitFunError::AIClient(err_msg);
warn!(
"AI request terminal failure: session_id={}, round_id={}, category={:?}, error={}",
context.session_id,
round_id,
error.error_category(),
error
);
return Err(error);
}
};

Expand Down Expand Up @@ -1095,9 +1109,29 @@ impl RoundExecutor {
Self::RETRY_BASE_DELAY_MS * (1u64 << attempt_index.min(3))
}

/// Check whether an error message represents a transient (retryable) condition.
///
/// Errors that already exhausted the SSE-layer retry budget (e.g. "failed
/// after N attempts:" or "Stream retry budget exhausted") are **not**
/// transient from the round-executor perspective — the SSE transport layer
/// already retried with exponential backoff and `Retry-After` parsing.
/// Re-entering the send loop would multiply attempts (10 × 10 = 100) and
/// hold the user in a long silent stall.
fn is_transient_network_error(error_message: &str) -> bool {
let msg = error_message.to_lowercase();

// The SSE layer already exhausted its own retry budget — do not
// re-enter another round of attempts from the round executor.
// We require BOTH "failed after " and "attempts:" to co-occur,
// which uniquely identifies the SSE/round-executor budget-exhausted
// format without catching generic errors like "failed after timeout".
if msg.contains("failed after ") && msg.contains("attempts:") {
return false;
}
if msg.contains("retry budget exhausted") {
return false;
}

let non_retryable_keywords = [
"invalid api key",
"unauthorized",
Expand Down Expand Up @@ -1471,4 +1505,55 @@ mod tests {
assert!(trace.partial_recovery_reason.is_none());
assert_eq!(trace.error.as_deref(), Some("request failed"));
}

#[test]
fn is_transient_error_treats_rate_limit_as_transient() {
assert!(RoundExecutor::is_transient_network_error(
"OpenAI Streaming API error 429 Too Many Requests"
));
assert!(RoundExecutor::is_transient_network_error(
"rate limit exceeded"
));
}

#[test]
fn is_transient_error_treats_network_errors_as_transient() {
assert!(RoundExecutor::is_transient_network_error(
"connection reset by peer"
));
assert!(RoundExecutor::is_transient_network_error("timeout"));
}

#[test]
fn is_transient_error_treats_context_overflow_as_non_transient() {
assert!(!RoundExecutor::is_transient_network_error(
"prompt is too long"
));
}

#[test]
fn is_transient_error_treats_budget_exhausted_as_non_transient() {
// After SSE layer exhausts its retry budget, the round executor must
// NOT re-enter another round of attempts (would cause 10×10 = 100
// retries).
assert!(!RoundExecutor::is_transient_network_error(
"OpenAI Streaming API failed after 10 attempts: \
OpenAI Streaming API error 429 Too Many Requests"
));
assert!(!RoundExecutor::is_transient_network_error(
"Stream retry budget exhausted after 10 attempts: timeout"
));
}

#[test]
fn is_transient_error_does_not_misclassify_failed_after_without_attempts() {
// "failed after " without "attempts:" should NOT be treated as budget
// exhausted — it may be a legitimately retryable transient error.
assert!(RoundExecutor::is_transient_network_error(
"stream failed after connection reset"
));
assert!(RoundExecutor::is_transient_network_error(
"request failed after timeout"
));
}
}
2 changes: 1 addition & 1 deletion src/web-ui/src/locales/en-US/errors.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
"networkError": "AI response interrupted",
"networkErrorSuggestion": "The connection was unstable or the model server closed it prematurely. Check your network and retry",
"rateLimit": "Model rate limit exceeded",
"rateLimitSuggestion": "Please retry later, or switch to a different model in settings",
"rateLimitSuggestion": "The provider rejected this request due to rate limiting. This may be caused by too many requests in a short period, or by tokens-per-minute (TPM) limits on certain provider platforms. Wait a moment and retry, or switch to a different model in settings",
"authError": "API authentication failed",
"authErrorSuggestion": "The API key is invalid or expired. Check the key in your model configuration",
"contextOverflow": "Conversation exceeds context limit",
Expand Down
2 changes: 1 addition & 1 deletion src/web-ui/src/locales/zh-CN/errors.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
"networkError": "AI 响应中断",
"networkErrorSuggestion": "网络连接不稳定或模型服务端提前关闭了连接,请检查网络后重试",
"rateLimit": "模型请求频率超限",
"rateLimitSuggestion": "请稍后重试,或在模型设置中切换到其他模型",
"rateLimitSuggestion": "服务商因频率限制拒绝了本次请求。可能是短时间内请求过多,也可能是某些服务商平台存在每分钟 token 数(TPM)限制。请稍候重试,或在设置中切换到其他模型",
"authError": "API 认证失败",
"authErrorSuggestion": "API 密钥无效或已过期,请检查模型配置中的密钥设置",
"contextOverflow": "对话内容超出限制",
Expand Down
2 changes: 1 addition & 1 deletion src/web-ui/src/locales/zh-TW/errors.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
"networkError": "AI 響應中斷",
"networkErrorSuggestion": "網絡連接不穩定或模型服務端提前關閉了連接,請檢查網絡後重試",
"rateLimit": "模型請求頻率超限",
"rateLimitSuggestion": "請稍後重試,或在模型設置中切換到其他模型",
"rateLimitSuggestion": "服務商因頻率限制拒絕了本次請求。可能是短時間內請求過多,也可能是某些服務商平台存在每分鐘 token 數(TPM)限制。請稍候重試,或在設定中切換到其他模型",
"authError": "API 認證失敗",
"authErrorSuggestion": "API 密鑰無效或已過期,請檢查模型設定中的密鑰設置",
"contextOverflow": "對話內容超出限制",
Expand Down
Loading