Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions src/client/ollama.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,11 @@ impl OllamaClient {
let status = response.status().as_u16();
let body = response.text().await.unwrap_or_default();
error!("Ollama chat failed with status {}: {}", status, body);
return Err(NodeTokenError::Ollama(format!(
"Chat failed: HTTP {} - {}",
status, body
)));
// 用 HttpError 保留 status code, 让 executor 据此判断 is_client_error (B1 修复支撑)
return Err(NodeTokenError::HttpError {
status,
message: body,
});
}

let chat_response: OllamaChatResponse = response.json().await.map_err(|e| {
Expand Down
12 changes: 11 additions & 1 deletion src/protocol/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,10 @@ pub enum NodeTaskResult {
code: String,
/// 错误消息
message: String,
/// 该失败是否由请求本身的问题引起(如模型不支持、参数非法)。
/// 老 server 忽略该字段时仍按通常失败处理, 保留向后兼容。
#[serde(default)]
is_client_error: bool,
},
}

Expand Down Expand Up @@ -709,6 +713,7 @@ mod tests {
result: NodeTaskResult::Failed {
code: "ollama_error".to_string(),
message: "Model not found".to_string(),
is_client_error: false,
},
};
let json = serde_json::to_string(&req).unwrap();
Expand All @@ -718,9 +723,14 @@ mod tests {
assert_eq!(parsed.protocol_version, "node.v1");
assert_eq!(parsed.task_id, task_id);
match parsed.result {
NodeTaskResult::Failed { code, message } => {
NodeTaskResult::Failed {
code,
message,
is_client_error,
} => {
assert_eq!(code, "ollama_error");
assert_eq!(message, "Model not found");
assert!(!is_client_error);
}
NodeTaskResult::Succeeded { .. } => panic!("Expected Failed variant"),
}
Expand Down
74 changes: 69 additions & 5 deletions src/runtime/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,7 @@ impl TaskExecutor {
}
Err(e) => {
error!("Task {} execution failed: {}", task_id, e);
NodeTaskResult::Failed {
code: "ollama_error".to_string(),
message: e.to_string(),
}
classify_ollama_error(&e)
}
};

Expand Down Expand Up @@ -307,6 +304,32 @@ impl TaskExecutor {
}
}

/// 把 Ollama 调用错误分类成 NodeTaskResult, 携带 is_client_error 标志:
/// - Ollama 4xx (如 "model does not support chat") → is_client_error=true
/// 服务端会立即 terminal failed, 不计入 node failure_count
/// - Ollama 5xx / 网络错 → is_client_error=false (默认行为, requeue + 失败计数)
fn classify_ollama_error(err: &NodeTokenError) -> NodeTaskResult {
let (code, message, is_client_error) = match err {
NodeTokenError::HttpError { status, message } if (400..500).contains(status) => (
format!("ollama_http_{}", status),
message.clone(),
true,
),
NodeTokenError::HttpError { status, message } => (
format!("ollama_http_{}", status),
message.clone(),
false,
),
NodeTokenError::Network(e) => ("ollama_network".to_string(), e.to_string(), false),
other => ("ollama_error".to_string(), other.to_string(), false),
};
NodeTaskResult::Failed {
code,
message,
is_client_error,
}
}

// 实现 Clone 以便在 tokio::spawn 中使用
impl Clone for TaskExecutor {
fn clone(&self) -> Self {
Expand Down Expand Up @@ -604,18 +627,59 @@ mod tests {
let result = NodeTaskResult::Failed {
code: "ollama_error".to_string(),
message: error_msg.to_string(),
is_client_error: false,
};

// 验证结果结构
match result {
NodeTaskResult::Failed { code, message } => {
NodeTaskResult::Failed {
code,
message,
is_client_error,
} => {
assert_eq!(code, "ollama_error");
assert_eq!(message, error_msg);
assert!(!is_client_error);
}
NodeTaskResult::Succeeded { .. } => panic!("Expected Failed variant"),
}
}

/// B1 修复回归: HTTP 4xx 应被分类为 is_client_error=true
#[test]
fn test_classify_ollama_4xx_is_client_error() {
let err = NodeTokenError::HttpError {
status: 400,
message: "model does not support chat".to_string(),
};
match classify_ollama_error(&err) {
NodeTaskResult::Failed {
is_client_error,
code,
..
} => {
assert!(is_client_error, "4xx must mark is_client_error=true");
assert_eq!(code, "ollama_http_400");
}
_ => panic!("expected Failed"),
}
}

/// HTTP 5xx 应是节点错, is_client_error=false (保持现有 retry 行为)
#[test]
fn test_classify_ollama_5xx_is_not_client_error() {
let err = NodeTokenError::HttpError {
status: 503,
message: "service unavailable".to_string(),
};
match classify_ollama_error(&err) {
NodeTaskResult::Failed {
is_client_error, ..
} => assert!(!is_client_error),
_ => panic!("expected Failed"),
}
}

#[test]
/// 验证任务 deadline 和 grace period 的计算逻辑
fn test_task_deadline_and_grace_period() {
Expand Down
6 changes: 4 additions & 2 deletions tests/integration_error_scenarios.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,17 @@ async fn test_ollama_connection_failure() {
// 验证连接失败
assert!(result.is_err(), "Ollama 连接应该失败");

// 验证错误信息(可能是 Ollama 错误或 HTTP 错误
// 验证错误信息(HttpError / Ollama / 状态码任一种描述都接受
let error_msg = format!("{:?}", result.unwrap_err());
assert!(
error_msg.contains("connection")
|| error_msg.contains("refused")
|| error_msg.contains("Connection")
|| error_msg.contains("Ollama")
|| error_msg.contains("503")
|| error_msg.contains("HTTP"),
|| error_msg.contains("502")
|| error_msg.contains("HTTP")
|| error_msg.contains("HttpError"),
"错误信息应该包含 Ollama 或连接失败描述: {}",
error_msg
);
Expand Down
Loading