diff --git a/specs/llm-call-lifecycle.md b/specs/llm-call-lifecycle.md index f3a36e0..100a549 100644 --- a/specs/llm-call-lifecycle.md +++ b/specs/llm-call-lifecycle.md @@ -58,12 +58,21 @@ pub enum Role { User, Assistant, System, Tool } pub enum ContentBlock { Text { text: String }, - ToolUse { id: String, name: String, input: serde_json::Value }, - ToolResult { tool_use_id: String, content: String, is_error: bool }, + ImageUrl { url: String }, // 多模态支持 + ToolUse { id: String, name: String, input: Value }, // 预留,暂不实现 tool 自动执行循环 + ToolResult { tool_use_id: String, content: String }, // 预留,暂不实现 tool 自动执行循环 } -pub struct Message { pub role: Role, pub content: Vec } -pub struct ToolDefinition { pub name: String, pub description: String, pub input_schema: Value } +pub struct Message { + pub role: Role, + pub content: Vec, +} + +pub struct ToolDefinition { + pub name: String, + pub description: String, + pub input_schema: Value, +} pub struct ChatRequest { pub model: String, @@ -72,6 +81,7 @@ pub struct ChatRequest { pub tools: Vec, pub max_tokens: Option, pub temperature: Option, + pub extra_body: Option, // 用于 enable_thinking 等扩展参数(如阿里云 DashScope) } pub struct ChatResponse { @@ -80,23 +90,49 @@ pub struct ChatResponse { pub stop_reason: Option, } -pub enum StopReason { Stop, ToolUse, MaxTokens, ContentFilter, Other(String) } +pub enum StopReason { + Stop, + ToolUse, // 预留,暂不实现 tool 自动执行循环 + MaxTokens, // 达到 max_tokens 限制 + ContentFilter, + Length, // 同 MaxTokens,兼容某些 API 的 finish_reason + Other(String), +} ``` +> **注意**:`ToolUse` / `ToolResult` / `ToolUse` variant of `StopReason` 为预留类型,暂不实现 tool 自动执行循环。 + ### 2. llm/error.rs — 错误体系 ```rust #[derive(thiserror::Error)] pub enum LlmError { + #[error("认证失败: {0}")] Authentication(String), + + #[error("限流{retry_after:?}")] RateLimit { retry_after: Option }, + + #[error("请求失败({status}): {body}")] Request { status: u16, body: String }, + + #[error("请求超时({duration:?})")] Timeout { duration: Duration }, + + #[error("流式响应错误: {0}")] Stream(String), + + #[error("上下文超限(actual:{actual}, limit:{limit})")] ContextLength { actual: u32, limit: u32 }, + + #[error("LLM 调用失败: {0}")] + Other(String), } ``` +**可重试错误**:`RateLimit`、`Timeout`、状态码 `5xx`。 +**不可重试**:`Authentication`、状态码 `4xx`(除 429)、`ContextLength`。 + ### 3. llm/provider.rs — Provider 接口 trait 单独存放,具体实现在 `provider/` 子模块。 @@ -114,7 +150,6 @@ pub trait LlmProvider: Send + Sync { #### 3.1 llm/provider/openai.rs — OpenAI 兼容实现 ```rust -// llm/provider/openai.rs use super::LlmProvider; pub struct OpenaiProvider { @@ -127,12 +162,15 @@ pub struct OpenaiProvider { impl LlmProvider for OpenaiProvider { async fn chat(&self, request: ChatRequest) -> Result { // POST {base_url}/chat/completions + // extra_body 会被合并到请求体中(如 enable_thinking) // 解析 response → ChatResponse todo!() } } ``` +> **注意**:`extra_body` 中的字段需与目标 API 兼容。部分 API(如阿里云 DashScope)通过 `extra_body` 传递扩展参数(如 `enable_thinking`)。 + 后续新增实现: `provider/anthropic.rs`、`provider/azure.rs` 等。 ### 4. llm/cycle.rs — 生命周期引擎 @@ -189,7 +227,14 @@ pub struct RetryConfig { 指数退避 + jitter: `delay = min(base * 2^attempt, max_delay) + random(0, delay * jitter_factor)` -可重试错误: RateLimit, Timeout, 5xx +**可重试错误**: `RateLimit`、`Timeout`、状态码 `5xx` +**不可重试**: `Authentication`、状态码 `4xx`(除 429)、`ContextLength` + +`should_retry(err: &LlmError) -> bool` 判断逻辑: +- `RateLimit` → true +- `Timeout` → true +- `Request { status, .. }` → status >= 500 || status == 429 +- 其他 → false #### 4.2 llm/cycle/usage.rs — Token 用量