docs(llm): 增补 LLM 调用周期控制实施方案
This commit is contained in:
@@ -58,12 +58,21 @@ pub enum Role { User, Assistant, System, Tool }
|
|||||||
|
|
||||||
pub enum ContentBlock {
|
pub enum ContentBlock {
|
||||||
Text { text: String },
|
Text { text: String },
|
||||||
ToolUse { id: String, name: String, input: serde_json::Value },
|
ImageUrl { url: String }, // 多模态支持
|
||||||
ToolResult { tool_use_id: String, content: String, is_error: bool },
|
ToolUse { id: String, name: String, input: Value }, // 预留,暂不实现 tool 自动执行循环
|
||||||
|
ToolResult { tool_use_id: String, content: String }, // 预留,暂不实现 tool 自动执行循环
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Message { pub role: Role, pub content: Vec<ContentBlock> }
|
pub struct Message {
|
||||||
pub struct ToolDefinition { pub name: String, pub description: String, pub input_schema: Value }
|
pub role: Role,
|
||||||
|
pub content: Vec<ContentBlock>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ToolDefinition {
|
||||||
|
pub name: String,
|
||||||
|
pub description: String,
|
||||||
|
pub input_schema: Value,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct ChatRequest {
|
pub struct ChatRequest {
|
||||||
pub model: String,
|
pub model: String,
|
||||||
@@ -72,6 +81,7 @@ pub struct ChatRequest {
|
|||||||
pub tools: Vec<ToolDefinition>,
|
pub tools: Vec<ToolDefinition>,
|
||||||
pub max_tokens: Option<u32>,
|
pub max_tokens: Option<u32>,
|
||||||
pub temperature: Option<f32>,
|
pub temperature: Option<f32>,
|
||||||
|
pub extra_body: Option<Value>, // 用于 enable_thinking 等扩展参数(如阿里云 DashScope)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ChatResponse {
|
pub struct ChatResponse {
|
||||||
@@ -80,23 +90,49 @@ pub struct ChatResponse {
|
|||||||
pub stop_reason: Option<StopReason>,
|
pub stop_reason: Option<StopReason>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum StopReason { Stop, ToolUse, MaxTokens, ContentFilter, Other(String) }
|
pub enum StopReason {
|
||||||
|
Stop,
|
||||||
|
ToolUse, // 预留,暂不实现 tool 自动执行循环
|
||||||
|
MaxTokens, // 达到 max_tokens 限制
|
||||||
|
ContentFilter,
|
||||||
|
Length, // 同 MaxTokens,兼容某些 API 的 finish_reason
|
||||||
|
Other(String),
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> **注意**:`ToolUse` / `ToolResult` / `ToolUse` variant of `StopReason` 为预留类型,暂不实现 tool 自动执行循环。
|
||||||
|
|
||||||
### 2. llm/error.rs — 错误体系
|
### 2. llm/error.rs — 错误体系
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
#[derive(thiserror::Error)]
|
#[derive(thiserror::Error)]
|
||||||
pub enum LlmError {
|
pub enum LlmError {
|
||||||
|
#[error("认证失败: {0}")]
|
||||||
Authentication(String),
|
Authentication(String),
|
||||||
|
|
||||||
|
#[error("限流{retry_after:?}")]
|
||||||
RateLimit { retry_after: Option<Duration> },
|
RateLimit { retry_after: Option<Duration> },
|
||||||
|
|
||||||
|
#[error("请求失败({status}): {body}")]
|
||||||
Request { status: u16, body: String },
|
Request { status: u16, body: String },
|
||||||
|
|
||||||
|
#[error("请求超时({duration:?})")]
|
||||||
Timeout { duration: Duration },
|
Timeout { duration: Duration },
|
||||||
|
|
||||||
|
#[error("流式响应错误: {0}")]
|
||||||
Stream(String),
|
Stream(String),
|
||||||
|
|
||||||
|
#[error("上下文超限(actual:{actual}, limit:{limit})")]
|
||||||
ContextLength { actual: u32, limit: u32 },
|
ContextLength { actual: u32, limit: u32 },
|
||||||
|
|
||||||
|
#[error("LLM 调用失败: {0}")]
|
||||||
|
Other(String),
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**可重试错误**:`RateLimit`、`Timeout`、状态码 `5xx`。
|
||||||
|
**不可重试**:`Authentication`、状态码 `4xx`(除 429)、`ContextLength`。
|
||||||
|
|
||||||
### 3. llm/provider.rs — Provider 接口
|
### 3. llm/provider.rs — Provider 接口
|
||||||
|
|
||||||
trait 单独存放,具体实现在 `provider/` 子模块。
|
trait 单独存放,具体实现在 `provider/` 子模块。
|
||||||
@@ -114,7 +150,6 @@ pub trait LlmProvider: Send + Sync {
|
|||||||
#### 3.1 llm/provider/openai.rs — OpenAI 兼容实现
|
#### 3.1 llm/provider/openai.rs — OpenAI 兼容实现
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
// llm/provider/openai.rs
|
|
||||||
use super::LlmProvider;
|
use super::LlmProvider;
|
||||||
|
|
||||||
pub struct OpenaiProvider {
|
pub struct OpenaiProvider {
|
||||||
@@ -127,12 +162,15 @@ pub struct OpenaiProvider {
|
|||||||
impl LlmProvider for OpenaiProvider {
|
impl LlmProvider for OpenaiProvider {
|
||||||
async fn chat(&self, request: ChatRequest) -> Result<ChatResponse, LlmError> {
|
async fn chat(&self, request: ChatRequest) -> Result<ChatResponse, LlmError> {
|
||||||
// POST {base_url}/chat/completions
|
// POST {base_url}/chat/completions
|
||||||
|
// extra_body 会被合并到请求体中(如 enable_thinking)
|
||||||
// 解析 response → ChatResponse
|
// 解析 response → ChatResponse
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> **注意**:`extra_body` 中的字段需与目标 API 兼容。部分 API(如阿里云 DashScope)通过 `extra_body` 传递扩展参数(如 `enable_thinking`)。
|
||||||
|
|
||||||
后续新增实现: `provider/anthropic.rs`、`provider/azure.rs` 等。
|
后续新增实现: `provider/anthropic.rs`、`provider/azure.rs` 等。
|
||||||
|
|
||||||
### 4. llm/cycle.rs — 生命周期引擎
|
### 4. llm/cycle.rs — 生命周期引擎
|
||||||
@@ -189,7 +227,14 @@ pub struct RetryConfig {
|
|||||||
|
|
||||||
指数退避 + jitter: `delay = min(base * 2^attempt, max_delay) + random(0, delay * jitter_factor)`
|
指数退避 + jitter: `delay = min(base * 2^attempt, max_delay) + random(0, delay * jitter_factor)`
|
||||||
|
|
||||||
可重试错误: RateLimit, Timeout, 5xx
|
**可重试错误**: `RateLimit`、`Timeout`、状态码 `5xx`
|
||||||
|
**不可重试**: `Authentication`、状态码 `4xx`(除 429)、`ContextLength`
|
||||||
|
|
||||||
|
`should_retry(err: &LlmError) -> bool` 判断逻辑:
|
||||||
|
- `RateLimit` → true
|
||||||
|
- `Timeout` → true
|
||||||
|
- `Request { status, .. }` → status >= 500 || status == 429
|
||||||
|
- 其他 → false
|
||||||
|
|
||||||
#### 4.2 llm/cycle/usage.rs — Token 用量
|
#### 4.2 llm/cycle/usage.rs — Token 用量
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user