diff --git a/specs/llm-call-lifecycle.md b/specs/llm-call-lifecycle.md new file mode 100644 index 0000000..f3a36e0 --- /dev/null +++ b/specs/llm-call-lifecycle.md @@ -0,0 +1,233 @@ +# LLM 调用周期控制 — 实施方案 + +> 参考实现: [HKUDS/OpenHarness](https://github.com/HKUDS/OpenHarness) + +## 目标 + +实现大模型基础调用周期控制,作为 agcore 的核心底层件。 + +## 范围 + +- 仅支持 OpenAI-compatible API (`POST /v1/chat/completions`) +- 仅非流式调用(后续可扩展流式) +- 支持传入 tool definitions 和解析 tool_use response,但**不含 tool 自动执行循环** +- 单次请求-响应周期控制 + +## 领域模块结构 + +所有 LLM 调用周期相关代码归入 `llm` 领域目录,未来其他功能(工具、记忆、提示词等)以同样方式组织。 + +``` +src/ + lib.rs # crate 根 + llm.rs # mod llm — 领域根(声明 + 重导出) + llm/ + types.rs # llm::types — Message, ContentBlock, ChatRequest/Response, ToolDefinition + error.rs # llm::error — LlmError + provider.rs # llm::provider — LlmProvider trait(仅接口) + provider/ + openai.rs # llm::provider::openai — OpenaiProvider 实现 + cycle.rs # llm::cycle — 生命周期引擎(子模块根) + cycle/ + retry.rs # llm::cycle::retry — 重试策略 + usage.rs # llm::cycle::usage — Token 用量 + +# 未来领域示例(占位): +# tools.rs + tools/ # 工具调用、MCP +# memory.rs + memory/ # 记忆系统 +# prompt.rs + prompt/ # 提示词工程 +# agent.rs + agent/ # Agent 运行时 +``` + +`llm.rs` 根模块声明: + +```rust +// llm.rs +pub mod types; +pub mod error; +pub mod provider; +pub mod cycle; +``` + +## 模块设计 + +### 1. llm/types.rs — 核心数据类型 + +```rust +pub enum Role { User, Assistant, System, Tool } + +pub enum ContentBlock { + Text { text: String }, + ToolUse { id: String, name: String, input: serde_json::Value }, + ToolResult { tool_use_id: String, content: String, is_error: bool }, +} + +pub struct Message { pub role: Role, pub content: Vec } +pub struct ToolDefinition { pub name: String, pub description: String, pub input_schema: Value } + +pub struct ChatRequest { + pub model: String, + pub messages: Vec, + pub system_prompt: Option, + pub tools: Vec, + pub max_tokens: Option, + pub temperature: Option, +} + +pub struct ChatResponse { + pub message: Message, + pub usage: Usage, + pub stop_reason: Option, +} + +pub enum StopReason { Stop, ToolUse, MaxTokens, ContentFilter, Other(String) } +``` + +### 2. llm/error.rs — 错误体系 + +```rust +#[derive(thiserror::Error)] +pub enum LlmError { + Authentication(String), + RateLimit { retry_after: Option }, + Request { status: u16, body: String }, + Timeout { duration: Duration }, + Stream(String), + ContextLength { actual: u32, limit: u32 }, +} +``` + +### 3. llm/provider.rs — Provider 接口 + +trait 单独存放,具体实现在 `provider/` 子模块。 + +```rust +// llm/provider.rs +pub mod openai; + +#[async_trait] +pub trait LlmProvider: Send + Sync { + async fn chat(&self, request: ChatRequest) -> Result; +} +``` + +#### 3.1 llm/provider/openai.rs — OpenAI 兼容实现 + +```rust +// llm/provider/openai.rs +use super::LlmProvider; + +pub struct OpenaiProvider { + http_client: reqwest::Client, + base_url: String, + api_key: String, + model: String, +} + +impl LlmProvider for OpenaiProvider { + async fn chat(&self, request: ChatRequest) -> Result { + // POST {base_url}/chat/completions + // 解析 response → ChatResponse + todo!() + } +} +``` + +后续新增实现: `provider/anthropic.rs`、`provider/azure.rs` 等。 + +### 4. llm/cycle.rs — 生命周期引擎 + +```rust +mod retry; +mod usage; + +pub use retry::RetryConfig; +pub use usage::{CostTracker, Usage}; + +pub struct CycleConfig { + pub model: String, + pub max_tokens: Option, + pub temperature: Option, + pub max_turns: Option, + pub retry: RetryConfig, +} + +pub struct LlmCycle { + provider: Box, + config: CycleConfig, + usage: CostTracker, + messages: Vec, + system_prompt: Option, +} +``` + +`submit()` 完整流程: + +``` +submit(prompt, tools) + │ + ├─ ① push Message(user, [Text(prompt)]) + ├─ ② 构建 ChatRequest { messages, system, tools, max_tokens, temperature } + ├─ ③ [重试循环] provider.chat(request) + │ ├─ Ok → 解析 ChatResponse + │ └─ Err(可重试) → compute_delay → sleep → retry + ├─ ④ push Message(assistant, [Text(...) | ToolUse(...)]) + ├─ ⑤ usage.add(response.usage) + └─ ⑥ return ChatResponse +``` + +#### 4.1 llm/cycle/retry.rs — 重试策略 + +```rust +pub struct RetryConfig { + pub max_retries: u32, // 默认 3 + pub base_delay: Duration, // 默认 1s + pub max_delay: Duration, // 默认 30s + pub jitter_factor: f64, // 默认 0.25 +} +``` + +指数退避 + jitter: `delay = min(base * 2^attempt, max_delay) + random(0, delay * jitter_factor)` + +可重试错误: RateLimit, Timeout, 5xx + +#### 4.2 llm/cycle/usage.rs — Token 用量 + +```rust +#[derive(Default)] +pub struct Usage { pub input_tokens: u32, pub output_tokens: u32 } + +pub struct CostTracker { accumulated: Usage } +impl CostTracker { + pub fn add(&mut self, usage: &Usage); + pub fn total(&self) -> &Usage; + pub fn reset(&mut self); +} +``` + +## 依赖 + +```toml +[dependencies] +tokio = { version = "1", features = ["full"] } +reqwest = { version = "0.12", features = ["json"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +thiserror = "2" +async-trait = "0.1" +tracing = "0.1" +``` + +## 测试 + +- Unit: types 序列化、retry 退避计算、usage 累计 +- Mock: HTTP mock server 测试 provider 请求/响应/错误处理 +- Integration (可选): Ollama 本地真实调用验证 + +## 后续扩展 + +- 流式接口 (`Stream`) +- Tool 自动执行循环 (参考 OpenHarness `run_query()`) +- 多 Provider 注册发现 (参考 OpenHarness `ProviderRegistry`) +- 上下文压缩 (auto-compaction) +- 生命周期钩子 (pre/post tool use hooks)