feat(llm): 实现 Phase 0 剩余四个模块

实现 ProviderRegistry、HookExecutor、StreamEvents 和 Auto-compaction 模块,并集成到 LlmCycle 中
This commit is contained in:
徐涛
2026-06-02 08:51:42 +08:00
parent 69b6dd942b
commit 32f3edaf19
13 changed files with 1299 additions and 9 deletions
+159
View File
@@ -0,0 +1,159 @@
//! 上下文自动压缩 —— 当对话历史过长时自动压缩。
use crate::llm::types::{ContentField, OpenaiChatMessage, OpenaiContentPart};
const AUTOCOMPACT_BUFFER_TOKENS: u32 = 13_000;
const RESERVED_OUTPUT_TOKENS: u32 = 20_000;
const MAX_CONSECUTIVE_FAILURES: u32 = 3;
const KEEP_RECENT: usize = 6;
/// 上下文压缩配置。
#[derive(Debug, Clone)]
pub struct CompactConfig {
/// 模型上下文窗口大小(token 数)。
pub context_window: u32,
/// 为输出预留的 token 数。
pub reserved_tokens: u32,
/// 微压缩保留的最近消息数。
pub keep_recent: usize,
}
impl Default for CompactConfig {
fn default() -> Self {
Self {
context_window: 128_000,
reserved_tokens: RESERVED_OUTPUT_TOKENS,
keep_recent: KEEP_RECENT,
}
}
}
impl CompactConfig {
/// 计算自动压缩触发的阈值。
pub fn threshold(&self) -> u32 {
self.context_window
.saturating_sub(self.reserved_tokens)
.saturating_sub(AUTOCOMPACT_BUFFER_TOKENS)
}
}
/// 压缩状态 —— 跟踪连续失败次数(断路器模式)。
#[derive(Debug, Clone)]
pub struct CompactState {
consecutive_failures: u32,
}
impl Default for CompactState {
fn default() -> Self {
Self::new()
}
}
impl CompactState {
/// 创建一个新的压缩状态。
pub fn new() -> Self {
Self {
consecutive_failures: 0,
}
}
/// 记录一次成功的压缩。
pub fn record_success(&mut self) {
self.consecutive_failures = 0;
}
/// 记录一次压缩失败。
///
/// 返回 `true` 表示已达断路器上限,不再尝试。
pub fn record_failure(&mut self) -> bool {
self.consecutive_failures += 1;
self.consecutive_failures >= MAX_CONSECUTIVE_FAILURES
}
}
/// 粗略估计消息列表的 token 数(基于字符数,4 字符 ≈ 1 token)。
pub fn estimate_message_tokens(messages: &[OpenaiChatMessage]) -> u32 {
messages
.iter()
.map(estimate_single_message_tokens)
.sum()
}
fn estimate_single_message_tokens(msg: &OpenaiChatMessage) -> u32 {
let role_overhead: u32 = 4;
let content_tokens = match msg {
OpenaiChatMessage::Developer { content, .. }
| OpenaiChatMessage::System { content, .. }
| OpenaiChatMessage::User { content, .. }
| OpenaiChatMessage::Assistant { content, .. }
| OpenaiChatMessage::Function { content, .. } => estimate_content_tokens(content),
OpenaiChatMessage::Tool { content, .. } => estimate_content_tokens(content),
};
role_overhead + content_tokens
}
fn estimate_content_tokens(content: &ContentField) -> u32 {
match content {
ContentField::String(s) => estimate_text_tokens(s),
ContentField::Array(parts) => parts.iter().map(estimate_part_tokens).sum(),
}
}
fn estimate_part_tokens(part: &OpenaiContentPart) -> u32 {
match part {
OpenaiContentPart::Text { text } => estimate_text_tokens(text),
_ => 50,
}
}
fn estimate_text_tokens(text: &str) -> u32 {
if text.is_empty() {
return 0;
}
let len = text.len() as u32;
(len * 4).div_ceil(3)
}
/// 判断是否需要触发自动压缩。
pub fn should_compact(
messages: &[OpenaiChatMessage],
config: &CompactConfig,
state: &CompactState,
) -> bool {
if state.consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
return false;
}
let tokens = estimate_message_tokens(messages);
tokens >= config.threshold()
}
/// 执行微压缩 —— 用 `[pruned]` 替换旧的 tool result 内容。
///
/// 这是最便宜的压缩方式,不需要 LLM 调用。
/// 保留最近的 `keep_recent` 条消息不变。
///
/// 返回释放的估算 token 数。
pub fn microcompact(messages: &mut [OpenaiChatMessage], keep_recent: usize) -> u32 {
if messages.len() <= keep_recent {
return 0;
}
let prune_start = messages.len() - keep_recent;
let mut freed_tokens: u32 = 0;
for msg in &messages[..prune_start] {
if matches!(msg, OpenaiChatMessage::Tool { .. }) {
freed_tokens += estimate_single_message_tokens(msg);
}
}
for msg in &mut messages[..prune_start] {
if let OpenaiChatMessage::Tool { content, .. } = msg {
*content = ContentField::Array(vec![OpenaiContentPart::Text {
text: "[pruned]".to_string(),
}]);
}
}
freed_tokens
}