//! 上下文自动压缩 —— 当对话历史过长时自动压缩。 use crate::llm::types::{ContentField, OpenaiChatMessage, OpenaiContentPart}; const AUTOCOMPACT_BUFFER_TOKENS: u32 = 13_000; const RESERVED_OUTPUT_TOKENS: u32 = 20_000; const MAX_CONSECUTIVE_FAILURES: u32 = 3; const KEEP_RECENT: usize = 6; /// 上下文压缩配置。 #[derive(Debug, Clone)] pub struct CompactConfig { /// 模型上下文窗口大小(token 数)。 pub context_window: u32, /// 为输出预留的 token 数。 pub reserved_tokens: u32, /// 微压缩保留的最近消息数。 pub keep_recent: usize, } impl Default for CompactConfig { fn default() -> Self { Self { context_window: 128_000, reserved_tokens: RESERVED_OUTPUT_TOKENS, keep_recent: KEEP_RECENT, } } } impl CompactConfig { /// 计算自动压缩触发的阈值。 pub fn threshold(&self) -> u32 { self.context_window .saturating_sub(self.reserved_tokens) .saturating_sub(AUTOCOMPACT_BUFFER_TOKENS) } } /// 压缩状态 —— 跟踪连续失败次数(断路器模式)。 #[derive(Debug, Clone)] pub struct CompactState { consecutive_failures: u32, } impl Default for CompactState { fn default() -> Self { Self::new() } } impl CompactState { /// 创建一个新的压缩状态。 pub fn new() -> Self { Self { consecutive_failures: 0, } } /// 记录一次成功的压缩。 pub fn record_success(&mut self) { self.consecutive_failures = 0; } /// 记录一次压缩失败。 /// /// 返回 `true` 表示已达断路器上限,不再尝试。 pub fn record_failure(&mut self) -> bool { self.consecutive_failures += 1; self.consecutive_failures >= MAX_CONSECUTIVE_FAILURES } } /// 粗略估计消息列表的 token 数(基于字符数,4 字符 ≈ 1 token)。 pub fn estimate_message_tokens(messages: &[OpenaiChatMessage]) -> u32 { messages .iter() .map(estimate_single_message_tokens) .sum() } fn estimate_single_message_tokens(msg: &OpenaiChatMessage) -> u32 { let role_overhead: u32 = 4; let content_tokens = match msg { OpenaiChatMessage::Developer { content, .. } | OpenaiChatMessage::System { content, .. } | OpenaiChatMessage::User { content, .. } | OpenaiChatMessage::Assistant { content, .. } | OpenaiChatMessage::Function { content, .. } => estimate_content_tokens(content), OpenaiChatMessage::Tool { content, .. } => estimate_content_tokens(content), }; role_overhead + content_tokens } fn estimate_content_tokens(content: &ContentField) -> u32 { match content { ContentField::String(s) => estimate_text_tokens(s), ContentField::Array(parts) => parts.iter().map(estimate_part_tokens).sum(), } } fn estimate_part_tokens(part: &OpenaiContentPart) -> u32 { match part { OpenaiContentPart::Text { text } => estimate_text_tokens(text), _ => 50, } } fn estimate_text_tokens(text: &str) -> u32 { if text.is_empty() { return 0; } let len = text.len() as u32; (len * 4).div_ceil(3) } /// 判断是否需要触发自动压缩。 pub fn should_compact( messages: &[OpenaiChatMessage], config: &CompactConfig, state: &CompactState, ) -> bool { if state.consecutive_failures >= MAX_CONSECUTIVE_FAILURES { return false; } let tokens = estimate_message_tokens(messages); tokens >= config.threshold() } /// 执行微压缩 —— 用 `[pruned]` 替换旧的 tool result 内容。 /// /// 这是最便宜的压缩方式,不需要 LLM 调用。 /// 保留最近的 `keep_recent` 条消息不变。 /// /// 返回释放的估算 token 数。 pub fn microcompact(messages: &mut [OpenaiChatMessage], keep_recent: usize) -> u32 { if messages.len() <= keep_recent { return 0; } let prune_start = messages.len() - keep_recent; let mut freed_tokens: u32 = 0; for msg in &messages[..prune_start] { if matches!(msg, OpenaiChatMessage::Tool { .. }) { freed_tokens += estimate_single_message_tokens(msg); } } for msg in &mut messages[..prune_start] { if let OpenaiChatMessage::Tool { content, .. } = msg { *content = ContentField::Array(vec![OpenaiContentPart::Text { text: "[pruned]".to_string(), }]); } } freed_tokens }