32f3edaf19
实现 ProviderRegistry、HookExecutor、StreamEvents 和 Auto-compaction 模块,并集成到 LlmCycle 中
160 lines
4.5 KiB
Rust
160 lines
4.5 KiB
Rust
//! 上下文自动压缩 —— 当对话历史过长时自动压缩。
|
|
|
|
use crate::llm::types::{ContentField, OpenaiChatMessage, OpenaiContentPart};
|
|
|
|
const AUTOCOMPACT_BUFFER_TOKENS: u32 = 13_000;
|
|
const RESERVED_OUTPUT_TOKENS: u32 = 20_000;
|
|
const MAX_CONSECUTIVE_FAILURES: u32 = 3;
|
|
const KEEP_RECENT: usize = 6;
|
|
|
|
/// 上下文压缩配置。
|
|
#[derive(Debug, Clone)]
|
|
pub struct CompactConfig {
|
|
/// 模型上下文窗口大小(token 数)。
|
|
pub context_window: u32,
|
|
/// 为输出预留的 token 数。
|
|
pub reserved_tokens: u32,
|
|
/// 微压缩保留的最近消息数。
|
|
pub keep_recent: usize,
|
|
}
|
|
|
|
impl Default for CompactConfig {
|
|
fn default() -> Self {
|
|
Self {
|
|
context_window: 128_000,
|
|
reserved_tokens: RESERVED_OUTPUT_TOKENS,
|
|
keep_recent: KEEP_RECENT,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl CompactConfig {
|
|
/// 计算自动压缩触发的阈值。
|
|
pub fn threshold(&self) -> u32 {
|
|
self.context_window
|
|
.saturating_sub(self.reserved_tokens)
|
|
.saturating_sub(AUTOCOMPACT_BUFFER_TOKENS)
|
|
}
|
|
}
|
|
|
|
/// 压缩状态 —— 跟踪连续失败次数(断路器模式)。
|
|
#[derive(Debug, Clone)]
|
|
pub struct CompactState {
|
|
consecutive_failures: u32,
|
|
}
|
|
|
|
impl Default for CompactState {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl CompactState {
|
|
/// 创建一个新的压缩状态。
|
|
pub fn new() -> Self {
|
|
Self {
|
|
consecutive_failures: 0,
|
|
}
|
|
}
|
|
|
|
/// 记录一次成功的压缩。
|
|
pub fn record_success(&mut self) {
|
|
self.consecutive_failures = 0;
|
|
}
|
|
|
|
/// 记录一次压缩失败。
|
|
///
|
|
/// 返回 `true` 表示已达断路器上限,不再尝试。
|
|
pub fn record_failure(&mut self) -> bool {
|
|
self.consecutive_failures += 1;
|
|
self.consecutive_failures >= MAX_CONSECUTIVE_FAILURES
|
|
}
|
|
}
|
|
|
|
/// 粗略估计消息列表的 token 数(基于字符数,4 字符 ≈ 1 token)。
|
|
pub fn estimate_message_tokens(messages: &[OpenaiChatMessage]) -> u32 {
|
|
messages
|
|
.iter()
|
|
.map(estimate_single_message_tokens)
|
|
.sum()
|
|
}
|
|
|
|
fn estimate_single_message_tokens(msg: &OpenaiChatMessage) -> u32 {
|
|
let role_overhead: u32 = 4;
|
|
let content_tokens = match msg {
|
|
OpenaiChatMessage::Developer { content, .. }
|
|
| OpenaiChatMessage::System { content, .. }
|
|
| OpenaiChatMessage::User { content, .. }
|
|
| OpenaiChatMessage::Assistant { content, .. }
|
|
| OpenaiChatMessage::Function { content, .. } => estimate_content_tokens(content),
|
|
OpenaiChatMessage::Tool { content, .. } => estimate_content_tokens(content),
|
|
};
|
|
role_overhead + content_tokens
|
|
}
|
|
|
|
fn estimate_content_tokens(content: &ContentField) -> u32 {
|
|
match content {
|
|
ContentField::String(s) => estimate_text_tokens(s),
|
|
ContentField::Array(parts) => parts.iter().map(estimate_part_tokens).sum(),
|
|
}
|
|
}
|
|
|
|
fn estimate_part_tokens(part: &OpenaiContentPart) -> u32 {
|
|
match part {
|
|
OpenaiContentPart::Text { text } => estimate_text_tokens(text),
|
|
_ => 50,
|
|
}
|
|
}
|
|
|
|
fn estimate_text_tokens(text: &str) -> u32 {
|
|
if text.is_empty() {
|
|
return 0;
|
|
}
|
|
let len = text.len() as u32;
|
|
(len * 4).div_ceil(3)
|
|
}
|
|
|
|
/// 判断是否需要触发自动压缩。
|
|
pub fn should_compact(
|
|
messages: &[OpenaiChatMessage],
|
|
config: &CompactConfig,
|
|
state: &CompactState,
|
|
) -> bool {
|
|
if state.consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
|
|
return false;
|
|
}
|
|
let tokens = estimate_message_tokens(messages);
|
|
tokens >= config.threshold()
|
|
}
|
|
|
|
/// 执行微压缩 —— 用 `[pruned]` 替换旧的 tool result 内容。
|
|
///
|
|
/// 这是最便宜的压缩方式,不需要 LLM 调用。
|
|
/// 保留最近的 `keep_recent` 条消息不变。
|
|
///
|
|
/// 返回释放的估算 token 数。
|
|
pub fn microcompact(messages: &mut [OpenaiChatMessage], keep_recent: usize) -> u32 {
|
|
if messages.len() <= keep_recent {
|
|
return 0;
|
|
}
|
|
|
|
let prune_start = messages.len() - keep_recent;
|
|
let mut freed_tokens: u32 = 0;
|
|
|
|
for msg in &messages[..prune_start] {
|
|
if matches!(msg, OpenaiChatMessage::Tool { .. }) {
|
|
freed_tokens += estimate_single_message_tokens(msg);
|
|
}
|
|
}
|
|
|
|
for msg in &mut messages[..prune_start] {
|
|
if let OpenaiChatMessage::Tool { content, .. } = msg {
|
|
*content = ContentField::Array(vec![OpenaiContentPart::Text {
|
|
text: "[pruned]".to_string(),
|
|
}]);
|
|
}
|
|
}
|
|
|
|
freed_tokens
|
|
}
|