记忆系统
约 1866 字大约 6 分钟
Agent虾学智能体入门
2026-03-08
记忆系统
本系列第六篇,深入讲解智能体的记忆系统,包括短期记忆、长期记忆、向量记忆和总结记忆。
为什么智能体需要记忆?
没有记忆的智能体就像金鱼,每次对话都从零开始。记忆系统让智能体能够:
- 记住对话历史和上下文
- 累积知识和经验
- 跨会话保持一致性
- 从过去的交互中学习
记忆类型概览
| 类型 | 存储时长 | 容量 | 用途 |
|---|---|---|---|
| 短期记忆 | 当前会话 | 有限 | 对话上下文 |
| 长期记忆 | 永久 | 大量 | 知识存储 |
| 向量记忆 | 永久 | 大量 | 语义检索 |
| 总结记忆 | 永久 | 压缩 | 关键信息 |
1. 短期记忆(Short-term Memory)
短期记忆存储当前会话的对话历史,是最基础的记忆形式。
实现方式
from typing import List, Dict
from dataclasses import dataclass
@dataclass
class Message:
"""消息结构"""
role: str # user, assistant, system
content: str
timestamp: str
class ShortTermMemory:
"""短期记忆 - 对话历史"""
def __init__(self, max_messages: int = 50):
self.messages: List[Message] = []
self.max_messages = max_messages
def add(self, role: str, content: str):
"""添加消息"""
from datetime import datetime
message = Message(
role=role,
content=content,
timestamp=datetime.now().isoformat()
)
self.messages.append(message)
# 超出限制时删除旧消息
if len(self.messages) > self.max_messages:
self.messages.pop(0)
def get_context(self, max_tokens: int = 4000) -> List[Dict]:
"""获取上下文(控制 token 数量)"""
context = []
total_tokens = 0
# 从最新消息开始倒序添加
for msg in reversed(self.messages):
msg_tokens = len(msg.content) // 4 # 粗略估算
if total_tokens + msg_tokens > max_tokens:
break
context.insert(0, {
"role": msg.role,
"content": msg.content
})
total_tokens += msg_tokens
return context
def clear(self):
"""清空记忆"""
self.messages = []
# 使用示例
memory = ShortTermMemory(max_messages=20)
memory.add("user", "你好,我是小明")
memory.add("assistant", "你好小明!有什么可以帮你的?")
memory.add("user", "帮我查一下北京天气")
context = memory.get_context()
print(f"上下文消息数: {len(context)}")滑动窗口策略
class SlidingWindowMemory:
"""滑动窗口记忆"""
def __init__(self, window_size: int = 10):
self.window_size = window_size
self.buffer = []
def add(self, message: dict):
"""添加消息,保持窗口大小"""
self.buffer.append(message)
if len(self.buffer) > self.window_size:
self.buffer.pop(0)
def get(self) -> list:
return self.buffer.copy()2. 长期记忆(Long-term Memory)
长期记忆将重要信息持久化存储,支持跨会话访问。
基于数据库的实现
import sqlite3
import json
from datetime import datetime
class LongTermMemory:
"""长期记忆 - SQLite 存储"""
def __init__(self, db_path: str = "memory.db"):
self.conn = sqlite3.connect(db_path)
self._init_db()
def _init_db(self):
"""初始化数据库"""
self.conn.execute("""
CREATE TABLE IF NOT EXISTS memories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
key TEXT UNIQUE,
value TEXT,
metadata TEXT,
created_at TIMESTAMP,
last_accessed TIMESTAMP,
access_count INTEGER DEFAULT 0
)
""")
self.conn.commit()
def store(self, key: str, value: str, metadata: dict = None):
"""存储记忆"""
now = datetime.now().isoformat()
self.conn.execute("""
INSERT OR REPLACE INTO memories
(key, value, metadata, created_at, last_accessed, access_count)
VALUES (?, ?, ?, ?, ?, COALESCE(
(SELECT access_count FROM memories WHERE key = ?), 0
))
""", (key, value, json.dumps(metadata or {}), now, now, key))
self.conn.commit()
def recall(self, key: str) -> str:
"""回忆记忆"""
cursor = self.conn.execute("""
UPDATE memories
SET last_accessed = ?, access_count = access_count + 1
WHERE key = ?
RETURNING value
""", (datetime.now().isoformat(), key))
row = cursor.fetchone()
return row[0] if row else None
def search(self, query: str) -> list:
"""搜索记忆"""
cursor = self.conn.execute("""
SELECT key, value, metadata FROM memories
WHERE key LIKE ? OR value LIKE ?
ORDER BY access_count DESC, last_accessed DESC
""", (f"%{query}%", f"%{query}%"))
return [
{"key": row[0], "value": row[1], "metadata": json.loads(row[2])}
for row in cursor.fetchall()
]
def forget(self, key: str):
"""遗忘记忆"""
self.conn.execute("DELETE FROM memories WHERE key = ?", (key,))
self.conn.commit()
# 使用示例
ltm = LongTermMemory()
ltm.store("user_preference", "喜欢简洁的回复", {"category": "preference"})
ltm.store("user_name", "小明", {"category": "profile"})
print(ltm.recall("user_name")) # 输出: 小明
print(ltm.search("喜欢")) # 搜索相关记忆3. 向量记忆(Vector Memory)
向量记忆使用嵌入技术实现语义检索,是最强大的记忆形式。
基于向量数据库的实现
from dataclasses import dataclass
from typing import List, Optional
import numpy as np
@dataclass
class MemoryItem:
"""记忆项"""
id: str
content: str
embedding: List[float]
metadata: dict
class VectorMemory:
"""向量记忆 - 语义检索"""
def __init__(self, embedding_model, similarity_threshold: float = 0.7):
self.embedding_model = embedding_model
self.similarity_threshold = similarity_threshold
self.memories: List[MemoryItem] = []
def store(self, content: str, metadata: dict = None) -> str:
"""存储记忆(自动生成向量)"""
import uuid
memory_id = str(uuid.uuid4())
embedding = self._get_embedding(content)
memory = MemoryItem(
id=memory_id,
content=content,
embedding=embedding,
metadata=metadata or {}
)
self.memories.append(memory)
return memory_id
def recall(self, query: str, top_k: int = 5) -> List[dict]:
"""语义检索"""
query_embedding = self._get_embedding(query)
# 计算相似度
similarities = []
for memory in self.memories:
sim = self._cosine_similarity(query_embedding, memory.embedding)
if sim >= self.similarity_threshold:
similarities.append((memory, sim))
# 排序并返回 top_k
similarities.sort(key=lambda x: x[1], reverse=True)
return [
{
"content": mem.content,
"metadata": mem.metadata,
"similarity": sim
}
for mem, sim in similarities[:top_k]
]
def _get_embedding(self, text: str) -> List[float]:
"""获取文本嵌入向量"""
# 实际应用中调用嵌入模型
# return self.embedding_model.embed(text)
# 模拟:返回随机向量
return list(np.random.randn(768))
def _cosine_similarity(self, a: List[float], b: List[float]) -> float:
"""计算余弦相似度"""
a = np.array(a)
b = np.array(b)
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
# 使用示例
# vector_memory = VectorMemory(embedding_model=openai_embeddings)
# vector_memory.store("用户喜欢用 Python 编程", {"category": "preference"})
# vector_memory.store("用户是后端工程师", {"category": "profile"})
#
# results = vector_memory.recall("编程相关偏好")
# for r in results:
# print(f"[{r['similarity']:.2f}] {r['content']}")使用 LangChain 向量存储
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
# 创建向量存储
embeddings = OpenAIEmbeddings()
vectorstore = Chroma(
embedding_function=embeddings,
persist_directory="./chroma_db"
)
# 存储记忆
vectorstore.add_documents([
Document(
page_content="用户偏好:喜欢简洁的技术文档",
metadata={"type": "preference", "importance": "high"}
),
Document(
page_content="用户项目:正在开发一个 AI Agent 系统",
metadata={"type": "project", "importance": "medium"}
)
])
# 检索记忆
results = vectorstore.similarity_search(
"用户的项目是什么?",
k=3
)
for doc in results:
print(doc.page_content)4. 总结记忆(Summary Memory)
总结记忆将长对话压缩为关键信息摘要。
实现方式
class SummaryMemory:
"""总结记忆"""
def __init__(self, llm, max_raw_messages: int = 10):
self.llm = llm
self.max_raw_messages = max_raw_messages
self.raw_messages = []
self.summary = ""
def add(self, role: str, content: str):
"""添加消息"""
self.raw_messages.append({"role": role, "content": content})
# 超出限制时触发总结
if len(self.raw_messages) > self.max_raw_messages:
self._summarize()
def _summarize(self):
"""总结历史消息"""
prompt = f"""
请总结以下对话的关键信息,保留重要细节:
对话历史:
{self._format_messages(self.raw_messages[:-2])}
总结要求:
1. 保留用户偏好和重要信息
2. 保留决策和结论
3. 简洁明了
总结:
"""
self.summary = self.llm.generate(prompt)
# 保留最近的几条消息
self.raw_messages = self.raw_messages[-2:]
def get_context(self) -> str:
"""获取完整上下文"""
if self.summary:
return f"[历史总结]\n{self.summary}\n\n[最近对话]\n{self._format_messages(self.raw_messages)}"
return self._format_messages(self.raw_messages)
def _format_messages(self, messages: list) -> str:
"""格式化消息"""
return "\n".join([
f"{m['role']}: {m['content']}"
for m in messages
])
# 使用示例
# summary_memory = SummaryMemory(llm=openai_client)
# for i in range(15):
# summary_memory.add("user", f"这是第{i+1}条消息")
# summary_memory.add("assistant", f"收到第{i+1}条消息")
#
# print(summary_memory.get_context())记忆架构设计
分层记忆系统
class HierarchicalMemory:
"""分层记忆系统"""
def __init__(self, llm, embedding_model):
# L1: 短期记忆(对话历史)
self.short_term = ShortTermMemory(max_messages=20)
# L2: 工作记忆(当前任务相关)
self.working = {}
# L3: 长期记忆(持久化)
self.long_term = LongTermMemory()
# L4: 向量记忆(语义检索)
self.vector = VectorMemory(embedding_model)
self.llm = llm
def remember(self, content: str, importance: str = "normal"):
"""记住信息"""
# 短期记忆始终更新
self.short_term.add("system", content)
# 根据重要性决定是否持久化
if importance == "high":
self.long_term.store(f"mem_{id(content)}", content)
self.vector.store(content)
def recall(self, query: str) -> str:
"""回忆信息"""
# 1. 检查工作记忆
if query in self.working:
return self.working[query]
# 2. 检查短期记忆
recent = self.short_term.get_context()
# 3. 语义检索长期记忆
relevant = self.vector.recall(query, top_k=3)
# 整合返回
context = f"[最近对话]\n{recent}\n\n[相关记忆]\n"
for r in relevant:
context += f"- {r['content']}\n"
return context记忆管理策略
| 策略 | 描述 | 适用场景 |
|---|---|---|
| FIFO | 先进先出,删除最旧 | 固定窗口 |
| 重要性 | 保留重要信息 | 混合记忆 |
| 频率 | 保留高频访问 | 缓存场景 |
| 时间衰减 | 新信息权重高 | 动态场景 |
| 语义去重 | 去除相似内容 | 避免冗余 |
小结
- 短期记忆处理当前会话,长期记忆持久化存储
- 向量记忆支持语义检索,是最强大的记忆形式
- 总结记忆压缩历史,节省 token
- 分层架构结合各种记忆类型,实现高效记忆管理
下一篇
反思机制 - 学习智能体的自我纠错和反思能力