反思机制
约 2072 字大约 7 分钟
Agent虾学智能体入门
2026-03-08
反思机制(Reflection)
本系列第七篇,深入讲解智能体的自我纠错和反思能力,包括 Self-Correction 和 Reflexion 技术。
什么是反思机制?
反思机制让智能体能够审视自己的行为和结果,发现错误并自我纠正。这是智能体从"执行者"进化为"学习者"的关键能力。
核心价值
| 能力 | 描述 | 价值 |
|---|---|---|
| 错误检测 | 发现执行中的问题 | 减少失败 |
| 原因分析 | 理解为什么会出错 | 深度理解 |
| 策略调整 | 改进执行方法 | 持续优化 |
| 经验积累 | 记住教训避免重犯 | 长期进步 |
1. Self-Correction(自我纠正)
自我纠正是最基础的反思形式,智能体检查输出并修正错误。
基本实现
from typing import Optional
from dataclasses import dataclass
@dataclass
class CorrectionResult:
"""纠正结果"""
original: str
corrected: str
issues_found: list
confidence: float
class SelfCorrection:
"""自我纠正模块"""
def __init__(self, llm, max_attempts: int = 3):
self.llm = llm
self.max_attempts = max_attempts
def correct(self, output: str, criteria: str = None) -> CorrectionResult:
"""纠正输出"""
for attempt in range(self.max_attempts):
# 检查问题
issues = self._check_issues(output, criteria)
if not issues:
return CorrectionResult(
original=output,
corrected=output,
issues_found=[],
confidence=1.0
)
print(f"[尝试 {attempt + 1}] 发现问题: {issues}")
# 修正问题
output = self._fix_issues(output, issues, criteria)
return CorrectionResult(
original=output,
corrected=output,
issues_found=issues,
confidence=0.5
)
def _check_issues(self, output: str, criteria: str) -> list:
"""检查输出中的问题"""
prompt = f"""
检查以下输出是否存在问题:
输出:{output}
检查标准:{criteria or '准确性、完整性、格式正确性'}
如果发现问题,返回 JSON 数组:
["问题1", "问题2"]
如果没有问题,返回空数组:[]
"""
response = self.llm.generate(prompt)
try:
return eval(response)
except:
return []
def _fix_issues(self, output: str, issues: list, criteria: str) -> str:
"""修正问题"""
prompt = f"""
修正以下输出的问题:
原始输出:{output}
发现的问题:{issues}
检查标准:{criteria or '准确性、完整性、格式正确性'}
请输出修正后的版本:
"""
return self.llm.generate(prompt)
# 使用示例
# corrector = SelfCorrection(llm=openai_client)
# result = corrector.correct(
# "答案是 42",
# criteria="必须是数学计算的详细步骤和结果"
# )
# print(result.corrected)代码纠正示例
class CodeSelfCorrection:
"""代码自我纠正"""
def __init__(self, llm):
self.llm = llm
def correct_code(self, code: str, error_message: str = None) -> str:
"""纠正代码错误"""
prompt = f"""
以下代码存在问题,请修正:
```python
{code}错误信息:
请输出:
问题分析
修正后的代码
修改说明 """
return self.llm.generate(prompt)def verify_and_fix(self, code: str, test_cases: list) -> str: """验证并修复代码"""
for test in test_cases: try: # 尝试运行测试 exec(code) print(f"✅ 测试通过: {test}") except Exception as e: print(f"❌ 测试失败: {test} - {e}") # 自我纠正 code = self.correct_code(code, str(e)) return code
使用示例
code_fixer = CodeSelfCorrection(llm)
fixed_code = code_fixer.correct_code(
"def add(a, b): return a - b", # 错误的减法
"期望 add(1, 2) == 3,但得到 -1"
)
---
## 2. Reflexion(反思学习)
**Reflexion** 是一种更高级的反思机制,通过 verbal reinforcement(语言强化)让智能体从失败中学习。
### 核心流程行动 → 观察 → 评估 → 反思 → 存储 → 下次使用
### 实现方式
```python
from dataclasses import dataclass
from typing import List, Optional
@dataclass
class Reflection:
"""反思结果"""
task: str
trajectory: List[dict] # 行动轨迹
failure_reason: str # 失败原因
insight: str # 洞察/教训
improvement: str # 改进建议
class ReflexionAgent:
"""Reflexion 智能体"""
def __init__(self, llm, tools: dict, max_reflections: int = 3):
self.llm = llm
self.tools = tools
self.max_reflections = max_reflections
self.reflection_memory: List[Reflection] = []
def run_with_reflection(self, task: str, max_attempts: int = 3) -> str:
"""带反思的任务执行"""
for attempt in range(max_attempts):
print(f"\n{'='*50}")
print(f"尝试 {attempt + 1}/{max_attempts}")
# 获取相关反思
relevant_reflections = self._get_relevant_reflections(task)
# 执行任务
result, trajectory, success = self._execute_task(
task,
relevant_reflections
)
if success:
print("✅ 任务成功完成!")
return result
# 失败时进行反思
print("❌ 任务失败,开始反思...")
reflection = self._reflect(task, trajectory, result)
self.reflection_memory.append(reflection)
print(f"[反思洞察] {reflection.insight}")
return f"经过 {max_attempts} 次尝试仍未能完成任务"
def _execute_task(self, task: str, reflections: List[Reflection]) -> tuple:
"""执行任务"""
trajectory = []
# 构建包含反思经验的提示词
reflection_hints = "\n".join([
f"- {r.insight}" for r in reflections
])
prompt = f"""
任务:{task}
过去的经验教训:
{reflection_hints if reflection_hints else '暂无'}
请执行任务,记录你的行动步骤。
"""
# 模拟执行(实际应用中会调用工具)
response = self.llm.generate(prompt)
trajectory.append({"action": "execute", "response": response})
# 简化:假设失败
success = "成功" in response
return response, trajectory, success
def _reflect(self, task: str, trajectory: List[dict], result: str) -> Reflection:
"""进行反思"""
prompt = f"""
分析以下失败案例,提取经验教训:
任务:{task}
执行轨迹:
{trajectory}
结果:{result}
请提供:
1. 失败原因分析
2. 关键洞察(下次可以如何避免)
3. 具体改进建议
以 JSON 格式返回:
{{
"failure_reason": "...",
"insight": "...",
"improvement": "..."
}}
"""
response = self.llm.generate(prompt)
analysis = eval(response)
return Reflection(
task=task,
trajectory=trajectory,
failure_reason=analysis["failure_reason"],
insight=analysis["insight"],
improvement=analysis["improvement"]
)
def _get_relevant_reflections(self, task: str) -> List[Reflection]:
"""获取相关反思"""
# 简化:返回最近的几个反思
# 实际应用中可以用向量检索
return self.reflection_memory[-3:]
# 使用示例
# agent = ReflexionAgent(llm=openai_client, tools={})
# result = agent.run_with_reflection("帮我写一个排序算法")3. CRITIC 框架
CRITIC(Critique-Then-Improve)是另一种反思模式,引入外部工具验证输出。
架构
生成输出 → 工具验证 → 发现问题 → 修正输出实现
class CRITICAgent:
"""CRITIC 框架实现"""
def __init__(self, llm, verification_tools: dict):
self.llm = llm
self.verification_tools = verification_tools
def generate_and_verify(self, prompt: str, max_iterations: int = 3) -> str:
"""生成并验证"""
# 初始生成
output = self.llm.generate(prompt)
for i in range(max_iterations):
# 验证
critique = self._critique(output)
if critique["is_valid"]:
print("✅ 验证通过")
return output
print(f"[迭代 {i+1}] 发现问题: {critique['issues']}")
# 改进
output = self._improve(output, critique)
return output
def _critique(self, output: str) -> dict:
"""批判性验证"""
issues = []
# 使用验证工具
for tool_name, tool_func in self.verification_tools.items():
result = tool_func(output)
if not result["valid"]:
issues.append({
"tool": tool_name,
"issue": result["message"]
})
return {
"is_valid": len(issues) == 0,
"issues": issues
}
def _improve(self, output: str, critique: dict) -> str:
"""改进输出"""
issues_text = "\n".join([
f"- {i['tool']}: {i['issue']}"
for i in critique["issues"]
])
prompt = f"""
改进以下输出:
原始输出:
{output}
发现的问题:
{issues_text}
请输出改进后的版本:
"""
return self.llm.generate(prompt)
# 验证工具示例
def fact_check_tool(text: str) -> dict:
"""事实检查工具"""
# 实际应用中调用事实检查 API
suspicious_phrases = ["肯定是", "绝对是", "100%"]
for phrase in suspicious_phrases:
if phrase in text:
return {
"valid": False,
"message": f"发现过于绝对的表述: '{phrase}'"
}
return {"valid": True, "message": "通过"}
# 使用示例
# critic = CRITICAgent(
# llm=openai_client,
# verification_tools={"fact_check": fact_check_tool}
# )
# result = critic.generate_and_verify("解释量子力学")4. Tree of Thoughts(思维树)
思维树通过探索多条推理路径,选择最优解。
from typing import List
from dataclasses import dataclass
@dataclass
class Thought:
"""思维节点"""
content: str
score: float
children: List['Thought']
class TreeOfThoughts:
"""思维树"""
def __init__(self, llm, branching_factor: int = 3, max_depth: int = 3):
self.llm = llm
self.branching_factor = branching_factor
self.max_depth = max_depth
def solve(self, problem: str) -> str:
"""解决问题"""
# 生成初始想法
thoughts = self._generate_thoughts(problem, None, self.branching_factor)
# 广度优先搜索
for depth in range(self.max_depth):
next_thoughts = []
for thought in thoughts:
# 评估想法
thought.score = self._evaluate_thought(thought, problem)
# 高分想法继续扩展
if thought.score > 0.5:
new_thoughts = self._generate_thoughts(
problem,
thought,
self.branching_factor
)
thought.children = new_thoughts
next_thoughts.extend(new_thoughts)
thoughts = next_thoughts
if not thoughts:
break
# 选择最佳路径
best_path = self._find_best_path(thoughts)
return self._generate_solution(problem, best_path)
def _generate_thoughts(self, problem: str, parent: Thought, n: int) -> List[Thought]:
"""生成多个想法"""
context = f"问题:{problem}"
if parent:
context += f"\n当前思路:{parent.content}"
prompt = f"""
{context}
生成 {n} 个可能的下一步思考方向:
"""
response = self.llm.generate(prompt)
# 解析响应为多个想法
thoughts = []
for line in response.split("\n"):
if line.strip():
thoughts.append(Thought(
content=line.strip(),
score=0.0,
children=[]
))
return thoughts[:n]
def _evaluate_thought(self, thought: Thought, problem: str) -> float:
"""评估想法质量"""
prompt = f"""
问题:{problem}
思考:{thought.content}
这个思考方向有多大可能解决问题?给出 0-1 的分数。
只返回数字。
"""
try:
return float(self.llm.generate(prompt))
except:
return 0.5
def _find_best_path(self, thoughts: List[Thought]) -> List[str]:
"""找到最佳路径"""
if not thoughts:
return []
best = max(thoughts, key=lambda t: t.score)
return [best.content]
def _generate_solution(self, problem: str, path: List[str]) -> str:
"""生成最终方案"""
return f"解决路径:{' → '.join(path)}"
# 使用示例
# tot = TreeOfThoughts(llm=openai_client)
# result = tot.solve("如何设计一个高效的内存缓存系统?")反思机制对比
| 方法 | 核心思想 | 优势 | 劣势 |
|---|---|---|---|
| Self-Correction | 检查并修正 | 简单直接 | 可能陷入循环 |
| Reflexion | 从失败学习 | 长期改进 | 需要多次尝试 |
| CRITIC | 工具验证 | 客观准确 | 依赖工具质量 |
| Tree of Thoughts | 多路径探索 | 全面考虑 | 计算成本高 |
小结
- 反思机制让智能体能够自我纠错和持续改进
- Self-Correction 是最基础的反思,检查并修正输出
- Reflexion 从失败中学习,积累经验教训
- CRITIC 引入外部工具验证,提高准确性
- Tree of Thoughts 探索多条路径,选择最优解
下一篇
多智能体协作 - 学习多智能体系统的设计与协作