框架调研 · 06
手搓 Agent:从原理到实现
框架是别人手搓的 Agent。理解了底层原理,框架才是透明的。
这一篇不依赖任何 Agent 框架,只用 LLM 的原始 API,从零实现一个支持 Tool Calling、多工具并发、循环执行的 Agent。
Agent 的本质
一个 Agent 就是一个循环:
while True:
decision = llm(history + system_prompt)
if decision.wants_tool:
result = execute_tool(decision.tool_name, decision.args)
history.append(result)
else:
return decision.text # 最终回答,结束循环
所有框架做的事,都是在这个循环上加抽象。
完整实现(约 200 行)
import json
import os
from typing import Any, Callable
from anthropic import Anthropic
client = Anthropic()
# ===== 工具注册系统 =====
class ToolRegistry:
"""工具注册表:管理所有可调用的工具"""
def __init__(self):
self._tools: dict[str, dict] = {} # name -> schema
self._functions: dict[str, Callable] = {} # name -> function
def register(self, name: str, description: str, parameters: dict, func: Callable):
self._tools[name] = {
"name": name,
"description": description,
"input_schema": parameters,
}
self._functions[name] = func
def tool(self, name: str, description: str, parameters: dict):
"""装饰器方式注册"""
def decorator(func: Callable):
self.register(name, description, parameters, func)
return func
return decorator
def schemas(self) -> list[dict]:
return list(self._tools.values())
def call(self, name: str, args: dict) -> Any:
if name not in self._functions:
return f"错误:工具 {name} 不存在"
try:
return self._functions[name](**args)
except Exception as e:
return f"工具执行失败:{e}"
registry = ToolRegistry()
# ===== 工具定义 =====
@registry.tool(
name="get_weather",
description="获取指定城市的当前天气",
parameters={
"type": "object",
"properties": {
"city": {"type": "string", "description": "城市名称"},
},
"required": ["city"],
},
)
def get_weather(city: str) -> str:
data = {"北京": "晴天 25°C", "上海": "多云 22°C", "广州": "小雨 30°C"}
return data.get(city, f"{city}:暂无天气数据")
@registry.tool(
name="calculate",
description="计算数学表达式",
parameters={
"type": "object",
"properties": {
"expression": {"type": "string", "description": "数学表达式,如 '2 + 3 * 4'"},
},
"required": ["expression"],
},
)
def calculate(expression: str) -> str:
try:
# 限制只允许数学表达式
allowed = set("0123456789+-*/()., ")
if not all(c in allowed for c in expression):
return "不支持的表达式"
result = eval(expression)
return str(result)
except Exception as e:
return f"计算错误: {e}"
@registry.tool(
name="search_knowledge",
description="在知识库中搜索信息",
parameters={
"type": "object",
"properties": {
"query": {"type": "string", "description": "搜索关键词"},
},
"required": ["query"],
},
)
def search_knowledge(query: str) -> str:
# 模拟知识库搜索
kb = {
"langchain": "LangChain 是最流行的 LLM 框架,提供 Chain、Agent、Tool 抽象。",
"langgraph": "LangGraph 是基于图的 Agent 框架,支持状态管理和条件分支。",
"agentscope": "AgentScope 是阿里巴巴开源的分布式多 Agent 框架。",
}
for key, value in kb.items():
if key.lower() in query.lower():
return value
return f"未找到关于 '{query}' 的相关信息"
# ===== Agent 核心 =====
class Agent:
def __init__(
self,
system_prompt: str,
model: str = "claude-opus-4-5",
max_iterations: int = 10,
verbose: bool = True,
):
self.system_prompt = system_prompt
self.model = model
self.max_iterations = max_iterations
self.verbose = verbose
self.history: list[dict] = []
def _log(self, msg: str):
if self.verbose:
print(msg)
def run(self, user_input: str) -> str:
self.history.append({"role": "user", "content": user_input})
for iteration in range(self.max_iterations):
self._log(f"\n[迭代 {iteration + 1}]")
# 调用 LLM
response = client.messages.create(
model=self.model,
max_tokens=2048,
system=self.system_prompt,
tools=registry.schemas(),
messages=self.history,
)
if response.stop_reason == "tool_use":
# 收集工具调用
tool_calls = [b for b in response.content if b.type == "tool_use"]
tool_results = []
for tc in tool_calls:
self._log(f" → 调用工具: {tc.name}({tc.input})")
result = registry.call(tc.name, tc.input)
self._log(f" ← 结果: {result}")
tool_results.append({
"type": "tool_result",
"tool_use_id": tc.id,
"content": str(result),
})
# 把工具调用和结果追加到历史
self.history.append({"role": "assistant", "content": response.content})
self.history.append({"role": "user", "content": tool_results})
elif response.stop_reason == "end_turn":
# 提取最终文本回复
for block in response.content:
if hasattr(block, "text"):
self.history.append({
"role": "assistant",
"content": block.text
})
return block.text
return "已达到最大迭代次数,任务未完成"
def reset(self):
"""清空对话历史"""
self.history = []
# ===== 使用 =====
if __name__ == "__main__":
agent = Agent(
system_prompt="""你是一个智能助手,可以查询天气、做数学计算和搜索知识库。
根据用户需求,合理调用工具来完成任务。""",
verbose=True,
)
# 测试 1:单工具
print("=" * 40)
result = agent.run("北京今天天气怎样?")
print(f"\n最终回答:{result}")
# 测试 2:多工具
agent.reset()
print("\n" + "=" * 40)
result = agent.run("上海天气怎样?另外 15 * 8 + 6 等于多少?再帮我搜索一下 LangGraph 是什么。")
print(f"\n最终回答:{result}")
扩展:支持多轮对话
# 不 reset,直接继续对话
agent = Agent(system_prompt="你是一个助手。", verbose=False)
print("开始对话(输入 quit 退出)")
while True:
user_input = input("\n用户: ").strip()
if user_input.lower() == "quit":
break
if not user_input:
continue
response = agent.run(user_input)
print(f"助手: {response}")
扩展:并发工具调用
当 LLM 同时请求多个工具时,可以并发执行提速:
import concurrent.futures
def run_tools_concurrently(tool_calls):
"""并发执行多个工具调用"""
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {
tc.id: executor.submit(registry.call, tc.name, tc.input)
for tc in tool_calls
}
return [
{
"type": "tool_result",
"tool_use_id": tc.id,
"content": str(futures[tc.id].result()),
}
for tc in tool_calls
]
你真正学到了什么
手写一遍,会发现框架在背后做的事:
| 框架能力 | 对应的手写代码 |
|---|---|
| 工具注册 | ToolRegistry.register() |
| 工具调用循环 | while True + stop_reason == "tool_use" |
| 对话历史管理 | self.history.append(...) |
| 最大迭代保护 | for iteration in range(max_iterations) |
| 工具结果回传 | tool_results + messages.append |
LangChain、OpenAI Agents SDK、AgentScope 做的都是这些——只是加了更多配置、更多工具、更多 Agent 间通信机制。
什么时候手搓而不是用框架
- 学习目的:理解底层机制,看透框架
- 极简场景:工具少,流程固定,不需要框架的复杂性
- 生产可控:框架升级频繁,手写代码依赖稳定
- 性能敏感:去掉框架抽象层,减少不必要开销