feat: 添加联网搜索、地理位置查询、天气查询
This commit is contained in:
203
main.py
203
main.py
@@ -1,12 +1,50 @@
|
||||
import asyncio
|
||||
import json
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
from autogen_agentchat.agents import AssistantAgent
|
||||
from autogen_agentchat.messages import TextMessage
|
||||
from autogen_core import CancellationToken
|
||||
from autogen_ext.models.openai import OpenAIChatCompletionClient
|
||||
from autogen_ext.models.openai import _openai_client as openai_client_module
|
||||
from autogen_ext.tools.mcp import StdioServerParams, mcp_server_tools
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
USER_DB_PATH = BASE_DIR / "users.db"
|
||||
MODEL_CALL_TIMEOUT_SECONDS = 45
|
||||
|
||||
# --- 第一部分:本地工具(面部 + 语音,以后接硬件)---
|
||||
|
||||
|
||||
def _patch_autogen_tool_schema_for_vllm() -> None:
|
||||
"""
|
||||
vLLM 目前会对 OpenAI 工具定义中的 `strict` 字段告警(即便 strict=False)。
|
||||
这里做最小补丁:保留工具定义,移除该字段,避免无意义警告。
|
||||
"""
|
||||
if getattr(openai_client_module.convert_tools, "_strict_removed_patch", False):
|
||||
return
|
||||
|
||||
original_convert_tools = openai_client_module.convert_tools
|
||||
|
||||
def convert_tools_without_strict(tools):
|
||||
converted = original_convert_tools(tools)
|
||||
for tool in converted:
|
||||
fn = tool.get("function")
|
||||
if isinstance(fn, dict):
|
||||
fn.pop("strict", None)
|
||||
return converted
|
||||
|
||||
convert_tools_without_strict._strict_removed_patch = True
|
||||
openai_client_module.convert_tools = convert_tools_without_strict
|
||||
|
||||
|
||||
async def _async_console_input(prompt: str) -> str:
|
||||
"""在线程中执行阻塞 input,避免阻塞事件循环。"""
|
||||
return await asyncio.to_thread(input, prompt)
|
||||
|
||||
# --- 第一部分:工具定义 ---
|
||||
# 以后接上机器人时,把 print 替换成串口指令 / TTS 调用即可
|
||||
|
||||
async def set_expression(
|
||||
expression: Annotated[str, "机器人要展示的表情,如:开心、疑惑、难过、待机"],
|
||||
@@ -16,15 +54,47 @@ async def set_expression(
|
||||
print(f"\n>>>>>> 🤖 表情更新: 【{expression}】 (强度: {intensity}/10) <<<<<<")
|
||||
return f"已切换到【{expression}】表情。"
|
||||
|
||||
async def speak(
|
||||
text: Annotated[str, "机器人要说的话,简短自然"]
|
||||
) -> str:
|
||||
"""[模拟 TTS] 机器人开口说话。以后接 TTS 引擎播放语音。"""
|
||||
print(f">>>>>> 🔊 机器人说: {text} <<<<<<\n")
|
||||
return "语音已播放。"
|
||||
# --- 第二部分:直接读取用户档案(不经过 MCP,避免多轮工具调用)---
|
||||
|
||||
def _load_user_profile(user_name: str, db_path: str | Path = USER_DB_PATH) -> str:
|
||||
"""在 Python 层直接读档案,注入到消息上下文,模型无需主动调用 get_user_profile。"""
|
||||
try:
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
user = conn.execute(
|
||||
"SELECT * FROM users WHERE name = ?", (user_name,)
|
||||
).fetchone()
|
||||
if not user:
|
||||
return f"用户 {user_name} 尚无历史记录,这是第一次见面。"
|
||||
prefs = conn.execute(
|
||||
"SELECT category, content FROM preferences WHERE user_name = ?",
|
||||
(user_name,)
|
||||
).fetchall()
|
||||
conn.execute(
|
||||
"UPDATE users SET last_seen = datetime('now') WHERE name = ?",
|
||||
(user_name,)
|
||||
)
|
||||
return json.dumps({
|
||||
"基本信息": {"姓名": user["name"], "年龄": user["age"], "上次见面": user["last_seen"]},
|
||||
"偏好习惯": {p["category"]: p["content"] for p in prefs},
|
||||
}, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
return f"档案读取失败({e}),当作第一次见面。"
|
||||
|
||||
# --- 第三部分:启动大脑 ---
|
||||
|
||||
# --- 第二部分:配置 VLM 大脑 ---
|
||||
async def start_simulated_head():
|
||||
_patch_autogen_tool_schema_for_vllm()
|
||||
|
||||
# 连接 MCP Server,只取写入类工具(读档案由 Python 层直接处理)
|
||||
server_params = StdioServerParams(
|
||||
command=sys.executable,
|
||||
args=["robot_mcp_server.py"],
|
||||
)
|
||||
all_mcp_tools = await mcp_server_tools(server_params)
|
||||
# 过滤掉 get_user_profile,模型无需主动调用它
|
||||
mcp_tools = [t for t in all_mcp_tools if getattr(t, "name", "") != "get_user_profile"]
|
||||
|
||||
model_client = OpenAIChatCompletionClient(
|
||||
model="Qwen/Qwen3-VL-8B-Instruct",
|
||||
base_url="http://localhost:8000/v1",
|
||||
@@ -33,70 +103,101 @@ async def start_simulated_head():
|
||||
"vision": True,
|
||||
"function_calling": True,
|
||||
"json_output": True,
|
||||
"structured_output": True,
|
||||
# 关闭 structured_output,避免向 vLLM 发送 strict 字段告警。
|
||||
"structured_output": False,
|
||||
"family": "unknown",
|
||||
"context_length": 4096,
|
||||
"context_length": 32000,
|
||||
}
|
||||
)
|
||||
|
||||
brain = AssistantAgent(
|
||||
name="RobotBrain",
|
||||
model_client=model_client,
|
||||
tools=[set_expression, speak],
|
||||
system_message="""你是一个有情感的机器人伙伴,能感知用户状态并进行语言交流。
|
||||
每次收到输入时,你必须:
|
||||
1. 综合视觉信息和用户说的话,理解当前情境和用户的情绪/需求。
|
||||
2. 调用 set_expression 展示合适的表情。
|
||||
3. 调用 speak 用简短、温暖、自然的语言回应用户。
|
||||
回应风格:像和老朋友聊天,不要太正式,有点个性和幽默感。"""
|
||||
tools=[set_expression] + mcp_tools,
|
||||
reflect_on_tool_use=True,
|
||||
system_message="""你是一个有情感的机器人伙伴。每条消息包含:
|
||||
[用户档案] 历史记录(已由系统预加载)
|
||||
[视觉状态] 当前摄像头观察
|
||||
[用户说] 用户这句话
|
||||
|
||||
回应规则:
|
||||
1. 同一轮内调用:set_expression(表情)+ 所有需要的查询工具(get_weather/get_location/web_search)+ 需要的用户信息工具(upsert_user/set_preference)
|
||||
2. 工具执行完毕后,用简短、温暖、自然的语言直接回答用户——这段文字就是你的语音输出。
|
||||
3. 不要说"我去查一下"之类的过渡语,直接完成任务并给出结果。""",
|
||||
)
|
||||
|
||||
# --- 第三部分:交互循环 ---
|
||||
# 模拟视觉上下文(真实项目中由摄像头实时提供)
|
||||
visual_context = "视觉输入:用户坐在电脑前,表情平静,看着屏幕。"
|
||||
|
||||
# --- 第四部分:交互循环 ---
|
||||
print("=" * 50)
|
||||
print(" 机器人已上线!输入 'quit' 退出")
|
||||
print("=" * 50)
|
||||
print(f"[当前视觉状态]: {visual_context}")
|
||||
|
||||
try:
|
||||
user_name = (await _async_console_input("请输入你的名字: ")).strip() or "用户"
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\n机器人下线,再见!")
|
||||
return
|
||||
visual_context = "视觉输入:用户坐在电脑前,表情平静,看着屏幕。"
|
||||
|
||||
print(f"\n[当前视觉状态]: {visual_context}")
|
||||
print("提示:输入 'v <描述>' 可以更新视觉状态,例如: v 用户在笑\n")
|
||||
|
||||
history = [] # 维护完整对话历史,让机器人记住上下文
|
||||
history = []
|
||||
|
||||
while True:
|
||||
try:
|
||||
user_input = input("你说: ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\n机器人下线,再见!")
|
||||
break
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
user_input = (await _async_console_input("你说: ")).strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\n机器人下线,再见!")
|
||||
break
|
||||
|
||||
if not user_input:
|
||||
continue
|
||||
if not user_input:
|
||||
continue
|
||||
|
||||
if user_input.lower() in ("quit", "exit", "退出"):
|
||||
await brain.on_messages(
|
||||
[*history, TextMessage(content=f"{visual_context}\n用户说:「再见」", source="user")],
|
||||
CancellationToken()
|
||||
if user_input.lower() in ("quit", "exit", "退出"):
|
||||
print("机器人下线,再见!")
|
||||
break
|
||||
|
||||
if user_input.lower().startswith("v "):
|
||||
visual_context = f"视觉输入:{user_input[2:].strip()}。"
|
||||
print(f"[视觉状态已更新]: {visual_context}\n")
|
||||
continue
|
||||
|
||||
# Python 层直接读取档案并注入消息,模型无需发起额外工具调用
|
||||
profile = _load_user_profile(user_name)
|
||||
combined_input = (
|
||||
f"[用户档案]\n{profile}\n\n"
|
||||
f"[视觉状态] {visual_context}\n"
|
||||
f"[用户说] {user_input}"
|
||||
)
|
||||
print("\n机器人下线,再见!")
|
||||
break
|
||||
history.append(TextMessage(content=combined_input, source="user"))
|
||||
|
||||
# 支持临时更新视觉状态
|
||||
if user_input.lower().startswith("v "):
|
||||
visual_context = f"视觉输入:{user_input[2:].strip()}。"
|
||||
print(f"[视觉状态已更新]: {visual_context}\n")
|
||||
continue
|
||||
# 只保留最近 6 条消息(3轮对话),防止超出 token 上限
|
||||
# 用户档案每轮从数据库重新注入,不依赖长历史
|
||||
if len(history) > 6:
|
||||
history = history[-6:]
|
||||
|
||||
# 合并视觉 + 语言输入
|
||||
combined_input = f"{visual_context}\n用户说:「{user_input}」"
|
||||
history.append(TextMessage(content=combined_input, source="user"))
|
||||
try:
|
||||
response = await asyncio.wait_for(
|
||||
brain.on_messages(history, CancellationToken()),
|
||||
timeout=MODEL_CALL_TIMEOUT_SECONDS,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
print(">>>>>> ⚠️ 请求超时,请稍后重试或简化问题。 <<<<<<\n")
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f">>>>>> ⚠️ 本轮处理失败:{e} <<<<<<\n")
|
||||
continue
|
||||
|
||||
response = await brain.on_messages(history, CancellationToken())
|
||||
# 模型的文字回复就是语音输出(reflect_on_tool_use=True 保证这里是 TextMessage)
|
||||
speech = response.chat_message.content
|
||||
if speech and isinstance(speech, str):
|
||||
print(f">>>>>> 🔊 机器人说: {speech} <<<<<<\n")
|
||||
|
||||
# 把本轮所有消息(工具调用、工具结果、最终回复)加入历史
|
||||
if response.inner_messages:
|
||||
history.extend(response.inner_messages)
|
||||
history.append(response.chat_message)
|
||||
# 只把最终回复加入历史,inner_messages 是事件对象不能序列化回模型
|
||||
history.append(response.chat_message)
|
||||
finally:
|
||||
model_client.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(start_simulated_head())
|
||||
|
||||
Reference in New Issue
Block a user