import asyncio import json import sqlite3 import sys from pathlib import Path from typing import Annotated from autogen_agentchat.agents import AssistantAgent from autogen_agentchat.messages import TextMessage from autogen_core import CancellationToken from autogen_ext.models.openai import OpenAIChatCompletionClient from autogen_ext.models.openai import _openai_client as openai_client_module from autogen_ext.tools.mcp import StdioServerParams, mcp_server_tools BASE_DIR = Path(__file__).resolve().parent USER_DB_PATH = BASE_DIR / "users.db" MODEL_CALL_TIMEOUT_SECONDS = 45 # --- 第一部分:本地工具(面部 + 语音,以后接硬件)--- def _patch_autogen_tool_schema_for_vllm() -> None: """ vLLM 目前会对 OpenAI 工具定义中的 `strict` 字段告警(即便 strict=False)。 这里做最小补丁:保留工具定义,移除该字段,避免无意义警告。 """ if getattr(openai_client_module.convert_tools, "_strict_removed_patch", False): return original_convert_tools = openai_client_module.convert_tools def convert_tools_without_strict(tools): converted = original_convert_tools(tools) for tool in converted: fn = tool.get("function") if isinstance(fn, dict): fn.pop("strict", None) return converted convert_tools_without_strict._strict_removed_patch = True openai_client_module.convert_tools = convert_tools_without_strict async def _async_console_input(prompt: str) -> str: """在线程中执行阻塞 input,避免阻塞事件循环。""" return await asyncio.to_thread(input, prompt) async def set_expression( expression: Annotated[str, "机器人要展示的表情,如:开心、疑惑、难过、待机"], intensity: Annotated[int, "表情强度 1-10"] = 5 ) -> str: """[模拟面部] 控制机器人头部的表情展示。""" print(f"\n>>>>>> 🤖 表情更新: 【{expression}】 (强度: {intensity}/10) <<<<<<") return f"已切换到【{expression}】表情。" # --- 第二部分:直接读取用户档案(不经过 MCP,避免多轮工具调用)--- def _load_user_profile(user_name: str, db_path: str | Path = USER_DB_PATH) -> str: """在 Python 层直接读档案,注入到消息上下文,模型无需主动调用 get_user_profile。""" try: with sqlite3.connect(db_path) as conn: conn.row_factory = sqlite3.Row user = conn.execute( "SELECT * FROM users WHERE name = ?", (user_name,) ).fetchone() if not user: return f"用户 {user_name} 尚无历史记录,这是第一次见面。" prefs = conn.execute( "SELECT category, content FROM preferences WHERE user_name = ?", (user_name,) ).fetchall() conn.execute( "UPDATE users SET last_seen = datetime('now') WHERE name = ?", (user_name,) ) return json.dumps({ "基本信息": {"姓名": user["name"], "年龄": user["age"], "上次见面": user["last_seen"]}, "偏好习惯": {p["category"]: p["content"] for p in prefs}, }, ensure_ascii=False) except Exception as e: return f"档案读取失败({e}),当作第一次见面。" # --- 第三部分:启动大脑 --- async def start_simulated_head(): _patch_autogen_tool_schema_for_vllm() # 连接 MCP Server,只取写入类工具(读档案由 Python 层直接处理) server_params = StdioServerParams( command=sys.executable, args=["robot_mcp_server.py"], ) all_mcp_tools = await mcp_server_tools(server_params) # 过滤掉 get_user_profile,模型无需主动调用它 mcp_tools = [t for t in all_mcp_tools if getattr(t, "name", "") != "get_user_profile"] model_client = OpenAIChatCompletionClient( model="Qwen/Qwen3-VL-8B-Instruct", base_url="http://localhost:8000/v1", api_key="EMPTY", model_info={ "vision": True, "function_calling": True, "json_output": True, # 关闭 structured_output,避免向 vLLM 发送 strict 字段告警。 "structured_output": False, "family": "unknown", "context_length": 32000, } ) brain = AssistantAgent( name="RobotBrain", model_client=model_client, tools=[set_expression] + mcp_tools, reflect_on_tool_use=True, system_message="""你是一个有情感的机器人伙伴。每条消息包含: [用户档案] 历史记录(已由系统预加载) [视觉状态] 当前摄像头观察 [用户说] 用户这句话 回应规则: 1. 同一轮内调用:set_expression(表情)+ 所有需要的查询工具(get_weather/get_location/web_search)+ 需要的用户信息工具(upsert_user/set_preference) 2. 工具执行完毕后,用简短、温暖、自然的语言直接回答用户——这段文字就是你的语音输出。 3. 不要说"我去查一下"之类的过渡语,直接完成任务并给出结果。""", ) # --- 第四部分:交互循环 --- print("=" * 50) print(" 机器人已上线!输入 'quit' 退出") print("=" * 50) try: user_name = (await _async_console_input("请输入你的名字: ")).strip() or "用户" except (EOFError, KeyboardInterrupt): print("\n机器人下线,再见!") return visual_context = "视觉输入:用户坐在电脑前,表情平静,看着屏幕。" print(f"\n[当前视觉状态]: {visual_context}") print("提示:输入 'v <描述>' 可以更新视觉状态,例如: v 用户在笑\n") history = [] try: while True: try: user_input = (await _async_console_input("你说: ")).strip() except (EOFError, KeyboardInterrupt): print("\n机器人下线,再见!") break if not user_input: continue if user_input.lower() in ("quit", "exit", "退出"): print("机器人下线,再见!") break if user_input.lower().startswith("v "): visual_context = f"视觉输入:{user_input[2:].strip()}。" print(f"[视觉状态已更新]: {visual_context}\n") continue # Python 层直接读取档案并注入消息,模型无需发起额外工具调用 profile = _load_user_profile(user_name) combined_input = ( f"[用户档案]\n{profile}\n\n" f"[视觉状态] {visual_context}\n" f"[用户说] {user_input}" ) history.append(TextMessage(content=combined_input, source="user")) # 只保留最近 6 条消息(3轮对话),防止超出 token 上限 # 用户档案每轮从数据库重新注入,不依赖长历史 if len(history) > 6: history = history[-6:] try: response = await asyncio.wait_for( brain.on_messages(history, CancellationToken()), timeout=MODEL_CALL_TIMEOUT_SECONDS, ) except asyncio.TimeoutError: print(">>>>>> ⚠️ 请求超时,请稍后重试或简化问题。 <<<<<<\n") continue except Exception as e: print(f">>>>>> ⚠️ 本轮处理失败:{e} <<<<<<\n") continue # 模型的文字回复就是语音输出(reflect_on_tool_use=True 保证这里是 TextMessage) speech = response.chat_message.content if speech and isinstance(speech, str): print(f">>>>>> 🔊 机器人说: {speech} <<<<<<\n") # 只把最终回复加入历史,inner_messages 是事件对象不能序列化回模型 history.append(response.chat_message) finally: model_client.close() if __name__ == "__main__": asyncio.run(start_simulated_head())