face_agent/brain.py

import sys
from typing import Annotated

from autogen_agentchat.agents import AssistantAgent
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_ext.models.openai import _openai_client as openai_client_module
from autogen_ext.tools.mcp import StdioServerParams, mcp_server_tools

from config import MODEL_API_KEY, MODEL_BASE_URL, MODEL_NAME


def patch_autogen_tool_schema_for_vllm() -> None:
    """
    vLLM 目前会对 OpenAI 工具定义中的 `strict` 字段告警（即便 strict=False）。
    这里做最小补丁：保留工具定义，移除该字段，避免无意义警告。
    """
    if getattr(openai_client_module.convert_tools, "_strict_removed_patch", False):
        return

    original_convert_tools = openai_client_module.convert_tools

    def convert_tools_without_strict(tools):
        converted = original_convert_tools(tools)
        for tool in converted:
            fn = tool.get("function")
            if isinstance(fn, dict):
                fn.pop("strict", None)
        return converted

    convert_tools_without_strict._strict_removed_patch = True
    openai_client_module.convert_tools = convert_tools_without_strict


async def set_expression(
    expression: Annotated[str, "机器人要展示的表情，如：开心、疑惑、难过、待机"],
    intensity: Annotated[int, "表情强度 1-10"] = 5,
) -> str:
    """[模拟面部] 控制机器人头部的表情展示。"""
    print(f"\n>>>>>> 🤖 表情更新: 【{expression}】 (强度: {intensity}/10) <<<<<<")
    return f"已切换到【{expression}】表情。"


async def create_brain() -> tuple[AssistantAgent, OpenAIChatCompletionClient]:
    patch_autogen_tool_schema_for_vllm()

    # 连接 MCP Server，只取写入类工具（读档案由 Python 层直接处理）
    server_params = StdioServerParams(
        command=sys.executable,
        args=["robot_mcp_server.py"],
    )
    all_mcp_tools = await mcp_server_tools(server_params)
    mcp_tools = [t for t in all_mcp_tools if getattr(t, "name", "") != "get_user_profile"]

    model_client = OpenAIChatCompletionClient(
        model=MODEL_NAME,
        base_url=MODEL_BASE_URL,
        api_key=MODEL_API_KEY,
        model_info={
            "vision": True,
            "function_calling": True,
            "json_output": True,
            # 关闭 structured_output，避免向 vLLM 发送 strict 字段告警。
            "structured_output": False,
            "family": "unknown",
            "context_length": 32000,
        },
    )

    brain = AssistantAgent(
        name="RobotBrain",
        model_client=model_client,
        tools=[set_expression] + mcp_tools,
        reflect_on_tool_use=True,
        system_message="""你是一个有情感的机器人伙伴。每条消息包含：
[用户档案] 历史记录（已由系统预加载）
[视觉状态] 当前摄像头观察
[用户说]   用户这句话

回应规则：
1. 同一轮内调用：set_expression（表情）+ 所有需要的查询工具（get_weather/get_location/web_search）+ 需要的用户信息工具（upsert_user/set_preference）
2. 工具执行完毕后，用简短、温暖、自然的语言直接回答用户——这段文字就是你的语音输出。
3. 不要说"我去查一下"之类的过渡语，直接完成任务并给出结果。""",
    )
    return brain, model_client