face_agent/main.py

import asyncio

from autogen_agentchat.messages import TextMessage
from autogen_core import CancellationToken

from brain import create_brain
from config import MODEL_BASE_URL, MODEL_CALL_TIMEOUT_SECONDS, MODEL_NAME
from profile_store import load_user_profile
from voice_io import (
    async_console_input,
    async_speak,
    find_audio_player,
    get_user_input,
    has_asr,
    has_tts,
)


async def start_simulated_head() -> None:
    brain, model_client = await create_brain()

    print("=" * 50)
    print("  机器人已上线！输入 'quit' 退出")
    print(f"  模型: {MODEL_NAME}")
    print(f"  服务: {MODEL_BASE_URL}")
    print("=" * 50)

    try:
        user_name = (await async_console_input("请输入你的名字: ")).strip() or "用户"
    except (EOFError, KeyboardInterrupt):
        print("\n机器人下线，再见！")
        return

    asr_ready = has_asr()
    tts_ready = has_tts()
    mode_tip = "voice" if (asr_ready and tts_ready) else "text"
    try:
        io_mode = (
            await async_console_input(f"输入模式 voice/text（默认 {mode_tip}）: ")
        ).strip().lower() or mode_tip
    except (EOFError, KeyboardInterrupt):
        print("\n机器人下线，再见！")
        return
    if io_mode not in ("voice", "text"):
        io_mode = mode_tip

    if io_mode == "voice" and not asr_ready:
        print(">>>>>> ⚠️ 未安装 speech_recognition，已降级为文本输入。 <<<<<<")
        io_mode = "text"
    if io_mode == "voice" and not tts_ready:
        print(">>>>>> ⚠️ 未安装 edge-tts，将仅文本输出，不播报语音。 <<<<<<")
    if io_mode == "voice" and tts_ready and find_audio_player() is None:
        print(">>>>>> ⚠️ 未检测到播放器(ffplay/mpg123/afplay)，将仅文本输出。 <<<<<<")

    print(
        "\n[语音依赖状态] "
        f"ASR={'ok' if asr_ready else 'missing'}, "
        f"TTS={'ok' if tts_ready else 'missing'}"
    )
    if not asr_ready or not tts_ready:
        print("可安装: pip install SpeechRecognition pyaudio edge-tts")

    visual_context = "视觉输入：用户坐在电脑前，表情平静，看着屏幕。"
    print(f"\n[当前视觉状态]: {visual_context}")
    print("提示：输入 'v <描述>' 可以更新视觉状态，例如: v 用户在笑\n")

    history: list[TextMessage] = []

    try:
        while True:
            try:
                user_input = await get_user_input(io_mode)
            except (EOFError, KeyboardInterrupt):
                print("\n机器人下线，再见！")
                break

            if not user_input:
                continue
            if user_input.lower() in ("quit", "exit", "退出"):
                print("机器人下线，再见！")
                break
            if user_input.lower().startswith("v "):
                visual_context = f"视觉输入：{user_input[2:].strip()}。"
                print(f"[视觉状态已更新]: {visual_context}\n")
                continue

            profile = load_user_profile(user_name)
            combined_input = (
                f"[用户档案]\n{profile}\n\n"
                f"[视觉状态] {visual_context}\n"
                f"[用户说] {user_input}"
            )
            history.append(TextMessage(content=combined_input, source="user"))
            if len(history) > 6:
                history = history[-6:]

            try:
                response = await asyncio.wait_for(
                    brain.on_messages(history, CancellationToken()),
                    timeout=MODEL_CALL_TIMEOUT_SECONDS,
                )
            except asyncio.TimeoutError:
                print(">>>>>> ⚠️ 请求超时，请稍后重试或简化问题。 <<<<<<\n")
                continue
            except Exception as e:
                print(f">>>>>> ⚠️ 本轮处理失败：{e} <<<<<<\n")
                continue

            speech = response.chat_message.content
            if speech and isinstance(speech, str):
                print(f">>>>>> 🔊 机器人说: {speech} <<<<<<\n")
                if io_mode == "voice":
                    spoken_ok = await async_speak(speech)
                    if not spoken_ok:
                        print(">>>>>> ⚠️ TTS 不可用，当前仅文本输出。 <<<<<<\n")

            history.append(response.chat_message)
    finally:
        model_client.close()


if __name__ == "__main__":
    asyncio.run(start_simulated_head())