Files
face_agent/main.py
2026-03-04 15:35:57 +08:00

125 lines
4.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import asyncio
from autogen_agentchat.messages import TextMessage
from autogen_core import CancellationToken
from brain import create_brain
from config import MODEL_BASE_URL, MODEL_CALL_TIMEOUT_SECONDS, MODEL_NAME
from profile_store import load_user_profile
from voice_io import (
async_console_input,
async_speak,
find_audio_player,
get_user_input,
has_asr,
has_tts,
)
async def start_simulated_head() -> None:
brain, model_client = await create_brain()
print("=" * 50)
print(" 机器人已上线!输入 'quit' 退出")
print(f" 模型: {MODEL_NAME}")
print(f" 服务: {MODEL_BASE_URL}")
print("=" * 50)
try:
user_name = (await async_console_input("请输入你的名字: ")).strip() or "用户"
except (EOFError, KeyboardInterrupt):
print("\n机器人下线,再见!")
return
asr_ready = has_asr()
tts_ready = has_tts()
mode_tip = "voice" if (asr_ready and tts_ready) else "text"
try:
io_mode = (
await async_console_input(f"输入模式 voice/text默认 {mode_tip}: ")
).strip().lower() or mode_tip
except (EOFError, KeyboardInterrupt):
print("\n机器人下线,再见!")
return
if io_mode not in ("voice", "text"):
io_mode = mode_tip
if io_mode == "voice" and not asr_ready:
print(">>>>>> ⚠️ 未安装 speech_recognition已降级为文本输入。 <<<<<<")
io_mode = "text"
if io_mode == "voice" and not tts_ready:
print(">>>>>> ⚠️ 未安装 edge-tts将仅文本输出不播报语音。 <<<<<<")
if io_mode == "voice" and tts_ready and find_audio_player() is None:
print(">>>>>> ⚠️ 未检测到播放器(ffplay/mpg123/afplay),将仅文本输出。 <<<<<<")
print(
"\n[语音依赖状态] "
f"ASR={'ok' if asr_ready else 'missing'}, "
f"TTS={'ok' if tts_ready else 'missing'}"
)
if not asr_ready or not tts_ready:
print("可安装: pip install SpeechRecognition pyaudio edge-tts")
visual_context = "视觉输入:用户坐在电脑前,表情平静,看着屏幕。"
print(f"\n[当前视觉状态]: {visual_context}")
print("提示:输入 'v <描述>' 可以更新视觉状态,例如: v 用户在笑\n")
history: list[TextMessage] = []
try:
while True:
try:
user_input = await get_user_input(io_mode)
except (EOFError, KeyboardInterrupt):
print("\n机器人下线,再见!")
break
if not user_input:
continue
if user_input.lower() in ("quit", "exit", "退出"):
print("机器人下线,再见!")
break
if user_input.lower().startswith("v "):
visual_context = f"视觉输入:{user_input[2:].strip()}"
print(f"[视觉状态已更新]: {visual_context}\n")
continue
profile = load_user_profile(user_name)
combined_input = (
f"[用户档案]\n{profile}\n\n"
f"[视觉状态] {visual_context}\n"
f"[用户说] {user_input}"
)
history.append(TextMessage(content=combined_input, source="user"))
if len(history) > 6:
history = history[-6:]
try:
response = await asyncio.wait_for(
brain.on_messages(history, CancellationToken()),
timeout=MODEL_CALL_TIMEOUT_SECONDS,
)
except asyncio.TimeoutError:
print(">>>>>> ⚠️ 请求超时,请稍后重试或简化问题。 <<<<<<\n")
continue
except Exception as e:
print(f">>>>>> ⚠️ 本轮处理失败:{e} <<<<<<\n")
continue
speech = response.chat_message.content
if speech and isinstance(speech, str):
print(f">>>>>> 🔊 机器人说: {speech} <<<<<<\n")
if io_mode == "voice":
spoken_ok = await async_speak(speech)
if not spoken_ok:
print(">>>>>> ⚠️ TTS 不可用,当前仅文本输出。 <<<<<<\n")
history.append(response.chat_message)
finally:
model_client.close()
if __name__ == "__main__":
asyncio.run(start_simulated_head())