125 lines
4.4 KiB
Python
125 lines
4.4 KiB
Python
import asyncio
|
||
|
||
from autogen_agentchat.messages import TextMessage
|
||
from autogen_core import CancellationToken
|
||
|
||
from brain import create_brain
|
||
from config import MODEL_BASE_URL, MODEL_CALL_TIMEOUT_SECONDS, MODEL_NAME
|
||
from profile_store import load_user_profile
|
||
from voice_io import (
|
||
async_console_input,
|
||
async_speak,
|
||
find_audio_player,
|
||
get_user_input,
|
||
has_asr,
|
||
has_tts,
|
||
)
|
||
|
||
|
||
async def start_simulated_head() -> None:
|
||
brain, model_client = await create_brain()
|
||
|
||
print("=" * 50)
|
||
print(" 机器人已上线!输入 'quit' 退出")
|
||
print(f" 模型: {MODEL_NAME}")
|
||
print(f" 服务: {MODEL_BASE_URL}")
|
||
print("=" * 50)
|
||
|
||
try:
|
||
user_name = (await async_console_input("请输入你的名字: ")).strip() or "用户"
|
||
except (EOFError, KeyboardInterrupt):
|
||
print("\n机器人下线,再见!")
|
||
return
|
||
|
||
asr_ready = has_asr()
|
||
tts_ready = has_tts()
|
||
mode_tip = "voice" if (asr_ready and tts_ready) else "text"
|
||
try:
|
||
io_mode = (
|
||
await async_console_input(f"输入模式 voice/text(默认 {mode_tip}): ")
|
||
).strip().lower() or mode_tip
|
||
except (EOFError, KeyboardInterrupt):
|
||
print("\n机器人下线,再见!")
|
||
return
|
||
if io_mode not in ("voice", "text"):
|
||
io_mode = mode_tip
|
||
|
||
if io_mode == "voice" and not asr_ready:
|
||
print(">>>>>> ⚠️ 未安装 speech_recognition,已降级为文本输入。 <<<<<<")
|
||
io_mode = "text"
|
||
if io_mode == "voice" and not tts_ready:
|
||
print(">>>>>> ⚠️ 未安装 edge-tts,将仅文本输出,不播报语音。 <<<<<<")
|
||
if io_mode == "voice" and tts_ready and find_audio_player() is None:
|
||
print(">>>>>> ⚠️ 未检测到播放器(ffplay/mpg123/afplay),将仅文本输出。 <<<<<<")
|
||
|
||
print(
|
||
"\n[语音依赖状态] "
|
||
f"ASR={'ok' if asr_ready else 'missing'}, "
|
||
f"TTS={'ok' if tts_ready else 'missing'}"
|
||
)
|
||
if not asr_ready or not tts_ready:
|
||
print("可安装: pip install SpeechRecognition pyaudio edge-tts")
|
||
|
||
visual_context = "视觉输入:用户坐在电脑前,表情平静,看着屏幕。"
|
||
print(f"\n[当前视觉状态]: {visual_context}")
|
||
print("提示:输入 'v <描述>' 可以更新视觉状态,例如: v 用户在笑\n")
|
||
|
||
history: list[TextMessage] = []
|
||
|
||
try:
|
||
while True:
|
||
try:
|
||
user_input = await get_user_input(io_mode)
|
||
except (EOFError, KeyboardInterrupt):
|
||
print("\n机器人下线,再见!")
|
||
break
|
||
|
||
if not user_input:
|
||
continue
|
||
if user_input.lower() in ("quit", "exit", "退出"):
|
||
print("机器人下线,再见!")
|
||
break
|
||
if user_input.lower().startswith("v "):
|
||
visual_context = f"视觉输入:{user_input[2:].strip()}。"
|
||
print(f"[视觉状态已更新]: {visual_context}\n")
|
||
continue
|
||
|
||
profile = load_user_profile(user_name)
|
||
combined_input = (
|
||
f"[用户档案]\n{profile}\n\n"
|
||
f"[视觉状态] {visual_context}\n"
|
||
f"[用户说] {user_input}"
|
||
)
|
||
history.append(TextMessage(content=combined_input, source="user"))
|
||
if len(history) > 6:
|
||
history = history[-6:]
|
||
|
||
try:
|
||
response = await asyncio.wait_for(
|
||
brain.on_messages(history, CancellationToken()),
|
||
timeout=MODEL_CALL_TIMEOUT_SECONDS,
|
||
)
|
||
except asyncio.TimeoutError:
|
||
print(">>>>>> ⚠️ 请求超时,请稍后重试或简化问题。 <<<<<<\n")
|
||
continue
|
||
except Exception as e:
|
||
print(f">>>>>> ⚠️ 本轮处理失败:{e} <<<<<<\n")
|
||
continue
|
||
|
||
speech = response.chat_message.content
|
||
if speech and isinstance(speech, str):
|
||
print(f">>>>>> 🔊 机器人说: {speech} <<<<<<\n")
|
||
if io_mode == "voice":
|
||
spoken_ok = await async_speak(speech)
|
||
if not spoken_ok:
|
||
print(">>>>>> ⚠️ TTS 不可用,当前仅文本输出。 <<<<<<\n")
|
||
|
||
history.append(response.chat_message)
|
||
finally:
|
||
model_client.close()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(start_simulated_head())
|
||
|