import asyncio from autogen_agentchat.messages import TextMessage from autogen_core import CancellationToken from brain import create_brain from config import MODEL_BASE_URL, MODEL_CALL_TIMEOUT_SECONDS, MODEL_NAME from profile_store import load_user_profile from voice_io import ( async_console_input, async_speak, find_audio_player, get_user_input, has_asr, has_tts, ) async def start_simulated_head() -> None: brain, model_client = await create_brain() print("=" * 50) print(" 机器人已上线!输入 'quit' 退出") print(f" 模型: {MODEL_NAME}") print(f" 服务: {MODEL_BASE_URL}") print("=" * 50) try: user_name = (await async_console_input("请输入你的名字: ")).strip() or "用户" except (EOFError, KeyboardInterrupt): print("\n机器人下线,再见!") return asr_ready = has_asr() tts_ready = has_tts() mode_tip = "voice" if (asr_ready and tts_ready) else "text" try: io_mode = ( await async_console_input(f"输入模式 voice/text(默认 {mode_tip}): ") ).strip().lower() or mode_tip except (EOFError, KeyboardInterrupt): print("\n机器人下线,再见!") return if io_mode not in ("voice", "text"): io_mode = mode_tip if io_mode == "voice" and not asr_ready: print(">>>>>> ⚠️ 未安装 speech_recognition,已降级为文本输入。 <<<<<<") io_mode = "text" if io_mode == "voice" and not tts_ready: print(">>>>>> ⚠️ 未安装 edge-tts,将仅文本输出,不播报语音。 <<<<<<") if io_mode == "voice" and tts_ready and find_audio_player() is None: print(">>>>>> ⚠️ 未检测到播放器(ffplay/mpg123/afplay),将仅文本输出。 <<<<<<") print( "\n[语音依赖状态] " f"ASR={'ok' if asr_ready else 'missing'}, " f"TTS={'ok' if tts_ready else 'missing'}" ) if not asr_ready or not tts_ready: print("可安装: pip install SpeechRecognition pyaudio edge-tts") visual_context = "视觉输入:用户坐在电脑前,表情平静,看着屏幕。" print(f"\n[当前视觉状态]: {visual_context}") print("提示:输入 'v <描述>' 可以更新视觉状态,例如: v 用户在笑\n") history: list[TextMessage] = [] try: while True: try: user_input = await get_user_input(io_mode) except (EOFError, KeyboardInterrupt): print("\n机器人下线,再见!") break if not user_input: continue if user_input.lower() in ("quit", "exit", "退出"): print("机器人下线,再见!") break if user_input.lower().startswith("v "): visual_context = f"视觉输入:{user_input[2:].strip()}。" print(f"[视觉状态已更新]: {visual_context}\n") continue profile = load_user_profile(user_name) combined_input = ( f"[用户档案]\n{profile}\n\n" f"[视觉状态] {visual_context}\n" f"[用户说] {user_input}" ) history.append(TextMessage(content=combined_input, source="user")) if len(history) > 6: history = history[-6:] try: response = await asyncio.wait_for( brain.on_messages(history, CancellationToken()), timeout=MODEL_CALL_TIMEOUT_SECONDS, ) except asyncio.TimeoutError: print(">>>>>> ⚠️ 请求超时,请稍后重试或简化问题。 <<<<<<\n") continue except Exception as e: print(f">>>>>> ⚠️ 本轮处理失败:{e} <<<<<<\n") continue speech = response.chat_message.content if speech and isinstance(speech, str): print(f">>>>>> 🔊 机器人说: {speech} <<<<<<\n") if io_mode == "voice": spoken_ok = await async_speak(speech) if not spoken_ok: print(">>>>>> ⚠️ TTS 不可用,当前仅文本输出。 <<<<<<\n") history.append(response.chat_message) finally: model_client.close() if __name__ == "__main__": asyncio.run(start_simulated_head())