refactor: 模块解耦

2026-03-04 15:35:57 +08:00
parent c97ff111fa
commit 85bcbe4529
5 changed files with 298 additions and 268 deletions
--- a/voice_io.py
+++ b/voice_io.py
@@ -0,0 +1,128 @@
+import asyncio
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+
+from config import ASR_LANGUAGE, TTS_VOICE
+
+try:
+    import speech_recognition as sr
+except ImportError:
+    sr = None
+
+try:
+    import edge_tts
+except ImportError:
+    edge_tts = None
+
+
+async def async_console_input(prompt: str) -> str:
+    """在线程中执行阻塞 input，避免阻塞事件循环。"""
+    return await asyncio.to_thread(input, prompt)
+
+
+def has_asr() -> bool:
+    return sr is not None
+
+
+def has_tts() -> bool:
+    return edge_tts is not None
+
+
+def find_audio_player() -> list[str] | None:
+    """查找可用播放器，优先 ffplay。"""
+    if shutil.which("ffplay"):
+        return ["ffplay", "-nodisp", "-autoexit", "-loglevel", "error"]
+    if shutil.which("mpg123"):
+        return ["mpg123", "-q"]
+    if shutil.which("afplay"):
+        return ["afplay"]
+    return None
+
+
+def _play_audio_file_blocking(audio_path: str, player_cmd: list[str]) -> bool:
+    try:
+        subprocess.run(
+            [*player_cmd, audio_path],
+            check=True,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+        return True
+    except Exception:
+        return False
+
+
+async def async_speak(text: str) -> bool:
+    """使用 edge-tts 生成 Yunxi 语音并播放。"""
+    if not text or edge_tts is None:
+        return False
+
+    player_cmd = find_audio_player()
+    if player_cmd is None:
+        return False
+
+    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
+        audio_path = fp.name
+    try:
+        communicate = edge_tts.Communicate(text=text, voice=TTS_VOICE)
+        await communicate.save(audio_path)
+        return await asyncio.to_thread(_play_audio_file_blocking, audio_path, player_cmd)
+    except Exception:
+        return False
+    finally:
+        try:
+            Path(audio_path).unlink(missing_ok=True)
+        except Exception:
+            pass
+
+
+def _listen_once_blocking(
+    language: str = ASR_LANGUAGE,
+    timeout: int = 8,
+    phrase_time_limit: int = 20,
+) -> str:
+    """阻塞式麦克风识别，返回识别文本。"""
+    if sr is None:
+        raise RuntimeError("缺少 speech_recognition 依赖")
+
+    recognizer = sr.Recognizer()
+    with sr.Microphone(sample_rate=16000) as source:
+        print(">>>>>> 🎤 请说话... <<<<<<")
+        recognizer.adjust_for_ambient_noise(source, duration=0.4)
+        audio = recognizer.listen(
+            source,
+            timeout=timeout,
+            phrase_time_limit=phrase_time_limit,
+        )
+    return recognizer.recognize_google(audio, language=language).strip()
+
+
+async def _async_listen_once() -> str:
+    return await asyncio.to_thread(_listen_once_blocking)
+
+
+async def get_user_input(io_mode: str) -> str:
+    """
+    统一用户输入入口：
+    - text: 纯文本输入
+    - voice: 回车后语音输入，也允许直接键入文字
+    """
+    if io_mode == "text":
+        return (await async_console_input("你说: ")).strip()
+
+    typed = (await async_console_input("你说(回车=语音, 直接输入=文本): ")).strip()
+    if typed:
+        return typed
+
+    try:
+        spoken = await _async_listen_once()
+    except Exception as e:
+        print(f">>>>>> ⚠️ 语音识别失败：{e} <<<<<<\n")
+        return ""
+
+    if spoken:
+        print(f"[语音识别]: {spoken}")
+    return spoken
+