feat: 添加edge-tts

This commit is contained in:
JiajunLI
2026-03-04 15:29:18 +08:00
parent d879aa1b2e
commit c97ff111fa
2 changed files with 60 additions and 72 deletions

85
main.py
View File

@@ -1,8 +1,11 @@
import asyncio import asyncio
import json import json
import os import os
import shutil
import sqlite3 import sqlite3
import subprocess
import sys import sys
import tempfile
from pathlib import Path from pathlib import Path
from typing import Annotated from typing import Annotated
@@ -19,9 +22,9 @@ except ImportError:
sr = None sr = None
try: try:
import pyttsx3 import edge_tts
except ImportError: except ImportError:
pyttsx3 = None edge_tts = None
BASE_DIR = Path(__file__).resolve().parent BASE_DIR = Path(__file__).resolve().parent
USER_DB_PATH = BASE_DIR / "users.db" USER_DB_PATH = BASE_DIR / "users.db"
@@ -30,8 +33,7 @@ ASR_LANGUAGE = "zh-CN"
MODEL_NAME = os.getenv("VLM_MODEL", "Qwen/Qwen3-VL-8B-Instruct") MODEL_NAME = os.getenv("VLM_MODEL", "Qwen/Qwen3-VL-8B-Instruct")
MODEL_BASE_URL = os.getenv("VLM_BASE_URL", "http://220.248.114.28:8000/v1") MODEL_BASE_URL = os.getenv("VLM_BASE_URL", "http://220.248.114.28:8000/v1")
MODEL_API_KEY = os.getenv("VLM_API_KEY", "EMPTY") MODEL_API_KEY = os.getenv("VLM_API_KEY", "EMPTY")
TTS_VOICE = os.getenv("TTS_VOICE", "zh-CN-YunxiNeural")
_TTS_ENGINE = None
# --- 第一部分:本地工具(面部 + 语音,以后接硬件)--- # --- 第一部分:本地工具(面部 + 语音,以后接硬件)---
@@ -63,40 +65,53 @@ async def _async_console_input(prompt: str) -> str:
return await asyncio.to_thread(input, prompt) return await asyncio.to_thread(input, prompt)
def _init_tts_engine(): def _find_audio_player() -> list[str] | None:
"""初始化离线 TTSpyttsx3""" """查找可用播放器,优先 ffplay"""
global _TTS_ENGINE if shutil.which("ffplay"):
if _TTS_ENGINE is not None: return ["ffplay", "-nodisp", "-autoexit", "-loglevel", "error"]
return _TTS_ENGINE if shutil.which("mpg123"):
if pyttsx3 is None: return ["mpg123", "-q"]
if shutil.which("afplay"):
return ["afplay"]
return None return None
engine = pyttsx3.init()
# 优先选择中文语音(不同系统 voice id 不同,这里做模糊匹配)
for voice in engine.getProperty("voices"):
voice_blob = f"{voice.id} {voice.name}".lower()
if "zh" in voice_blob or "chinese" in voice_blob or "mandarin" in voice_blob:
engine.setProperty("voice", voice.id)
break
engine.setProperty("rate", 190)
_TTS_ENGINE = engine
return _TTS_ENGINE
def _play_audio_file_blocking(audio_path: str, player_cmd: list[str]) -> bool:
def _speak_blocking(text: str) -> bool: """阻塞播放音频文件。"""
"""阻塞式语音播报。成功返回 True。""" try:
if not text: subprocess.run(
return False [*player_cmd, audio_path],
engine = _init_tts_engine() check=True,
if engine is None: stdout=subprocess.DEVNULL,
return False stderr=subprocess.DEVNULL,
engine.say(text) )
engine.runAndWait()
return True return True
except Exception:
return False
async def _async_speak(text: str) -> bool: async def _async_speak(text: str) -> bool:
return await asyncio.to_thread(_speak_blocking, text) """使用 edge-tts 生成 Yunxi 语音并播放。"""
if not text or edge_tts is None:
return False
player_cmd = _find_audio_player()
if player_cmd is None:
return False
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
audio_path = fp.name
try:
communicate = edge_tts.Communicate(text=text, voice=TTS_VOICE)
await communicate.save(audio_path)
return await asyncio.to_thread(_play_audio_file_blocking, audio_path, player_cmd)
except Exception:
return False
finally:
try:
Path(audio_path).unlink(missing_ok=True)
except Exception:
pass
def _listen_once_blocking( def _listen_once_blocking(
@@ -243,7 +258,7 @@ async def start_simulated_head():
return return
has_asr = sr is not None has_asr = sr is not None
has_tts = pyttsx3 is not None has_tts = edge_tts is not None
if has_asr and has_tts: if has_asr and has_tts:
mode_tip = "voice" mode_tip = "voice"
else: else:
@@ -264,7 +279,9 @@ async def start_simulated_head():
print(">>>>>> ⚠️ 未安装 speech_recognition已降级为文本输入。 <<<<<<") print(">>>>>> ⚠️ 未安装 speech_recognition已降级为文本输入。 <<<<<<")
io_mode = "text" io_mode = "text"
if io_mode == "voice" and not has_tts: if io_mode == "voice" and not has_tts:
print(">>>>>> ⚠️ 未安装 pyttsx3,将仅文本输出,不播报语音。 <<<<<<") print(">>>>>> ⚠️ 未安装 edge-tts将仅文本输出不播报语音。 <<<<<<")
if io_mode == "voice" and has_tts and _find_audio_player() is None:
print(">>>>>> ⚠️ 未检测到播放器(ffplay/mpg123/afplay),将仅文本输出。 <<<<<<")
print( print(
"\n[语音依赖状态] " "\n[语音依赖状态] "
@@ -272,7 +289,7 @@ async def start_simulated_head():
f"TTS={'ok' if has_tts else 'missing'}" f"TTS={'ok' if has_tts else 'missing'}"
) )
if not has_asr or not has_tts: if not has_asr or not has_tts:
print("可安装: pip install SpeechRecognition pyaudio pyttsx3") print("可安装: pip install SpeechRecognition pyaudio edge-tts")
visual_context = "视觉输入:用户坐在电脑前,表情平静,看着屏幕。" visual_context = "视觉输入:用户坐在电脑前,表情平静,看着屏幕。"

View File

@@ -1,37 +1,8 @@
archspec @ file:///home/conda/feedstock_root/build_artifacts/archspec_1737352602016/work autogen-agentchat>=0.4
boltons @ file:///home/conda/feedstock_root/build_artifacts/boltons_1749686179973/work autogen-core>=0.4
Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1764016952863/work autogen-ext[openai,mcp]>=0.4
certifi @ file:///home/conda/feedstock_root/build_artifacts/certifi_1767500808759/work/certifi mcp>=1.0
cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1761202850602/work requests>=2.31
charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1760437218288/work SpeechRecognition>=3.10
colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1733218098505/work PyAudio>=0.2.14
conda @ file:///home/conda/feedstock_root/build_artifacts/conda_1770031335390/work/conda-src edge-tts>=6.1
conda-libmamba-solver @ file:///home/conda/feedstock_root/build_artifacts/conda-libmamba-solver_1764081326783/work/src
conda-package-handling @ file:///home/conda/feedstock_root/build_artifacts/conda-package-handling_1736345463896/work
conda_package_streaming @ file:///home/conda/feedstock_root/build_artifacts/conda-package-streaming_1751548120229/work
distro @ file:///home/conda/feedstock_root/build_artifacts/distro_1734729835256/work
frozendict @ file:///home/conda/feedstock_root/build_artifacts/frozendict_1763082802787/work
h2 @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_h2_1756364871/work
hpack @ file:///home/conda/feedstock_root/build_artifacts/hpack_1737618293087/work
hyperframe @ file:///home/conda/feedstock_root/build_artifacts/hyperframe_1737618333194/work
idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1760286409563/work
jsonpatch @ file:///home/conda/feedstock_root/build_artifacts/jsonpatch_1733814567314/work
jsonpointer @ file:///home/conda/feedstock_root/build_artifacts/jsonpointer_1756754132407/work
libmambapy @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_libmambapy_1764158555/work/libmambapy
menuinst @ file:///home/conda/feedstock_root/build_artifacts/menuinst_1761299738838/work
msgpack @ file:///home/conda/feedstock_root/build_artifacts/msgpack-python_1762503974934/work
packaging @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_packaging_1745345660/work
platformdirs @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_platformdirs_1759953252/work
pluggy @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pluggy_1764896838/work
pycosat @ file:///home/conda/feedstock_root/build_artifacts/pycosat_1757744639790/work
pycparser @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pycparser_1733195786/work
PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1733217236728/work
requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1755614211359/work
ruamel.yaml @ file:///home/conda/feedstock_root/build_artifacts/ruamel.yaml_1761160588389/work
ruamel.yaml.clib @ file:///home/conda/feedstock_root/build_artifacts/ruamel.yaml.clib_1760564169582/work
setuptools==80.9.0
tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1735661334605/work
truststore @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_truststore_1753886790/work
urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1750271362675/work
wheel==0.45.1
zstandard==0.25.0