feat: 添加edge-tts
This commit is contained in:
87
main.py
87
main.py
@@ -1,8 +1,11 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
@@ -19,9 +22,9 @@ except ImportError:
|
||||
sr = None
|
||||
|
||||
try:
|
||||
import pyttsx3
|
||||
import edge_tts
|
||||
except ImportError:
|
||||
pyttsx3 = None
|
||||
edge_tts = None
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
USER_DB_PATH = BASE_DIR / "users.db"
|
||||
@@ -30,8 +33,7 @@ ASR_LANGUAGE = "zh-CN"
|
||||
MODEL_NAME = os.getenv("VLM_MODEL", "Qwen/Qwen3-VL-8B-Instruct")
|
||||
MODEL_BASE_URL = os.getenv("VLM_BASE_URL", "http://220.248.114.28:8000/v1")
|
||||
MODEL_API_KEY = os.getenv("VLM_API_KEY", "EMPTY")
|
||||
|
||||
_TTS_ENGINE = None
|
||||
TTS_VOICE = os.getenv("TTS_VOICE", "zh-CN-YunxiNeural")
|
||||
|
||||
# --- 第一部分:本地工具(面部 + 语音,以后接硬件)---
|
||||
|
||||
@@ -63,40 +65,53 @@ async def _async_console_input(prompt: str) -> str:
|
||||
return await asyncio.to_thread(input, prompt)
|
||||
|
||||
|
||||
def _init_tts_engine():
|
||||
"""初始化离线 TTS(pyttsx3)。"""
|
||||
global _TTS_ENGINE
|
||||
if _TTS_ENGINE is not None:
|
||||
return _TTS_ENGINE
|
||||
if pyttsx3 is None:
|
||||
return None
|
||||
|
||||
engine = pyttsx3.init()
|
||||
# 优先选择中文语音(不同系统 voice id 不同,这里做模糊匹配)
|
||||
for voice in engine.getProperty("voices"):
|
||||
voice_blob = f"{voice.id} {voice.name}".lower()
|
||||
if "zh" in voice_blob or "chinese" in voice_blob or "mandarin" in voice_blob:
|
||||
engine.setProperty("voice", voice.id)
|
||||
break
|
||||
engine.setProperty("rate", 190)
|
||||
_TTS_ENGINE = engine
|
||||
return _TTS_ENGINE
|
||||
def _find_audio_player() -> list[str] | None:
|
||||
"""查找可用播放器,优先 ffplay。"""
|
||||
if shutil.which("ffplay"):
|
||||
return ["ffplay", "-nodisp", "-autoexit", "-loglevel", "error"]
|
||||
if shutil.which("mpg123"):
|
||||
return ["mpg123", "-q"]
|
||||
if shutil.which("afplay"):
|
||||
return ["afplay"]
|
||||
return None
|
||||
|
||||
|
||||
def _speak_blocking(text: str) -> bool:
|
||||
"""阻塞式语音播报。成功返回 True。"""
|
||||
if not text:
|
||||
def _play_audio_file_blocking(audio_path: str, player_cmd: list[str]) -> bool:
|
||||
"""阻塞播放音频文件。"""
|
||||
try:
|
||||
subprocess.run(
|
||||
[*player_cmd, audio_path],
|
||||
check=True,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
engine = _init_tts_engine()
|
||||
if engine is None:
|
||||
return False
|
||||
engine.say(text)
|
||||
engine.runAndWait()
|
||||
return True
|
||||
|
||||
|
||||
async def _async_speak(text: str) -> bool:
|
||||
return await asyncio.to_thread(_speak_blocking, text)
|
||||
"""使用 edge-tts 生成 Yunxi 语音并播放。"""
|
||||
if not text or edge_tts is None:
|
||||
return False
|
||||
|
||||
player_cmd = _find_audio_player()
|
||||
if player_cmd is None:
|
||||
return False
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as fp:
|
||||
audio_path = fp.name
|
||||
try:
|
||||
communicate = edge_tts.Communicate(text=text, voice=TTS_VOICE)
|
||||
await communicate.save(audio_path)
|
||||
return await asyncio.to_thread(_play_audio_file_blocking, audio_path, player_cmd)
|
||||
except Exception:
|
||||
return False
|
||||
finally:
|
||||
try:
|
||||
Path(audio_path).unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _listen_once_blocking(
|
||||
@@ -243,7 +258,7 @@ async def start_simulated_head():
|
||||
return
|
||||
|
||||
has_asr = sr is not None
|
||||
has_tts = pyttsx3 is not None
|
||||
has_tts = edge_tts is not None
|
||||
if has_asr and has_tts:
|
||||
mode_tip = "voice"
|
||||
else:
|
||||
@@ -264,7 +279,9 @@ async def start_simulated_head():
|
||||
print(">>>>>> ⚠️ 未安装 speech_recognition,已降级为文本输入。 <<<<<<")
|
||||
io_mode = "text"
|
||||
if io_mode == "voice" and not has_tts:
|
||||
print(">>>>>> ⚠️ 未安装 pyttsx3,将仅文本输出,不播报语音。 <<<<<<")
|
||||
print(">>>>>> ⚠️ 未安装 edge-tts,将仅文本输出,不播报语音。 <<<<<<")
|
||||
if io_mode == "voice" and has_tts and _find_audio_player() is None:
|
||||
print(">>>>>> ⚠️ 未检测到播放器(ffplay/mpg123/afplay),将仅文本输出。 <<<<<<")
|
||||
|
||||
print(
|
||||
"\n[语音依赖状态] "
|
||||
@@ -272,7 +289,7 @@ async def start_simulated_head():
|
||||
f"TTS={'ok' if has_tts else 'missing'}"
|
||||
)
|
||||
if not has_asr or not has_tts:
|
||||
print("可安装: pip install SpeechRecognition pyaudio pyttsx3")
|
||||
print("可安装: pip install SpeechRecognition pyaudio edge-tts")
|
||||
|
||||
visual_context = "视觉输入:用户坐在电脑前,表情平静,看着屏幕。"
|
||||
|
||||
|
||||
@@ -1,37 +1,8 @@
|
||||
archspec @ file:///home/conda/feedstock_root/build_artifacts/archspec_1737352602016/work
|
||||
boltons @ file:///home/conda/feedstock_root/build_artifacts/boltons_1749686179973/work
|
||||
Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1764016952863/work
|
||||
certifi @ file:///home/conda/feedstock_root/build_artifacts/certifi_1767500808759/work/certifi
|
||||
cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1761202850602/work
|
||||
charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1760437218288/work
|
||||
colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1733218098505/work
|
||||
conda @ file:///home/conda/feedstock_root/build_artifacts/conda_1770031335390/work/conda-src
|
||||
conda-libmamba-solver @ file:///home/conda/feedstock_root/build_artifacts/conda-libmamba-solver_1764081326783/work/src
|
||||
conda-package-handling @ file:///home/conda/feedstock_root/build_artifacts/conda-package-handling_1736345463896/work
|
||||
conda_package_streaming @ file:///home/conda/feedstock_root/build_artifacts/conda-package-streaming_1751548120229/work
|
||||
distro @ file:///home/conda/feedstock_root/build_artifacts/distro_1734729835256/work
|
||||
frozendict @ file:///home/conda/feedstock_root/build_artifacts/frozendict_1763082802787/work
|
||||
h2 @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_h2_1756364871/work
|
||||
hpack @ file:///home/conda/feedstock_root/build_artifacts/hpack_1737618293087/work
|
||||
hyperframe @ file:///home/conda/feedstock_root/build_artifacts/hyperframe_1737618333194/work
|
||||
idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1760286409563/work
|
||||
jsonpatch @ file:///home/conda/feedstock_root/build_artifacts/jsonpatch_1733814567314/work
|
||||
jsonpointer @ file:///home/conda/feedstock_root/build_artifacts/jsonpointer_1756754132407/work
|
||||
libmambapy @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_libmambapy_1764158555/work/libmambapy
|
||||
menuinst @ file:///home/conda/feedstock_root/build_artifacts/menuinst_1761299738838/work
|
||||
msgpack @ file:///home/conda/feedstock_root/build_artifacts/msgpack-python_1762503974934/work
|
||||
packaging @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_packaging_1745345660/work
|
||||
platformdirs @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_platformdirs_1759953252/work
|
||||
pluggy @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pluggy_1764896838/work
|
||||
pycosat @ file:///home/conda/feedstock_root/build_artifacts/pycosat_1757744639790/work
|
||||
pycparser @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pycparser_1733195786/work
|
||||
PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1733217236728/work
|
||||
requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1755614211359/work
|
||||
ruamel.yaml @ file:///home/conda/feedstock_root/build_artifacts/ruamel.yaml_1761160588389/work
|
||||
ruamel.yaml.clib @ file:///home/conda/feedstock_root/build_artifacts/ruamel.yaml.clib_1760564169582/work
|
||||
setuptools==80.9.0
|
||||
tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1735661334605/work
|
||||
truststore @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_truststore_1753886790/work
|
||||
urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1750271362675/work
|
||||
wheel==0.45.1
|
||||
zstandard==0.25.0
|
||||
autogen-agentchat>=0.4
|
||||
autogen-core>=0.4
|
||||
autogen-ext[openai,mcp]>=0.4
|
||||
mcp>=1.0
|
||||
requests>=2.31
|
||||
SpeechRecognition>=3.10
|
||||
PyAudio>=0.2.14
|
||||
edge-tts>=6.1
|
||||
|
||||
Reference in New Issue
Block a user