初始化

This commit is contained in:
gouhanke
2026-03-05 18:26:08 +08:00
commit 5073036034
22 changed files with 504 additions and 0 deletions

35
server/speech.py Normal file
View File

@@ -0,0 +1,35 @@
import base64
import edge_tts
from faster_whisper import WhisperModel
from . import config
class SpeechService:
def __init__(self) -> None:
print("⏳ 正在加载本地语音识别模型 (首次启动可能需要下载)...")
self._whisper_model = WhisperModel(
config.WHISPER_MODEL_NAME,
device=config.WHISPER_DEVICE,
compute_type=config.WHISPER_COMPUTE_TYPE,
)
print("✅ 本地语音模型加载完毕!")
def transcribe(self, audio_path: str) -> str:
segments, _ = self._whisper_model.transcribe(
audio_path,
beam_size=config.WHISPER_BEAM_SIZE,
language=config.WHISPER_LANGUAGE,
)
return "".join(segment.text for segment in segments)
async def synthesize_audio_data_url(self, text: str) -> str:
communicate = edge_tts.Communicate(text, config.TTS_VOICE)
audio_data = b""
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_data += chunk["data"]
audio_b64 = base64.b64encode(audio_data).decode("utf-8")
return f"data:audio/mp3;base64,{audio_b64}"