初始化

2026-03-05 18:26:08 +08:00
commit 5073036034
22 changed files with 504 additions and 0 deletions
--- a/server/speech.py
+++ b/server/speech.py
@@ -0,0 +1,35 @@
+import base64
+
+import edge_tts
+from faster_whisper import WhisperModel
+
+from . import config
+
+
+class SpeechService:
+    def __init__(self) -> None:
+        print("⏳ 正在加载本地语音识别模型 (首次启动可能需要下载)...")
+        self._whisper_model = WhisperModel(
+            config.WHISPER_MODEL_NAME,
+            device=config.WHISPER_DEVICE,
+            compute_type=config.WHISPER_COMPUTE_TYPE,
+        )
+        print("✅ 本地语音模型加载完毕！")
+
+    def transcribe(self, audio_path: str) -> str:
+        segments, _ = self._whisper_model.transcribe(
+            audio_path,
+            beam_size=config.WHISPER_BEAM_SIZE,
+            language=config.WHISPER_LANGUAGE,
+        )
+        return "".join(segment.text for segment in segments)
+
+    async def synthesize_audio_data_url(self, text: str) -> str:
+        communicate = edge_tts.Communicate(text, config.TTS_VOICE)
+        audio_data = b""
+        async for chunk in communicate.stream():
+            if chunk["type"] == "audio":
+                audio_data += chunk["data"]
+
+        audio_b64 = base64.b64encode(audio_data).decode("utf-8")
+        return f"data:audio/mp3;base64,{audio_b64}"