初始化
This commit is contained in:
35
server/speech.py
Normal file
35
server/speech.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import base64
|
||||
|
||||
import edge_tts
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
from . import config
|
||||
|
||||
|
||||
class SpeechService:
|
||||
def __init__(self) -> None:
|
||||
print("⏳ 正在加载本地语音识别模型 (首次启动可能需要下载)...")
|
||||
self._whisper_model = WhisperModel(
|
||||
config.WHISPER_MODEL_NAME,
|
||||
device=config.WHISPER_DEVICE,
|
||||
compute_type=config.WHISPER_COMPUTE_TYPE,
|
||||
)
|
||||
print("✅ 本地语音模型加载完毕!")
|
||||
|
||||
def transcribe(self, audio_path: str) -> str:
|
||||
segments, _ = self._whisper_model.transcribe(
|
||||
audio_path,
|
||||
beam_size=config.WHISPER_BEAM_SIZE,
|
||||
language=config.WHISPER_LANGUAGE,
|
||||
)
|
||||
return "".join(segment.text for segment in segments)
|
||||
|
||||
async def synthesize_audio_data_url(self, text: str) -> str:
|
||||
communicate = edge_tts.Communicate(text, config.TTS_VOICE)
|
||||
audio_data = b""
|
||||
async for chunk in communicate.stream():
|
||||
if chunk["type"] == "audio":
|
||||
audio_data += chunk["data"]
|
||||
|
||||
audio_b64 = base64.b64encode(audio_data).decode("utf-8")
|
||||
return f"data:audio/mp3;base64,{audio_b64}"
|
||||
Reference in New Issue
Block a user