初始化

2026-03-05 18:26:08 +08:00
commit 5073036034
22 changed files with 504 additions and 0 deletions
--- a/server/ws.py
+++ b/server/ws.py
@@ -0,0 +1,67 @@
+import base64
+import json
+import os
+import tempfile
+
+from fastapi import APIRouter, WebSocket, WebSocketDisconnect
+
+from .agent_service import AvatarAgentService
+from .speech import SpeechService
+from .ws_messages import send_audio_message, send_text_message
+
+router = APIRouter()
+speech_service = SpeechService()
+agent_service = AvatarAgentService()
+
+
+def _save_audio_to_temp_file(audio_b64: str) -> str:
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".webm")
+    try:
+        temp_file.write(base64.b64decode(audio_b64))
+        return temp_file.name
+    finally:
+        temp_file.close()
+
+
+@router.websocket("/ws")
+async def websocket_endpoint(websocket: WebSocket) -> None:
+    await websocket.accept()
+    print("✅ WebSocket 连接成功！准备就绪。")
+
+    try:
+        while True:
+            message_text = await websocket.receive_text()
+            data = json.loads(message_text)
+
+            if data.get("type") != "user_input":
+                continue
+
+            audio_b64 = data["audio"].split(",")[-1]
+            image_b64 = data["image"].split(",")[-1]
+
+            audio_path = _save_audio_to_temp_file(audio_b64)
+            try:
+                await send_text_message(websocket, "<i>[👂 正在辨识语音...]</i><br>")
+                user_text = speech_service.transcribe(audio_path)
+            finally:
+                if os.path.exists(audio_path):
+                    os.remove(audio_path)
+
+            if not user_text.strip():
+                await send_text_message(websocket, "<i>[没听清你说什么...]</i><br>")
+                continue
+
+            await send_text_message(websocket, f"<b>你说：</b>{user_text}<br>")
+            await send_text_message(websocket, "<i>[🧠 正在看图思考...]</i><br>")
+
+            ai_response = await agent_service.reply(user_text, image_b64)
+            await send_text_message(websocket, f"<b>AI主播：</b>{ai_response}<br><br>")
+
+            await send_text_message(websocket, "<i>[🗣️ 正在生成语音...]</i><br>")
+            audio_data_url = await speech_service.synthesize_audio_data_url(ai_response)
+            await send_audio_message(websocket, audio_data_url)
+
+    except WebSocketDisconnect:
+        print("❌ 前端页面已关闭或断开连接")
+    except Exception as exc:
+        print(f"⚠️ 发生错误: {exc}")