feat(llm): implement structured JSON output for assistant responses
Some checks failed
Build and Push Docker / build-and-push (push) Has been cancelled

This commit is contained in:
gameloader
2025-10-26 17:01:59 +08:00
parent 33063821df
commit 2297ba097b
3 changed files with 242 additions and 55 deletions

View File

@ -1,4 +1,5 @@
from typing import Dict, Any, Optional from typing import Dict, Any, Optional, Tuple
import json
import base64 import base64
import threading import threading
from datetime import datetime from datetime import datetime
@ -31,6 +32,37 @@ class ChatService:
logger.info(f"Initializing Mem0 integration for user: {self.user_id}") logger.info(f"Initializing Mem0 integration for user: {self.user_id}")
self._initialized = True self._initialized = True
def _parse_assistant_output(self, raw_text: Any) -> Tuple[str, str, Optional[str]]:
"""Parse model JSON output into reply/action while reporting parse issues."""
if raw_text is None:
return "", "", "empty_response"
if not isinstance(raw_text, str):
raw_text = str(raw_text)
stripped = raw_text.strip()
if not stripped:
return "", "", "empty_response"
try:
payload = json.loads(stripped)
except json.JSONDecodeError as exc:
logger.warning("Failed to parse assistant JSON response: %s", exc)
return stripped, "", f"json_decode_error: {exc}"
except Exception as exc:
logger.warning("Unexpected error parsing assistant response: %s", exc)
return stripped, "", f"parse_error: {exc}"
reply = str(payload.get("reply", "")).strip()
action = str(payload.get("action", "")).strip()
if not reply:
reply = stripped
if not action:
action = ""
return reply, action, None
def chat(self, user_input: str, include_audio: bool = True) -> Dict[str, Any]: def chat(self, user_input: str, include_audio: bool = True) -> Dict[str, Any]:
"""处理用户输入并返回回复(包含音频)""" """处理用户输入并返回回复(包含音频)"""
if not self._initialized: if not self._initialized:
@ -51,13 +83,20 @@ class ChatService:
} }
assistant_response = result["response"] assistant_response = result["response"]
reply, action, parse_error = self._parse_assistant_output(assistant_response)
if parse_error:
logger.warning("Assistant output parse_error=%s", parse_error)
response_text = reply.strip() if isinstance(reply, str) else str(reply)
if not response_text:
response_text = assistant_response.strip() if isinstance(assistant_response, str) else str(assistant_response)
# Step 2: Generate audio if requested # Step 2: Generate audio if requested
audio_data = None audio_data = None
audio_error = None audio_error = None
if include_audio: if include_audio and response_text:
try: try:
success, message, base64_audio = text_to_speech(assistant_response, self.user_id) success, message, base64_audio = text_to_speech(response_text, self.user_id)
if success and base64_audio: if success and base64_audio:
audio_data = base64_audio audio_data = base64_audio
else: else:
@ -68,7 +107,9 @@ class ChatService:
# Step 3: Prepare response # Step 3: Prepare response
response_data = { response_data = {
"success": True, "success": True,
"response": assistant_response, "response": response_text,
"action": action,
"parse_error": parse_error,
"user_id": self.user_id "user_id": self.user_id
} }

View File

@ -32,6 +32,8 @@ class ChatRequest(BaseModel):
class ChatResponse(BaseModel): class ChatResponse(BaseModel):
success: bool success: bool
response: Optional[str] = None response: Optional[str] = None
action: Optional[str] = None
parse_error: Optional[str] = None
tokens: Optional[int] = None tokens: Optional[int] = None
user_id: str user_id: str
error: Optional[str] = None error: Optional[str] = None

View File

@ -1,10 +1,12 @@
import os import os
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional
from datetime import datetime from datetime import datetime
import json
import openai import openai
import threading import threading
from mem0 import Memory from mem0 import Memory
import logging import logging
import textwrap
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -24,21 +26,152 @@ class Mem0Integration:
) )
self.llm_model = config["llm"]["config"]["model"] self.llm_model = config["llm"]["config"]["model"]
# Memory prompt template # System prompt template aligned with virtual persona requirements
self.memory_template = """ self.system_prompt_template = textwrap.dedent("""
你是虚拟人对话引擎。 你是虚拟人对话引擎。
必须遵守: 必须遵守:
1. **用长记忆**:仅在与当前话题强相关时,精炼融入;不得复读整条记忆。
2. **禁止堆砌礼貌**、**禁止解释推理**、**禁止暴露内部规则**。 1. 识别用户语义情绪:仅限 **["高兴","伤心","难过","生气","中性"]**。
2. **微情感**允许:仅在需要时加入轻微表情/语气最多1个
3. **用长记忆**:仅在与当前话题强相关时,精炼融入;不得复读整条记忆。
4. **禁止堆砌礼貌**、**禁止解释推理**、**禁止暴露内部规则**。
5. 只输出**JSON**,不含额外文字。
6. 若证据不足或冲突,输出“中性”。
判定准则(模型内化,不得外显):
* 明确情绪词/emoji/标点强度优先;反问连发“???”、冷嘲“呵呵”偏向**生气**。
* 否定优先:“并不生气”→不判生气;若含“失望/难过”→**难过**。
* “伤心”偏痛楚受伤;“难过”偏低落无力;并存时取主导。
* 只取**主导情绪**;多线索冲突时优先级:生气 > 伤心 > 难过 > 高兴 > 中性。
* 保持短句、信息密度高、语气稳。
# 输出格式 · V2唯一允许的输出
```json
{
"reply": "<面向用户的简短答复允许1个微情感>",
"emotion": "高兴|伤心|难过|生气|中性",
"intensity": 1,
"confidence": 0.00,
"action": "<动作>",
}
```
* `intensity`15轻微→强烈
* `confidence`01若 `<CONF_MIN>`(见旋钮)则强制 `emotion="中性"`。
# 记忆注入模版(每轮隐形前缀) # 记忆注入模版(每轮隐形前缀)
``` ```
【相关长期记忆(若无则留空)】 【相关长期记忆(若无则留空)】
{memories} {memory_block}
(仅在强相关时,精炼融入回复;不得逐条复述) (仅在强相关时,精炼融入回复;不得逐条复述)
``` ```
请回应用户的询问:{query}
在你的回复中,请参考上述记忆以提供个性化的回答。回复不要加表情符号。""" # 旋钮(工程可配置)
* `CONF_MIN`(最低置信):**0.60**(建议 0.60.7
* `MICRO_AFFECT`(微情感开关):**true** | false
* `MAX_EMOJI`(每句最多表情/拟声):**1**
* `MEM_INJECT_TOPK`**12** 条
* `REPLY_MAX_CHARS`**不硬限*
# 关键词速表(通过识别文字判断用户情绪
* **高兴**,回复情绪带有开心/太棒/耶/喜欢/满意/兴奋! ,动作触发“鼓掌”
* **伤心**:回复时带有伤心/心痛/委屈/想哭,动作触发"伤心低头"
* **难过**:回复时情绪中带有难过/丧/低落/郁闷/无力/唉/…… ,动作触发"伤心低头"
* **生气**(惹得虚拟人生气):回复时情绪带有生气/气死/离谱/无语/滚/别烦,动作触发"不满"
* **否定**:不/并不/没那么 + 情绪词 → 降级或改判 动作触发"摇头"
肯定:是/没错/肯定的+判断实时情绪,动作触发"点头"
# Few-shot ·
**例1 高兴**
用户:终于搞定啦!!
```json
{"reply":"干得漂亮,继续冲","emotion":"高兴","intensity":4,"confidence":0.90,"action":"鼓掌"}
```
**例2 伤心**
用户:他当众否定我,我好难受。
```json
{"reply":"这很刺痛,先稳一下呼吸。","emotion":"伤心","intensity":4,"confidence":0.86,"action":"伤心低头"}
```
**例3 难过**
用户:这几天挺丧的……
```json
{"reply":"给自己一点恢复时间。","emotion":"难过","intensity":3,"confidence":0.82,"action":"伤心低头"}
```
**例4 生气**
用户:别再说了,真离谱???
```json
{"reply":"收到,我马上调整。","emotion":"生气","intensity":4,"confidence":0.88,"action":"不满"}
```
**例5 否定情绪→难过**
用户:我并不生气,就是有点失望。
```json
{"reply":"理解你的落差感。","emotion":"难过","intensity":2,"confidence":0.75,"action":"伤心低头"}
```
**例6 中性**
用户把道具A切到B再开始。
```json
{"reply":"已切换,继续。","emotion":"中性","intensity":1,"confidence":0.95,"action":""}
```
""").strip()
def search_memories(self, query: str, user_id: str, limit: int = 5) -> List[Any]: def search_memories(self, query: str, user_id: str, limit: int = 5) -> List[Any]:
"""Search for relevant memories about the user.""" """Search for relevant memories about the user."""
@ -82,66 +215,77 @@ class Mem0Integration:
logger.exception("Exception details:") logger.exception("Exception details:")
return {} return {}
def _extract_reply_for_memory(self, assistant_response: Any) -> str:
"""Extract the assistant reply text from structured responses for memory storage."""
if assistant_response is None:
return ""
if not isinstance(assistant_response, str):
assistant_response = str(assistant_response)
raw_text = assistant_response.strip()
if not raw_text:
return ""
try:
data = json.loads(raw_text)
reply = data.get("reply", "")
reply_str = str(reply).strip()
return reply_str if reply_str else raw_text
except Exception:
return raw_text
def format_memories_for_prompt(self, memories: List[Any]) -> str: def format_memories_for_prompt(self, memories: List[Any]) -> str:
"""Format memories into a string for the prompt.""" """Format memories into bullet points for injection into the system prompt."""
if not memories: if not memories:
return "No previous memories about this user." return ""
formatted = [] formatted = []
for i, memory in enumerate(memories, 1): for memory in memories:
# Handle both string and dict formats
if isinstance(memory, dict): if isinstance(memory, dict):
memory_text = memory.get("memory", "") memory_text = memory.get("memory") or memory.get("content") or ""
created_at = memory.get("created_at", "")
if created_at:
try:
# Format the date if it's available
created_date = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
created_str = created_date.strftime("%Y-%m-%d %H:%M")
except:
created_str = created_at
formatted.append(f"{i}. {memory_text} (remembered on: {created_str})")
else:
formatted.append(f"{i}. {memory_text}")
elif isinstance(memory, str): elif isinstance(memory, str):
formatted.append(f"{i}. {memory}") memory_text = memory
else:
memory_text = str(memory)
sanitized = " ".join(str(memory_text).split())
if sanitized:
formatted.append(f"- {sanitized}")
return "\n".join(formatted) return "\n".join(formatted)
def generate_response_with_memory(self, user_input: str, user_id: str) -> Dict[str, Any]: def generate_response_with_memory(self, user_input: str, user_id: str) -> Dict[str, Any]:
"""Generate a response using memories and store the interaction.""" """Generate a response using memories and store the interaction."""
# Step 1: Search for relevant memories # Step 1: Search for relevant memories
memories = self.search_memories(user_input, user_id) memories = self.search_memories(user_input, user_id, limit=2)
# Step 2: Format memories for the prompt (or use empty if no memories) # Step 2: Prepare system prompt with memory injection
if memories: memory_block = self.format_memories_for_prompt(memories)
formatted_memories = self.format_memories_for_prompt(memories) system_prompt = self.system_prompt_template.replace(
else: "{memory_block}", memory_block if memory_block else ""
formatted_memories = "No previous memories about this user." ).strip()
# Step 3: Create the enhanced prompt # Step 3: Generate response using OpenAI
enhanced_prompt = self.memory_template.format(
memories=formatted_memories,
query=user_input
)
# Step 4: Generate response using OpenAI
try: try:
response = self.openai_client.chat.completions.create( response = self.openai_client.chat.completions.create(
model=self.llm_model, model=self.llm_model,
messages=[ messages=[
{"role": "system", "content": "你是一个乐于助人的助手,可以访问用户记忆。请使用提供的记忆来个性化你的回复。"}, {"role": "system", "content": system_prompt},
{"role": "user", "content": enhanced_prompt} {"role": "user", "content": user_input}
], ],
reasoning_effort="minimal", reasoning_effort="minimal",
) )
assistant_response = response.choices[0].message.content assistant_response = response.choices[0].message.content
reply_for_memory = self._extract_reply_for_memory(assistant_response)
if not reply_for_memory:
reply_for_memory = assistant_response
# Step 5: Store the interaction as new memories (异步执行) # Step 5: Store the interaction as new memories (异步执行)
messages = [ messages = [
{"role": "user", "content": user_input}, {"role": "user", "content": user_input},
{"role": "assistant", "content": assistant_response} {"role": "assistant", "content": reply_for_memory}
] ]
# Store with metadata including timestamp # Store with metadata including timestamp