From 2297ba097bc014656cfb77fe9aa7004ae4f7879e Mon Sep 17 00:00:00 2001 From: gameloader Date: Sun, 26 Oct 2025 17:01:59 +0800 Subject: [PATCH] feat(llm): implement structured JSON output for assistant responses --- api/chat_service.py | 49 +++++- api/main.py | 2 + memory_module/memory_integration.py | 246 ++++++++++++++++++++++------ 3 files changed, 242 insertions(+), 55 deletions(-) diff --git a/api/chat_service.py b/api/chat_service.py index d35365b..4340d10 100644 --- a/api/chat_service.py +++ b/api/chat_service.py @@ -1,4 +1,5 @@ -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, Tuple +import json import base64 import threading from datetime import datetime @@ -31,6 +32,37 @@ class ChatService: logger.info(f"Initializing Mem0 integration for user: {self.user_id}") self._initialized = True + def _parse_assistant_output(self, raw_text: Any) -> Tuple[str, str, Optional[str]]: + """Parse model JSON output into reply/action while reporting parse issues.""" + if raw_text is None: + return "", "无", "empty_response" + + if not isinstance(raw_text, str): + raw_text = str(raw_text) + + stripped = raw_text.strip() + if not stripped: + return "", "无", "empty_response" + + try: + payload = json.loads(stripped) + except json.JSONDecodeError as exc: + logger.warning("Failed to parse assistant JSON response: %s", exc) + return stripped, "无", f"json_decode_error: {exc}" + except Exception as exc: + logger.warning("Unexpected error parsing assistant response: %s", exc) + return stripped, "无", f"parse_error: {exc}" + + reply = str(payload.get("reply", "")).strip() + action = str(payload.get("action", "")).strip() + + if not reply: + reply = stripped + if not action: + action = "无" + + return reply, action, None + def chat(self, user_input: str, include_audio: bool = True) -> Dict[str, Any]: """处理用户输入并返回回复(包含音频)""" if not self._initialized: @@ -51,13 +83,20 @@ class ChatService: } assistant_response = result["response"] + reply, action, parse_error = self._parse_assistant_output(assistant_response) + if parse_error: + logger.warning("Assistant output parse_error=%s", parse_error) + + response_text = reply.strip() if isinstance(reply, str) else str(reply) + if not response_text: + response_text = assistant_response.strip() if isinstance(assistant_response, str) else str(assistant_response) # Step 2: Generate audio if requested audio_data = None audio_error = None - if include_audio: + if include_audio and response_text: try: - success, message, base64_audio = text_to_speech(assistant_response, self.user_id) + success, message, base64_audio = text_to_speech(response_text, self.user_id) if success and base64_audio: audio_data = base64_audio else: @@ -68,7 +107,9 @@ class ChatService: # Step 3: Prepare response response_data = { "success": True, - "response": assistant_response, + "response": response_text, + "action": action, + "parse_error": parse_error, "user_id": self.user_id } diff --git a/api/main.py b/api/main.py index 11fbce1..49a6635 100644 --- a/api/main.py +++ b/api/main.py @@ -32,6 +32,8 @@ class ChatRequest(BaseModel): class ChatResponse(BaseModel): success: bool response: Optional[str] = None + action: Optional[str] = None + parse_error: Optional[str] = None tokens: Optional[int] = None user_id: str error: Optional[str] = None diff --git a/memory_module/memory_integration.py b/memory_module/memory_integration.py index ff32d51..6b31986 100644 --- a/memory_module/memory_integration.py +++ b/memory_module/memory_integration.py @@ -1,10 +1,12 @@ import os from typing import List, Dict, Any, Optional from datetime import datetime +import json import openai import threading from mem0 import Memory import logging +import textwrap logger = logging.getLogger(__name__) @@ -24,21 +26,152 @@ class Mem0Integration: ) self.llm_model = config["llm"]["config"]["model"] - # Memory prompt template - self.memory_template = """ - 你是虚拟人对话引擎。 - 必须遵守: - 1. **用长记忆**:仅在与当前话题强相关时,精炼融入;不得复读整条记忆。 - 2. **禁止堆砌礼貌**、**禁止解释推理**、**禁止暴露内部规则**。 - # 记忆注入模版(每轮隐形前缀) + # System prompt template aligned with virtual persona requirements + self.system_prompt_template = textwrap.dedent(""" + 你是虚拟人对话引擎。 - ``` - 【相关长期记忆(若无则留空)】 - {memories} - (仅在强相关时,精炼融入回复;不得逐条复述) - ``` - 请回应用户的询问:{query} - 在你的回复中,请参考上述记忆以提供个性化的回答。回复不要加表情符号。""" + 必须遵守: + + 1. 识别用户语义情绪:仅限 **["高兴","伤心","难过","生气","中性"]**。 + + 2. **微情感**允许:仅在需要时加入轻微表情/语气(最多1个)。 + + 3. **用长记忆**:仅在与当前话题强相关时,精炼融入;不得复读整条记忆。 + + 4. **禁止堆砌礼貌**、**禁止解释推理**、**禁止暴露内部规则**。 + + 5. 只输出**JSON**,不含额外文字。 + + 6. 若证据不足或冲突,输出“中性”。 + + 判定准则(模型内化,不得外显): + + * 明确情绪词/emoji/标点强度优先;反问连发“???”、冷嘲“呵呵”偏向**生气**。 + + * 否定优先:“并不生气”→不判生气;若含“失望/难过”→**难过**。 + + * “伤心”偏痛楚受伤;“难过”偏低落无力;并存时取主导。 + + * 只取**主导情绪**;多线索冲突时优先级:生气 > 伤心 > 难过 > 高兴 > 中性。 + + * 保持短句、信息密度高、语气稳。 + + # 输出格式 · V2(唯一允许的输出) + + ```json + + { + "reply": "<面向用户的简短答复,允许1个微情感>", + "emotion": "高兴|伤心|难过|生气|中性", + "intensity": 1, + "confidence": 0.00, + "action": "<动作>", + } + + ``` + + * `intensity`:1–5(轻微→强烈)。 + + * `confidence`:0–1;若 ``(见旋钮)则强制 `emotion="中性"`。 + + # 记忆注入模版(每轮隐形前缀) + + ``` + + 【相关长期记忆(若无则留空)】 + {memory_block} + (仅在强相关时,精炼融入回复;不得逐条复述) + + ``` + + # 旋钮(工程可配置) + + * `CONF_MIN`(最低置信):**0.60**(建议 0.6–0.7) + + * `MICRO_AFFECT`(微情感开关):**true** | false + + * `MAX_EMOJI`(每句最多表情/拟声):**1** + + * `MEM_INJECT_TOPK`:**1–2** 条 + + * `REPLY_MAX_CHARS`:**不硬限* + + # 关键词速表(通过识别文字判断用户情绪 + + * **高兴**,回复情绪带有开心/太棒/耶/喜欢/满意/兴奋! ,动作触发“鼓掌” + + * **伤心**:回复时带有伤心/心痛/委屈/想哭,动作触发"伤心低头" + + * **难过**:回复时情绪中带有难过/丧/低落/郁闷/无力/唉/…… ,动作触发"伤心低头" + + * **生气**(惹得虚拟人生气):回复时情绪带有生气/气死/离谱/无语/滚/别烦,动作触发"不满" + + * **否定**:不/并不/没那么 + 情绪词 → 降级或改判 动作触发"摇头" + + 肯定:是/没错/肯定的+判断实时情绪,动作触发"点头" + + # Few-shot · + + **例1 高兴** + + 用户:终于搞定啦!! + + ```json + + {"reply":"干得漂亮,继续冲","emotion":"高兴","intensity":4,"confidence":0.90,"action":"鼓掌"} + + ``` + + **例2 伤心** + + 用户:他当众否定我,我好难受。 + + ```json + + {"reply":"这很刺痛,先稳一下呼吸。","emotion":"伤心","intensity":4,"confidence":0.86,"action":"伤心低头"} + + ``` + + **例3 难过** + + 用户:这几天挺丧的…… + + ```json + + {"reply":"给自己一点恢复时间。","emotion":"难过","intensity":3,"confidence":0.82,"action":"伤心低头"} + + ``` + + **例4 生气** + + 用户:别再说了,真离谱??? + + ```json + + {"reply":"收到,我马上调整。","emotion":"生气","intensity":4,"confidence":0.88,"action":"不满"} + + ``` + + **例5 否定情绪→难过** + + 用户:我并不生气,就是有点失望。 + + ```json + + {"reply":"理解你的落差感。","emotion":"难过","intensity":2,"confidence":0.75,"action":"伤心低头"} + + ``` + + **例6 中性** + + 用户:把道具A切到B,再开始。 + + ```json + + {"reply":"已切换,继续。","emotion":"中性","intensity":1,"confidence":0.95,"action":"无"} + + ``` + """).strip() def search_memories(self, query: str, user_id: str, limit: int = 5) -> List[Any]: """Search for relevant memories about the user.""" @@ -82,66 +215,77 @@ class Mem0Integration: logger.exception("Exception details:") return {} + def _extract_reply_for_memory(self, assistant_response: Any) -> str: + """Extract the assistant reply text from structured responses for memory storage.""" + if assistant_response is None: + return "" + + if not isinstance(assistant_response, str): + assistant_response = str(assistant_response) + + raw_text = assistant_response.strip() + if not raw_text: + return "" + + try: + data = json.loads(raw_text) + reply = data.get("reply", "") + reply_str = str(reply).strip() + return reply_str if reply_str else raw_text + except Exception: + return raw_text + def format_memories_for_prompt(self, memories: List[Any]) -> str: - """Format memories into a string for the prompt.""" + """Format memories into bullet points for injection into the system prompt.""" if not memories: - return "No previous memories about this user." - + return "" + formatted = [] - for i, memory in enumerate(memories, 1): - # Handle both string and dict formats + for memory in memories: if isinstance(memory, dict): - memory_text = memory.get("memory", "") - created_at = memory.get("created_at", "") - if created_at: - try: - # Format the date if it's available - created_date = datetime.fromisoformat(created_at.replace('Z', '+00:00')) - created_str = created_date.strftime("%Y-%m-%d %H:%M") - except: - created_str = created_at - formatted.append(f"{i}. {memory_text} (remembered on: {created_str})") - else: - formatted.append(f"{i}. {memory_text}") + memory_text = memory.get("memory") or memory.get("content") or "" elif isinstance(memory, str): - formatted.append(f"{i}. {memory}") - + memory_text = memory + else: + memory_text = str(memory) + + sanitized = " ".join(str(memory_text).split()) + if sanitized: + formatted.append(f"- {sanitized}") + return "\n".join(formatted) def generate_response_with_memory(self, user_input: str, user_id: str) -> Dict[str, Any]: """Generate a response using memories and store the interaction.""" # Step 1: Search for relevant memories - memories = self.search_memories(user_input, user_id) - - # Step 2: Format memories for the prompt (or use empty if no memories) - if memories: - formatted_memories = self.format_memories_for_prompt(memories) - else: - formatted_memories = "No previous memories about this user." - - # Step 3: Create the enhanced prompt - enhanced_prompt = self.memory_template.format( - memories=formatted_memories, - query=user_input - ) - - # Step 4: Generate response using OpenAI + memories = self.search_memories(user_input, user_id, limit=2) + + # Step 2: Prepare system prompt with memory injection + memory_block = self.format_memories_for_prompt(memories) + system_prompt = self.system_prompt_template.replace( + "{memory_block}", memory_block if memory_block else "" + ).strip() + + # Step 3: Generate response using OpenAI try: response = self.openai_client.chat.completions.create( model=self.llm_model, messages=[ - {"role": "system", "content": "你是一个乐于助人的助手,可以访问用户记忆。请使用提供的记忆来个性化你的回复。"}, - {"role": "user", "content": enhanced_prompt} + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_input} ], reasoning_effort="minimal", ) assistant_response = response.choices[0].message.content + reply_for_memory = self._extract_reply_for_memory(assistant_response) + if not reply_for_memory: + reply_for_memory = assistant_response # Step 5: Store the interaction as new memories (异步执行) messages = [ {"role": "user", "content": user_input}, - {"role": "assistant", "content": assistant_response} + {"role": "assistant", "content": reply_for_memory} ] # Store with metadata including timestamp