diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index f0833d716..555f1f110 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -453,7 +453,7 @@ def get_memory( @staticmethod def _parse_hallucination_filter_response(text: str) -> tuple[bool, dict[int, dict]]: """Parse index-keyed JSON from hallucination filter response. - Expected shape: { "0": {"need_rewrite": bool, "rewritten": str, "reason": str}, ... } + Expected shape: { "0": {"need_rewrite": bool, "rewritten_suffix": str, "reason": str}, ... } Returns (success, parsed_dict) with int keys. """ try: @@ -477,16 +477,16 @@ def _parse_hallucination_filter_response(text: str) -> tuple[bool, dict[int, dic if not isinstance(v, dict): continue need_rewrite = v.get("need_rewrite") - rewritten = v.get("rewritten", "") + rewritten_suffix = v.get("rewritten_suffix", "") reason = v.get("reason", "") if ( isinstance(need_rewrite, bool) - and isinstance(rewritten, str) + and isinstance(rewritten_suffix, str) and isinstance(reason, str) ): result[idx] = { "need_rewrite": need_rewrite, - "rewritten": rewritten, + "rewritten_suffix": rewritten_suffix, "reason": reason, } @@ -522,20 +522,26 @@ def filter_hallucination_in_memories( assert len(parsed) == len(memory_list) for mem_idx, content in parsed.items(): need_rewrite = content.get("need_rewrite", False) - rewritten = content.get("rewritten", "") + rewritten_suffix = content.get("rewritten_suffix", "") reason = content.get("reason", "") - # Apply rewriting if requested + # Append a new memory item instead of replacing the original if ( need_rewrite - and isinstance(rewritten, str) - and len(rewritten) > len(memory_list[mem_idx].memory) + and isinstance(rewritten_suffix, str) + and len(rewritten_suffix.strip()) > 0 ): - memory_list[mem_idx].memory = rewritten + original_text = memory_list[mem_idx].memory + logger.info( - f"[filter_hallucination_in_memories] index={mem_idx}, need_rewrite={need_rewrite}, rewritten='{rewritten}', reason='{reason}', original memory='{memory_list[mem_idx].memory}'" + f"[filter_hallucination_in_memories] index={mem_idx}, need_rewrite={need_rewrite}, rewritten_suffix='{rewritten_suffix}', reason='{reason}', original memory='{original_text}', action='append_suffix'" ) - new_mem_list.append(memory_list[mem_idx]) + + # Append only the suffix to the original memory text + memory_list[mem_idx].memory = original_text + rewritten_suffix + new_mem_list.append(memory_list[mem_idx]) + else: + new_mem_list.append(memory_list[mem_idx]) return new_mem_list else: logger.warning("Hallucination filter parsing failed or returned empty result.") diff --git a/src/memos/mem_scheduler/general_scheduler.py b/src/memos/mem_scheduler/general_scheduler.py index 4c7d51a7c..bbcb2c379 100644 --- a/src/memos/mem_scheduler/general_scheduler.py +++ b/src/memos/mem_scheduler/general_scheduler.py @@ -126,7 +126,10 @@ def long_memory_update_process( top_k=self.top_k, ) logger.info( - f"[long_memory_update_process] Processed {len(queries)} queries {queries} and retrieved {len(new_candidates)} new candidate memories for user_id={user_id}" + # Build the candidate preview string outside the f-string to avoid backslashes in expression + f"[long_memory_update_process] Processed {len(queries)} queries {queries} and retrieved {len(new_candidates)} " + f"new candidate memories for user_id={user_id}: " + + ("\n- " + "\n- ".join([f"{one.id}: {one.memory}" for one in new_candidates])) ) # rerank @@ -141,8 +144,12 @@ def long_memory_update_process( f"[long_memory_update_process] Final working memory size: {len(new_order_working_memory)} memories for user_id={user_id}" ) - old_memory_texts = [mem.memory for mem in cur_working_memory] - new_memory_texts = [mem.memory for mem in new_order_working_memory] + old_memory_texts = "\n- " + "\n- ".join( + [f"{one.id}: {one.memory}" for one in cur_working_memory] + ) + new_memory_texts = "\n- " + "\n- ".join( + [f"{one.id}: {one.memory}" for one in new_order_working_memory] + ) logger.info( f"[long_memory_update_process] For user_id='{user_id}', mem_cube_id='{mem_cube_id}': " @@ -1424,8 +1431,10 @@ def process_session_turn( method=self.search_method, search_args=search_args, ) + logger.info( - f"[process_session_turn] Search results for missing evidence '{item}': {[one.memory for one in results]}" + f"[process_session_turn] Search results for missing evidence '{item}': " + + ("\n- " + "\n- ".join([f"{one.id}: {one.memory}" for one in results])) ) new_candidates.extend(results) return cur_working_memory, new_candidates diff --git a/src/memos/mem_scheduler/task_schedule_modules/redis_queue.py b/src/memos/mem_scheduler/task_schedule_modules/redis_queue.py index d3268eda8..ae1b44a80 100644 --- a/src/memos/mem_scheduler/task_schedule_modules/redis_queue.py +++ b/src/memos/mem_scheduler/task_schedule_modules/redis_queue.py @@ -27,7 +27,6 @@ from memos.mem_scheduler.task_schedule_modules.orchestrator import SchedulerOrchestrator from memos.mem_scheduler.utils.status_tracker import TaskStatusTracker from memos.mem_scheduler.webservice_modules.redis_service import RedisSchedulerModule -from memos.utils import timed_with_status logger = get_logger(__name__) @@ -251,14 +250,6 @@ def _stop_stream_keys_refresh_thread(self) -> None: except Exception as e: logger.debug(f"Stopping stream keys refresh thread encountered: {e}") - @timed_with_status( - log_prefix="task_broker", - log_extra_args={ - "stream_prefix": os.getenv( - "MEMSCHEDULER_REDIS_STREAM_KEY_PREFIX", DEFAULT_STREAM_KEY_PREFIX - ) - }, - ) def task_broker( self, consume_batch_size: int, diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py index dfeb5d180..cf8456c80 100644 --- a/src/memos/templates/mem_reader_prompts.py +++ b/src/memos/templates/mem_reader_prompts.py @@ -422,45 +422,27 @@ SIMPLE_STRUCT_HALLUCINATION_FILTER_PROMPT = """ You are a strict memory validator. -# TASK -Review each memory object against the messages (ground truth). -Do NOT alter the original memory content. Instead, append a concise reference-resolution explanation after the original content. -If any part of the memory originates from assistant inference (i.e., not explicitly stated by the user), explicitly note this after the explanation. - -# RULENOTES (strictly enforced) -- NEVER change, delete, or paraphrase the original memory text. -- ALWAYS preserve the original language, structure, and factual phrasing of the memory. -- After the original text, add exactly one sentence starting with "[Ref] " that resolves ambiguous references (e.g., pronouns like 'she', 'it', or vague terms like 'the dog') using only information explicitly present in the user messages or prior memories. -- If the memory contains content that was inferred by the assistant (not directly stated by the user), append an additional sentence starting with "[Source:] Inference by assistant." after the [Ref:] sentence. -- Do NOT add any other commentary, formatting, or metadata beyond this. -- Keep all original timestamps and identifiers intact in the memory object; this rule applies only to the 'text' field. - -# INPUTS -messages (ground truth): +Task: +Check each memory against the user messages (ground truth). Do not modify the original text. Generate ONLY a suffix to append. + +Rules: +- Append " [Source:] Inference by assistant." if the memory contains assistant inference (not directly stated by the user). +- Otherwise output an empty suffix. +- No other commentary or formatting. + +Inputs: +messages: {messages_inline} -Extracted memory list to validate (indexed JSON objects with text and metadata): +memories: {memories_inline} -# OUTPUT FORMAT -Return a JSON object where: -- Keys are the same stringified indices as in the input memory list (e.g., "0", "1"). -- Each value is: {{"need_rewrite": boolean, "rewritten": string, "reason": string}} -- Set "need_rewrite" to true ONLY if the memory contains ambiguous references or assistant inference requiring clarification. -- If "need_rewrite" is true, "rewritten" = + " [Ref] ." -- If "need_rewrite" is false (i.e., memory is fully explicit and user-stated), "rewritten" is an empty string. -- "reason" must be brief: e.g., "resolved ambiguous reference with inference", "explicit user statement, no rewrite needed". - -# EXAMPLE -Input memory text: "She loves painting." -User messages include: "Caroline loves painting." -→ Rewritten: "She loves painting. [Ref] 'She' refers to Caroline." - -Input memory text: "The user is a developer." -User never stated this, but assistant inferred from context. -→ Rewritten: "The user is a developer. [Ref] 'The user' refers to the person interacting with the assistant; this statement is assistant inference." - -Final Output: +Output JSON: +- Keys: same indices as input ("0", "1", ...). +- Values: {{ "need_rewrite": boolean, "rewritten_suffix": string, "reason": string }} +- need_rewrite = true only when assistant inference is detected. +- rewritten_suffix = " [Source:] Inference by assistant." or "". +- reason: brief, e.g., "assistant inference detected" or "explicit user statement". """