diff --git a/src/memos/api/handlers/chat_handler.py b/src/memos/api/handlers/chat_handler.py index c42157245..b0240985e 100644 --- a/src/memos/api/handlers/chat_handler.py +++ b/src/memos/api/handlers/chat_handler.py @@ -688,14 +688,24 @@ def generate_chat_response() -> Generator[str, None, None]: def _dedup_and_supplement_memories( self, first_filtered_memories: list, second_filtered_memories: list ) -> list: - """Remove memory from second_filtered_memories that already exists in first_filtered_memories, return remaining memories""" - # Create a set of IDs from first_filtered_memories for efficient lookup - first_memory_ids = {memory["id"] for memory in first_filtered_memories} + """ + Remove memories from second_filtered_memories whose content already exists in + first_filtered_memories, return the remaining list. + """ + + def _norm(text: str) -> str: + # Use normalized text as the dedup key; keep original text in the payload. + return " ".join(text.split()) + + first_memory_texts = {_norm(memory.get("memory", "")) for memory in first_filtered_memories} remaining_memories = [] for memory in second_filtered_memories: - if memory["id"] not in first_memory_ids: - remaining_memories.append(memory) + key = _norm(memory.get("memory", "")) + if key in first_memory_texts: + continue + first_memory_texts.add(key) + remaining_memories.append(memory) return remaining_memories def _get_internet_reference( diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index a36f4ff3a..57f2cdba1 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -360,7 +360,20 @@ def _fine_search( logger.info( f"Added {len(additional_memories)} more memories. Total enhanced memories: {len(enhanced_memories)}" ) - formatted_memories = [format_memory_item(data) for data in enhanced_memories] + + def _dedup_by_content(memories: list) -> list: + seen = set() + unique_memories = [] + for mem in memories: + key = " ".join(mem.memory.split()) + if key in seen: + continue + seen.add(key) + unique_memories.append(mem) + return unique_memories + + deduped_memories = _dedup_by_content(enhanced_memories) + formatted_memories = [format_memory_item(data) for data in deduped_memories] logger.info(f"Found {len(formatted_memories)} memories for user {search_req.user_id}")