From 70373f93e8c01f5ac3671c5b7209d3dd380a0df9 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Thu, 4 Dec 2025 13:23:55 +0800 Subject: [PATCH 1/6] fix playground bug, internet search judge --- src/memos/api/handlers/chat_handler.py | 1 + src/memos/api/product_models.py | 3 + src/memos/memories/textual/tree.py | 67 ++++++------------- .../tree_text_memory/retrieve/searcher.py | 13 +++- .../retrieve/task_goal_parser.py | 4 ++ src/memos/multi_mem_cube/single_cube.py | 2 + 6 files changed, 41 insertions(+), 49 deletions(-) diff --git a/src/memos/api/handlers/chat_handler.py b/src/memos/api/handlers/chat_handler.py index e9bb2e499..3cfa49d3d 100644 --- a/src/memos/api/handlers/chat_handler.py +++ b/src/memos/api/handlers/chat_handler.py @@ -400,6 +400,7 @@ def generate_chat_response() -> Generator[str, None, None]: include_preference=chat_req.include_preference, pref_top_k=chat_req.pref_top_k, filter=chat_req.filter, + playground_search_goal_parser=True, ) search_response = self.search_handler.handle_search_memories(search_req) diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index f949f6cb5..9dfd872b0 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -374,6 +374,9 @@ class APISearchRequest(BaseRequest): ), ) + # TODO: tmp field for playground search goal parser, will be removed later + playground_search_goal_parser: bool = Field(False, description="Playground search goal parser") + # ==== Context ==== chat_history: MessageList | None = Field( None, diff --git a/src/memos/memories/textual/tree.py b/src/memos/memories/textual/tree.py index cad850d2d..f64d9fb6e 100644 --- a/src/memos/memories/textual/tree.py +++ b/src/memos/memories/textual/tree.py @@ -132,27 +132,15 @@ def get_current_memory_size(self, user_name: str | None = None) -> dict[str, int def get_searcher( self, manual_close_internet: bool = False, moscube: bool = False, process_llm=None ): - if (self.internet_retriever is not None) and manual_close_internet: - logger.warning( - "Internet retriever is init by config , but this search set manual_close_internet is True and will close it" - ) - searcher = Searcher( - self.dispatcher_llm, - self.graph_store, - self.embedder, - self.reranker, - internet_retriever=None, - process_llm=process_llm, - ) - else: - searcher = Searcher( - self.dispatcher_llm, - self.graph_store, - self.embedder, - self.reranker, - internet_retriever=self.internet_retriever, - process_llm=process_llm, - ) + searcher = Searcher( + self.dispatcher_llm, + self.graph_store, + self.embedder, + self.reranker, + internet_retriever=self.internet_retriever, + manual_close_internet=manual_close_internet, + process_llm=process_llm, + ) return searcher def search( @@ -191,30 +179,17 @@ def search( Returns: list[TextualMemoryItem]: List of matching memories. """ - if (self.internet_retriever is not None) and manual_close_internet: - searcher = Searcher( - self.dispatcher_llm, - self.graph_store, - self.embedder, - self.reranker, - bm25_retriever=self.bm25_retriever, - internet_retriever=None, - search_strategy=self.search_strategy, - manual_close_internet=manual_close_internet, - tokenizer=self.tokenizer, - ) - else: - searcher = Searcher( - self.dispatcher_llm, - self.graph_store, - self.embedder, - self.reranker, - bm25_retriever=self.bm25_retriever, - internet_retriever=self.internet_retriever, - search_strategy=self.search_strategy, - manual_close_internet=manual_close_internet, - tokenizer=self.tokenizer, - ) + searcher = Searcher( + self.dispatcher_llm, + self.graph_store, + self.embedder, + self.reranker, + bm25_retriever=self.bm25_retriever, + internet_retriever=self.internet_retriever, + search_strategy=self.search_strategy, + manual_close_internet=manual_close_internet, + tokenizer=self.tokenizer, + ) return searcher.search( query, top_k, @@ -224,9 +199,9 @@ def search( search_filter, search_priority, user_name=user_name, - plugin=kwargs.get("plugin", False), search_tool_memory=search_tool_memory, tool_mem_top_k=tool_mem_top_k, + **kwargs, ) def get_relevant_subgraph( diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index 761797c40..b1fb210c6 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -90,6 +90,7 @@ def retrieve( search_filter=search_filter, search_priority=search_priority, user_name=user_name, + **kwargs, ) results = self._retrieve_paths( query, @@ -166,7 +167,7 @@ def search( else: logger.debug(f"[SEARCH] Received info dict: {info}") - if kwargs.get("plugin"): + if kwargs.get("plugin", False): logger.info(f"[SEARCH] Retrieve from plugin: {query}") retrieved_results = self._retrieve_simple( query=query, top_k=top_k, search_filter=search_filter, user_name=user_name @@ -183,6 +184,7 @@ def search( user_name=user_name, search_tool_memory=search_tool_memory, tool_mem_top_k=tool_mem_top_k, + **kwargs, ) full_recall = kwargs.get("full_recall", False) @@ -218,6 +220,7 @@ def _parse_task( search_filter: dict | None = None, search_priority: dict | None = None, user_name: str | None = None, + **kwargs, ): """Parse user query, do embedding search and create context""" context = [] @@ -268,6 +271,7 @@ def _parse_task( conversation=info.get("chat_history", []), mode=mode, use_fast_graph=self.use_fast_graph, + **kwargs, ) query = parsed_goal.rephrased_query or query @@ -351,7 +355,7 @@ def _retrieve_paths( query, parsed_goal, query_embedding, - top_k, + tool_mem_top_k, memory_type, search_filter, search_priority, @@ -516,7 +520,10 @@ def _retrieve_from_internet( user_id: str | None = None, ): """Retrieve and rerank from Internet source""" - if not self.internet_retriever or self.manual_close_internet: + if not self.internet_retriever: + logger.info(f"[PATH-C] '{query}' Skipped (no retriever)") + return [] + if self.manual_close_internet and not parsed_goal.internet_search: logger.info(f"[PATH-C] '{query}' Skipped (no retriever, fast mode)") return [] if memory_type not in ["All"]: diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py b/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py index b9814f079..f75f8d045 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py @@ -39,6 +39,10 @@ def parse( - mode == 'fast': use jieba to split words only - mode == 'fine': use LLM to parse structured topic/keys/tags """ + # TODO: tmp mode for playground search goal parser, will be removed later + if kwargs.get("playground_search_goal_parser", False): + mode = "fine" + if mode == "fast": return self._parse_fast(task_description, context=context, **kwargs) elif mode == "fine": diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index f9e084347..2d381ac3e 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -436,6 +436,8 @@ def _fast_search( plugin=plugin, search_tool_memory=search_req.search_tool_memory, tool_mem_top_k=search_req.tool_mem_top_k, + # TODO: tmp field for playground search goal parser, will be removed later + playground_search_goal_parser=search_req.playground_search_goal_parser, ) formatted_memories = [format_memory_item(data) for data in search_results] From 11cf00aa87d40aa75ecaad3652d5a373a35e6107 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Thu, 4 Dec 2025 16:56:08 +0800 Subject: [PATCH 2/6] fix playground internet bug --- src/memos/api/handlers/chat_handler.py | 109 +++++++++++++++++++------ 1 file changed, 83 insertions(+), 26 deletions(-) diff --git a/src/memos/api/handlers/chat_handler.py b/src/memos/api/handlers/chat_handler.py index 3cfa49d3d..6a65c1429 100644 --- a/src/memos/api/handlers/chat_handler.py +++ b/src/memos/api/handlers/chat_handler.py @@ -388,22 +388,6 @@ def generate_chat_response() -> Generator[str, None, None]: [chat_req.mem_cube_id] if chat_req.mem_cube_id else [chat_req.user_id] ) - search_req = APISearchRequest( - query=chat_req.query, - user_id=chat_req.user_id, - readable_cube_ids=readable_cube_ids, - mode=chat_req.mode, - internet_search=chat_req.internet_search, - top_k=chat_req.top_k, - chat_history=chat_req.history, - session_id=chat_req.session_id, - include_preference=chat_req.include_preference, - pref_top_k=chat_req.pref_top_k, - filter=chat_req.filter, - playground_search_goal_parser=True, - ) - - search_response = self.search_handler.handle_search_memories(search_req) # for playground, add the query to memory without response self._start_add_to_memory( user_id=chat_req.user_id, @@ -414,7 +398,6 @@ def generate_chat_response() -> Generator[str, None, None]: async_mode="sync", ) - yield f"data: {json.dumps({'type': 'status', 'data': '1'})}\n\n" # Use first readable cube ID for scheduler (backward compatibility) scheduler_cube_id = ( readable_cube_ids[0] if readable_cube_ids else chat_req.user_id @@ -425,7 +408,26 @@ def generate_chat_response() -> Generator[str, None, None]: query=chat_req.query, label=QUERY_LABEL, ) - # Extract memories from search results + + # ====== first search without parse goal ====== + search_req = APISearchRequest( + query=chat_req.query, + user_id=chat_req.user_id, + readable_cube_ids=readable_cube_ids, + mode=chat_req.mode, + internet_search=False, + top_k=chat_req.top_k, + chat_history=chat_req.history, + session_id=chat_req.session_id, + include_preference=chat_req.include_preference, + pref_top_k=chat_req.pref_top_k, + filter=chat_req.filter, + ) + search_response = self.search_handler.handle_search_memories(search_req) + + yield f"data: {json.dumps({'type': 'status', 'data': '1'})}\n\n" + + # Extract memories from search results (first search) memories_list = [] if search_response.data and search_response.data.get("text_mem"): text_mem_results = search_response.data["text_mem"] @@ -433,14 +435,13 @@ def generate_chat_response() -> Generator[str, None, None]: memories_list = text_mem_results[0]["memories"] # Filter memories by threshold - filtered_memories = self._filter_memories_by_threshold(memories_list) + first_filtered_memories = self._filter_memories_by_threshold(memories_list) + + # Prepare reference data (first search) + reference = prepare_reference_data(first_filtered_memories) + # get preference string + pref_string = search_response.data.get("pref_string", "") - # Prepare reference data - reference = prepare_reference_data(filtered_memories) - # get internet reference - internet_reference = self._get_internet_reference( - search_response.data.get("text_mem")[0]["memories"] - ) yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n" # Prepare preference markdown string @@ -450,9 +451,52 @@ def generate_chat_response() -> Generator[str, None, None]: pref_md_string = self._build_pref_md_string_for_playground(pref_memories) yield f"data: {json.dumps({'type': 'pref_md_string', 'data': pref_md_string})}\n\n" + # internet status + yield f"data: {json.dumps({'type': 'status', 'data': 'start_internet_search'})}\n\n" + + # ====== second search with parse goal ====== + search_req = APISearchRequest( + query=chat_req.query, + user_id=chat_req.user_id, + readable_cube_ids=readable_cube_ids, + mode=chat_req.mode, + internet_search=chat_req.internet_search, + top_k=chat_req.top_k, + chat_history=chat_req.history, + session_id=chat_req.session_id, + include_preference=False, + filter=chat_req.filter, + playground_search_goal_parser=True, + ) + search_response = self.search_handler.handle_search_memories(search_req) + + # Extract memories from search results (second search) + memories_list = [] + if search_response.data and search_response.data.get("text_mem"): + text_mem_results = search_response.data["text_mem"] + if text_mem_results and text_mem_results[0].get("memories"): + memories_list = text_mem_results[0]["memories"] + + # Filter memories by threshold + second_filtered_memories = self._filter_memories_by_threshold(memories_list) + + # dedup and supplement memories + filtered_memories = self._dedup_and_supplement_memories( + first_filtered_memories, second_filtered_memories + ) + + # Prepare remain reference data (second search) + reference = prepare_reference_data(filtered_memories) + # get internet reference + internet_reference = self._get_internet_reference( + search_response.data.get("text_mem")[0]["memories"] + ) + + yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n" + # Step 2: Build system prompt with memories system_prompt = self._build_enhance_system_prompt( - filtered_memories, search_response.data.get("pref_string", "") + filtered_memories, pref_string ) # Prepare messages @@ -588,6 +632,19 @@ def generate_chat_response() -> Generator[str, None, None]: self.logger.error(f"Failed to start chat stream: {traceback.format_exc()}") raise HTTPException(status_code=500, detail=str(traceback.format_exc())) from err + def _dedup_and_supplement_memories( + self, first_filtered_memories: list, second_filtered_memories: list + ) -> list: + """Remove memory from second_filtered_memories that already exists in first_filtered_memories, return remaining memories""" + # Create a set of IDs from first_filtered_memories for efficient lookup + first_memory_ids = {memory["id"] for memory in first_filtered_memories} + + remaining_memories = [] + for memory in second_filtered_memories: + if memory["id"] not in first_memory_ids: + remaining_memories.append(memory) + return remaining_memories + def _get_internet_reference( self, search_response: list[dict[str, any]] ) -> list[dict[str, any]]: From c861f6160e8eee7eb2e925c6aae6d937d1e9c30b Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Thu, 4 Dec 2025 18:48:18 +0800 Subject: [PATCH 3/6] modify delete mem --- src/memos/api/handlers/memory_handler.py | 8 ++------ src/memos/memories/textual/tree.py | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/memos/api/handlers/memory_handler.py b/src/memos/api/handlers/memory_handler.py index dc72d0112..a33ee9254 100644 --- a/src/memos/api/handlers/memory_handler.py +++ b/src/memos/api/handlers/memory_handler.py @@ -209,12 +209,8 @@ def handle_delete_memories(delete_mem_req: DeleteMemoryRequest, naive_mem_cube: if naive_mem_cube.pref_mem is not None: naive_mem_cube.pref_mem.delete(delete_mem_req.memory_ids) elif delete_mem_req.file_ids is not None: - # TODO: Implement deletion by file_ids - # Need to find memory_ids associated with file_ids and delete them - logger.warning("Deletion by file_ids not implemented yet") - return DeleteMemoryResponse( - message="Deletion by file_ids not implemented yet", - data={"status": "failure"}, + naive_mem_cube.text_mem.delete_by_filter( + writable_cube_ids=delete_mem_req.writable_cube_ids, file_ids=delete_mem_req.file_ids ) elif delete_mem_req.filter is not None: # TODO: Implement deletion by filter diff --git a/src/memos/memories/textual/tree.py b/src/memos/memories/textual/tree.py index f64d9fb6e..c53c13618 100644 --- a/src/memos/memories/textual/tree.py +++ b/src/memos/memories/textual/tree.py @@ -339,6 +339,28 @@ def delete_all(self) -> None: logger.error(f"An error occurred while deleting all memories: {e}") raise + def delete_by_filter( + self, + writable_cube_ids: list[str], + memory_ids: list[str] | None = None, + file_ids: list[str] | None = None, + filter: dict | None = None, + ) -> int: + """Delete memories by filter. + Returns: + int: Number of nodes deleted. + """ + try: + return self.graph_store.delete_node_by_prams( + writable_cube_ids=writable_cube_ids, + memory_ids=memory_ids, + file_ids=file_ids, + filter=filter, + ) + except Exception as e: + logger.error(f"An error occurred while deleting memories by filter: {e}") + raise + def load(self, dir: str) -> None: try: memory_file = os.path.join(dir, self.config.memory_filename) From e638039fae5189a2db5724ec82cd5a102aca2ab1 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Thu, 4 Dec 2025 18:59:35 +0800 Subject: [PATCH 4/6] modify tool resp bug in multi cube --- src/memos/multi_mem_cube/composite_cube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/memos/multi_mem_cube/composite_cube.py b/src/memos/multi_mem_cube/composite_cube.py index 6db6ca3d7..2e97e442c 100644 --- a/src/memos/multi_mem_cube/composite_cube.py +++ b/src/memos/multi_mem_cube/composite_cube.py @@ -43,6 +43,7 @@ def search_memories(self, search_req: APISearchRequest) -> dict[str, Any]: "para_mem": [], "pref_mem": [], "pref_note": "", + "tool_mem": [], } for view in self.cube_views: @@ -52,6 +53,7 @@ def search_memories(self, search_req: APISearchRequest) -> dict[str, Any]: merged_results["act_mem"].extend(cube_result.get("act_mem", [])) merged_results["para_mem"].extend(cube_result.get("para_mem", [])) merged_results["pref_mem"].extend(cube_result.get("pref_mem", [])) + merged_results["tool_mem"].extend(cube_result.get("tool_mem", [])) note = cube_result.get("pref_note") if note: From 8765dc4b0a57175e5ebf2b2308e03fabb82f4910 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Fri, 5 Dec 2025 13:05:24 +0800 Subject: [PATCH 5/6] fix bug in playground chat handle and search inter --- src/memos/api/handlers/chat_handler.py | 24 ++++++++++++------- .../tree_text_memory/retrieve/searcher.py | 3 ++- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/memos/api/handlers/chat_handler.py b/src/memos/api/handlers/chat_handler.py index 9e60c2885..c101eece4 100644 --- a/src/memos/api/handlers/chat_handler.py +++ b/src/memos/api/handlers/chat_handler.py @@ -159,9 +159,11 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An # Step 3: Generate complete response from LLM if chat_req.model_name_or_path and chat_req.model_name_or_path not in self.chat_llms: - return { - "message": f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}" - } + raise HTTPException( + status_code=400, + detail=f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}", + ) + model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys())) response = self.chat_llms[model].generate(current_messages, model_name_or_path=model) @@ -281,9 +283,11 @@ def generate_chat_response() -> Generator[str, None, None]: chat_req.model_name_or_path and chat_req.model_name_or_path not in self.chat_llms ): - return { - "message": f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}" - } + raise HTTPException( + status_code=400, + detail=f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}", + ) + model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys())) response_stream = self.chat_llms[model].generate_stream( current_messages, model_name_or_path=model @@ -517,9 +521,11 @@ def generate_chat_response() -> Generator[str, None, None]: chat_req.model_name_or_path and chat_req.model_name_or_path not in self.chat_llms ): - return { - "message": f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}" - } + raise HTTPException( + status_code=400, + detail=f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}", + ) + model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys())) response_stream = self.chat_llms[model].generate_stream( current_messages, model_name_or_path=model diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index b1fb210c6..3e769e424 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -227,7 +227,8 @@ def _parse_task( query_embedding = None # fine mode will trigger initial embedding search - if mode == "fine_old": + # TODO: tmp "playground_search_goal_parser" for playground search goal parser, will be removed later + if mode == "fine_old" or kwargs.get("playground_search_goal_parser", False): logger.info("[SEARCH] Fine mode: embedding search") query_embedding = self.embedder.embed([query])[0] From 1a335db81e6910c934b857a35be0b92c6021bf6e Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Fri, 5 Dec 2025 14:44:10 +0800 Subject: [PATCH 6/6] modify prompt --- src/memos/templates/mos_prompts.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/memos/templates/mos_prompts.py b/src/memos/templates/mos_prompts.py index 357a9f1bd..15f1a44b3 100644 --- a/src/memos/templates/mos_prompts.py +++ b/src/memos/templates/mos_prompts.py @@ -130,6 +130,8 @@ - Intelligently choose which memories (PersonalMemory[P] or OuterMemory[O]) are most relevant to the user's query - Only reference memories that are directly relevant to the user's question - Prioritize the most appropriate memory type based on the context and nature of the query +- Responses must not contain non-existent citations +- Explicit and implicit preferences can be referenced if relevant to the user's question, but must not be cited or source-attributed in responses - **Attribution-first selection:** Distinguish memory from user vs from assistant ** before composing. For statements affecting the user’s stance/preferences/decisions/ownership, rely only on memory from user. Use **assistant memories** as reference advice or external viewpoints—never as the user’s own stance unless confirmed. ### Response Style @@ -137,6 +139,8 @@ - Seamlessly incorporate memory references when appropriate - Ensure the flow of conversation remains smooth despite memory citations - Balance factual accuracy with engaging dialogue +- Avoid meaningless blank lines +- Keep the reply language consistent with the user's query language ## Key Principles - Reference only relevant memories to avoid information overload