diff --git a/src/memos/graph_dbs/polardb.py b/src/memos/graph_dbs/polardb.py index d2d69c768..74dd38fc1 100644 --- a/src/memos/graph_dbs/polardb.py +++ b/src/memos/graph_dbs/polardb.py @@ -1553,7 +1553,7 @@ def search_by_fulltext( """ params = [tsquery_string, tsquery_string] - + logger.info(f"[search_by_fulltext] query: {query}, params: {params}") conn = self._get_connection() try: with conn.cursor() as cursor: diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py index 7ac274a62..5dfbde704 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py @@ -101,13 +101,27 @@ def retrieve( user_name=user_name, search_filter=id_filter, ) + if use_fast_graph: + future_fulltext = executor.submit( + self._fulltext_recall, + query_words=parsed_goal.keys or [], + memory_scope=memory_scope, + top_k=top_k, + search_filter=search_filter, + search_priority=search_priority, + user_name=user_name, + ) graph_results = future_graph.result() vector_results = future_vector.result() bm25_results = future_bm25.result() if self.use_bm25 else [] + fulltext_results = future_fulltext.result() if use_fast_graph else [] # Merge and deduplicate by ID - combined = {item.id: item for item in graph_results + vector_results + bm25_results} + combined = { + item.id: item + for item in graph_results + vector_results + bm25_results + fulltext_results + } return list(combined.values()) @@ -404,3 +418,55 @@ def _bm25_recall( ) return [TextualMemoryItem.from_dict(n) for n in bm25_results] + + def _fulltext_recall( + self, + query_words: list[str], + memory_scope: str, + top_k: int = 20, + max_num: int = 5, + status: str = "activated", + cube_name: str | None = None, + search_filter: dict | None = None, + search_priority: dict | None = None, + user_name: str | None = None, + ): + """Perform fulltext-based retrieval. + Args: + query_words: list of query words + memory_scope: memory scope + top_k: top k results + max_num: max number of query words + status: status + cube_name: cube name + search_filter: search filter + search_priority: search priority + user_name: user name + Returns: + list of TextualMemoryItem + """ + if not query_words: + return [] + logger.info(f"[FULLTEXT] query_words: {query_words}") + all_hits = self.graph_store.search_by_fulltext( + query_words=query_words, + top_k=top_k, + status=status, + scope=memory_scope, + cube_name=cube_name, + search_filter=search_priority, + filter=search_filter, + user_name=user_name, + ) + if not all_hits: + return [] + + # merge and deduplicate + unique_ids = {r["id"] for r in all_hits if r.get("id")} + node_dicts = ( + self.graph_store.get_nodes( + list(unique_ids), include_embedding=False, cube_name=cube_name, user_name=user_name + ) + or [] + ) + return [TextualMemoryItem.from_dict(n) for n in node_dicts]