From 84e856b434f1ee7ce2e656f140ad6b8cde09c2af Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Sat, 29 Nov 2025 14:57:00 +0800 Subject: [PATCH 01/28] function call supoort --- src/memos/mem_reader/multi_model_struct.py | 2 +- .../read_multi_model/system_parser.py | 64 +++++++++++++++---- .../read_multi_model/tool_parser.py | 14 +++- src/memos/memories/textual/item.py | 11 +++- 4 files changed, 74 insertions(+), 17 deletions(-) diff --git a/src/memos/mem_reader/multi_model_struct.py b/src/memos/mem_reader/multi_model_struct.py index 4520058b9..650587472 100644 --- a/src/memos/mem_reader/multi_model_struct.py +++ b/src/memos/mem_reader/multi_model_struct.py @@ -75,7 +75,7 @@ def _process_multi_model_data( fast_memory_items = self._concat_multi_model_memories(all_memory_items) else: - # Parse as single message + # Parse as single string fast_memory_items = self.multi_model_parser.parse( scene_data_info, info, mode="fast", **kwargs ) diff --git a/src/memos/mem_reader/read_multi_model/system_parser.py b/src/memos/mem_reader/read_multi_model/system_parser.py index 258b752cc..97d3f1efa 100644 --- a/src/memos/mem_reader/read_multi_model/system_parser.py +++ b/src/memos/mem_reader/read_multi_model/system_parser.py @@ -1,14 +1,22 @@ """Parser for system messages.""" +import json +import re +import uuid + from typing import Any from memos.embedders.base import BaseEmbedder from memos.llms.base import BaseLLM from memos.log import get_logger -from memos.memories.textual.item import SourceMessage, TextualMemoryItem +from memos.memories.textual.item import ( + SourceMessage, + TextualMemoryItem, + TreeNodeTextualMemoryMetadata, +) from memos.types.openai_chat_completion_types import ChatCompletionSystemMessageParam -from .base import BaseMessageParser, _extract_text_from_content +from .base import BaseMessageParser logger = get_logger(__name__) @@ -29,20 +37,19 @@ def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None): def create_source( self, - message: ChatCompletionSystemMessageParam, + message: str, info: dict[str, Any], ) -> SourceMessage: """Create SourceMessage from system message.""" - if not isinstance(message, dict): - return SourceMessage(type="chat", role="system") + tool_schema_match = re.search(r"(.*?)", message, re.DOTALL) + tool_schema_content = tool_schema_match.group(1) if tool_schema_match else "" - content = _extract_text_from_content(message.get("content", "")) return SourceMessage( type="chat", role="system", - chat_time=message.get("chat_time"), - message_id=message.get("message_id"), - content=content, + chat_time=message.get("chat_time", None), + message_id=message.get("message_id", None), + content=tool_schema_content, ) def rebuild_from_source( @@ -63,7 +70,25 @@ def parse_fast( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - return super().parse_fast(message, info, **kwargs) + content = message["content"] + if isinstance(content, dict): + content = content["text"] + + # Extract tool_schema content and remaining content + content_wo_tool_schema = re.sub( + r"(.*?)", + r"omitted", + content, + flags=re.DOTALL, + ) + + source = self.create_source(content, info) + return [ + TextualMemoryItem( + memory=content_wo_tool_schema, + metadata=TreeNodeTextualMemoryMetadata(sources=[source]), + ) + ] def parse_fine( self, @@ -71,4 +96,21 @@ def parse_fine( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - return [] + content = message["content"] + if isinstance(content, dict): + content = content["text"] + try: + tool_schema = json.loads(content) + assert isinstance(tool_schema, list), "Tool schema must be a list" + except json.JSONDecodeError: + return [] + + return [ + TextualMemoryItem( + id=str(uuid.uuid4()), + memory=json.dumps(tool_schema), + metadata=TreeNodeTextualMemoryMetadata( + memory_type="tool_schema", + ), + ) + ] diff --git a/src/memos/mem_reader/read_multi_model/tool_parser.py b/src/memos/mem_reader/read_multi_model/tool_parser.py index f7437312d..4aa17a93e 100644 --- a/src/memos/mem_reader/read_multi_model/tool_parser.py +++ b/src/memos/mem_reader/read_multi_model/tool_parser.py @@ -5,7 +5,11 @@ from memos.embedders.base import BaseEmbedder from memos.llms.base import BaseLLM from memos.log import get_logger -from memos.memories.textual.item import SourceMessage, TextualMemoryItem +from memos.memories.textual.item import ( + SourceMessage, + TextualMemoryItem, + TreeNodeTextualMemoryMetadata, +) from memos.types.openai_chat_completion_types import ChatCompletionToolMessageParam from .base import BaseMessageParser, _extract_text_from_content @@ -64,7 +68,13 @@ def parse_fast( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - return super().parse_fast(message, info, **kwargs) + memory = "" + source = self.create_source(message, info) + return [ + TextualMemoryItem( + memory=memory, metadata=TreeNodeTextualMemoryMetadata(sources=[source]) + ) + ] def parse_fine( self, diff --git a/src/memos/memories/textual/item.py b/src/memos/memories/textual/item.py index 12be08057..76b252f54 100644 --- a/src/memos/memories/textual/item.py +++ b/src/memos/memories/textual/item.py @@ -99,9 +99,14 @@ def __str__(self) -> str: class TreeNodeTextualMemoryMetadata(TextualMemoryMetadata): """Extended metadata for structured memory, layered retrieval, and lifecycle tracking.""" - memory_type: Literal["WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"] = Field( - default="WorkingMemory", description="Memory lifecycle type." - ) + memory_type: Literal[ + "WorkingMemory", + "LongTermMemory", + "UserMemory", + "OuterMemory", + "ToolSchema", + "ToolTrajectory", + ] = Field(default="WorkingMemory", description="Memory lifecycle type.") sources: list[SourceMessage] | None = Field( default=None, description="Multiple origins of the memory (e.g., URLs, notes)." ) From ad86322912b51c69ff4c9d76b16e21158903a5f0 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Sun, 30 Nov 2025 20:17:53 +0800 Subject: [PATCH 02/28] add tool parser --- .../read_multi_model/tool_parser.py | 113 ++++++++++++++++-- 1 file changed, 101 insertions(+), 12 deletions(-) diff --git a/src/memos/mem_reader/read_multi_model/tool_parser.py b/src/memos/mem_reader/read_multi_model/tool_parser.py index 4aa17a93e..8d42b69a7 100644 --- a/src/memos/mem_reader/read_multi_model/tool_parser.py +++ b/src/memos/mem_reader/read_multi_model/tool_parser.py @@ -1,5 +1,7 @@ """Parser for tool messages.""" +import json + from typing import Any from memos.embedders.base import BaseEmbedder @@ -12,7 +14,7 @@ ) from memos.types.openai_chat_completion_types import ChatCompletionToolMessageParam -from .base import BaseMessageParser, _extract_text_from_content +from .base import BaseMessageParser logger = get_logger(__name__) @@ -35,18 +37,71 @@ def create_source( self, message: ChatCompletionToolMessageParam, info: dict[str, Any], - ) -> SourceMessage: + ) -> SourceMessage | list[SourceMessage]: """Create SourceMessage from tool message.""" + if not isinstance(message, dict): - return SourceMessage(type="chat", role="tool") - - content = _extract_text_from_content(message.get("content", "")) - return SourceMessage( - type="chat", - role="tool", - chat_time=message.get("chat_time"), - message_id=message.get("message_id"), - content=content, + return [] + + role = message.get("role", "tool") + raw_content = message.get("content", "") + tool_call_id = message.get("tool_call_id", "") + chat_time = message.get("chat_time") + message_id = message.get("message_id") + + sources = [] + + if isinstance(raw_content, list): + # Multimodal: create one SourceMessage per part + for part in raw_content: + if isinstance(part, dict): + part_type = part.get("type", "") + if part_type == "file": + file_info = part.get("file", {}) + sources.append( + SourceMessage( + type="file", + role=role, + chat_time=chat_time, + message_id=message_id, + doc_path=file_info.get("filename") or file_info.get("file_id", ""), + content=file_info.get("file_data", ""), + tool_call_id=tool_call_id, + original_part=part, + ) + ) + else: + # image_url, input_audio, etc. + sources.append( + SourceMessage( + type=part_type, + role=role, + chat_time=chat_time, + message_id=message_id, + content=f"[{part_type}]", + tool_call_id=tool_call_id, + original_part=part, + ) + ) + else: + # Simple string content message: single SourceMessage + content = raw_content + if content: + sources.append( + SourceMessage( + type="chat", + role=role, + chat_time=chat_time, + message_id=message_id, + content=content, + tool_call_id=tool_call_id, + ) + ) + + return ( + sources + if len(sources) > 1 + else (sources[0] if sources else SourceMessage(type="chat", role=role)) ) def rebuild_from_source( @@ -54,6 +109,40 @@ def rebuild_from_source( source: SourceMessage, ) -> ChatCompletionToolMessageParam: """Rebuild tool message from SourceMessage.""" + + # Priority 1: Use original_part if available + if hasattr(source, "original_part") and source.original_part: + original = source.original_part + # If it's a content part, wrap it in a message + if isinstance(original, dict) and "type" in original: + return { + "role": source.role or "user", + "content": [original], + "chat_time": source.chat_time, + "message_id": source.message_id, + } + # If it's already a full message, return it + if isinstance(original, dict) and "role" in original: + return original + + # Priority 2: Rebuild from source fields + if source.type == "file": + return { + "role": source.role or "user", + "content": [ + { + "type": "file", + "file": { + "filename": source.doc_path or "", + "file_data": source.content or "", + }, + } + ], + "chat_time": source.chat_time, + "message_id": source.message_id, + } + + # Simple text message return { "role": "tool", "content": source.content or "", @@ -68,7 +157,7 @@ def parse_fast( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - memory = "" + memory = json.dumps(message) source = self.create_source(message, info) return [ TextualMemoryItem( From e3aaf689f3b7269c232ee6fb02a801e16f0ad2a4 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Mon, 1 Dec 2025 13:59:31 +0800 Subject: [PATCH 03/28] rename multi model to modal --- .../mem_reader/{multi_model_struct.py => multi_modal_struct.py} | 0 .../mem_reader/{read_multi_model => read_multi_modal}/__init__.py | 0 .../{read_multi_model => read_multi_modal}/assistant_parser.py | 0 .../mem_reader/{read_multi_model => read_multi_modal}/base.py | 0 .../{read_multi_model => read_multi_modal}/file_content_parser.py | 0 .../{read_multi_model => read_multi_modal}/multi_model_parser.py | 0 .../{read_multi_model => read_multi_modal}/string_parser.py | 0 .../{read_multi_model => read_multi_modal}/system_parser.py | 0 .../{read_multi_model => read_multi_modal}/text_content_parser.py | 0 .../{read_multi_model => read_multi_modal}/tool_parser.py | 0 .../{read_multi_model => read_multi_modal}/user_parser.py | 0 .../mem_reader/{read_multi_model => read_multi_modal}/utils.py | 0 12 files changed, 0 insertions(+), 0 deletions(-) rename src/memos/mem_reader/{multi_model_struct.py => multi_modal_struct.py} (100%) rename src/memos/mem_reader/{read_multi_model => read_multi_modal}/__init__.py (100%) rename src/memos/mem_reader/{read_multi_model => read_multi_modal}/assistant_parser.py (100%) rename src/memos/mem_reader/{read_multi_model => read_multi_modal}/base.py (100%) rename src/memos/mem_reader/{read_multi_model => read_multi_modal}/file_content_parser.py (100%) rename src/memos/mem_reader/{read_multi_model => read_multi_modal}/multi_model_parser.py (100%) rename src/memos/mem_reader/{read_multi_model => read_multi_modal}/string_parser.py (100%) rename src/memos/mem_reader/{read_multi_model => read_multi_modal}/system_parser.py (100%) rename src/memos/mem_reader/{read_multi_model => read_multi_modal}/text_content_parser.py (100%) rename src/memos/mem_reader/{read_multi_model => read_multi_modal}/tool_parser.py (100%) rename src/memos/mem_reader/{read_multi_model => read_multi_modal}/user_parser.py (100%) rename src/memos/mem_reader/{read_multi_model => read_multi_modal}/utils.py (100%) diff --git a/src/memos/mem_reader/multi_model_struct.py b/src/memos/mem_reader/multi_modal_struct.py similarity index 100% rename from src/memos/mem_reader/multi_model_struct.py rename to src/memos/mem_reader/multi_modal_struct.py diff --git a/src/memos/mem_reader/read_multi_model/__init__.py b/src/memos/mem_reader/read_multi_modal/__init__.py similarity index 100% rename from src/memos/mem_reader/read_multi_model/__init__.py rename to src/memos/mem_reader/read_multi_modal/__init__.py diff --git a/src/memos/mem_reader/read_multi_model/assistant_parser.py b/src/memos/mem_reader/read_multi_modal/assistant_parser.py similarity index 100% rename from src/memos/mem_reader/read_multi_model/assistant_parser.py rename to src/memos/mem_reader/read_multi_modal/assistant_parser.py diff --git a/src/memos/mem_reader/read_multi_model/base.py b/src/memos/mem_reader/read_multi_modal/base.py similarity index 100% rename from src/memos/mem_reader/read_multi_model/base.py rename to src/memos/mem_reader/read_multi_modal/base.py diff --git a/src/memos/mem_reader/read_multi_model/file_content_parser.py b/src/memos/mem_reader/read_multi_modal/file_content_parser.py similarity index 100% rename from src/memos/mem_reader/read_multi_model/file_content_parser.py rename to src/memos/mem_reader/read_multi_modal/file_content_parser.py diff --git a/src/memos/mem_reader/read_multi_model/multi_model_parser.py b/src/memos/mem_reader/read_multi_modal/multi_model_parser.py similarity index 100% rename from src/memos/mem_reader/read_multi_model/multi_model_parser.py rename to src/memos/mem_reader/read_multi_modal/multi_model_parser.py diff --git a/src/memos/mem_reader/read_multi_model/string_parser.py b/src/memos/mem_reader/read_multi_modal/string_parser.py similarity index 100% rename from src/memos/mem_reader/read_multi_model/string_parser.py rename to src/memos/mem_reader/read_multi_modal/string_parser.py diff --git a/src/memos/mem_reader/read_multi_model/system_parser.py b/src/memos/mem_reader/read_multi_modal/system_parser.py similarity index 100% rename from src/memos/mem_reader/read_multi_model/system_parser.py rename to src/memos/mem_reader/read_multi_modal/system_parser.py diff --git a/src/memos/mem_reader/read_multi_model/text_content_parser.py b/src/memos/mem_reader/read_multi_modal/text_content_parser.py similarity index 100% rename from src/memos/mem_reader/read_multi_model/text_content_parser.py rename to src/memos/mem_reader/read_multi_modal/text_content_parser.py diff --git a/src/memos/mem_reader/read_multi_model/tool_parser.py b/src/memos/mem_reader/read_multi_modal/tool_parser.py similarity index 100% rename from src/memos/mem_reader/read_multi_model/tool_parser.py rename to src/memos/mem_reader/read_multi_modal/tool_parser.py diff --git a/src/memos/mem_reader/read_multi_model/user_parser.py b/src/memos/mem_reader/read_multi_modal/user_parser.py similarity index 100% rename from src/memos/mem_reader/read_multi_model/user_parser.py rename to src/memos/mem_reader/read_multi_modal/user_parser.py diff --git a/src/memos/mem_reader/read_multi_model/utils.py b/src/memos/mem_reader/read_multi_modal/utils.py similarity index 100% rename from src/memos/mem_reader/read_multi_model/utils.py rename to src/memos/mem_reader/read_multi_modal/utils.py From f62a010ecd68d23086dc269842c716725891ca11 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Mon, 1 Dec 2025 14:17:14 +0800 Subject: [PATCH 04/28] rename multi modal --- .../{multi_model_parser.py => multi_modal_parser.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/memos/mem_reader/read_multi_modal/{multi_model_parser.py => multi_modal_parser.py} (100%) diff --git a/src/memos/mem_reader/read_multi_modal/multi_model_parser.py b/src/memos/mem_reader/read_multi_modal/multi_modal_parser.py similarity index 100% rename from src/memos/mem_reader/read_multi_modal/multi_model_parser.py rename to src/memos/mem_reader/read_multi_modal/multi_modal_parser.py From 87091d6409d77b29b171f5c9b997c6b7d9a2b4af Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Mon, 1 Dec 2025 21:12:48 +0800 Subject: [PATCH 05/28] tool mem support --- src/memos/mem_reader/multi_modal_struct.py | 70 +++++++- .../read_multi_modal/system_parser.py | 24 ++- .../read_multi_modal/tool_parser.py | 151 +++++++++++++++--- src/memos/mem_reader/simple_struct.py | 2 + src/memos/memories/textual/item.py | 8 +- src/memos/templates/tool_mem_prompts.py | 84 ++++++++++ 6 files changed, 307 insertions(+), 32 deletions(-) create mode 100644 src/memos/templates/tool_mem_prompts.py diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index 5a78208b9..7c7ec64de 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -1,4 +1,5 @@ import concurrent.futures +import json import traceback from typing import Any @@ -7,8 +8,9 @@ from memos.configs.mem_reader import MultiModalStructMemReaderConfig from memos.context.context import ContextThreadPoolExecutor from memos.mem_reader.read_multi_modal import MultiModalParser -from memos.mem_reader.simple_struct import SimpleStructMemReader +from memos.mem_reader.simple_struct import SimpleStructMemReader, detect_lang from memos.memories.textual.item import TextualMemoryItem +from memos.templates.tool_mem_prompts import TOOL_TRAJECTORY_PROMPT_EN, TOOL_TRAJECTORY_PROMPT_ZH from memos.types import MessagesType from memos.utils import timed @@ -222,6 +224,61 @@ def _process_string_fine( return fine_memory_items + def _get_llm_tool_trajectory_response(self, mem_str: str) -> dict: + """ + Generete tool trajectory experience item by llm. + """ + try: + lang = detect_lang(mem_str) + template = TOOL_TRAJECTORY_PROMPT_ZH if lang == "zh" else TOOL_TRAJECTORY_PROMPT_EN + prompt = template.replace("{messages}", mem_str) + rsp = self.llm.generate([{"role": "user", "content": prompt}]) + rsp = rsp.replace("```json", "").replace("```", "") + return json.loads(rsp) + except Exception as e: + logger.error(f"[MultiModalFine] Error calling LLM for tool trajectory: {e}") + return [] + + def _process_tool_trajectory_fine( + self, + fast_memory_items: list[TextualMemoryItem], + info: dict[str, Any], + ) -> list[TextualMemoryItem]: + """ + Process tool trajectory memory items through LLM to generate fine mode memories. + """ + if not fast_memory_items: + return [] + + fine_memory_items = [] + + for fast_item in fast_memory_items: + # Extract memory text (string content) + mem_str = fast_item.memory or "" + if not mem_str.strip() or "tool:" not in mem_str: + continue + try: + resp = self._get_llm_tool_trajectory_response(mem_str) + except Exception as e: + logger.error(f"[MultiModalFine] Error calling LLM for tool trajectory: {e}") + continue + for m in resp: + try: + # Normalize memory_type (same as simple_struct) + memory_type = "ToolTrajectoryMemory" + + node = self._make_memory_item( + value=m.get("trajectory", ""), + info=info, + memory_type=memory_type, + tool_used_status=m.get("tool_used_status", []), + ) + fine_memory_items.append(node) + except Exception as e: + logger.error(f"[MultiModalFine] parse error for tool trajectory: {e}") + + return fine_memory_items + @timed def _process_multi_modal_data( self, scene_data_info: MessagesType, info, mode: str = "fine", **kwargs @@ -266,6 +323,11 @@ def _process_multi_modal_data( ) fine_memory_items.extend(fine_memory_items_string_parser) + fine_memory_items_tool_trajectory_parser = self._process_tool_trajectory_fine( + fast_memory_items, info + ) + fine_memory_items.extend(fine_memory_items_tool_trajectory_parser) + # Part B: get fine multimodal items for fast_item in fast_memory_items: sources = fast_item.metadata.sources @@ -304,6 +366,12 @@ def _process_transfer_multi_modal_data( # Part A: call llm fine_memory_items_string_parser = self._process_string_fine([raw_node], info, custom_tags) fine_memory_items.extend(fine_memory_items_string_parser) + + fine_memory_items_tool_trajectory_parser = self._process_tool_trajectory_fine( + [raw_node], info + ) + fine_memory_items.extend(fine_memory_items_tool_trajectory_parser) + # Part B: get fine multimodal items for source in sources: items = self.multi_modal_parser.process_transfer( diff --git a/src/memos/mem_reader/read_multi_modal/system_parser.py b/src/memos/mem_reader/read_multi_modal/system_parser.py index 9a7b1b32f..9056a050c 100644 --- a/src/memos/mem_reader/read_multi_modal/system_parser.py +++ b/src/memos/mem_reader/read_multi_modal/system_parser.py @@ -41,6 +41,12 @@ def create_source( info: dict[str, Any], ) -> SourceMessage: """Create SourceMessage from system message.""" + content_wo_tool_schema = re.sub( + r"(.*?)", + r"omitted", + message, + flags=re.DOTALL, + ) tool_schema_match = re.search(r"(.*?)", message, re.DOTALL) tool_schema_content = tool_schema_match.group(1) if tool_schema_match else "" @@ -49,7 +55,8 @@ def create_source( role="system", chat_time=message.get("chat_time", None), message_id=message.get("message_id", None), - content=tool_schema_content, + content=content_wo_tool_schema, + tool_schema=tool_schema_content, ) def rebuild_from_source( @@ -57,9 +64,10 @@ def rebuild_from_source( source: SourceMessage, ) -> ChatCompletionSystemMessageParam: """Rebuild system message from SourceMessage.""" + # only rebuild tool schema content, content will be used in full chat content by llm return { "role": "system", - "content": source.content or "", + "content": source.tool_schema or "", "chat_time": source.chat_time, "message_id": source.message_id, } @@ -74,7 +82,7 @@ def parse_fast( if isinstance(content, dict): content = content["text"] - # Extract tool_schema content and remaining content + # Replace tool_schema content with "omitted" in remaining content content_wo_tool_schema = re.sub( r"(.*?)", r"omitted", @@ -87,7 +95,7 @@ def parse_fast( TextualMemoryItem( memory=content_wo_tool_schema, metadata=TreeNodeTextualMemoryMetadata( - memory_type="LongTermMemory", + memory_type="LongTermMemory", # only choce long term memory for system messages as a placeholder status="activated", tags=["mode:fast"], sources=[source], @@ -106,8 +114,12 @@ def parse_fine( content = content["text"] try: tool_schema = json.loads(content) - assert isinstance(tool_schema, list), "Tool schema must be a list" + assert isinstance(tool_schema, list), "Tool schema must be a list[dict]" except json.JSONDecodeError: + logger.warning(f"[SystemParser] Failed to parse tool schema: {content}") + return [] + except AssertionError: + logger.warning(f"[SystemParser] Tool schema must be a list[dict]: {content}") return [] return [ @@ -115,7 +127,7 @@ def parse_fine( id=str(uuid.uuid4()), memory=json.dumps(tool_schema), metadata=TreeNodeTextualMemoryMetadata( - memory_type="tool_schema", + memory_type="ToolSchemaMemory", ), ) ] diff --git a/src/memos/mem_reader/read_multi_modal/tool_parser.py b/src/memos/mem_reader/read_multi_modal/tool_parser.py index 8d42b69a7..8d5ac7c24 100644 --- a/src/memos/mem_reader/read_multi_modal/tool_parser.py +++ b/src/memos/mem_reader/read_multi_modal/tool_parser.py @@ -56,7 +56,18 @@ def create_source( for part in raw_content: if isinstance(part, dict): part_type = part.get("type", "") - if part_type == "file": + if part_type == "text": + sources.append( + SourceMessage( + type="text", + role=role, + chat_time=chat_time, + message_id=message_id, + content=part.get("text", ""), + tool_call_id=tool_call_id, + ) + ) + elif part_type == "file": file_info = part.get("file", {}) sources.append( SourceMessage( @@ -64,45 +75,59 @@ def create_source( role=role, chat_time=chat_time, message_id=message_id, - doc_path=file_info.get("filename") or file_info.get("file_id", ""), content=file_info.get("file_data", ""), + filename=file_info.get("filename", ""), + file_id=file_info.get("file_id", ""), tool_call_id=tool_call_id, original_part=part, ) ) - else: - # image_url, input_audio, etc. + elif part_type == "image_url": + file_info = part.get("image_url", {}) sources.append( SourceMessage( - type=part_type, + type="image_url", role=role, chat_time=chat_time, message_id=message_id, - content=f"[{part_type}]", + content=file_info.get("url", ""), + detail=file_info.get("detail", "auto"), tool_call_id=tool_call_id, original_part=part, ) ) + elif part_type == "input_audio": + file_info = part.get("input_audio", {}) + sources.append( + SourceMessage( + type="input_audio", + role=role, + chat_time=chat_time, + message_id=message_id, + content=file_info.get("data", ""), + format=file_info.get("format", "wav"), + tool_call_id=tool_call_id, + original_part=part, + ) + ) + else: + logger.warning(f"[ToolParser] Unsupported part type: {part_type}") + continue else: # Simple string content message: single SourceMessage - content = raw_content - if content: + if raw_content: sources.append( SourceMessage( type="chat", role=role, chat_time=chat_time, message_id=message_id, - content=content, + content=raw_content, tool_call_id=tool_call_id, ) ) - return ( - sources - if len(sources) > 1 - else (sources[0] if sources else SourceMessage(type="chat", role=role)) - ) + return sources def rebuild_from_source( self, @@ -117,6 +142,7 @@ def rebuild_from_source( if isinstance(original, dict) and "type" in original: return { "role": source.role or "user", + "tool_call_id": source.tool_call_id or "", "content": [original], "chat_time": source.chat_time, "message_id": source.message_id, @@ -126,14 +152,27 @@ def rebuild_from_source( return original # Priority 2: Rebuild from source fields - if source.type == "file": + if source.type == "text": return { - "role": source.role or "user", + "role": source.role or "tool", + "content": [ + { + "type": "text", + "text": source.content or "", + } + ], + "chat_time": source.chat_time, + "message_id": source.message_id, + } + elif source.type == "file": + return { + "role": source.role or "tool", "content": [ { "type": "file", "file": { - "filename": source.doc_path or "", + "file_id": source.file_id or "", + "filename": source.filename or "", "file_data": source.content or "", }, } @@ -141,12 +180,42 @@ def rebuild_from_source( "chat_time": source.chat_time, "message_id": source.message_id, } + elif source.type == "image_url": + return { + "role": source.role or "tool", + "content": [ + { + "type": "image_url", + "image_url": { + "url": source.content or "", + "detail": source.detail or "auto", + }, + } + ], + "chat_time": source.chat_time, + "message_id": source.message_id, + } + elif source.type == "input_audio": + return { + "role": source.role or "tool", + "content": [ + { + "type": "input_audio", + "input_audio": { + "data": source.content or "", + "format": source.format or "wav", + }, + } + ], + "chat_time": source.chat_time, + "message_id": source.message_id, + } # Simple text message return { "role": "tool", "content": source.content or "", - "tool_call_id": source.message_id or "", # tool_call_id might be in message_id + "tool_call_id": source.message_id or "", "chat_time": source.chat_time, "message_id": source.message_id, } @@ -157,11 +226,30 @@ def parse_fast( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - memory = json.dumps(message) - source = self.create_source(message, info) + role = message.get("role", "") + content = message.get("content", "") + chat_time = message.get("chat_time", None) + + if role != "user": + logger.warning(f"[ToolParser] Expected role is `user`, got {role}") + return [] + parts = [f"{role}: "] + if chat_time: + parts.append(f"[{chat_time}]: ") + prefix = "".join(parts) + content = json.dumps(content) if isinstance(content, list) else content + line = f"{prefix}{content}\n" + if not line: + return [] + memory_type = ( + "LongTermMemory" # only choce long term memory for tool messages as a placeholder + ) + + sources = self.create_source(message, info) return [ TextualMemoryItem( - memory=memory, metadata=TreeNodeTextualMemoryMetadata(sources=[source]) + memory=line, + metadata=TreeNodeTextualMemoryMetadata(memory_type=memory_type, sources=sources), ) ] @@ -171,4 +259,25 @@ def parse_fine( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: + content = message.get("content", "") + if isinstance(content, list): + part_type = content[0].get("type", "") + if part_type == "text": + # text will fine parse in full chat content, no need to parse specially + return [] + elif part_type == "file": + # TODO: use OCR to extract text from file and generate mem by llm + content = content[0].get("file", {}).get("file_data", "") + elif part_type == "image_url": + # TODO: use multi-modal llm to generate mem by image url + content = content[0].get("image_url", {}).get("url", "") + elif part_type == "input_audio": + # TODO: unsupport audio for now + return [] + else: + logger.warning(f"[ToolParser] Unsupported part type: {part_type}") + return [] + else: + # simple string content message, fine parse in full chat content, no need to parse specially + return [] return [] diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index 53a7de035..7f7b16234 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -223,6 +223,7 @@ def _make_memory_item( background: str = "", type_: str = "fact", confidence: float = 0.99, + **kwargs, ) -> TextualMemoryItem: """construct memory item""" info_ = info.copy() @@ -245,6 +246,7 @@ def _make_memory_item( confidence=confidence, type=type_, info=info_, + **kwargs, ), ) diff --git a/src/memos/memories/textual/item.py b/src/memos/memories/textual/item.py index 76b252f54..75a16bace 100644 --- a/src/memos/memories/textual/item.py +++ b/src/memos/memories/textual/item.py @@ -24,7 +24,7 @@ class SourceMessage(BaseModel): - type: Source kind (e.g., "chat", "doc", "web", "file", "system", ...). If not provided, upstream logic may infer it: presence of `role` ⇒ "chat"; otherwise ⇒ "doc". - - role: Conversation role ("user" | "assistant" | "system") when the + - role: Conversation role ("user" | "assistant" | "system" | "tool") when the source is a chat turn. - content: Minimal reproducible snippet from the source. If omitted, upstream may fall back to `doc_path` / `url` / `message_id`. @@ -35,7 +35,7 @@ class SourceMessage(BaseModel): """ type: str | None = "chat" - role: Literal["user", "assistant", "system"] | None = None + role: Literal["user", "assistant", "system", "tool"] | None = None chat_time: str | None = None message_id: str | None = None content: str | None = None @@ -104,8 +104,8 @@ class TreeNodeTextualMemoryMetadata(TextualMemoryMetadata): "LongTermMemory", "UserMemory", "OuterMemory", - "ToolSchema", - "ToolTrajectory", + "ToolSchemaMemory", + "ToolTrajectoryMemory", ] = Field(default="WorkingMemory", description="Memory lifecycle type.") sources: list[SourceMessage] | None = Field( default=None, description="Multiple origins of the memory (e.g., URLs, notes)." diff --git a/src/memos/templates/tool_mem_prompts.py b/src/memos/templates/tool_mem_prompts.py new file mode 100644 index 000000000..7d5363956 --- /dev/null +++ b/src/memos/templates/tool_mem_prompts.py @@ -0,0 +1,84 @@ +TOOL_TRAJECTORY_PROMPT_ZH = """ +你是一个专业的工具调用轨迹提取专家。你的任务是从给定的对话消息中提取完整的工具调用轨迹经验。 + +## 提取规则: +1. 只有当对话中存在有价值的工具调用过程时才进行提取 +2. 有价值的轨迹至少包含以下元素: + - 用户的问题(user message) + - 助手的工具调用尝试(assistant message with tool_calls) + - 工具的执行结果(tool message with tool_call_id and content,无论成功或失败) + - 助手的响应(assistant message,无论是否给出最终答案) + +## 输出格式: +返回一个JSON数组,格式如下: +```json +[ + { + "trajectory": "自然语言输出包含'任务、使用的工具、工具观察、最终回答'的完整精炼的总结,体现顺序", + "tool_used_status": [ + { + "used_tool": "工具名1", + "success_rate": "0.0-1.0之间的数值,表示该工具在本次轨迹中的成功率", + "error_type": "调用失败时的错误类型和描述,成功时为空字符串", + "experience": "该工具的使用经验,比如常见的参数模式、执行特点、结果解读方式等" + } + ] + } +] +``` + +## 注意事项: +- 如果对话中没有完整的工具调用轨迹,返回空数组 +- 每个轨迹必须是独立的完整过程 +- 一个轨迹中可能涉及多个工具的使用,每个工具在tool_used_status中独立记录 +- 只提取事实内容,不要添加任何解释或额外信息 +- 确保返回的是有效的JSON格式 + +请分析以下对话消息并提取工具调用轨迹: + +{messages} + +""" + + +TOOL_TRAJECTORY_PROMPT_EN = """ +You are a professional tool call trajectory extraction expert. Your task is to extract valuable tool call trajectory experiences from given conversation messages. + +## Extraction Rules: +1. Only extract when there are valuable tool calling processes in the conversation +2. Valuable trajectories must contain at least the following elements: + - User's question (user message) + - Assistant's tool call attempt (assistant message with tool_calls) + - Tool execution results (tool message with tool_call_id and content, regardless of success or failure) + - Assistant's response (assistant message, whether or not a final answer is given) + +## Output Format: +Return a JSON array in the following format: +```json +[ + { + "trajectory": "Natural language summary containing 'task, tools used, tool observations, final answer' in a complete and refined manner, reflecting the sequence", + "tool_used_status": [ + { + "used_tool": "Tool Name 1", + "success_rate": "Numerical value between 0.0-1.0, indicating the success rate of this tool in the current trajectory", + "error_type": "Error type and description when call fails, empty string when successful", + "experience": "Usage experience of this tool, such as common parameter patterns, execution characteristics, result interpretation methods, etc." + } + ] + } +] +``` + +## Notes: +- If there are no complete tool call trajectories in the conversation, return an empty array +- Each trajectory must be an independent complete process +- Multiple tools may be used in one trajectory, each tool is recorded independently in tool_used_status +- Only extract factual content, do not add any additional explanations or information +- Ensure the returned content is valid JSON format + +Please analyze the following conversation messages and extract tool call trajectories: + +{messages} + +""" From b6efc875e05d7b50ae24e10e41ee0c1dd108d891 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Tue, 2 Dec 2025 18:58:22 +0800 Subject: [PATCH 06/28] modify multi-modal code --- .../mem_reader/read_multi_modal/system_parser.py | 15 ++++++++++----- .../mem_reader/read_multi_modal/tool_parser.py | 16 +++++++++------- .../textual/tree_text_memory/organize/manager.py | 7 ++++++- .../chat_completion_assistant_message_param.py | 7 ++++--- .../chat_completion_system_message_param.py | 5 +++-- .../chat_completion_tool_message_param.py | 3 +-- .../chat_completion_user_message_param.py | 3 +-- 7 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/system_parser.py b/src/memos/mem_reader/read_multi_modal/system_parser.py index 9056a050c..e08279dc1 100644 --- a/src/memos/mem_reader/read_multi_modal/system_parser.py +++ b/src/memos/mem_reader/read_multi_modal/system_parser.py @@ -37,17 +37,21 @@ def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None): def create_source( self, - message: str, + message: ChatCompletionSystemMessageParam, info: dict[str, Any], ) -> SourceMessage: """Create SourceMessage from system message.""" + content = message["content"] + if isinstance(content, dict): + content = content["text"] + content_wo_tool_schema = re.sub( r"(.*?)", r"omitted", - message, + content, flags=re.DOTALL, ) - tool_schema_match = re.search(r"(.*?)", message, re.DOTALL) + tool_schema_match = re.search(r"(.*?)", content, re.DOTALL) tool_schema_content = tool_schema_match.group(1) if tool_schema_match else "" return SourceMessage( @@ -90,7 +94,7 @@ def parse_fast( flags=re.DOTALL, ) - source = self.create_source(content, info) + source = self.create_source(message, info) return [ TextualMemoryItem( memory=content_wo_tool_schema, @@ -125,9 +129,10 @@ def parse_fine( return [ TextualMemoryItem( id=str(uuid.uuid4()), - memory=json.dumps(tool_schema), + memory=json.dumps(schema), metadata=TreeNodeTextualMemoryMetadata( memory_type="ToolSchemaMemory", ), ) + for schema in tool_schema ] diff --git a/src/memos/mem_reader/read_multi_modal/tool_parser.py b/src/memos/mem_reader/read_multi_modal/tool_parser.py index 8d5ac7c24..72937cf9a 100644 --- a/src/memos/mem_reader/read_multi_modal/tool_parser.py +++ b/src/memos/mem_reader/read_multi_modal/tool_parser.py @@ -230,26 +230,28 @@ def parse_fast( content = message.get("content", "") chat_time = message.get("chat_time", None) - if role != "user": - logger.warning(f"[ToolParser] Expected role is `user`, got {role}") + if role != "tool": + logger.warning(f"[ToolParser] Expected role is `tool`, got {role}") return [] parts = [f"{role}: "] if chat_time: parts.append(f"[{chat_time}]: ") prefix = "".join(parts) - content = json.dumps(content) if isinstance(content, list) else content + content = json.dumps(content) if isinstance(content, list | dict) else content line = f"{prefix}{content}\n" if not line: return [] - memory_type = ( - "LongTermMemory" # only choce long term memory for tool messages as a placeholder - ) sources = self.create_source(message, info) return [ TextualMemoryItem( memory=line, - metadata=TreeNodeTextualMemoryMetadata(memory_type=memory_type, sources=sources), + metadata=TreeNodeTextualMemoryMetadata( + memory_type="LongTermMemory", # only choce long term memory for tool messages as a placeholder + status="activated", + tags=["mode:fast"], + sources=sources, + ), ) ] diff --git a/src/memos/memories/textual/tree_text_memory/organize/manager.py b/src/memos/memories/textual/tree_text_memory/organize/manager.py index a71fee02f..31cf89ae1 100644 --- a/src/memos/memories/textual/tree_text_memory/organize/manager.py +++ b/src/memos/memories/textual/tree_text_memory/organize/manager.py @@ -186,7 +186,12 @@ def _process_memory(self, memory: TextualMemoryItem, user_name: str | None = Non ) futures.append(("working", f_working)) - if memory.metadata.memory_type in ("LongTermMemory", "UserMemory"): + if memory.metadata.memory_type in ( + "LongTermMemory", + "UserMemory", + "ToolSchemaMemory", + "ToolTrajectoryMemory", + ): f_graph = ex.submit( self._add_to_graph_memory, memory=memory, diff --git a/src/memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py b/src/memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py index a742de3a9..3c5638788 100644 --- a/src/memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +++ b/src/memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py @@ -2,7 +2,6 @@ from __future__ import annotations -from collections.abc import Iterable from typing import Literal, TypeAlias from typing_extensions import Required, TypedDict @@ -35,7 +34,7 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False): [Learn more](https://platform.openai.com/docs/guides/audio). """ - content: str | Iterable[ContentArrayOfContentPart] | None + content: str | list[ContentArrayOfContentPart] | ContentArrayOfContentPart | None """The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified. @@ -44,7 +43,9 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False): refusal: str | None """The refusal message by the assistant.""" - tool_calls: Iterable[ChatCompletionMessageToolCallUnionParam] + tool_calls: ( + list[ChatCompletionMessageToolCallUnionParam] | ChatCompletionMessageToolCallUnionParam + ) """The tool calls generated by the model, such as function calls.""" chat_time: str | None diff --git a/src/memos/types/openai_chat_completion_types/chat_completion_system_message_param.py b/src/memos/types/openai_chat_completion_types/chat_completion_system_message_param.py index 7faa90e2e..ea2101229 100644 --- a/src/memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +++ b/src/memos/types/openai_chat_completion_types/chat_completion_system_message_param.py @@ -2,7 +2,6 @@ from __future__ import annotations -from collections.abc import Iterable from typing import Literal from typing_extensions import Required, TypedDict @@ -14,7 +13,9 @@ class ChatCompletionSystemMessageParam(TypedDict, total=False): - content: Required[str | Iterable[ChatCompletionContentPartTextParam]] + content: Required[ + str | list[ChatCompletionContentPartTextParam] | ChatCompletionContentPartTextParam + ] """The contents of the system message.""" role: Required[Literal["system"]] diff --git a/src/memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py b/src/memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py index c03220915..99c845d11 100644 --- a/src/memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +++ b/src/memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py @@ -2,7 +2,6 @@ from __future__ import annotations -from collections.abc import Iterable from typing import Literal from typing_extensions import Required, TypedDict @@ -14,7 +13,7 @@ class ChatCompletionToolMessageParam(TypedDict, total=False): - content: Required[str | Iterable[ChatCompletionContentPartParam]] + content: Required[str | list[ChatCompletionContentPartParam] | ChatCompletionContentPartParam] """The contents of the tool message.""" role: Required[Literal["tool"]] diff --git a/src/memos/types/openai_chat_completion_types/chat_completion_user_message_param.py b/src/memos/types/openai_chat_completion_types/chat_completion_user_message_param.py index 2c2a1f23f..8c004f340 100644 --- a/src/memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +++ b/src/memos/types/openai_chat_completion_types/chat_completion_user_message_param.py @@ -2,7 +2,6 @@ from __future__ import annotations -from collections.abc import Iterable from typing import Literal from typing_extensions import Required, TypedDict @@ -14,7 +13,7 @@ class ChatCompletionUserMessageParam(TypedDict, total=False): - content: Required[str | Iterable[ChatCompletionContentPartParam]] + content: Required[str | list[ChatCompletionContentPartParam] | ChatCompletionContentPartParam] """The contents of the user message.""" role: Required[Literal["user"]] From 492571bf8a78e010ae51afacad074830a064f19a Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Tue, 2 Dec 2025 20:43:13 +0800 Subject: [PATCH 07/28] pref support multi-modal messages --- .../textual/prefer_text_memory/spliter.py | 2 +- .../textual/prefer_text_memory/utils.py | 33 ++++++++++++++++--- src/memos/multi_mem_cube/single_cube.py | 7 ++++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/memos/memories/textual/prefer_text_memory/spliter.py b/src/memos/memories/textual/prefer_text_memory/spliter.py index 3059d611b..a54036778 100644 --- a/src/memos/memories/textual/prefer_text_memory/spliter.py +++ b/src/memos/memories/textual/prefer_text_memory/spliter.py @@ -87,7 +87,7 @@ def _split_with_overlap(self, data: MessageList) -> list[MessageList]: # overlap 1 turns (Q + A = 2) context = copy.deepcopy(chunk[-2:]) if i + 1 < len(data) else [] chunk = context - if chunk and len(chunk) % 2 == 0: + if chunk: chunks.append(chunk) return chunks diff --git a/src/memos/memories/textual/prefer_text_memory/utils.py b/src/memos/memories/textual/prefer_text_memory/utils.py index 76d4b4211..03d2ef923 100644 --- a/src/memos/memories/textual/prefer_text_memory/utils.py +++ b/src/memos/memories/textual/prefer_text_memory/utils.py @@ -1,3 +1,4 @@ +import json import re from memos.dependency import require_python_package @@ -9,12 +10,36 @@ def convert_messages_to_string(messages: MessageList) -> str: """Convert a list of messages to a string.""" message_text = "" for message in messages: + content = message.get("content", "") + content = ( + content.strip() + if isinstance(content, str) + else json.dumps(content, ensure_ascii=False).strip() + ) + if message["role"] == "system": + continue if message["role"] == "user": - message_text += f"Query: {message['content']}\n" if message["content"].strip() else "" + message_text += f"User: {content}\n" if content else "" elif message["role"] == "assistant": - message_text += f"Answer: {message['content']}\n" if message["content"].strip() else "" - message_text = message_text.strip() - return message_text + tool_calls = message.get("tool_calls", []) + tool_calls_str = ( + f"[tool_calls]: {json.dumps(tool_calls, ensure_ascii=False)}" if tool_calls else "" + ) + line_str = ( + f"Assistant: {content} {tool_calls_str}".strip() + if content or tool_calls_str + else "" + ) + message_text += f"{line_str}\n" if line_str else "" + elif message["role"] == "tool": + tool_call_id = message.get("tool_call_id", "") + line_str = ( + f"Tool: {content} [tool_call_id]: {tool_call_id}".strip() + if tool_call_id + else f"Tool: {content}".strip() + ) + message_text += f"{line_str}\n" if line_str else "" + return message_text.strip() @require_python_package( diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index 880646939..6c1ff00c0 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -495,6 +495,13 @@ def _process_pref_mem( if os.getenv("ENABLE_PREFERENCE_MEMORY", "false").lower() != "true": return [] + if add_req.messages is None or isinstance(add_req.messages, str): + return [] + + for message in add_req.messages: + if message.get("role", None) is None: + return [] + target_session_id = add_req.session_id or "default_session" if sync_mode == "async": From 89f11aa524710e3a88029fe1359edfbad30e3eca Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Tue, 2 Dec 2025 20:58:24 +0800 Subject: [PATCH 08/28] modify bug in chat handle --- src/memos/api/handlers/chat_handler.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/memos/api/handlers/chat_handler.py b/src/memos/api/handlers/chat_handler.py index 1054644d2..968b9f5e6 100644 --- a/src/memos/api/handlers/chat_handler.py +++ b/src/memos/api/handlers/chat_handler.py @@ -142,7 +142,9 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An # Step 2: Build system prompt system_prompt = self._build_system_prompt( - filtered_memories, search_response.data["pref_string"], chat_req.system_prompt + filtered_memories, + search_response.data.get("pref_string", ""), + chat_req.system_prompt, ) # Prepare message history @@ -257,7 +259,7 @@ def generate_chat_response() -> Generator[str, None, None]: # Step 2: Build system prompt with memories system_prompt = self._build_system_prompt( filtered_memories, - search_response.data["pref_string"], + search_response.data.get("pref_string", ""), chat_req.system_prompt, ) @@ -436,7 +438,7 @@ def generate_chat_response() -> Generator[str, None, None]: # Step 2: Build system prompt with memories system_prompt = self._build_enhance_system_prompt( - filtered_memories, search_response.data["pref_string"] + filtered_memories, search_response.data.get("pref_string", "") ) # Prepare messages From 152844a996229ca25ec45c5aa5acd6a9a0b7d58d Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Tue, 2 Dec 2025 21:14:27 +0800 Subject: [PATCH 09/28] fix pre commit --- poetry.lock | 42 ++++++++++---------- src/memos/api/product_models.py | 2 +- src/memos/mem_scheduler/general_scheduler.py | 4 +- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/poetry.lock b/poetry.lock index 940697b1c..c6c82cdbb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "absl-py" @@ -54,7 +54,7 @@ description = "Timeout context manager for asyncio programs" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "(python_version == \"3.10\" or python_version == \"3.11\") and python_full_version < \"3.11.3\" and (extra == \"mem-scheduler\" or extra == \"all\")" +markers = "python_full_version < \"3.11.3\" and (extra == \"mem-scheduler\" or extra == \"all\")" files = [ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, @@ -1080,7 +1080,7 @@ description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.9" groups = ["main", "eval"] -markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\" or python_version == \"3.13\") and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")" +markers = "(platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") and python_version < \"3.14\"" files = [ {file = "greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be"}, {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac"}, @@ -2623,7 +2623,7 @@ files = [ {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:235f728d6e2a409eddf1df58d5b0921cf80cfa9e72b9f2775ccb7b4a87984668"}, {file = "nvidia_cublas_cu12-12.6.4.1-py3-none-win_amd64.whl", hash = "sha256:9e4fa264f4d8a4eb0cdbd34beadc029f453b3bafae02401e999cf3d5a5af75f8"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [[package]] name = "nvidia-cuda-cupti-cu12" @@ -2639,7 +2639,7 @@ files = [ {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a3eff6cdfcc6a4c35db968a06fcadb061cbc7d6dde548609a941ff8701b98b73"}, {file = "nvidia_cuda_cupti_cu12-12.6.80-py3-none-win_amd64.whl", hash = "sha256:bbe6ae76e83ce5251b56e8c8e61a964f757175682bbad058b170b136266ab00a"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [[package]] name = "nvidia-cuda-nvrtc-cu12" @@ -2653,7 +2653,7 @@ files = [ {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53"}, {file = "nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:f7007dbd914c56bd80ea31bc43e8e149da38f68158f423ba845fc3292684e45a"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [[package]] name = "nvidia-cuda-runtime-cu12" @@ -2669,7 +2669,7 @@ files = [ {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a84d15d5e1da416dd4774cb42edf5e954a3e60cc945698dc1d5be02321c44dc8"}, {file = "nvidia_cuda_runtime_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:86c58044c824bf3c173c49a2dbc7a6c8b53cb4e4dca50068be0bf64e9dab3f7f"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [[package]] name = "nvidia-cudnn-cu12" @@ -2683,7 +2683,7 @@ files = [ {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2"}, {file = "nvidia_cudnn_cu12-9.5.1.17-py3-none-win_amd64.whl", hash = "sha256:d7af0f8a4f3b4b9dbb3122f2ef553b45694ed9c384d5a75bab197b8eefb79ab8"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [package.dependencies] nvidia-cublas-cu12 = "*" @@ -2702,7 +2702,7 @@ files = [ {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.whl", hash = "sha256:768160ac89f6f7b459bee747e8d175dbf53619cfe74b2a5636264163138013ca"}, {file = "nvidia_cufft_cu12-11.3.0.4-py3-none-win_amd64.whl", hash = "sha256:6048ebddfb90d09d2707efb1fd78d4e3a77cb3ae4dc60e19aab6be0ece2ae464"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [package.dependencies] nvidia-nvjitlink-cu12 = "*" @@ -2718,7 +2718,7 @@ files = [ {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159"}, {file = "nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8f57a0051dcf2543f6dc2b98a98cb2719c37d3cee1baba8965d57f3bbc90d4db"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [[package]] name = "nvidia-curand-cu12" @@ -2734,7 +2734,7 @@ files = [ {file = "nvidia_curand_cu12-10.3.7.77-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:7b2ed8e95595c3591d984ea3603dd66fe6ce6812b886d59049988a712ed06b6e"}, {file = "nvidia_curand_cu12-10.3.7.77-py3-none-win_amd64.whl", hash = "sha256:6d6d935ffba0f3d439b7cd968192ff068fafd9018dbf1b85b37261b13cfc9905"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [[package]] name = "nvidia-cusolver-cu12" @@ -2750,7 +2750,7 @@ files = [ {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dbbe4fc38ec1289c7e5230e16248365e375c3673c9c8bac5796e2e20db07f56e"}, {file = "nvidia_cusolver_cu12-11.7.1.2-py3-none-win_amd64.whl", hash = "sha256:6813f9d8073f555444a8705f3ab0296d3e1cb37a16d694c5fc8b862a0d8706d7"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [package.dependencies] nvidia-cublas-cu12 = "*" @@ -2771,7 +2771,7 @@ files = [ {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:23749a6571191a215cb74d1cdbff4a86e7b19f1200c071b3fcf844a5bea23a2f"}, {file = "nvidia_cusparse_cu12-12.5.4.2-py3-none-win_amd64.whl", hash = "sha256:4acb8c08855a26d737398cba8fb6f8f5045d93f82612b4cfd84645a2332ccf20"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [package.dependencies] nvidia-nvjitlink-cu12 = "*" @@ -2788,7 +2788,7 @@ files = [ {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46"}, {file = "nvidia_cusparselt_cu12-0.6.3-py3-none-win_amd64.whl", hash = "sha256:3b325bcbd9b754ba43df5a311488fca11a6b5dc3d11df4d190c000cf1a0765c7"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [[package]] name = "nvidia-nccl-cu12" @@ -2801,7 +2801,7 @@ files = [ {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c196e95e832ad30fbbb50381eb3cbd1fadd5675e587a548563993609af19522"}, {file = "nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [[package]] name = "nvidia-nvjitlink-cu12" @@ -2815,7 +2815,7 @@ files = [ {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41"}, {file = "nvidia_nvjitlink_cu12-12.6.85-py3-none-win_amd64.whl", hash = "sha256:e61120e52ed675747825cdd16febc6a0730537451d867ee58bee3853b1b13d1c"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [[package]] name = "nvidia-nvtx-cu12" @@ -2831,7 +2831,7 @@ files = [ {file = "nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1"}, {file = "nvidia_nvtx_cu12-12.6.77-py3-none-win_amd64.whl", hash = "sha256:2fb11a4af04a5e6c84073e6404d26588a34afd35379f0855a99797897efa75c0"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [[package]] name = "ollama" @@ -3919,7 +3919,7 @@ files = [ {file = "pywin32-311-cp39-cp39-win_amd64.whl", hash = "sha256:e0c4cfb0621281fe40387df582097fd796e80430597cb9944f0ae70447bacd91"}, {file = "pywin32-311-cp39-cp39-win_arm64.whl", hash = "sha256:62ea666235135fee79bb154e695f3ff67370afefd71bd7fea7512fc70ef31e3d"}, ] -markers = {main = "extra == \"all\" and platform_system == \"Windows\" or sys_platform == \"win32\"", eval = "platform_system == \"Windows\""} +markers = {main = "platform_system == \"Windows\" and extra == \"all\" or sys_platform == \"win32\"", eval = "platform_system == \"Windows\""} [[package]] name = "pyyaml" @@ -4917,7 +4917,7 @@ files = [ {file = "setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922"}, {file = "setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c"}, ] -markers = {main = "extra == \"all\" or extra == \"pref-mem\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\" or python_version >= \"3.12\""} +markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (extra == \"all\" or extra == \"pref-mem\") or extra == \"pref-mem\" or extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\" or python_version >= \"3.12\""} [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""] @@ -5408,7 +5408,7 @@ files = [ {file = "triton-3.3.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3198adb9d78b77818a5388bff89fa72ff36f9da0bc689db2f0a651a67ce6a42"}, {file = "triton-3.3.1-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f6139aeb04a146b0b8e0fbbd89ad1e65861c57cfed881f21d62d3cb94a36bab7"}, ] -markers = {main = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"all\"", eval = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +markers = {main = "platform_machine == \"x86_64\" and extra == \"all\" and platform_system == \"Linux\"", eval = "platform_machine == \"x86_64\" and platform_system == \"Linux\""} [package.dependencies] setuptools = ">=40.8.0" @@ -5617,7 +5617,7 @@ description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.8.0" groups = ["main"] -markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"" +markers = "platform_python_implementation != \"PyPy\" and sys_platform != \"win32\" and sys_platform != \"cygwin\"" files = [ {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f"}, {file = "uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d"}, diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index 16ae86638..ffe736aa3 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -6,7 +6,7 @@ # Import message types from core types module from memos.log import get_logger -from memos.types import MessageDict, MessageList, MessagesType, PermissionDict, SearchMode +from memos.types import MessageDict, PermissionDict, SearchMode logger = get_logger(__name__) diff --git a/src/memos/mem_scheduler/general_scheduler.py b/src/memos/mem_scheduler/general_scheduler.py index 601c935a2..2145dee9f 100644 --- a/src/memos/mem_scheduler/general_scheduler.py +++ b/src/memos/mem_scheduler/general_scheduler.py @@ -309,7 +309,9 @@ def log_add_messages(self, msg: ScheduleMessageItem): if missing_ids: content_preview = ( - msg.content[:200] + "..." if isinstance(msg.content, str) and len(msg.content) > 200 else msg.content + msg.content[:200] + "..." + if isinstance(msg.content, str) and len(msg.content) > 200 + else msg.content ) logger.warning( "Missing TextualMemoryItem(s) during add log preparation. " From 2e68bac864446de8abfb5e77c24886d10c0828a3 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 3 Dec 2025 10:48:37 +0800 Subject: [PATCH 10/28] modify code --- src/memos/mem_reader/read_multi_modal/tool_parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/tool_parser.py b/src/memos/mem_reader/read_multi_modal/tool_parser.py index 72937cf9a..a3d9e6b91 100644 --- a/src/memos/mem_reader/read_multi_modal/tool_parser.py +++ b/src/memos/mem_reader/read_multi_modal/tool_parser.py @@ -268,11 +268,12 @@ def parse_fine( # text will fine parse in full chat content, no need to parse specially return [] elif part_type == "file": - # TODO: use OCR to extract text from file and generate mem by llm - content = content[0].get("file", {}).get("file_data", "") + # use file content parser to parse file content, no need to parse here + return [] elif part_type == "image_url": # TODO: use multi-modal llm to generate mem by image url content = content[0].get("image_url", {}).get("url", "") + return [] elif part_type == "input_audio": # TODO: unsupport audio for now return [] @@ -282,4 +283,3 @@ def parse_fine( else: # simple string content message, fine parse in full chat content, no need to parse specially return [] - return [] From a04b5f5fe83724eb3900d7069f21b45b4f50f6ef Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 3 Dec 2025 15:56:37 +0800 Subject: [PATCH 11/28] add tool search --- src/memos/api/handlers/formatters_handler.py | 34 ++++ src/memos/api/product_models.py | 15 ++ .../read_multi_modal/tool_parser.py | 109 +----------- .../mem_scheduler/optimized_scheduler.py | 4 + src/memos/memories/textual/tree.py | 4 + .../tree_text_memory/retrieve/recall.py | 8 +- .../tree_text_memory/retrieve/searcher.py | 160 +++++++++++++++++- src/memos/multi_mem_cube/single_cube.py | 18 +- 8 files changed, 236 insertions(+), 116 deletions(-) diff --git a/src/memos/api/handlers/formatters_handler.py b/src/memos/api/handlers/formatters_handler.py index 976be87bb..88875cacc 100644 --- a/src/memos/api/handlers/formatters_handler.py +++ b/src/memos/api/handlers/formatters_handler.py @@ -90,3 +90,37 @@ def post_process_pref_mem( memories_result["pref_note"] = pref_note return memories_result + + +def post_process_textual_mem( + memories_result: dict[str, Any], + text_formatted_mem: list[dict[str, Any]], + mem_cube_id: str, +) -> dict[str, Any]: + """ + Post-process text and tool memory results. + """ + fact_mem = [ + mem + for mem in text_formatted_mem + if mem["metadata"]["memory_type"] not in ["ToolSchemaMemory", "ToolTrajectoryMemory"] + ] + tool_mem = [ + mem + for mem in text_formatted_mem + if mem["metadata"]["memory_type"] in ["ToolSchemaMemory", "ToolTrajectoryMemory"] + ] + + memories_result["text_mem"].append( + { + "cube_id": mem_cube_id, + "memories": fact_mem, + } + ) + memories_result["tool_mem"].append( + { + "cube_id": mem_cube_id, + "memories": tool_mem, + } + ) + return memories_result diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index ffe736aa3..acdb773e9 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -326,6 +326,21 @@ class APISearchRequest(BaseRequest): ), ) + search_tool_memory: bool = Field( + True, + description=( + "Whether to retrieve tool memories along with general memories. " + "If enabled, the system will automatically recall tool memories " + "relevant to the query. Default: True." + ), + ) + + tool_mem_top_k: int = Field( + 6, + ge=0, + description="Number of tool memories to retrieve (top-K). Default: 6.", + ) + # ==== Filter conditions ==== # TODO: maybe add detailed description later filter: dict[str, Any] | None = Field( diff --git a/src/memos/mem_reader/read_multi_modal/tool_parser.py b/src/memos/mem_reader/read_multi_modal/tool_parser.py index a3d9e6b91..cdd3620cb 100644 --- a/src/memos/mem_reader/read_multi_modal/tool_parser.py +++ b/src/memos/mem_reader/read_multi_modal/tool_parser.py @@ -135,91 +135,6 @@ def rebuild_from_source( ) -> ChatCompletionToolMessageParam: """Rebuild tool message from SourceMessage.""" - # Priority 1: Use original_part if available - if hasattr(source, "original_part") and source.original_part: - original = source.original_part - # If it's a content part, wrap it in a message - if isinstance(original, dict) and "type" in original: - return { - "role": source.role or "user", - "tool_call_id": source.tool_call_id or "", - "content": [original], - "chat_time": source.chat_time, - "message_id": source.message_id, - } - # If it's already a full message, return it - if isinstance(original, dict) and "role" in original: - return original - - # Priority 2: Rebuild from source fields - if source.type == "text": - return { - "role": source.role or "tool", - "content": [ - { - "type": "text", - "text": source.content or "", - } - ], - "chat_time": source.chat_time, - "message_id": source.message_id, - } - elif source.type == "file": - return { - "role": source.role or "tool", - "content": [ - { - "type": "file", - "file": { - "file_id": source.file_id or "", - "filename": source.filename or "", - "file_data": source.content or "", - }, - } - ], - "chat_time": source.chat_time, - "message_id": source.message_id, - } - elif source.type == "image_url": - return { - "role": source.role or "tool", - "content": [ - { - "type": "image_url", - "image_url": { - "url": source.content or "", - "detail": source.detail or "auto", - }, - } - ], - "chat_time": source.chat_time, - "message_id": source.message_id, - } - elif source.type == "input_audio": - return { - "role": source.role or "tool", - "content": [ - { - "type": "input_audio", - "input_audio": { - "data": source.content or "", - "format": source.format or "wav", - }, - } - ], - "chat_time": source.chat_time, - "message_id": source.message_id, - } - - # Simple text message - return { - "role": "tool", - "content": source.content or "", - "tool_call_id": source.message_id or "", - "chat_time": source.chat_time, - "message_id": source.message_id, - } - def parse_fast( self, message: ChatCompletionToolMessageParam, @@ -261,25 +176,5 @@ def parse_fine( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - content = message.get("content", "") - if isinstance(content, list): - part_type = content[0].get("type", "") - if part_type == "text": - # text will fine parse in full chat content, no need to parse specially - return [] - elif part_type == "file": - # use file content parser to parse file content, no need to parse here - return [] - elif part_type == "image_url": - # TODO: use multi-modal llm to generate mem by image url - content = content[0].get("image_url", {}).get("url", "") - return [] - elif part_type == "input_audio": - # TODO: unsupport audio for now - return [] - else: - logger.warning(f"[ToolParser] Unsupported part type: {part_type}") - return [] - else: - # simple string content message, fine parse in full chat content, no need to parse specially - return [] + # tool message no special multimodal handling is required in fine mode. + return [] diff --git a/src/memos/mem_scheduler/optimized_scheduler.py b/src/memos/mem_scheduler/optimized_scheduler.py index a85c533a0..f99360a86 100644 --- a/src/memos/mem_scheduler/optimized_scheduler.py +++ b/src/memos/mem_scheduler/optimized_scheduler.py @@ -159,6 +159,8 @@ def mix_search_memories( search_filter=search_filter, search_priority=search_priority, info=info, + search_tool_memory=search_req.search_tool_memory, + tool_mem_top_k=search_req.tool_mem_top_k, ) # Try to get pre-computed memories if available @@ -182,6 +184,8 @@ def mix_search_memories( top_k=search_req.top_k, user_name=user_context.mem_cube_id, info=info, + search_tool_memory=search_req.search_tool_memory, + tool_mem_top_k=search_req.tool_mem_top_k, ) memories = merged_memories[: search_req.top_k] diff --git a/src/memos/memories/textual/tree.py b/src/memos/memories/textual/tree.py index ad2bcd9c4..cad850d2d 100644 --- a/src/memos/memories/textual/tree.py +++ b/src/memos/memories/textual/tree.py @@ -166,6 +166,8 @@ def search( search_priority: dict | None = None, search_filter: dict | None = None, user_name: str | None = None, + search_tool_memory: bool = False, + tool_mem_top_k: int = 6, **kwargs, ) -> list[TextualMemoryItem]: """Search for memories based on a query. @@ -223,6 +225,8 @@ def search( search_priority, user_name=user_name, plugin=kwargs.get("plugin", False), + search_tool_memory=search_tool_memory, + tool_mem_top_k=tool_mem_top_k, ) def get_relevant_subgraph( diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py index 5dfbde704..dea83887e 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py @@ -59,7 +59,13 @@ def retrieve( Returns: list: Combined memory items. """ - if memory_scope not in ["WorkingMemory", "LongTermMemory", "UserMemory"]: + if memory_scope not in [ + "WorkingMemory", + "LongTermMemory", + "UserMemory", + "ToolSchemaMemory", + "ToolTrajectoryMemory", + ]: raise ValueError(f"Unsupported memory scope: {memory_scope}") if memory_scope == "WorkingMemory": diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index 830b915c1..0666f1d86 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -76,6 +76,8 @@ def retrieve( search_filter: dict | None = None, search_priority: dict | None = None, user_name: str | None = None, + search_tool_memory: bool = False, + tool_mem_top_k: int = 6, **kwargs, ) -> list[tuple[TextualMemoryItem, float]]: logger.info( @@ -100,6 +102,8 @@ def retrieve( search_filter, search_priority, user_name, + search_tool_memory, + tool_mem_top_k, ) return results @@ -109,10 +113,14 @@ def post_retrieve( top_k: int, user_name: str | None = None, info=None, + search_tool_memory: bool = False, + tool_mem_top_k: int = 6, plugin=False, ): deduped = self._deduplicate_results(retrieved_results) - final_results = self._sort_and_trim(deduped, top_k, plugin) + final_results = self._sort_and_trim( + deduped, top_k, plugin, search_tool_memory, tool_mem_top_k + ) self._update_usage_history(final_results, info, user_name) return final_results @@ -127,6 +135,8 @@ def search( search_filter: dict | None = None, search_priority: dict | None = None, user_name: str | None = None, + search_tool_memory: bool = False, + tool_mem_top_k: int = 6, **kwargs, ) -> list[TextualMemoryItem]: """ @@ -171,6 +181,8 @@ def search( search_filter=search_filter, search_priority=search_priority, user_name=user_name, + search_tool_memory=search_tool_memory, + tool_mem_top_k=tool_mem_top_k, ) final_results = self.post_retrieve( @@ -179,6 +191,8 @@ def search( user_name=user_name, info=None, plugin=kwargs.get("plugin", False), + search_tool_memory=search_tool_memory, + tool_mem_top_k=tool_mem_top_k, ) logger.info(f"[SEARCH] Done. Total {len(final_results)} results.") @@ -272,6 +286,8 @@ def _retrieve_paths( search_filter: dict | None = None, search_priority: dict | None = None, user_name: str | None = None, + search_tool_memory: bool = False, + tool_mem_top_k: int = 6, ): """Run A/B/C retrieval paths in parallel""" tasks = [] @@ -324,6 +340,22 @@ def _retrieve_paths( user_name, ) ) + if search_tool_memory: + tasks.append( + executor.submit( + self._retrieve_from_tool_memory, + query, + parsed_goal, + query_embedding, + top_k, + memory_type, + search_filter, + search_priority, + user_name, + id_filter, + mode=mode, + ) + ) results = [] for t in tasks: @@ -498,6 +530,98 @@ def _retrieve_from_internet( parsed_goal=parsed_goal, ) + # --- Path D + @timed + def _retrieve_from_tool_memory( + self, + query, + parsed_goal, + query_embedding, + top_k, + memory_type, + search_filter: dict | None = None, + search_priority: dict | None = None, + user_name: str | None = None, + id_filter: dict | None = None, + mode: str = "fast", + ): + """Retrieve and rerank from ToolMemory""" + results = { + "ToolSchemaMemory": [], + "ToolTrajectoryMemory": [], + } + tasks = [] + + # chain of thinking + cot_embeddings = [] + if self.vec_cot: + queries = self._cot_query(query, mode=mode, context=parsed_goal.context) + if len(queries) > 1: + cot_embeddings = self.embedder.embed(queries) + cot_embeddings.extend(query_embedding) + else: + cot_embeddings = query_embedding + + with ContextThreadPoolExecutor(max_workers=2) as executor: + if memory_type in ["All", "ToolSchemaMemory"]: + tasks.append( + executor.submit( + self.graph_retriever.retrieve, + query=query, + parsed_goal=parsed_goal, + query_embedding=cot_embeddings, + top_k=top_k * 2, + memory_scope="ToolSchemaMemory", + search_filter=search_filter, + search_priority=search_priority, + user_name=user_name, + id_filter=id_filter, + use_fast_graph=self.use_fast_graph, + ) + ) + if memory_type in ["All", "ToolTrajectoryMemory"]: + tasks.append( + executor.submit( + self.graph_retriever.retrieve, + query=query, + parsed_goal=parsed_goal, + query_embedding=cot_embeddings, + top_k=top_k * 2, + memory_scope="ToolTrajectoryMemory", + search_filter=search_filter, + search_priority=search_priority, + user_name=user_name, + id_filter=id_filter, + use_fast_graph=self.use_fast_graph, + ) + ) + + # Collect results from all tasks + for task in tasks: + rsp = task.result() + if rsp and rsp[0].metadata.memory_type == "ToolSchemaMemory": + results["ToolSchemaMemory"].extend(rsp) + elif rsp and rsp[0].metadata.memory_type == "ToolTrajectoryMemory": + results["ToolTrajectoryMemory"].extend(rsp) + + schema_reranked = self.reranker.rerank( + query=query, + query_embedding=query_embedding[0], + graph_results=results["ToolSchemaMemory"], + top_k=top_k, + parsed_goal=parsed_goal, + search_filter=search_filter, + ) + trajectory_reranked = self.reranker.rerank( + query=query, + query_embedding=query_embedding[0], + graph_results=results["ToolTrajectoryMemory"], + top_k=top_k, + parsed_goal=parsed_goal, + search_filter=search_filter, + ) + return schema_reranked + trajectory_reranked + @timed def _retrieve_simple( self, @@ -554,11 +678,41 @@ def _deduplicate_results(self, results): return list(deduped.values()) @timed - def _sort_and_trim(self, results, top_k, plugin=False): + def _sort_and_trim( + self, results, top_k, plugin=False, search_tool_memory=False, tool_mem_top_k=6 + ): """Sort results by score and trim to top_k""" + final_items = [] + if search_tool_memory: + tool_results = [ + (item, score) + for item, score in results + if item.metadata.memory_type in ["ToolSchemaMemory", "ToolTrajectoryMemory"] + ] + sorted_tool_results = sorted(tool_results, key=lambda pair: pair[1], reverse=True)[ + :tool_mem_top_k + ] + for item, score in sorted_tool_results: + if plugin and round(score, 2) == 0.00: + continue + meta_data = item.metadata.model_dump() + meta_data["relativity"] = score + final_items.append( + TextualMemoryItem( + id=item.id, + memory=item.memory, + metadata=SearchedTreeNodeTextualMemoryMetadata(**meta_data), + ) + ) + # separate textual results + results = [ + (item, score) + for item, score in results + if item.metadata.memory_type not in ["ToolSchemaMemory", "ToolTrajectoryMemory"] + ] sorted_results = sorted(results, key=lambda pair: pair[1], reverse=True)[:top_k] - final_items = [] + for item, score in sorted_results: if plugin and round(score, 2) == 0.00: continue diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index c6fe588cb..1ddd2b1b7 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -11,6 +11,7 @@ from memos.api.handlers.formatters_handler import ( format_memory_item, post_process_pref_mem, + post_process_textual_mem, ) from memos.context.context import ContextThreadPoolExecutor from memos.log import get_logger @@ -109,6 +110,7 @@ def search_memories(self, search_req: APISearchRequest) -> dict[str, Any]: "para_mem": [], "pref_mem": [], "pref_note": "", + "tool_mem": [], } # Determine search mode @@ -123,11 +125,10 @@ def search_memories(self, search_req: APISearchRequest) -> dict[str, Any]: pref_formatted_memories = pref_future.result() # Build result - memories_result["text_mem"].append( - { - "cube_id": self.cube_id, - "memories": text_formatted_memories, - } + memories_result = post_process_textual_mem( + memories_result, + text_formatted_memories, + self.cube_id, ) memories_result = post_process_pref_mem( @@ -278,6 +279,8 @@ def _fine_search( Returns: List of enhanced search results """ + # TODO: support tool memory search in future + logger.info(f"Fine strategy: {FINE_STRATEGY}") if FINE_STRATEGY == FineStrategy.DEEP_SEARCH: return self._deep_search(search_req=search_req, user_context=user_context) @@ -375,6 +378,9 @@ def _search_pref( """ if os.getenv("ENABLE_PREFERENCE_MEMORY", "false").lower() != "true": return [] + if not search_req.include_preference: + return [] + logger.info(f"search_req.filter for preference memory: {search_req.filter}") logger.info(f"type of pref_mem: {type(self.naive_mem_cube.pref_mem)}") try: @@ -427,6 +433,8 @@ def _fast_search( "chat_history": search_req.chat_history, }, plugin=plugin, + search_tool_memory=search_req.search_tool_memory, + tool_mem_top_k=search_req.tool_mem_top_k, ) formatted_memories = [format_memory_item(data) for data in search_results] From da9d843382321c223ba891030477391289237989 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 3 Dec 2025 17:30:17 +0800 Subject: [PATCH 12/28] tool search --- src/memos/api/product_models.py | 22 +++++++++---------- .../read_multi_modal/system_parser.py | 9 ++++++++ .../tree_text_memory/organize/manager.py | 9 ++++---- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index acdb773e9..4ad4016bc 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -6,7 +6,7 @@ # Import message types from core types module from memos.log import get_logger -from memos.types import MessageDict, PermissionDict, SearchMode +from memos.types import MessageList, MessagesType, PermissionDict, SearchMode logger = get_logger(__name__) @@ -56,7 +56,7 @@ class Message(BaseModel): class MemoryCreate(BaseRequest): user_id: str = Field(..., description="User ID") - messages: list | None = Field(None, description="List of messages to store.") + messages: MessageList | None = Field(None, description="List of messages to store.") memory_content: str | None = Field(None, description="Content to store as memory") doc_path: str | None = Field(None, description="Path to document to store") mem_cube_id: str | None = Field(None, description="ID of the memory cube") @@ -83,7 +83,7 @@ class ChatRequest(BaseRequest): writable_cube_ids: list[str] | None = Field( None, description="List of cube IDs user can write for multi-cube chat" ) - history: list | None = Field(None, description="Chat history") + history: MessageList | None = Field(None, description="Chat history") mode: SearchMode = Field(SearchMode.FAST, description="search mode: fast, fine, or mixture") system_prompt: str | None = Field(None, description="Base system prompt to use for chat") top_k: int = Field(10, description="Number of results to return") @@ -165,7 +165,7 @@ class ChatCompleteRequest(BaseRequest): user_id: str = Field(..., description="User ID") query: str = Field(..., description="Chat query message") mem_cube_id: str | None = Field(None, description="Cube ID to use for chat") - history: list | None = Field(None, description="Chat history") + history: MessageList | None = Field(None, description="Chat history") internet_search: bool = Field(False, description="Whether to use internet search") system_prompt: str | None = Field(None, description="Base prompt to use for chat") top_k: int = Field(10, description="Number of results to return") @@ -251,7 +251,7 @@ class MemoryCreateRequest(BaseRequest): """Request model for creating memories.""" user_id: str = Field(..., description="User ID") - messages: str | list | None = Field(None, description="List of messages to store.") + messages: str | MessagesType | None = Field(None, description="List of messages to store.") memory_content: str | None = Field(None, description="Memory content to store") doc_path: str | None = Field(None, description="Path to document to store") mem_cube_id: str | None = Field(None, description="Cube ID") @@ -375,7 +375,7 @@ class APISearchRequest(BaseRequest): ) # ==== Context ==== - chat_history: list | None = Field( + chat_history: MessageList | None = Field( None, description=( "Historical chat messages used internally by algorithms. " @@ -505,7 +505,7 @@ class APIADDRequest(BaseRequest): ) # ==== Input content ==== - messages: str | list | None = Field( + messages: MessagesType | None = Field( None, description=( "List of messages to store. Supports: " @@ -521,7 +521,7 @@ class APIADDRequest(BaseRequest): ) # ==== Chat history ==== - chat_history: list | None = Field( + chat_history: MessageList | None = Field( None, description=( "Historical chat messages used internally by algorithms. " @@ -651,7 +651,7 @@ class APIFeedbackRequest(BaseRequest): "default_session", description="Session ID for soft-filtering memories" ) task_id: str | None = Field(None, description="Task ID for monitering async tasks") - history: list[MessageDict] | None = Field(..., description="Chat history") + history: MessageList | None = Field(..., description="Chat history") retrieved_memory_ids: list[str] | None = Field( None, description="Retrieved memory ids at last turn" ) @@ -686,7 +686,7 @@ class APIChatCompleteRequest(BaseRequest): writable_cube_ids: list[str] | None = Field( None, description="List of cube IDs user can write for multi-cube chat" ) - history: list | None = Field(None, description="Chat history") + history: MessageList | None = Field(None, description="Chat history") mode: SearchMode = Field(SearchMode.FAST, description="search mode: fast, fine, or mixture") system_prompt: str | None = Field(None, description="Base system prompt to use for chat") top_k: int = Field(10, description="Number of results to return") @@ -755,7 +755,7 @@ class SuggestionRequest(BaseRequest): user_id: str = Field(..., description="User ID") mem_cube_id: str = Field(..., description="Cube ID") language: Literal["zh", "en"] = Field("zh", description="Language for suggestions") - message: list | None = Field(None, description="List of messages to store.") + message: MessagesType | None = Field(None, description="List of messages to store.") # ─── MemOS Client Response Models ────────────────────────────────────────────── diff --git a/src/memos/mem_reader/read_multi_modal/system_parser.py b/src/memos/mem_reader/read_multi_modal/system_parser.py index e08279dc1..c3961c73a 100644 --- a/src/memos/mem_reader/read_multi_modal/system_parser.py +++ b/src/memos/mem_reader/read_multi_modal/system_parser.py @@ -126,12 +126,21 @@ def parse_fine( logger.warning(f"[SystemParser] Tool schema must be a list[dict]: {content}") return [] + info_ = info.copy() + user_id = info_.pop("user_id", "") + session_id = info_.pop("session_id", "") + return [ TextualMemoryItem( id=str(uuid.uuid4()), memory=json.dumps(schema), metadata=TreeNodeTextualMemoryMetadata( + user_id=user_id, + session_id=session_id, memory_type="ToolSchemaMemory", + status="activated", + embedding=self.embedder.embed([json.dumps(schema)])[0], + info=info_, ), ) for schema in tool_schema diff --git a/src/memos/memories/textual/tree_text_memory/organize/manager.py b/src/memos/memories/textual/tree_text_memory/organize/manager.py index 31cf89ae1..3226f7ca0 100644 --- a/src/memos/memories/textual/tree_text_memory/organize/manager.py +++ b/src/memos/memories/textual/tree_text_memory/organize/manager.py @@ -181,10 +181,11 @@ def _process_memory(self, memory: TextualMemoryItem, user_name: str | None = Non working_id = str(uuid.uuid4()) with ContextThreadPoolExecutor(max_workers=2, thread_name_prefix="mem") as ex: - f_working = ex.submit( - self._add_memory_to_db, memory, "WorkingMemory", user_name, working_id - ) - futures.append(("working", f_working)) + if memory.metadata.memory_type not in ("ToolSchemaMemory", "ToolTrajectoryMemory"): + f_working = ex.submit( + self._add_memory_to_db, memory, "WorkingMemory", user_name, working_id + ) + futures.append(("working", f_working)) if memory.metadata.memory_type in ( "LongTermMemory", From 96a9dfdbc59dce8c4540c7fee785b0f026ba35d0 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 3 Dec 2025 19:37:32 +0800 Subject: [PATCH 13/28] add split chunck in system and tool --- docker/requirements.txt | 1 + .../read_multi_modal/system_parser.py | 25 ++++++++++++++++--- .../read_multi_modal/tool_parser.py | 23 ++++++++++++++--- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/docker/requirements.txt b/docker/requirements.txt index d3268edae..21f246599 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -160,3 +160,4 @@ xlrd==2.0.2 xlsxwriter==3.2.5 prometheus-client==0.23.1 pymilvus==2.5.12 +langchain-text-splitters==1.0.0 diff --git a/src/memos/mem_reader/read_multi_modal/system_parser.py b/src/memos/mem_reader/read_multi_modal/system_parser.py index c3961c73a..3f467d649 100644 --- a/src/memos/mem_reader/read_multi_modal/system_parser.py +++ b/src/memos/mem_reader/read_multi_modal/system_parser.py @@ -95,17 +95,34 @@ def parse_fast( ) source = self.create_source(message, info) - return [ - TextualMemoryItem( - memory=content_wo_tool_schema, + + # Extract info fields + info_ = info.copy() + user_id = info_.pop("user_id", "") + session_id = info_.pop("session_id", "") + + # Split parsed text into chunks + content_chunks = self._split_text(content_wo_tool_schema) + + memory_items = [] + for _chunk_idx, chunk_text in enumerate(content_chunks): + if not chunk_text.strip(): + continue + + memory_item = TextualMemoryItem( + memory=chunk_text, metadata=TreeNodeTextualMemoryMetadata( + user_id=user_id, + session_id=session_id, memory_type="LongTermMemory", # only choce long term memory for system messages as a placeholder status="activated", tags=["mode:fast"], sources=[source], + info=info_, ), ) - ] + memory_items.append(memory_item) + return memory_items def parse_fine( self, diff --git a/src/memos/mem_reader/read_multi_modal/tool_parser.py b/src/memos/mem_reader/read_multi_modal/tool_parser.py index cdd3620cb..09bd9e9d0 100644 --- a/src/memos/mem_reader/read_multi_modal/tool_parser.py +++ b/src/memos/mem_reader/read_multi_modal/tool_parser.py @@ -158,17 +158,32 @@ def parse_fast( return [] sources = self.create_source(message, info) - return [ - TextualMemoryItem( - memory=line, + + # Extract info fields + info_ = info.copy() + user_id = info_.pop("user_id", "") + session_id = info_.pop("session_id", "") + + content_chunks = self._split_text(line) + memory_items = [] + for _chunk_idx, chunk_text in enumerate(content_chunks): + if not chunk_text.strip(): + continue + + memory_item = TextualMemoryItem( + memory=chunk_text, metadata=TreeNodeTextualMemoryMetadata( + user_id=user_id, + session_id=session_id, memory_type="LongTermMemory", # only choce long term memory for tool messages as a placeholder status="activated", tags=["mode:fast"], sources=sources, + info=info_, ), ) - ] + memory_items.append(memory_item) + return memory_items def parse_fine( self, From 44570dfb8f250c720eecdffcc778b8faacf03bd2 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Thu, 4 Dec 2025 10:18:06 +0800 Subject: [PATCH 14/28] fix bug in plug pref search --- src/memos/multi_mem_cube/single_cube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index 1ddd2b1b7..a89b31ce7 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -389,6 +389,7 @@ def _search_pref( top_k=search_req.pref_top_k, info={ "user_id": search_req.user_id, + "mem_cube_id": user_context.mem_cube_id, "session_id": search_req.session_id, "chat_history": search_req.chat_history, }, From 4c24cfc06f5115bd3d68bb2107504c3ef0ee2619 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Thu, 4 Dec 2025 10:31:47 +0800 Subject: [PATCH 15/28] fix bug in pref add --- src/memos/multi_mem_cube/single_cube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index a89b31ce7..f9e084347 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -567,7 +567,7 @@ def _process_pref_mem( message_item_pref = ScheduleMessageItem( user_id=add_req.user_id, session_id=target_session_id, - mem_cube_id=self.cube_id, + mem_cube_id=user_context.mem_cube_id, mem_cube=self.naive_mem_cube, label=PREF_ADD_LABEL, content=json.dumps(messages_list), @@ -592,7 +592,7 @@ def _process_pref_mem( **(add_req.info or {}), "user_id": add_req.user_id, "session_id": target_session_id, - "mem_cube_id": self.cube_id, + "mem_cube_id": user_context.mem_cube_id, }, ) pref_ids_local: list[str] = self.naive_mem_cube.pref_mem.add(pref_memories_local) From 1fc9fab4c88bc3b3e7da68e29279a6c76f4a1296 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Mon, 15 Dec 2025 19:19:23 +0800 Subject: [PATCH 16/28] modify prompt and code --- src/memos/mem_reader/multi_modal_struct.py | 45 +++++++++++------ src/memos/templates/tool_mem_prompts.py | 56 ++++++++++++++++++---- 2 files changed, 78 insertions(+), 23 deletions(-) diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index 88ef56b7c..1681b1ed6 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -495,7 +495,11 @@ def _process_tool_trajectory_fine( for fast_item in fast_memory_items: # Extract memory text (string content) mem_str = fast_item.memory or "" - if not mem_str.strip() or "tool:" not in mem_str: + if not mem_str.strip() or ( + "tool:" not in mem_str + and "[tool_calls]:" not in mem_str + and "omitted" not in mem_str + ): continue try: resp = self._get_llm_tool_trajectory_response(mem_str) @@ -554,16 +558,22 @@ def _process_multi_modal_data( if mode == "fast": return fast_memory_items else: - # Part A: call llm + # Part A: call llm in parallel using thread pool fine_memory_items = [] - fine_memory_items_string_parser = self._process_string_fine( - fast_memory_items, info, custom_tags - ) - fine_memory_items.extend(fine_memory_items_string_parser) - fine_memory_items_tool_trajectory_parser = self._process_tool_trajectory_fine( - fast_memory_items, info - ) + with ContextThreadPoolExecutor(max_workers=2) as executor: + future_string = executor.submit( + self._process_string_fine, fast_memory_items, info, custom_tags + ) + future_tool = executor.submit( + self._process_tool_trajectory_fine, fast_memory_items, info + ) + + # Collect results + fine_memory_items_string_parser = future_string.result() + fine_memory_items_tool_trajectory_parser = future_tool.result() + + fine_memory_items.extend(fine_memory_items_string_parser) fine_memory_items.extend(fine_memory_items_tool_trajectory_parser) # Part B: get fine multimodal items @@ -601,13 +611,18 @@ def _process_transfer_multi_modal_data( } fine_memory_items = [] - # Part A: call llm - fine_memory_items_string_parser = self._process_string_fine([raw_node], info, custom_tags) - fine_memory_items.extend(fine_memory_items_string_parser) + # Part A: call llm in parallel using thread pool + with ContextThreadPoolExecutor(max_workers=2) as executor: + future_string = executor.submit( + self._process_string_fine, [raw_node], info, custom_tags + ) + future_tool = executor.submit(self._process_tool_trajectory_fine, [raw_node], info) - fine_memory_items_tool_trajectory_parser = self._process_tool_trajectory_fine( - [raw_node], info - ) + # Collect results + fine_memory_items_string_parser = future_string.result() + fine_memory_items_tool_trajectory_parser = future_tool.result() + + fine_memory_items.extend(fine_memory_items_string_parser) fine_memory_items.extend(fine_memory_items_tool_trajectory_parser) # Part B: get fine multimodal items diff --git a/src/memos/templates/tool_mem_prompts.py b/src/memos/templates/tool_mem_prompts.py index 7d5363956..fa765da34 100644 --- a/src/memos/templates/tool_mem_prompts.py +++ b/src/memos/templates/tool_mem_prompts.py @@ -2,15 +2,25 @@ 你是一个专业的工具调用轨迹提取专家。你的任务是从给定的对话消息中提取完整的工具调用轨迹经验。 ## 提取规则: -1. 只有当对话中存在有价值的工具调用过程时才进行提取 -2. 有价值的轨迹至少包含以下元素: +1. 只有当对话中存在有价值的工具调用经验时才进行提取 +2. 有价值的轨迹包含两种情况: + + **情况A - 标准工具调用轨迹**(包含以下完整流程): - 用户的问题(user message) - 助手的工具调用尝试(assistant message with tool_calls) - 工具的执行结果(tool message with tool_call_id and content,无论成功或失败) - - 助手的响应(assistant message,无论是否给出最终答案) + - 助手基于工具结果的响应(assistant message) + + **情况B - 无需工具调用的轨迹**(同时满足以下条件): + - 对话中提供了可用的工具列表 + - 助手没有进行任何工具调用 + - 直接给出了答案并获得正确反馈 + - 这种情况需要提取并标注"此问题无需工具调用即可回答" ## 输出格式: 返回一个JSON数组,格式如下: + +**情况A的输出格式:** ```json [ { @@ -27,8 +37,18 @@ ] ``` +**情况B的输出格式:** +```json +[ + { + "trajectory": "自然语言输出说明'任务内容、为什么不需要工具调用、最终回答'", + "tool_used_status": [] + } +] +``` + ## 注意事项: -- 如果对话中没有完整的工具调用轨迹,返回空数组 +- 如果对话中没有完整的工具调用轨迹(情况A)且不符合情况B的条件,返回空数组 - 每个轨迹必须是独立的完整过程 - 一个轨迹中可能涉及多个工具的使用,每个工具在tool_used_status中独立记录 - 只提取事实内容,不要添加任何解释或额外信息 @@ -45,15 +65,25 @@ You are a professional tool call trajectory extraction expert. Your task is to extract valuable tool call trajectory experiences from given conversation messages. ## Extraction Rules: -1. Only extract when there are valuable tool calling processes in the conversation -2. Valuable trajectories must contain at least the following elements: +1. Only extract when there are valuable tool calling experiences in the conversation +2. Valuable trajectories include two scenarios: + + **Scenario A - Standard Tool Call Trajectory** (contains the complete flow): - User's question (user message) - Assistant's tool call attempt (assistant message with tool_calls) - Tool execution results (tool message with tool_call_id and content, regardless of success or failure) - - Assistant's response (assistant message, whether or not a final answer is given) + - Assistant's response based on tool results (assistant message) + + **Scenario B - No Tool Call Needed Trajectory** (must meet all conditions): + - Tools are provided in the conversation + - Assistant made no tool calls + - Assistant directly provided an answer and received correct feedback + - This should be extracted with annotation "This question can be answered without tool calls" ## Output Format: Return a JSON array in the following format: + +**Format for Scenario A:** ```json [ { @@ -70,8 +100,18 @@ ] ``` +**Format for Scenario B:** +```json +[ + { + "trajectory": "Natural language description of 'task content, why tool calls are not needed, final answer'", + "tool_used_status": [] + } +] +``` + ## Notes: -- If there are no complete tool call trajectories in the conversation, return an empty array +- If there are no complete tool call trajectories (Scenario A) and Scenario B conditions are not met, return an empty array - Each trajectory must be an independent complete process - Multiple tools may be used in one trajectory, each tool is recorded independently in tool_used_status - Only extract factual content, do not add any additional explanations or information From 62d7253337d3030dbe8373af5db0e45703ae73b4 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 17 Dec 2025 15:09:50 +0800 Subject: [PATCH 17/28] modify prompt --- src/memos/templates/tool_mem_prompts.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/memos/templates/tool_mem_prompts.py b/src/memos/templates/tool_mem_prompts.py index fa765da34..ec4d1352a 100644 --- a/src/memos/templates/tool_mem_prompts.py +++ b/src/memos/templates/tool_mem_prompts.py @@ -25,12 +25,12 @@ [ { "trajectory": "自然语言输出包含'任务、使用的工具、工具观察、最终回答'的完整精炼的总结,体现顺序", + "experience": "成功时总结参数模式和最佳实践;失败时反思总结教训和注意事项,以及可能的正确解法" "tool_used_status": [ { "used_tool": "工具名1", "success_rate": "0.0-1.0之间的数值,表示该工具在本次轨迹中的成功率", "error_type": "调用失败时的错误类型和描述,成功时为空字符串", - "experience": "该工具的使用经验,比如常见的参数模式、执行特点、结果解读方式等" } ] } @@ -42,15 +42,18 @@ [ { "trajectory": "自然语言输出说明'任务内容、为什么不需要工具调用、最终回答'", + "experience": "成功时总结参数模式和最佳实践;失败时反思总结教训和注意事项,以及可能的正确解法" "tool_used_status": [] } ] ``` ## 注意事项: -- 如果对话中没有完整的工具调用轨迹(情况A)且不符合情况B的条件,返回空数组 +- **trajectory 必须精简**:用最少的文字清晰表达完整流程,避免冗长描述 +- 如果对话中没有完整的工具调用轨迹(情况A)且不符合(情况B)的条件,返回空数组 - 每个轨迹必须是独立的完整过程 - 一个轨迹中可能涉及多个工具的使用,每个工具在tool_used_status中独立记录 +- 如果多条轨迹存在顺序依赖关系,需要将它们视为一条轨迹 - 只提取事实内容,不要添加任何解释或额外信息 - 确保返回的是有效的JSON格式 @@ -88,12 +91,12 @@ [ { "trajectory": "Natural language summary containing 'task, tools used, tool observations, final answer' in a complete and refined manner, reflecting the sequence", + "experience": "When successful, summarize parameter patterns and best practices; when failed, reflect on lessons learned, precautions, and possible correct solutions", "tool_used_status": [ { "used_tool": "Tool Name 1", "success_rate": "Numerical value between 0.0-1.0, indicating the success rate of this tool in the current trajectory", - "error_type": "Error type and description when call fails, empty string when successful", - "experience": "Usage experience of this tool, such as common parameter patterns, execution characteristics, result interpretation methods, etc." + "error_type": "Error type and description when call fails, empty string when successful" } ] } @@ -105,15 +108,18 @@ [ { "trajectory": "Natural language description of 'task content, why tool calls are not needed, final answer'", + "experience": "When successful, summarize parameter patterns and best practices; when failed, reflect on lessons learned, precautions, and possible correct solutions", "tool_used_status": [] } ] ``` ## Notes: -- If there are no complete tool call trajectories (Scenario A) and Scenario B conditions are not met, return an empty array +- **trajectory must be concise**: Express the complete process clearly with minimal words, avoid lengthy descriptions +- If there are no complete tool call trajectories (Scenario A) and (Scenario B) conditions are not met, return an empty array - Each trajectory must be an independent complete process - Multiple tools may be used in one trajectory, each tool is recorded independently in tool_used_status +- If multiple trajectories have sequential dependencies, they should be considered as one trajectory - Only extract factual content, do not add any additional explanations or information - Ensure the returned content is valid JSON format From ca104aa69fc90b2e04a661ee4ef8a15a9c8bf846 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 17 Dec 2025 15:47:41 +0800 Subject: [PATCH 18/28] add experience in tool mem --- src/memos/mem_reader/multi_modal_struct.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index 1681b1ed6..8753c28d3 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -515,6 +515,7 @@ def _process_tool_trajectory_fine( value=m.get("trajectory", ""), info=info, memory_type=memory_type, + experience=m.get("experience", ""), tool_used_status=m.get("tool_used_status", []), ) fine_memory_items.append(node) From 3d5d094734b70c38772391f904add93bb83e76c9 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 17 Dec 2025 16:54:57 +0800 Subject: [PATCH 19/28] modify prompt --- src/memos/templates/tool_mem_prompts.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/memos/templates/tool_mem_prompts.py b/src/memos/templates/tool_mem_prompts.py index ec4d1352a..72238945f 100644 --- a/src/memos/templates/tool_mem_prompts.py +++ b/src/memos/templates/tool_mem_prompts.py @@ -1,5 +1,5 @@ TOOL_TRAJECTORY_PROMPT_ZH = """ -你是一个专业的工具调用轨迹提取专家。你的任务是从给定的对话消息中提取完整的工具调用轨迹经验。 +你是一个专业的工具经验提取取专家。你的任务是从给定的对话消息中提取完整的工具调用轨迹经验。 ## 提取规则: 1. 只有当对话中存在有价值的工具调用经验时才进行提取 @@ -25,7 +25,7 @@ [ { "trajectory": "自然语言输出包含'任务、使用的工具、工具观察、最终回答'的完整精炼的总结,体现顺序", - "experience": "成功时总结参数模式和最佳实践;失败时反思总结教训和注意事项,以及可能的正确解法" + "experience": "深入分析本次轨迹的经验教训:\n- 成功(完成用户任务):总结有效的参数模式、调用策略和最佳实践\n- 失败(未完成用户任务):必须深入分析真实错误原因,包括:\n 1. 结合system中给定的函数定义和说明,分析工具是否被正确理解和使用\n 2. 分析用户问题的真实需求,判断工具选择是否合理\n 3. 分析错误的根本原因(参数错误、逻辑错误、工具选择错误、幻觉调用等)\n 4. 提供可能的正确解法和避免该错误的策略\n- 不要只复述表面错误信息,要透过现象看本质" "tool_used_status": [ { "used_tool": "工具名1", @@ -42,7 +42,7 @@ [ { "trajectory": "自然语言输出说明'任务内容、为什么不需要工具调用、最终回答'", - "experience": "成功时总结参数模式和最佳实践;失败时反思总结教训和注意事项,以及可能的正确解法" + "experience": "深入分析本次轨迹的经验教训:\n- 成功(完成用户任务):总结有效的参数模式、调用策略和最佳实践\n- 失败(未完成用户任务):必须深入分析真实错误原因,包括:\n 1. 结合system中给定的函数定义和说明,分析工具是否被正确理解和使用\n 2. 分析用户问题的真实需求,判断工具选择是否合理\n 3. 分析错误的根本原因(参数错误、逻辑错误、工具选择错误、幻觉调用等)\n 4. 提供可能的正确解法和避免该错误的策略\n- 不要只复述表面错误信息,要透过现象看本质" "tool_used_status": [] } ] @@ -50,7 +50,6 @@ ## 注意事项: - **trajectory 必须精简**:用最少的文字清晰表达完整流程,避免冗长描述 -- 如果对话中没有完整的工具调用轨迹(情况A)且不符合(情况B)的条件,返回空数组 - 每个轨迹必须是独立的完整过程 - 一个轨迹中可能涉及多个工具的使用,每个工具在tool_used_status中独立记录 - 如果多条轨迹存在顺序依赖关系,需要将它们视为一条轨迹 @@ -65,7 +64,7 @@ TOOL_TRAJECTORY_PROMPT_EN = """ -You are a professional tool call trajectory extraction expert. Your task is to extract valuable tool call trajectory experiences from given conversation messages. +You are a professional tool experience extraction expert. Your task is to extract valuable tool experience from given conversation messages. ## Extraction Rules: 1. Only extract when there are valuable tool calling experiences in the conversation @@ -91,7 +90,7 @@ [ { "trajectory": "Natural language summary containing 'task, tools used, tool observations, final answer' in a complete and refined manner, reflecting the sequence", - "experience": "When successful, summarize parameter patterns and best practices; when failed, reflect on lessons learned, precautions, and possible correct solutions", + "experience": "In-depth analysis of lessons learned from this trajectory:\n- Success (user task completed): Summarize effective parameter patterns, calling strategies, and best practices\n- Failure (user task not completed): Must deeply analyze the root cause of the error, including:\n 1. Analyze whether the tool was correctly understood and used based on the function definitions and descriptions in the system\n 2. Analyze the actual needs of the user's question to determine if the tool selection was appropriate\n 3. Analyze the fundamental cause of the error (parameter errors, logic errors, incorrect tool selection, hallucinated calls, etc.)\n 4. Provide possible correct solutions and strategies to avoid this error\n- Don't just repeat superficial error messages; look beyond the surface to understand the essence" "tool_used_status": [ { "used_tool": "Tool Name 1", @@ -108,7 +107,7 @@ [ { "trajectory": "Natural language description of 'task content, why tool calls are not needed, final answer'", - "experience": "When successful, summarize parameter patterns and best practices; when failed, reflect on lessons learned, precautions, and possible correct solutions", + "experience": "In-depth analysis of lessons learned from this trajectory:\n- Success (user task completed): Summarize effective parameter patterns, calling strategies, and best practices\n- Failure (user task not completed): Must deeply analyze the root cause of the error, including:\n 1. Analyze whether the tool was correctly understood and used based on the function definitions and descriptions in the system\n 2. Analyze the actual needs of the user's question to determine if the tool selection was appropriate\n 3. Analyze the fundamental cause of the error (parameter errors, logic errors, incorrect tool selection, hallucinated calls, etc.)\n 4. Provide possible correct solutions and strategies to avoid this error\n- Don't just repeat superficial error messages; look beyond the surface to understand the essence" "tool_used_status": [] } ] @@ -116,7 +115,6 @@ ## Notes: - **trajectory must be concise**: Express the complete process clearly with minimal words, avoid lengthy descriptions -- If there are no complete tool call trajectories (Scenario A) and (Scenario B) conditions are not met, return an empty array - Each trajectory must be an independent complete process - Multiple tools may be used in one trajectory, each tool is recorded independently in tool_used_status - If multiple trajectories have sequential dependencies, they should be considered as one trajectory From 3924ddd9c381c54727792c3aa37ba87946b8e627 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 17 Dec 2025 19:01:44 +0800 Subject: [PATCH 20/28] modify promtp --- src/memos/mem_reader/multi_modal_struct.py | 3 ++- src/memos/templates/tool_mem_prompts.py | 18 +++++++----------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index 8753c28d3..9b592a7ce 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -1,5 +1,6 @@ import concurrent.futures import json +import re import traceback from typing import Any @@ -498,7 +499,7 @@ def _process_tool_trajectory_fine( if not mem_str.strip() or ( "tool:" not in mem_str and "[tool_calls]:" not in mem_str - and "omitted" not in mem_str + and not re.search(r".*?", mem_str, re.DOTALL) ): continue try: diff --git a/src/memos/templates/tool_mem_prompts.py b/src/memos/templates/tool_mem_prompts.py index 72238945f..7bb34a5e7 100644 --- a/src/memos/templates/tool_mem_prompts.py +++ b/src/memos/templates/tool_mem_prompts.py @@ -11,11 +11,9 @@ - 工具的执行结果(tool message with tool_call_id and content,无论成功或失败) - 助手基于工具结果的响应(assistant message) - **情况B - 无需工具调用的轨迹**(同时满足以下条件): + **情况B - 无工具调用的轨迹**(同时满足以下条件): - 对话中提供了可用的工具列表 - - 助手没有进行任何工具调用 - - 直接给出了答案并获得正确反馈 - - 这种情况需要提取并标注"此问题无需工具调用即可回答" + - 助手没有进行任何工具调用,直接给出了答案 ## 输出格式: 返回一个JSON数组,格式如下: @@ -25,7 +23,7 @@ [ { "trajectory": "自然语言输出包含'任务、使用的工具、工具观察、最终回答'的完整精炼的总结,体现顺序", - "experience": "深入分析本次轨迹的经验教训:\n- 成功(完成用户任务):总结有效的参数模式、调用策略和最佳实践\n- 失败(未完成用户任务):必须深入分析真实错误原因,包括:\n 1. 结合system中给定的函数定义和说明,分析工具是否被正确理解和使用\n 2. 分析用户问题的真实需求,判断工具选择是否合理\n 3. 分析错误的根本原因(参数错误、逻辑错误、工具选择错误、幻觉调用等)\n 4. 提供可能的正确解法和避免该错误的策略\n- 不要只复述表面错误信息,要透过现象看本质" + "experience": "深入分析本次轨迹的经验教训,输出精简的结论:\n- 成功(完成用户任务):总结有效的参数模式、调用策略和最佳实践\n- 失败(未完成用户任务):按以下步骤分析后,输出精简的结论\n 分析步骤(不要在结论中输出这些步骤,仅作为分析指导):\n 步骤1:检查调用的工具是否在system中提供,即使问题需要调用工具,但system中没有提供,则不能强行调用工具\n 步骤2:如果有工具可用,结合函数定义和说明,分析工具是否被正确理解和使用\n 步骤3:分析用户问题的真实需求,判断工具选择是否合理,是否本身不需要不需要但调用了工具\n 步骤4:分析错误的根本原因(参数错误、逻辑错误、工具选择错误、幻觉调用等)\n 步骤5:提供可能的正确解法和避免该错误的策略\n 最终输出:基于以上分析,给出精简、准确的结论,不要复述分析步骤" "tool_used_status": [ { "used_tool": "工具名1", @@ -42,7 +40,7 @@ [ { "trajectory": "自然语言输出说明'任务内容、为什么不需要工具调用、最终回答'", - "experience": "深入分析本次轨迹的经验教训:\n- 成功(完成用户任务):总结有效的参数模式、调用策略和最佳实践\n- 失败(未完成用户任务):必须深入分析真实错误原因,包括:\n 1. 结合system中给定的函数定义和说明,分析工具是否被正确理解和使用\n 2. 分析用户问题的真实需求,判断工具选择是否合理\n 3. 分析错误的根本原因(参数错误、逻辑错误、工具选择错误、幻觉调用等)\n 4. 提供可能的正确解法和避免该错误的策略\n- 不要只复述表面错误信息,要透过现象看本质" + "experience": "正确则输出'正确的执行轨迹',错误则分析原因并给出简短的结论", "tool_used_status": [] } ] @@ -78,9 +76,7 @@ **Scenario B - No Tool Call Needed Trajectory** (must meet all conditions): - Tools are provided in the conversation - - Assistant made no tool calls - - Assistant directly provided an answer and received correct feedback - - This should be extracted with annotation "This question can be answered without tool calls" + - Assistant made no tool calls and directly provided an answer ## Output Format: Return a JSON array in the following format: @@ -90,7 +86,7 @@ [ { "trajectory": "Natural language summary containing 'task, tools used, tool observations, final answer' in a complete and refined manner, reflecting the sequence", - "experience": "In-depth analysis of lessons learned from this trajectory:\n- Success (user task completed): Summarize effective parameter patterns, calling strategies, and best practices\n- Failure (user task not completed): Must deeply analyze the root cause of the error, including:\n 1. Analyze whether the tool was correctly understood and used based on the function definitions and descriptions in the system\n 2. Analyze the actual needs of the user's question to determine if the tool selection was appropriate\n 3. Analyze the fundamental cause of the error (parameter errors, logic errors, incorrect tool selection, hallucinated calls, etc.)\n 4. Provide possible correct solutions and strategies to avoid this error\n- Don't just repeat superficial error messages; look beyond the surface to understand the essence" + "experience": "In-depth analysis of lessons learned from this trajectory, output concise conclusions:\n- Success (user task completed): Summarize effective parameter patterns, calling strategies, and best practices\n- Failure (user task not completed): Analyze following these steps, then output concise conclusions\n Analysis steps (do not output these steps in the conclusion, only use as analysis guidance):\n Step 1: Check if the called tool is provided in the system. Even if the problem requires a tool call, if the system does not provide it, the tool cannot be forcibly called\n Step 2: If tools are available, analyze whether the tool was correctly understood and used based on the function definitions and descriptions\n Step 3: Analyze the actual needs of the user's question to determine if the tool selection was appropriate, or if tools were unnecessarily called when not needed\n Step 4: Analyze the fundamental cause of the error (parameter errors, logic errors, incorrect tool selection, hallucinated calls, etc.)\n Step 5: Provide possible correct solutions and strategies to avoid this error\n Final output: Based on the above analysis, provide concise and accurate conclusions without repeating the analysis steps" "tool_used_status": [ { "used_tool": "Tool Name 1", @@ -107,7 +103,7 @@ [ { "trajectory": "Natural language description of 'task content, why tool calls are not needed, final answer'", - "experience": "In-depth analysis of lessons learned from this trajectory:\n- Success (user task completed): Summarize effective parameter patterns, calling strategies, and best practices\n- Failure (user task not completed): Must deeply analyze the root cause of the error, including:\n 1. Analyze whether the tool was correctly understood and used based on the function definitions and descriptions in the system\n 2. Analyze the actual needs of the user's question to determine if the tool selection was appropriate\n 3. Analyze the fundamental cause of the error (parameter errors, logic errors, incorrect tool selection, hallucinated calls, etc.)\n 4. Provide possible correct solutions and strategies to avoid this error\n- Don't just repeat superficial error messages; look beyond the surface to understand the essence" + "experience": "Output 'Correct execution trajectory' if correct, otherwise analyze the reason and provide a brief conclusion", "tool_used_status": [] } ] From f432d20271cc6a2ef343d8dbeedccca273a184a9 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 17 Dec 2025 20:32:49 +0800 Subject: [PATCH 21/28] modify code --- .../read_multi_modal/system_parser.py | 43 ++++++++++++++++--- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/system_parser.py b/src/memos/mem_reader/read_multi_modal/system_parser.py index 2e856365a..f9e6d5e93 100644 --- a/src/memos/mem_reader/read_multi_modal/system_parser.py +++ b/src/memos/mem_reader/read_multi_modal/system_parser.py @@ -87,13 +87,42 @@ def parse_fast( if isinstance(content, dict): content = content["text"] - # Replace tool_schema content with "omitted" in remaining content - content_wo_tool_schema = re.sub( - r"(.*?)", - r"omitted", - content, - flags=re.DOTALL, - ) + # Process tool_schema content + content_wo_tool_schema = content + + # Find first tool_schema block + tool_schema_pattern = r"(.*?)" + match = re.search(tool_schema_pattern, content, flags=re.DOTALL) + + if match: + original_text = match.group(0) # 完整的 ... + schema_content = match.group(1) # 标签之间的内容 + + # Parse tool schema + try: + tool_schema = json.loads(schema_content) + assert isinstance(tool_schema, list), "Tool schema must be a list[dict]" + except json.JSONDecodeError: + try: + tool_schema = ast.literal_eval(schema_content) + assert isinstance(tool_schema, list), "Tool schema must be a list[dict]" + except (ValueError, SyntaxError, AssertionError): + logger.warning( + f"[SystemParser] Failed to parse tool schema with both JSON and ast.literal_eval: {schema_content[:100]}..." + ) + tool_schema = None + except AssertionError: + logger.warning( + f"[SystemParser] Tool schema must be a list[dict]: {schema_content[:100]}..." + ) + tool_schema = None + + # Process and replace + if tool_schema is not None: + processed_text = f"{json.dumps(tool_schema)}" + content_wo_tool_schema = content_wo_tool_schema.replace( + original_text, processed_text, 1 + ) source = self.create_source(message, info) From a10c55b6f3c034268693b1e728b7b38d78fdf12a Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Thu, 18 Dec 2025 17:38:41 +0800 Subject: [PATCH 22/28] modify code --- .../read_multi_modal/system_parser.py | 109 ++++++++++++-- src/memos/templates/tool_mem_prompts.py | 135 +++++++++--------- 2 files changed, 164 insertions(+), 80 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/system_parser.py b/src/memos/mem_reader/read_multi_modal/system_parser.py index f9e6d5e93..1b3409bcd 100644 --- a/src/memos/mem_reader/read_multi_modal/system_parser.py +++ b/src/memos/mem_reader/read_multi_modal/system_parser.py @@ -87,16 +87,13 @@ def parse_fast( if isinstance(content, dict): content = content["text"] - # Process tool_schema content - content_wo_tool_schema = content - # Find first tool_schema block tool_schema_pattern = r"(.*?)" match = re.search(tool_schema_pattern, content, flags=re.DOTALL) if match: - original_text = match.group(0) # 完整的 ... - schema_content = match.group(1) # 标签之间的内容 + original_text = match.group(0) # Complete ... block + schema_content = match.group(1) # Content between the tags # Parse tool schema try: @@ -119,10 +116,102 @@ def parse_fast( # Process and replace if tool_schema is not None: - processed_text = f"{json.dumps(tool_schema)}" - content_wo_tool_schema = content_wo_tool_schema.replace( - original_text, processed_text, 1 - ) + + def remove_descriptions(obj): + """Recursively remove all 'description' keys from a nested dict/list structure.""" + if isinstance(obj, dict): + return { + k: remove_descriptions(v) for k, v in obj.items() if k != "description" + } + elif isinstance(obj, list): + return [remove_descriptions(item) for item in obj] + else: + return obj + + def keep_first_layer_params(obj): + """Only keep first layer parameter information, remove nested parameters.""" + if isinstance(obj, list): + return [keep_first_layer_params(item) for item in obj] + elif isinstance(obj, dict): + result = {} + for k, v in obj.items(): + if k == "properties" and isinstance(v, dict): + # For properties, only keep first layer parameter names and types + first_layer_props = {} + for param_name, param_info in v.items(): + if isinstance(param_info, dict): + # Only keep type and basic info, remove nested properties + first_layer_props[param_name] = { + key: val + for key, val in param_info.items() + if key in ["type", "enum", "required"] + and key != "properties" + } + else: + first_layer_props[param_name] = param_info + result[k] = first_layer_props + elif k == "parameters" and isinstance(v, dict): + # Process parameters object but only keep first layer + result[k] = keep_first_layer_params(v) + elif isinstance(v, dict | list) and k != "properties": + result[k] = keep_first_layer_params(v) + else: + result[k] = v + return result + else: + return obj + + def format_tool_schema_readable(tool_schema): + """Convert tool schema to readable format: tool_name: [param1 (type1), ...](required: ...)""" + lines = [] + for tool in tool_schema: + # Handle both new format and old-style OpenAI function format + if tool.get("type") == "function" and "function" in tool: + tool_info = tool["function"] + else: + tool_info = tool + + tool_name = tool_info.get("name", "unknown") + params_obj = tool_info.get("parameters", {}) + properties = params_obj.get("properties", {}) + required = params_obj.get("required", []) + + # Format parameters + param_strs = [] + for param_name, param_info in properties.items(): + if isinstance(param_info, dict): + param_type = param_info.get("type", "any") + # Handle enum + if "enum" in param_info: + param_type = f"{param_type}[{', '.join(param_info['enum'])}]" + param_strs.append(f"{param_name} ({param_type})") + else: + param_strs.append(f"{param_name} (any)") + + # Format required parameters + required_str = f"(required: {', '.join(required)})" if required else "" + + # Construct the line + params_part = f"[{', '.join(param_strs)}]" if param_strs else "[]" + line = f"{tool_name}: {params_part}{required_str}" + lines.append(line) + + return "\n".join(lines) + + # First keep only first layer params, then remove descriptions + simple_tool_schema = keep_first_layer_params(tool_schema) + simple_tool_schema = remove_descriptions(simple_tool_schema) + # change to readable format + readable_schema = format_tool_schema_readable(simple_tool_schema) + + processed_text = f"{readable_schema}" + content = content.replace(original_text, processed_text, 1) + + parts = ["system: "] + if message.get("chat_time"): + parts.append(f"[{message.get('chat_time')}]: ") + prefix = "".join(parts) + line = f"{prefix}{content}\n" source = self.create_source(message, info) @@ -132,7 +221,7 @@ def parse_fast( session_id = info_.pop("session_id", "") # Split parsed text into chunks - content_chunks = self._split_text(content_wo_tool_schema) + content_chunks = self._split_text(line) memory_items = [] for _chunk_idx, chunk_text in enumerate(content_chunks): diff --git a/src/memos/templates/tool_mem_prompts.py b/src/memos/templates/tool_mem_prompts.py index 7bb34a5e7..7cfc3035a 100644 --- a/src/memos/templates/tool_mem_prompts.py +++ b/src/memos/templates/tool_mem_prompts.py @@ -1,96 +1,101 @@ TOOL_TRAJECTORY_PROMPT_ZH = """ -你是一个专业的工具经验提取取专家。你的任务是从给定的对话消息中提取完整的工具调用轨迹经验。 - -## 提取规则: -1. 只有当对话中存在有价值的工具调用经验时才进行提取 -2. 有价值的轨迹包含两种情况: - - **情况A - 标准工具调用轨迹**(包含以下完整流程): - - 用户的问题(user message) - - 助手的工具调用尝试(assistant message with tool_calls) - - 工具的执行结果(tool message with tool_call_id and content,无论成功或失败) - - 助手基于工具结果的响应(assistant message) - - **情况B - 无工具调用的轨迹**(同时满足以下条件): - - 对话中提供了可用的工具列表 - - 助手没有进行任何工具调用,直接给出了答案 +你是一个专业的工具经验提取专家。你的任务是从给定的对话消息中提取完整的工具调用轨迹经验。 + +## 分析判断步骤: +**步骤1:判断任务完成度** +根据用户反馈,判定correctness:success(成功)或 failed(失败),用户反馈决定权大于执行结果,用户反馈有误,则判定为failed +**步骤2:成功轨迹(success)** +总结:有效的参数模式、调用策略、最佳实践 +**步骤3:失败轨迹(failed)- 错误分析** +3.1 工具需求判断 + - 任务是否需要工具?(需要/直接回答/误调用) +3.2 工具调用检查 + - 工具存在性:是否在system中提供 + - 工具选择:是否选对工具 + - 参数正确性:是否符合类型定义 + - 幻觉检测:是否调用不存在的工具 +3.3 错误根因定位 + 结合消息中的错误反馈信息和上述分析,精准输出根本原因 +3.4 正确解法 + 给出避免错误的策略和正确调用方式,正确解法不单单是这个工具本身,而是整个轨迹的正确解法 ## 输出格式: 返回一个JSON数组,格式如下: -**情况A的输出格式:** ```json [ { - "trajectory": "自然语言输出包含'任务、使用的工具、工具观察、最终回答'的完整精炼的总结,体现顺序", - "experience": "深入分析本次轨迹的经验教训,输出精简的结论:\n- 成功(完成用户任务):总结有效的参数模式、调用策略和最佳实践\n- 失败(未完成用户任务):按以下步骤分析后,输出精简的结论\n 分析步骤(不要在结论中输出这些步骤,仅作为分析指导):\n 步骤1:检查调用的工具是否在system中提供,即使问题需要调用工具,但system中没有提供,则不能强行调用工具\n 步骤2:如果有工具可用,结合函数定义和说明,分析工具是否被正确理解和使用\n 步骤3:分析用户问题的真实需求,判断工具选择是否合理,是否本身不需要不需要但调用了工具\n 步骤4:分析错误的根本原因(参数错误、逻辑错误、工具选择错误、幻觉调用等)\n 步骤5:提供可能的正确解法和避免该错误的策略\n 最终输出:基于以上分析,给出精简、准确的结论,不要复述分析步骤" + "correctness": "success 或 failed", + "trajectory": "精炼完整的自然语言总结,包含:用户任务 -> 执行动作(调用的工具/直接回答) -> 执行结果 -> 最终回答", + "experience": "如果成功:总结有效的调用策略和最佳实践\n如果失败:按步骤3分析后,输出精简的结论,包含:错误原因 + 正确解法", "tool_used_status": [ { - "used_tool": "工具名1", + "used_tool": "工具名称(如果调用了工具)", "success_rate": "0.0-1.0之间的数值,表示该工具在本次轨迹中的成功率", - "error_type": "调用失败时的错误类型和描述,成功时为空字符串", + "error_type": "调用失败时的错误类型和描述,成功时为空字符串" } ] } ] ``` -**情况B的输出格式:** -```json -[ - { - "trajectory": "自然语言输出说明'任务内容、为什么不需要工具调用、最终回答'", - "experience": "正确则输出'正确的执行轨迹',错误则分析原因并给出简短的结论", - "tool_used_status": [] - } -] -``` - ## 注意事项: -- **trajectory 必须精简**:用最少的文字清晰表达完整流程,避免冗长描述 - 每个轨迹必须是独立的完整过程 - 一个轨迹中可能涉及多个工具的使用,每个工具在tool_used_status中独立记录 +- 如果没有调用工具,tool_used_status为空数组[] - 如果多条轨迹存在顺序依赖关系,需要将它们视为一条轨迹 - 只提取事实内容,不要添加任何解释或额外信息 - 确保返回的是有效的JSON格式 +- 输出的trajectory需要按照messages的发展顺序排列 -请分析以下对话消息并提取工具调用轨迹: - +请分析以下对话消息并提取工具调用轨迹,基于以下对话消息: + {messages} - + """ TOOL_TRAJECTORY_PROMPT_EN = """ -You are a professional tool experience extraction expert. Your task is to extract valuable tool experience from given conversation messages. +You are a professional tool experience extraction expert. Your task is to extract complete tool call trajectory experiences from given conversation messages. + +## Analysis and Judgment Steps: -## Extraction Rules: -1. Only extract when there are valuable tool calling experiences in the conversation -2. Valuable trajectories include two scenarios: +**Step 1: Assess Task Completion** +Determine correctness based on user feedback: success or failed, user feedback has higher priority than execution results, if user feedback is incorrect, then determine as failed - **Scenario A - Standard Tool Call Trajectory** (contains the complete flow): - - User's question (user message) - - Assistant's tool call attempt (assistant message with tool_calls) - - Tool execution results (tool message with tool_call_id and content, regardless of success or failure) - - Assistant's response based on tool results (assistant message) +**Step 2: Successful Trajectory (success)** +Summarize: effective parameter patterns, calling strategies, best practices - **Scenario B - No Tool Call Needed Trajectory** (must meet all conditions): - - Tools are provided in the conversation - - Assistant made no tool calls and directly provided an answer +**Step 3: Failed Trajectory (failed) - Error Analysis** + +3.1 Tool Requirement Assessment + - Does the task require tools? (required/direct answer/unnecessary call) + +3.2 Tool Call Verification + - Tool availability: provided in system? + - Tool selection: correct tool chosen? + - Parameter correctness: conform to type definitions? + - Hallucination detection: calling non-existent tools? + +3.3 Root Cause Identification + Combine error feedback from messages with above analysis to precisely output root cause + +3.4 Correct Solution + Provide strategies to avoid errors and correct calling approach. The solution should address the entire trajectory, not just the tool itself ## Output Format: Return a JSON array in the following format: -**Format for Scenario A:** ```json [ { - "trajectory": "Natural language summary containing 'task, tools used, tool observations, final answer' in a complete and refined manner, reflecting the sequence", - "experience": "In-depth analysis of lessons learned from this trajectory, output concise conclusions:\n- Success (user task completed): Summarize effective parameter patterns, calling strategies, and best practices\n- Failure (user task not completed): Analyze following these steps, then output concise conclusions\n Analysis steps (do not output these steps in the conclusion, only use as analysis guidance):\n Step 1: Check if the called tool is provided in the system. Even if the problem requires a tool call, if the system does not provide it, the tool cannot be forcibly called\n Step 2: If tools are available, analyze whether the tool was correctly understood and used based on the function definitions and descriptions\n Step 3: Analyze the actual needs of the user's question to determine if the tool selection was appropriate, or if tools were unnecessarily called when not needed\n Step 4: Analyze the fundamental cause of the error (parameter errors, logic errors, incorrect tool selection, hallucinated calls, etc.)\n Step 5: Provide possible correct solutions and strategies to avoid this error\n Final output: Based on the above analysis, provide concise and accurate conclusions without repeating the analysis steps" + "correctness": "success or failed", + "trajectory": "Concise and complete natural language summary including: user task -> execution action (tool called/direct answer) -> execution result -> final answer", + "experience": "If success: summarize effective calling strategies and best practices\nIf failed: after Step 3 analysis, output concise conclusion including: error cause + correct solution", "tool_used_status": [ { - "used_tool": "Tool Name 1", - "success_rate": "Numerical value between 0.0-1.0, indicating the success rate of this tool in the current trajectory", + "used_tool": "Tool name (if tool was called)", + "success_rate": "Numerical value between 0.0-1.0, indicating the success rate of this tool in current trajectory", "error_type": "Error type and description when call fails, empty string when successful" } ] @@ -98,27 +103,17 @@ ] ``` -**Format for Scenario B:** -```json -[ - { - "trajectory": "Natural language description of 'task content, why tool calls are not needed, final answer'", - "experience": "Output 'Correct execution trajectory' if correct, otherwise analyze the reason and provide a brief conclusion", - "tool_used_status": [] - } -] -``` - ## Notes: -- **trajectory must be concise**: Express the complete process clearly with minimal words, avoid lengthy descriptions - Each trajectory must be an independent complete process -- Multiple tools may be used in one trajectory, each tool is recorded independently in tool_used_status -- If multiple trajectories have sequential dependencies, they should be considered as one trajectory -- Only extract factual content, do not add any additional explanations or information +- A trajectory may involve multiple tools, each recorded independently in tool_used_status +- If no tool was called, tool_used_status is an empty array [] +- If multiple trajectories have sequential dependencies, treat them as one trajectory +- Only extract factual content, do not add any explanations or extra information - Ensure the returned content is valid JSON format +- The trajectory should be arranged according to the development order of messages -Please analyze the following conversation messages and extract tool call trajectories: - +Please analyze the following conversation messages and extract tool call trajectories based on: + {messages} - + """ From acc0bdf209494f164b7722d276609c99b25bb1ad Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Sat, 20 Dec 2025 11:25:08 +0800 Subject: [PATCH 23/28] modify bug --- src/memos/mem_reader/read_multi_modal/system_parser.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/system_parser.py b/src/memos/mem_reader/read_multi_modal/system_parser.py index 066944485..e80e34ac8 100644 --- a/src/memos/mem_reader/read_multi_modal/system_parser.py +++ b/src/memos/mem_reader/read_multi_modal/system_parser.py @@ -208,11 +208,11 @@ def format_tool_schema_readable(tool_schema): processed_text = f"{readable_schema}" content = content.replace(original_text, processed_text, 1) - parts = ["system: "] - if message.get("chat_time"): - parts.append(f"[{message.get('chat_time')}]: ") - prefix = "".join(parts) - line = f"{prefix}{content}\n" + parts = ["system: "] + if message.get("chat_time"): + parts.append(f"[{message.get('chat_time')}]: ") + prefix = "".join(parts) + line = f"{prefix}{content}\n" source = self.create_source(message, info) From a98076ce932b2a8f58e0d48595ed3a70af83a2cf Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Sat, 20 Dec 2025 13:37:59 +0800 Subject: [PATCH 24/28] modify cide --- .../read_multi_modal/system_parser.py | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/memos/mem_reader/read_multi_modal/system_parser.py b/src/memos/mem_reader/read_multi_modal/system_parser.py index e80e34ac8..dfffb4626 100644 --- a/src/memos/mem_reader/read_multi_modal/system_parser.py +++ b/src/memos/mem_reader/read_multi_modal/system_parser.py @@ -42,9 +42,10 @@ def create_source( info: dict[str, Any], ) -> SourceMessage: """Create SourceMessage from system message.""" - content = message["content"] + + content = message.get("content", "") if isinstance(content, dict): - content = content["text"] + content = content.get("text", "") content_wo_tool_schema = re.sub( r"(.*?)", @@ -84,9 +85,9 @@ def parse_fast( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - content = message["content"] + content = message.get("content", "") if isinstance(content, dict): - content = content["text"] + content = content.get("text", "") # Find first tool_schema block tool_schema_pattern = r"(.*?)" @@ -166,9 +167,14 @@ def format_tool_schema_readable(tool_schema): """Convert tool schema to readable format: tool_name: [param1 (type1), ...](required: ...)""" lines = [] for tool in tool_schema: + if not tool: + continue + # Handle both new format and old-style OpenAI function format if tool.get("type") == "function" and "function" in tool: - tool_info = tool["function"] + tool_info = tool.get("function") + if not tool_info: + continue else: tool_info = tool @@ -183,14 +189,20 @@ def format_tool_schema_readable(tool_schema): if isinstance(param_info, dict): param_type = param_info.get("type", "any") # Handle enum - if "enum" in param_info: - param_type = f"{param_type}[{', '.join(param_info['enum'])}]" + if "enum" in param_info and param_info["enum"] is not None: + # Ensure all enum values are strings + enum_values = [str(v) for v in param_info["enum"]] + param_type = f"{param_type}[{', '.join(enum_values)}]" param_strs.append(f"{param_name} ({param_type})") else: param_strs.append(f"{param_name} (any)") # Format required parameters - required_str = f"(required: {', '.join(required)})" if required else "" + # Ensure all required parameter names are strings + required_strs = [str(r) for r in required] if required else [] + required_str = ( + f"(required: {', '.join(required_strs)})" if required_strs else "" + ) # Construct the line params_part = f"[{', '.join(param_strs)}]" if param_strs else "[]" @@ -212,7 +224,7 @@ def format_tool_schema_readable(tool_schema): if message.get("chat_time"): parts.append(f"[{message.get('chat_time')}]: ") prefix = "".join(parts) - line = f"{prefix}{content}\n" + msg_line = f"{prefix}{content}\n" source = self.create_source(message, info) @@ -222,7 +234,7 @@ def format_tool_schema_readable(tool_schema): session_id = info_.pop("session_id", "") # Split parsed text into chunks - content_chunks = self._split_text(line) + content_chunks = self._split_text(msg_line) memory_items = [] for _chunk_idx, chunk_text in enumerate(content_chunks): @@ -250,9 +262,9 @@ def parse_fine( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - content = message["content"] + content = message.get("content", "") if isinstance(content, dict): - content = content["text"] + content = content.get("text", "") try: tool_schema = json.loads(content) assert isinstance(tool_schema, list), "Tool schema must be a list[dict]" From 87cf9cc2b5828f2aa690a78e8a39f2b3397cfb06 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Mon, 22 Dec 2025 10:47:53 +0800 Subject: [PATCH 25/28] modify tool prompt --- src/memos/templates/tool_mem_prompts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memos/templates/tool_mem_prompts.py b/src/memos/templates/tool_mem_prompts.py index 7cfc3035a..ef5c9ed02 100644 --- a/src/memos/templates/tool_mem_prompts.py +++ b/src/memos/templates/tool_mem_prompts.py @@ -26,7 +26,7 @@ [ { "correctness": "success 或 failed", - "trajectory": "精炼完整的自然语言总结,包含:用户任务 -> 执行动作(调用的工具/直接回答) -> 执行结果 -> 最终回答", + "trajectory": "精炼完整的自然语言总结,包含:[任务(用户任务) -> 执行动作(调用的工具/直接回答) -> 执行结果] (可能多轮) -> 最终回答", "experience": "如果成功:总结有效的调用策略和最佳实践\n如果失败:按步骤3分析后,输出精简的结论,包含:错误原因 + 正确解法", "tool_used_status": [ { @@ -90,7 +90,7 @@ [ { "correctness": "success or failed", - "trajectory": "Concise and complete natural language summary including: user task -> execution action (tool called/direct answer) -> execution result -> final answer", + "trajectory": "Concise and complete natural language summary including: [task (user task) -> execution action (tool called/direct answer) -> execution result] (possibly multiple rounds) -> final answer", "experience": "If success: summarize effective calling strategies and best practices\nIf failed: after Step 3 analysis, output concise conclusion including: error cause + correct solution", "tool_used_status": [ { From 9f24265b84667a05ebbe4872e4e3be9694b5ad59 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Tue, 23 Dec 2025 15:15:02 +0800 Subject: [PATCH 26/28] add new prompt --- src/memos/templates/tool_mem_prompts.py | 173 ++++++++++++++++++++++-- 1 file changed, 162 insertions(+), 11 deletions(-) diff --git a/src/memos/templates/tool_mem_prompts.py b/src/memos/templates/tool_mem_prompts.py index ef5c9ed02..f19a7c32a 100644 --- a/src/memos/templates/tool_mem_prompts.py +++ b/src/memos/templates/tool_mem_prompts.py @@ -4,9 +4,14 @@ ## 分析判断步骤: **步骤1:判断任务完成度** 根据用户反馈,判定correctness:success(成功)或 failed(失败),用户反馈决定权大于执行结果,用户反馈有误,则判定为failed -**步骤2:成功轨迹(success)** -总结:有效的参数模式、调用策略、最佳实践 -**步骤3:失败轨迹(failed)- 错误分析** + +**步骤2:成功轨迹(success)- 经验提炼** +从成功模式中提炼通用原则或规则,采用"when...then..."结构: +- when: 明确描述触发该经验的场景特征(任务类型、工具环境、参数特征等) +- then: 总结有效的参数模式、调用策略、最佳实践 +注意:经验是解决整个轨迹问题级别的,不仅仅针对单个工具 + +**步骤3:失败轨迹(failed)- 错误分析与经验提炼** 3.1 工具需求判断 - 任务是否需要工具?(需要/直接回答/误调用) 3.2 工具调用检查 @@ -16,8 +21,11 @@ - 幻觉检测:是否调用不存在的工具 3.3 错误根因定位 结合消息中的错误反馈信息和上述分析,精准输出根本原因 -3.4 正确解法 - 给出避免错误的策略和正确调用方式,正确解法不单单是这个工具本身,而是整个轨迹的正确解法 +3.4 经验提炼(核心) + 从失败模式中提炼通用原则或规则,采用"when...then..."结构: + - when: 明确描述触发该经验的场景特征(任务类型、工具环境、参数特征等) + - then: 给出避免错误的通用策略、正确调用方式或决策规则 + 注意:经验是解决整个轨迹问题级别的,不仅仅针对单个工具 ## 输出格式: 返回一个JSON数组,格式如下: @@ -27,7 +35,7 @@ { "correctness": "success 或 failed", "trajectory": "精炼完整的自然语言总结,包含:[任务(用户任务) -> 执行动作(调用的工具/直接回答) -> 执行结果] (可能多轮) -> 最终回答", - "experience": "如果成功:总结有效的调用策略和最佳实践\n如果失败:按步骤3分析后,输出精简的结论,包含:错误原因 + 正确解法", + "experience": "采用when...then...格式,例如:'when 遇到XX的任务时,应该YY'", "tool_used_status": [ { "used_tool": "工具名称(如果调用了工具)", @@ -47,6 +55,8 @@ - 只提取事实内容,不要添加任何解释或额外信息 - 确保返回的是有效的JSON格式 - 输出的trajectory需要按照messages的发展顺序排列 +- experience必须是通用的、可复用的经验规则,而不是针对具体案例的描述 +- 无论成功或失败,都要提炼经验并使用when...then...格式 请分析以下对话消息并提取工具调用轨迹,基于以下对话消息: @@ -63,10 +73,146 @@ **Step 1: Assess Task Completion** Determine correctness based on user feedback: success or failed, user feedback has higher priority than execution results, if user feedback is incorrect, then determine as failed +**Step 2: Successful Trajectory (success) - Experience Extraction** +Extract general principles or rules from success patterns, using "when...then..." structure: +- when: clearly describe the scenario characteristics that trigger this experience (task type, tool environment, parameter characteristics, etc.) +- then: summarize effective parameter patterns, calling strategies, and best practices +Note: Experience is at the trajectory-level problem-solving, not just for a single tool + +**Step 3: Failed Trajectory (failed) - Error Analysis and Experience Extraction** + +3.1 Tool Requirement Assessment + - Does the task require tools? (required/direct answer/unnecessary call) + +3.2 Tool Call Verification + - Tool availability: provided in system? + - Tool selection: correct tool chosen? + - Parameter correctness: conform to type definitions? + - Hallucination detection: calling non-existent tools? + +3.3 Root Cause Identification + Combine error feedback from messages with above analysis to precisely output root cause + +3.4 Experience Extraction (Core) + Extract general principles or rules from failure patterns, using "when...then..." structure: + - when: clearly describe the scenario characteristics that trigger this experience (task type, tool environment, parameter characteristics, etc.) + - then: provide general strategies to avoid errors, correct calling approaches, or decision rules + Note: Experience is at the trajectory-level problem-solving, not just for a single tool + +## Output Format: +Return a JSON array in the following format: + +```json +[ + { + "correctness": "success or failed", + "trajectory": "Concise and complete natural language summary including: [task (user task) -> execution action (tool called/direct answer) -> execution result] (possibly multiple rounds) -> final answer", + "experience": "Use when...then... format, e.g., 'when encountering XX tasks, should do YY'", + "tool_used_status": [ + { + "used_tool": "Tool name (if tool was called)", + "success_rate": "Numerical value between 0.0-1.0, indicating the success rate of this tool in current trajectory", + "error_type": "Error type and description when call fails, empty string when successful" + } + ] + } +] +``` + +## Notes: +- Each trajectory must be an independent complete process +- A trajectory may involve multiple tools, each recorded independently in tool_used_status +- If no tool was called, tool_used_status is an empty array [] +- If multiple trajectories have sequential dependencies, treat them as one trajectory +- Only extract factual content, do not add any explanations or extra information +- Ensure the returned content is valid JSON format +- The trajectory should be arranged according to the development order of messages +- Experience must be general and reusable rules, not descriptions specific to concrete cases +- Whether success or failed, always extract experience using when...then... format + +Please analyze the following conversation messages and extract tool call trajectories based on: + +{messages} + +""" + + +TOOL_TRAJECTORY_PROMPT_ZH_BAK = """ +你是一个专业的工具经验提取专家。你的任务是从给定的对话消息中提取完整的工具调用轨迹经验。 + +## 分析判断步骤: +**步骤1:判断任务完成度** +根据用户反馈,判定correctness:success(成功)或 failed(失败),用户反馈决定权大于执行结果,用户反馈有误,则判定为failed + +**步骤2:成功轨迹(success)** +记录轨迹信息,但经验留空,不需要记录 + +**步骤3:失败轨迹(failed)- 错误分析与经验提炼** +3.1 工具需求判断 + - 任务是否需要工具?(需要/直接回答/误调用) +3.2 工具调用检查 + - 工具存在性:是否在system中提供 + - 工具选择:是否选对工具 + - 参数正确性:是否符合类型定义 + - 幻觉检测:是否调用不存在的工具 +3.3 错误根因定位 + 结合消息中的错误反馈信息和上述分析,精准输出根本原因 +3.4 经验提炼(核心) + 从失败模式中提炼通用原则或规则,采用"when...then..."结构: + - when: 明确描述触发该经验的场景特征(任务类型、工具环境、参数特征等) + - then: 给出避免错误的通用策略、正确调用方式或决策规则 + 注意:经验是解决整个轨迹问题级别的,不仅仅针对单个工具 + +## 输出格式: +返回一个JSON数组,格式如下: + +```json +[ + { + "correctness": "success 或 failed", + "trajectory": "精炼完整的自然语言总结,包含:[任务(用户任务) -> 执行动作(调用的工具/直接回答) -> 执行结果] (可能多轮) -> 最终回答", + "experience": "如果成功:留空\n如果失败:采用when...then...格式,例如:'when 遇到XX的任务时,应该YY'", + "tool_used_status": [ + { + "used_tool": "工具名称(如果调用了工具)", + "success_rate": "0.0-1.0之间的数值,表示该工具在本次轨迹中的成功率", + "error_type": "调用失败时的错误类型和描述,成功时为空字符串" + } + ] + } +] +``` + +## 注意事项: +- 每个轨迹必须是独立的完整过程 +- 一个轨迹中可能涉及多个工具的使用,每个工具在tool_used_status中独立记录 +- 如果没有调用工具,tool_used_status为空数组[] +- 如果多条轨迹存在顺序依赖关系,需要将它们视为一条轨迹 +- 只提取事实内容,不要添加任何解释或额外信息 +- 确保返回的是有效的JSON格式 +- 输出的trajectory需要按照messages的发展顺序排列 +- 当任务完成度为success时,experience字段应留空(空字符串),但trajectory字段仍需记录完整轨迹 +- experience必须是通用的、可复用的经验规则,而不是针对具体案例的描述 + +请分析以下对话消息并提取工具调用轨迹,基于以下对话消息: + +{messages} + +""" + + +TOOL_TRAJECTORY_PROMPT_EN_BAK = """ +You are a professional tool experience extraction expert. Your task is to extract complete tool call trajectory experiences from given conversation messages. + +## Analysis and Judgment Steps: + +**Step 1: Assess Task Completion** +Determine correctness based on user feedback: success or failed, user feedback has higher priority than execution results, if user feedback is incorrect, then determine as failed + **Step 2: Successful Trajectory (success)** -Summarize: effective parameter patterns, calling strategies, best practices +Record trajectory information, but leave experience empty, no need to record -**Step 3: Failed Trajectory (failed) - Error Analysis** +**Step 3: Failed Trajectory (failed) - Error Analysis and Experience Extraction** 3.1 Tool Requirement Assessment - Does the task require tools? (required/direct answer/unnecessary call) @@ -80,8 +226,11 @@ 3.3 Root Cause Identification Combine error feedback from messages with above analysis to precisely output root cause -3.4 Correct Solution - Provide strategies to avoid errors and correct calling approach. The solution should address the entire trajectory, not just the tool itself +3.4 Experience Extraction (Core) + Extract general principles or rules from failure patterns, using "when...then..." structure: + - when: clearly describe the scenario characteristics that trigger this experience (task type, tool environment, parameter characteristics, etc.) + - then: provide general strategies to avoid errors, correct calling approaches, or decision rules + Note: Experience is at the trajectory-level problem-solving, not just for a single tool ## Output Format: Return a JSON array in the following format: @@ -91,7 +240,7 @@ { "correctness": "success or failed", "trajectory": "Concise and complete natural language summary including: [task (user task) -> execution action (tool called/direct answer) -> execution result] (possibly multiple rounds) -> final answer", - "experience": "If success: summarize effective calling strategies and best practices\nIf failed: after Step 3 analysis, output concise conclusion including: error cause + correct solution", + "experience": "If success: leave empty\nIf failed: use when...then... format, e.g., 'when encountering XX tasks, should do YY'", "tool_used_status": [ { "used_tool": "Tool name (if tool was called)", @@ -111,6 +260,8 @@ - Only extract factual content, do not add any explanations or extra information - Ensure the returned content is valid JSON format - The trajectory should be arranged according to the development order of messages +- When task completion is success, the experience field should be left empty (empty string), but the trajectory field should still record the complete trajectory +- Experience must be general and reusable rules, not descriptions specific to concrete cases Please analyze the following conversation messages and extract tool call trajectories based on: From 28adf9f1e5f98e88d276f5a81f9282ae50772314 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Tue, 23 Dec 2025 19:31:24 +0800 Subject: [PATCH 27/28] add correct --- src/memos/mem_reader/multi_modal_struct.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index 2118422b0..c560984b6 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -567,6 +567,7 @@ def _process_tool_trajectory_fine( value=m.get("trajectory", ""), info=info, memory_type=memory_type, + correctness=m.get("correctness", ""), experience=m.get("experience", ""), tool_used_status=m.get("tool_used_status", []), ) From 51b382ea8eec3ca3c920eb08e8403faa7b4463a4 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Tue, 6 Jan 2026 16:08:12 +0800 Subject: [PATCH 28/28] modify bug --- src/memos/api/routers/server_router.py | 8 ++++++++ src/memos/memories/textual/preference.py | 2 +- src/memos/memories/textual/simple_preference.py | 8 ++++---- src/memos/vec_dbs/milvus.py | 6 ++---- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/memos/api/routers/server_router.py b/src/memos/api/routers/server_router.py index c60e84253..a4052d313 100644 --- a/src/memos/api/routers/server_router.py +++ b/src/memos/api/routers/server_router.py @@ -90,6 +90,7 @@ status_tracker = TaskStatusTracker(redis_client=redis_client) embedder = components["embedder"] graph_db = components["graph_db"] +vector_db = components["vector_db"] # ============================================================================= @@ -359,6 +360,13 @@ def get_user_names_by_memory_ids(request: GetUserNamesByMemoryIdsRequest): ), ) result = graph_db.get_user_names_by_memory_ids(memory_ids=request.memory_ids) + if vector_db: + prefs = [] + for collection_name in ["explicit_preference", "implicit_preference"]: + prefs.extend( + vector_db.get_by_ids(collection_name=collection_name, ids=request.memory_ids) + ) + result.update({pref.id: pref.payload.get("mem_cube_id", None) for pref in prefs}) return GetUserNamesByMemoryIdsResponse( code=200, message="Successfully", diff --git a/src/memos/memories/textual/preference.py b/src/memos/memories/textual/preference.py index a34315918..78f4d6e28 100644 --- a/src/memos/memories/textual/preference.py +++ b/src/memos/memories/textual/preference.py @@ -248,7 +248,7 @@ def get_all(self) -> list[TextualMemoryItem]: Returns: list[TextualMemoryItem]: List of all memories. """ - all_collections = self.vector_db.list_collections() + all_collections = ["explicit_preference", "implicit_preference"] all_memories = {} for collection_name in all_collections: items = self.vector_db.get_all(collection_name) diff --git a/src/memos/memories/textual/simple_preference.py b/src/memos/memories/textual/simple_preference.py index ee37d638c..cc1781f06 100644 --- a/src/memos/memories/textual/simple_preference.py +++ b/src/memos/memories/textual/simple_preference.py @@ -90,7 +90,7 @@ def get_with_collection_name( return None return TextualMemoryItem( id=res.id, - memory=res.payload.get("dialog_str", ""), + memory=res.memory, metadata=PreferenceTextualMemoryMetadata(**res.payload), ) except Exception as e: @@ -116,7 +116,7 @@ def get_by_ids_with_collection_name( return [ TextualMemoryItem( id=memo.id, - memory=memo.payload.get("dialog_str", ""), + memory=memo.memory, metadata=PreferenceTextualMemoryMetadata(**memo.payload), ) for memo in res @@ -132,14 +132,14 @@ def get_all(self) -> list[TextualMemoryItem]: Returns: list[TextualMemoryItem]: List of all memories. """ - all_collections = self.vector_db.list_collections() + all_collections = ["explicit_preference", "implicit_preference"] all_memories = {} for collection_name in all_collections: items = self.vector_db.get_all(collection_name) all_memories[collection_name] = [ TextualMemoryItem( id=memo.id, - memory=memo.payload.get("dialog_str", ""), + memory=memo.memory, metadata=PreferenceTextualMemoryMetadata(**memo.payload), ) for memo in items diff --git a/src/memos/vec_dbs/milvus.py b/src/memos/vec_dbs/milvus.py index 5dacf0499..cc8909d34 100644 --- a/src/memos/vec_dbs/milvus.py +++ b/src/memos/vec_dbs/milvus.py @@ -457,14 +457,13 @@ def get_by_id(self, collection_name: str, id: str) -> MilvusVecDBItem | None: return None entity = results[0] - payload = {k: v for k, v in entity.items() if k not in ["id", "vector", "score"]} return MilvusVecDBItem( id=entity["id"], memory=entity.get("memory"), original_text=entity.get("original_text"), vector=entity.get("vector"), - payload=payload, + payload=entity.get("payload", {}), ) def get_by_ids(self, collection_name: str, ids: list[str]) -> list[MilvusVecDBItem]: @@ -479,14 +478,13 @@ def get_by_ids(self, collection_name: str, ids: list[str]) -> list[MilvusVecDBIt items = [] for entity in results: - payload = {k: v for k, v in entity.items() if k not in ["id", "vector", "score"]} items.append( MilvusVecDBItem( id=entity["id"], memory=entity.get("memory"), original_text=entity.get("original_text"), vector=entity.get("vector"), - payload=payload, + payload=entity.get("payload", {}), ) )