diff --git a/src/memos/mem_reader/multi_model_struct.py b/src/memos/mem_reader/multi_model_struct.py index 13824f7d8..8c5fcdd14 100644 --- a/src/memos/mem_reader/multi_model_struct.py +++ b/src/memos/mem_reader/multi_model_struct.py @@ -39,8 +39,16 @@ def __init__(self, config: MultiModelStructMemReaderConfig): parser=None, ) + def _concat_multi_model_memories( + self, all_memory_items: list[TextualMemoryItem] + ) -> list[TextualMemoryItem]: + # TODO: concat multi_model_memories + return all_memory_items + @timed - def _process_multi_model_data(self, scene_data_info: MessagesType, info, **kwargs): + def _process_multi_model_data( + self, scene_data_info: MessagesType, info, **kwargs + ) -> list[TextualMemoryItem]: """ Process multi-model data using MultiModelParser. @@ -50,6 +58,9 @@ def _process_multi_model_data(self, scene_data_info: MessagesType, info, **kwarg **kwargs: Additional parameters (mode, etc.) """ mode = kwargs.get("mode", "fine") + # Pop custom_tags from info (same as simple_struct.py) + # must pop here, avoid add to info, only used in sync fine mode + custom_tags = info.pop("custom_tags", None) if isinstance(info, dict) else None # Use MultiModelParser to parse the scene data # If it's a list, parse each item; otherwise parse as single message @@ -57,16 +68,71 @@ def _process_multi_model_data(self, scene_data_info: MessagesType, info, **kwarg # Parse each message in the list all_memory_items = [] for msg in scene_data_info: - items = self.multi_model_parser.parse(msg, info, mode=mode, **kwargs) + items = self.multi_model_parser.parse(msg, info, mode="fast", **kwargs) all_memory_items.extend(items) - return all_memory_items + fast_memory_items = self._concat_multi_model_memories(all_memory_items) + else: # Parse as single message - return self.multi_model_parser.parse(scene_data_info, info, mode=mode, **kwargs) + fast_memory_items = self.multi_model_parser.parse( + scene_data_info, info, mode="fast", **kwargs + ) + + if mode == "fast": + return fast_memory_items + else: + # TODO: parallel call llm and get fine multi model items + # Part A: call llm + fine_memory_items = [] + fine_memory_items_string_parser = [] + fine_memory_items.extend(fine_memory_items_string_parser) + # Part B: get fine multi model items + + for fast_item in fast_memory_items: + sources = fast_item.metadata.sources + for source in sources: + items = self.multi_model_parser.process_transfer( + source, context_items=[fast_item], custom_tags=custom_tags + ) + fine_memory_items.extend(items) + logger.warning("Not Implemented Now!") + return fine_memory_items @timed - def _process_transfer_multi_model_data(self, raw_node: TextualMemoryItem): - raise NotImplementedError + def _process_transfer_multi_model_data( + self, + raw_node: TextualMemoryItem, + custom_tags: list[str] | None = None, + ) -> list[TextualMemoryItem]: + """ + Process transfer for multi-model data. + + Each source is processed independently by its corresponding parser, + which knows how to rebuild the original message and parse it in fine mode. + """ + sources = raw_node.metadata.sources or [] + if not sources: + logger.warning("[MultiModelStruct] No sources found in raw_node") + return [] + + # Extract info from raw_node (same as simple_struct.py) + info = { + "user_id": raw_node.metadata.user_id, + "session_id": raw_node.metadata.session_id, + **(raw_node.metadata.info or {}), + } + + fine_memory_items = [] + # Part A: call llm + fine_memory_items_string_parser = [] + fine_memory_items.extend(fine_memory_items_string_parser) + # Part B: get fine multi model items + for source in sources: + items = self.multi_model_parser.process_transfer( + source, context_items=[raw_node], info=info, custom_tags=custom_tags + ) + fine_memory_items.extend(items) + return fine_memory_items def get_scene_data_info(self, scene_data: list, type: str) -> list[list[Any]]: """ @@ -85,7 +151,7 @@ def get_scene_data_info(self, scene_data: list, type: str) -> list[list[Any]]: def _read_memory( self, messages: list[MessagesType], type: str, info: dict[str, Any], mode: str = "fine" - ): + ) -> list[list[TextualMemoryItem]]: list_scene_data_info = self.get_scene_data_info(messages, type) memory_list = [] @@ -106,7 +172,10 @@ def _read_memory( return memory_list def fine_transfer_simple_mem( - self, input_memories: list[TextualMemoryItem], type: str + self, + input_memories: list[TextualMemoryItem], + type: str, + custom_tags: list[str] | None = None, ) -> list[list[TextualMemoryItem]]: if not input_memories: return [] @@ -116,7 +185,9 @@ def fine_transfer_simple_mem( # Process Q&A pairs concurrently with context propagation with ContextThreadPoolExecutor() as executor: futures = [ - executor.submit(self._process_transfer_multi_model_data, scene_data_info) + executor.submit( + self._process_transfer_multi_model_data, scene_data_info, custom_tags + ) for scene_data_info in input_memories ] for future in concurrent.futures.as_completed(futures): diff --git a/src/memos/mem_reader/read_multi_model/assistant_parser.py b/src/memos/mem_reader/read_multi_model/assistant_parser.py index 2f2cbbc5d..726a954d3 100644 --- a/src/memos/mem_reader/read_multi_model/assistant_parser.py +++ b/src/memos/mem_reader/read_multi_model/assistant_parser.py @@ -5,10 +5,10 @@ from memos.embedders.base import BaseEmbedder from memos.llms.base import BaseLLM from memos.log import get_logger -from memos.memories.textual.item import TextualMemoryItem +from memos.memories.textual.item import SourceMessage, TextualMemoryItem from memos.types.openai_chat_completion_types import ChatCompletionAssistantMessageParam -from .base import BaseMessageParser +from .base import BaseMessageParser, _extract_text_from_content logger = get_logger(__name__) @@ -25,8 +25,37 @@ def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None): embedder: Embedder for generating embeddings llm: Optional LLM for fine mode processing """ - self.embedder = embedder - self.llm = llm + super().__init__(embedder, llm) + + def create_source( + self, + message: ChatCompletionAssistantMessageParam, + info: dict[str, Any], + ) -> SourceMessage: + """Create SourceMessage from assistant message.""" + if not isinstance(message, dict): + return SourceMessage(type="chat", role="assistant") + + content = _extract_text_from_content(message.get("content", "")) + return SourceMessage( + type="chat", + role="assistant", + chat_time=message.get("chat_time"), + message_id=message.get("message_id"), + content=content, + ) + + def rebuild_from_source( + self, + source: SourceMessage, + ) -> ChatCompletionAssistantMessageParam: + """Rebuild assistant message from SourceMessage.""" + return { + "role": "assistant", + "content": source.content or "", + "chat_time": source.chat_time, + "message_id": source.message_id, + } def parse_fast( self, @@ -34,7 +63,7 @@ def parse_fast( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - return [] + return super().parse_fast(message, info, **kwargs) def parse_fine( self, diff --git a/src/memos/mem_reader/read_multi_model/base.py b/src/memos/mem_reader/read_multi_model/base.py index 024a940b8..e59b6a6bc 100644 --- a/src/memos/mem_reader/read_multi_model/base.py +++ b/src/memos/mem_reader/read_multi_model/base.py @@ -4,16 +4,109 @@ in both fast and fine modes. """ +import re + from abc import ABC, abstractmethod from typing import Any -from memos.memories.textual.item import TextualMemoryItem +from memos import log +from memos.memories.textual.item import ( + SourceMessage, + TextualMemoryItem, + TreeNodeTextualMemoryMetadata, +) + + +logger = log.get_logger(__name__) + + +def _derive_key(text: str, max_len: int = 80) -> str: + """Default key when without LLM: first max_len words.""" + if not text: + return "" + sent = re.split(r"[。!?!?]\s*|\n", text.strip())[0] + return (sent[:max_len]).strip() + + +def _extract_text_from_content(content: Any) -> str: + """ + Extract text from message content. + Handles str, list of parts, or None. + """ + if content is None: + return "" + if isinstance(content, str): + return content + if isinstance(content, list): + texts = [] + for part in content: + if isinstance(part, dict): + part_type = part.get("type", "") + if part_type == "text": + texts.append(part.get("text", "")) + elif part_type == "file": + file_info = part.get("file", {}) + texts.append(file_info.get("file_data") or file_info.get("filename", "[file]")) + else: + texts.append(f"[{part_type}]") + else: + texts.append(str(part)) + return " ".join(texts) + return str(content) class BaseMessageParser(ABC): """Base interface for message type parsers.""" + def __init__(self, embedder, llm=None): + """ + Initialize BaseMessageParser. + + Args: + embedder: Embedder for generating embeddings + llm: Optional LLM for fine mode processing + """ + self.embedder = embedder + self.llm = llm + + @abstractmethod + def create_source( + self, + message: Any, + info: dict[str, Any], + ) -> SourceMessage | list[SourceMessage]: + """ + Create SourceMessage(s) from the message. + + Each parser decides how to create sources: + - Simple messages: return single SourceMessage + - Multimodal messages: return list of SourceMessage (one per part) + + Args: + message: The message to create source from + info: Dictionary containing user_id and session_id + + Returns: + SourceMessage or list of SourceMessage + """ + @abstractmethod + def rebuild_from_source( + self, + source: SourceMessage, + ) -> Any: + """ + Rebuild original message from SourceMessage. + + Each parser knows how to reconstruct its own message type. + + Args: + source: SourceMessage to rebuild from + + Returns: + Rebuilt message in original format + """ + def parse_fast( self, message: Any, @@ -21,7 +114,15 @@ def parse_fast( **kwargs, ) -> list[TextualMemoryItem]: """ - Parse message in fast mode (no LLM calls, quick processing). + Default parse_fast implementation (equivalent to simple_struct fast mode). + + Fast mode logic: + - Extract text content from message + - Determine memory_type based on role (UserMemory for user, LongTermMemory otherwise) + - Create TextualMemoryItem with tags=["mode:fast"] + - No LLM calls, quick processing + + Subclasses can override this method for custom behavior. Args: message: The message to parse @@ -31,6 +132,52 @@ def parse_fast( Returns: List of TextualMemoryItem objects """ + if not isinstance(message, dict): + logger.warning(f"[BaseParser] Expected dict, got {type(message)}") + return [] + + # Extract text content + content = _extract_text_from_content(message.get("content")) + if not content: + return [] + + # Determine memory_type based on role (equivalent to simple_struct logic) + role = message.get("role", "").strip().lower() + memory_type = "UserMemory" if role == "user" else "LongTermMemory" + + # Create source(s) using parser's create_source method + sources = self.create_source(message, info) + if isinstance(sources, SourceMessage): + sources = [sources] + elif not sources: + return [] + + # Extract info fields + info_ = info.copy() + user_id = info_.pop("user_id", "") + session_id = info_.pop("session_id", "") + + # Create memory item (equivalent to _make_memory_item) + memory_item = TextualMemoryItem( + memory=content, + metadata=TreeNodeTextualMemoryMetadata( + user_id=user_id, + session_id=session_id, + memory_type=memory_type, + status="activated", + tags=["mode:fast"], + key=_derive_key(content), + embedding=self.embedder.embed([content])[0], + usage=[], + sources=sources, + background="", + confidence=0.99, + type="fact", + info=info_, + ), + ) + + return [memory_item] @abstractmethod def parse_fine( diff --git a/src/memos/mem_reader/read_multi_model/file_content_parser.py b/src/memos/mem_reader/read_multi_model/file_content_parser.py index 71af89d18..32769d764 100644 --- a/src/memos/mem_reader/read_multi_model/file_content_parser.py +++ b/src/memos/mem_reader/read_multi_model/file_content_parser.py @@ -5,7 +5,7 @@ from memos.embedders.base import BaseEmbedder from memos.llms.base import BaseLLM from memos.log import get_logger -from memos.memories.textual.item import TextualMemoryItem +from memos.memories.textual.item import SourceMessage, TextualMemoryItem from memos.parsers.factory import ParserFactory from memos.types.openai_chat_completion_types import File @@ -32,10 +32,43 @@ def __init__( llm: Optional LLM for fine mode processing parser: Optional parser for parsing file contents """ - self.embedder = embedder - self.llm = llm + super().__init__(embedder, llm) self.parser = parser + def create_source( + self, + message: File, + info: dict[str, Any], + ) -> SourceMessage: + """Create SourceMessage from file content part.""" + if isinstance(message, dict): + file_info = message.get("file", {}) + return SourceMessage( + type="file", + doc_path=file_info.get("filename") or file_info.get("file_id", ""), + content=file_info.get("file_data", ""), + original_part=message, + ) + return SourceMessage(type="file", doc_path=str(message)) + + def rebuild_from_source( + self, + source: SourceMessage, + ) -> File: + """Rebuild file content part from SourceMessage.""" + # Use original_part if available + if hasattr(source, "original_part") and source.original_part: + return source.original_part + + # Rebuild from source fields + return { + "type": "file", + "file": { + "filename": source.doc_path or "", + "file_data": source.content or "", + }, + } + def _parse_file(self, file_info: dict[str, Any]) -> str: """ Parse file content. diff --git a/src/memos/mem_reader/read_multi_model/multi_model_parser.py b/src/memos/mem_reader/read_multi_model/multi_model_parser.py index e16733468..083db67d4 100644 --- a/src/memos/mem_reader/read_multi_model/multi_model_parser.py +++ b/src/memos/mem_reader/read_multi_model/multi_model_parser.py @@ -9,7 +9,7 @@ from memos.embedders.base import BaseEmbedder from memos.llms.base import BaseLLM from memos.log import get_logger -from memos.memories.textual.item import TextualMemoryItem +from memos.memories.textual.item import SourceMessage, TextualMemoryItem from memos.types import MessagesType from .assistant_parser import AssistantParser @@ -168,3 +168,72 @@ def parse_batch( items = self.parse(message, info, mode, **kwargs) results.append(items) return results + + def process_transfer( + self, + source: SourceMessage, + context_items: list[TextualMemoryItem] | None = None, + **kwargs, + ) -> list[TextualMemoryItem]: + """ + Process transfer from SourceMessage to fine memory items. + + This method: + 1. Determines which parser to use based on source type + 2. Rebuilds message from source using parser's rebuild_from_source + 3. Calls parse_fine on the appropriate parser + + Args: + source: SourceMessage to process + context_items: Optional list of TextualMemoryItem for context + **kwargs: Additional parameters (e.g., info dict with user_id, session_id, custom_tags) + + Returns: + List of TextualMemoryItem objects from fine mode parsing + """ + if not self.llm: + logger.warning("[MultiModelParser] LLM not available for process_transfer") + return [] + + # Extract info from context_items if available + info = kwargs.get("info", {}) + if context_items and len(context_items) > 0: + first_item = context_items[0] + if not info: + info = { + "user_id": first_item.metadata.user_id, + "session_id": first_item.metadata.session_id, + } + + # Extract custom_tags from kwargs (same as simple_struct.py) + custom_tags = kwargs.get("custom_tags") + + # Try to determine parser from source.type + parser = None + if source.type == "file": + parser = self.file_content_parser + elif source.type == "text": + parser = self.text_content_parser + elif source.role: + # Chat message, use role parser + parser = self.role_parsers.get(source.role) + + if not parser: + logger.warning(f"[MultiModelParser] Could not determine parser for source: {source}") + return [] + + # Rebuild message from source using parser's method + try: + message = parser.rebuild_from_source(source) + except Exception as e: + logger.error(f"[MultiModelParser] Error rebuilding message from source: {e}") + return [] + + # Parse in fine mode (pass custom_tags to parse_fine) + try: + return parser.parse_fine( + message, info, context_items=context_items, custom_tags=custom_tags, **kwargs + ) + except Exception as e: + logger.error(f"[MultiModelParser] Error parsing in fine mode: {e}") + return [] diff --git a/src/memos/mem_reader/read_multi_model/string_parser.py b/src/memos/mem_reader/read_multi_model/string_parser.py index 5c5c829b3..8d65f5c8a 100644 --- a/src/memos/mem_reader/read_multi_model/string_parser.py +++ b/src/memos/mem_reader/read_multi_model/string_parser.py @@ -8,7 +8,7 @@ from memos.embedders.base import BaseEmbedder from memos.llms.base import BaseLLM from memos.log import get_logger -from memos.memories.textual.item import TextualMemoryItem +from memos.memories.textual.item import SourceMessage, TextualMemoryItem from .base import BaseMessageParser @@ -27,8 +27,25 @@ def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None): embedder: Embedder for generating embeddings llm: Optional LLM for fine mode processing """ - self.embedder = embedder - self.llm = llm + super().__init__(embedder, llm) + + def create_source( + self, + message: str, + info: dict[str, Any], + ) -> SourceMessage: + """Create SourceMessage from string message.""" + return SourceMessage( + type="doc", + content=str(message), + ) + + def rebuild_from_source( + self, + source: SourceMessage, + ) -> str: + """Rebuild string message from SourceMessage.""" + return source.content or "" def parse_fast( self, diff --git a/src/memos/mem_reader/read_multi_model/system_parser.py b/src/memos/mem_reader/read_multi_model/system_parser.py index 3024ef89c..258b752cc 100644 --- a/src/memos/mem_reader/read_multi_model/system_parser.py +++ b/src/memos/mem_reader/read_multi_model/system_parser.py @@ -5,10 +5,10 @@ from memos.embedders.base import BaseEmbedder from memos.llms.base import BaseLLM from memos.log import get_logger -from memos.memories.textual.item import TextualMemoryItem +from memos.memories.textual.item import SourceMessage, TextualMemoryItem from memos.types.openai_chat_completion_types import ChatCompletionSystemMessageParam -from .base import BaseMessageParser +from .base import BaseMessageParser, _extract_text_from_content logger = get_logger(__name__) @@ -25,8 +25,37 @@ def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None): embedder: Embedder for generating embeddings llm: Optional LLM for fine mode processing """ - self.embedder = embedder - self.llm = llm + super().__init__(embedder, llm) + + def create_source( + self, + message: ChatCompletionSystemMessageParam, + info: dict[str, Any], + ) -> SourceMessage: + """Create SourceMessage from system message.""" + if not isinstance(message, dict): + return SourceMessage(type="chat", role="system") + + content = _extract_text_from_content(message.get("content", "")) + return SourceMessage( + type="chat", + role="system", + chat_time=message.get("chat_time"), + message_id=message.get("message_id"), + content=content, + ) + + def rebuild_from_source( + self, + source: SourceMessage, + ) -> ChatCompletionSystemMessageParam: + """Rebuild system message from SourceMessage.""" + return { + "role": "system", + "content": source.content or "", + "chat_time": source.chat_time, + "message_id": source.message_id, + } def parse_fast( self, @@ -34,7 +63,7 @@ def parse_fast( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - return [] + return super().parse_fast(message, info, **kwargs) def parse_fine( self, diff --git a/src/memos/mem_reader/read_multi_model/text_content_parser.py b/src/memos/mem_reader/read_multi_model/text_content_parser.py index d9a9700d4..051d5ec47 100644 --- a/src/memos/mem_reader/read_multi_model/text_content_parser.py +++ b/src/memos/mem_reader/read_multi_model/text_content_parser.py @@ -5,7 +5,7 @@ from memos.embedders.base import BaseEmbedder from memos.llms.base import BaseLLM from memos.log import get_logger -from memos.memories.textual.item import TextualMemoryItem +from memos.memories.textual.item import SourceMessage, TextualMemoryItem from memos.types.openai_chat_completion_types import ChatCompletionContentPartTextParam from .base import BaseMessageParser @@ -25,8 +25,37 @@ def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None): embedder: Embedder for generating embeddings llm: Optional LLM for fine mode processing """ - self.embedder = embedder - self.llm = llm + super().__init__(embedder, llm) + + def create_source( + self, + message: ChatCompletionContentPartTextParam, + info: dict[str, Any], + ) -> SourceMessage: + """Create SourceMessage from text content part.""" + if isinstance(message, dict): + text = message.get("text", "") + return SourceMessage( + type="text", + content=text, + original_part=message, + ) + return SourceMessage(type="text", content=str(message)) + + def rebuild_from_source( + self, + source: SourceMessage, + ) -> ChatCompletionContentPartTextParam: + """Rebuild text content part from SourceMessage.""" + # Use original_part if available + if hasattr(source, "original_part") and source.original_part: + return source.original_part + + # Rebuild from source fields + return { + "type": "text", + "text": source.content or "", + } def parse_fast( self, diff --git a/src/memos/mem_reader/read_multi_model/tool_parser.py b/src/memos/mem_reader/read_multi_model/tool_parser.py index abf705eaa..f7437312d 100644 --- a/src/memos/mem_reader/read_multi_model/tool_parser.py +++ b/src/memos/mem_reader/read_multi_model/tool_parser.py @@ -5,10 +5,10 @@ from memos.embedders.base import BaseEmbedder from memos.llms.base import BaseLLM from memos.log import get_logger -from memos.memories.textual.item import TextualMemoryItem +from memos.memories.textual.item import SourceMessage, TextualMemoryItem from memos.types.openai_chat_completion_types import ChatCompletionToolMessageParam -from .base import BaseMessageParser +from .base import BaseMessageParser, _extract_text_from_content logger = get_logger(__name__) @@ -25,8 +25,38 @@ def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None): embedder: Embedder for generating embeddings llm: Optional LLM for fine mode processing """ - self.embedder = embedder - self.llm = llm + super().__init__(embedder, llm) + + def create_source( + self, + message: ChatCompletionToolMessageParam, + info: dict[str, Any], + ) -> SourceMessage: + """Create SourceMessage from tool message.""" + if not isinstance(message, dict): + return SourceMessage(type="chat", role="tool") + + content = _extract_text_from_content(message.get("content", "")) + return SourceMessage( + type="chat", + role="tool", + chat_time=message.get("chat_time"), + message_id=message.get("message_id"), + content=content, + ) + + def rebuild_from_source( + self, + source: SourceMessage, + ) -> ChatCompletionToolMessageParam: + """Rebuild tool message from SourceMessage.""" + return { + "role": "tool", + "content": source.content or "", + "tool_call_id": source.message_id or "", # tool_call_id might be in message_id + "chat_time": source.chat_time, + "message_id": source.message_id, + } def parse_fast( self, @@ -34,7 +64,7 @@ def parse_fast( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - return [] + return super().parse_fast(message, info, **kwargs) def parse_fine( self, diff --git a/src/memos/mem_reader/read_multi_model/user_parser.py b/src/memos/mem_reader/read_multi_model/user_parser.py index 78f9d0057..7dc505167 100644 --- a/src/memos/mem_reader/read_multi_model/user_parser.py +++ b/src/memos/mem_reader/read_multi_model/user_parser.py @@ -5,17 +5,20 @@ from memos.embedders.base import BaseEmbedder from memos.llms.base import BaseLLM from memos.log import get_logger -from memos.memories.textual.item import TextualMemoryItem +from memos.memories.textual.item import SourceMessage, TextualMemoryItem from memos.types.openai_chat_completion_types import ChatCompletionUserMessageParam -from .base import BaseMessageParser +from .base import BaseMessageParser, _extract_text_from_content logger = get_logger(__name__) class UserParser(BaseMessageParser): - """Parser for user messages.""" + """Parser for user messages. + + Handles multimodal user messages by creating one SourceMessage per content part. + """ def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None): """ @@ -25,8 +28,140 @@ def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None): embedder: Embedder for generating embeddings llm: Optional LLM for fine mode processing """ - self.embedder = embedder - self.llm = llm + super().__init__(embedder, llm) + + def create_source( + self, + message: ChatCompletionUserMessageParam, + info: dict[str, Any], + ) -> SourceMessage | list[SourceMessage]: + """ + Create SourceMessage(s) from user message. + + For multimodal messages (content is a list), creates one SourceMessage per part. + For simple messages (content is str), creates a single SourceMessage. + """ + if not isinstance(message, dict): + return [] + + role = message.get("role", "user") + raw_content = message.get("content", "") + chat_time = message.get("chat_time") + message_id = message.get("message_id") + + sources = [] + + if isinstance(raw_content, list): + # Multimodal: create one SourceMessage per part + for part in raw_content: + if isinstance(part, dict): + part_type = part.get("type", "") + if part_type == "text": + sources.append( + SourceMessage( + type="chat", + role=role, + chat_time=chat_time, + message_id=message_id, + content=part.get("text", ""), + # Save original part for reconstruction + original_part=part, + ) + ) + elif part_type == "file": + file_info = part.get("file", {}) + sources.append( + SourceMessage( + type="file", + role=role, + chat_time=chat_time, + message_id=message_id, + doc_path=file_info.get("filename") or file_info.get("file_id", ""), + content=file_info.get("file_data", ""), + original_part=part, + ) + ) + else: + # image_url, input_audio, etc. + sources.append( + SourceMessage( + type=part_type, + role=role, + chat_time=chat_time, + message_id=message_id, + content=f"[{part_type}]", + original_part=part, + ) + ) + else: + # Simple message: single SourceMessage + content = _extract_text_from_content(raw_content) + if content: + sources.append( + SourceMessage( + type="chat", + role=role, + chat_time=chat_time, + message_id=message_id, + content=content, + ) + ) + + return ( + sources + if len(sources) > 1 + else (sources[0] if sources else SourceMessage(type="chat", role=role)) + ) + + def rebuild_from_source( + self, + source: SourceMessage, + ) -> ChatCompletionUserMessageParam: + """ + Rebuild user message from SourceMessage. + + If source has original_part, use it directly. + Otherwise, reconstruct from source fields. + """ + # Priority 1: Use original_part if available + if hasattr(source, "original_part") and source.original_part: + original = source.original_part + # If it's a content part, wrap it in a message + if isinstance(original, dict) and "type" in original: + return { + "role": source.role or "user", + "content": [original], + "chat_time": source.chat_time, + "message_id": source.message_id, + } + # If it's already a full message, return it + if isinstance(original, dict) and "role" in original: + return original + + # Priority 2: Rebuild from source fields + if source.type == "file": + return { + "role": source.role or "user", + "content": [ + { + "type": "file", + "file": { + "filename": source.doc_path or "", + "file_data": source.content or "", + }, + } + ], + "chat_time": source.chat_time, + "message_id": source.message_id, + } + + # Simple text message + return { + "role": source.role or "user", + "content": source.content or "", + "chat_time": source.chat_time, + "message_id": source.message_id, + } def parse_fast( self, @@ -34,7 +169,7 @@ def parse_fast( info: dict[str, Any], **kwargs, ) -> list[TextualMemoryItem]: - return [] + return super().parse_fast(message, info, **kwargs) def parse_fine( self, diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index 94b0929f6..627a5793b 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -331,7 +331,7 @@ def _process_chat_data(self, scene_data_info, info, **kwargs): windows = list(self._iter_chat_windows(scene_data_info)) custom_tags = info.pop( "custom_tags", None - ) # msut pop here, avoid add to info, only used in sync fine mode + ) # must pop here, avoid add to info, only used in sync fine mode if mode == "fast": logger.debug("Using unified Fast Mode") @@ -470,7 +470,7 @@ def get_memory( def _read_memory( self, messages: list[MessagesType], type: str, info: dict[str, Any], mode: str = "fine" - ): + ) -> list[list[TextualMemoryItem]]: """ 1. raw file: [