diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index 10bac319e..48be9b72c 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -8,8 +8,9 @@ from memos.configs.mem_reader import MultiModalStructMemReaderConfig from memos.context.context import ContextThreadPoolExecutor from memos.mem_reader.read_multi_modal import MultiModalParser, detect_lang +from memos.mem_reader.read_multi_modal.base import _derive_key from memos.mem_reader.simple_struct import PROMPT_DICT, SimpleStructMemReader -from memos.memories.textual.item import TextualMemoryItem +from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata from memos.templates.tool_mem_prompts import TOOL_TRAJECTORY_PROMPT_EN, TOOL_TRAJECTORY_PROMPT_ZH from memos.types import MessagesType from memos.utils import timed @@ -184,6 +185,33 @@ def _concat_multi_modal_memories( if window: windows.append(window) + # Batch compute embeddings for all windows + if windows: + # Collect all valid windows that need embedding + valid_windows = [w for w in windows if w and w.memory] + + if valid_windows: + # Collect all texts that need embedding + texts_to_embed = [w.memory for w in valid_windows] + + # Batch compute all embeddings at once + try: + embeddings = self.embedder.embed(texts_to_embed) + # Fill embeddings back into memory items + for window, embedding in zip(valid_windows, embeddings, strict=True): + window.metadata.embedding = embedding + except Exception as e: + logger.error(f"[MultiModalStruct] Error batch computing embeddings: {e}") + # Fallback: compute embeddings individually + for window in valid_windows: + if window.memory: + try: + window.metadata.embedding = self.embedder.embed([window.memory])[0] + except Exception as e2: + logger.error( + f"[MultiModalStruct] Error computing embedding for item: {e2}" + ) + return windows def _build_window_from_items( @@ -247,17 +275,35 @@ def _build_window_from_items( # If no text content, return None return None - # Create aggregated memory item (similar to _build_fast_node in simple_struct) + # Create aggregated memory item without embedding (will be computed in batch later) extra_kwargs: dict[str, Any] = {} if aggregated_file_ids: extra_kwargs["file_ids"] = aggregated_file_ids - aggregated_item = self._make_memory_item( - value=merged_text, - info=info, - memory_type=memory_type, - tags=["mode:fast"], - sources=all_sources, - **extra_kwargs, + + # Extract info fields + info_ = info.copy() + user_id = info_.pop("user_id", "") + session_id = info_.pop("session_id", "") + + # Create memory item without embedding (set to None, will be filled in batch) + aggregated_item = TextualMemoryItem( + memory=merged_text, + metadata=TreeNodeTextualMemoryMetadata( + user_id=user_id, + session_id=session_id, + memory_type=memory_type, + status="activated", + tags=["mode:fast"], + key=_derive_key(merged_text), + embedding=None, # Will be computed in batch + usage=[], + sources=all_sources, + background="", + confidence=0.99, + type="fact", + info=info_, + **extra_kwargs, + ), ) return aggregated_item @@ -282,22 +328,23 @@ def _get_llm_response( Returns: LLM response dictionary """ - # Try to extract actual text content from sources for better language detection - text_for_lang_detection = mem_str + # Determine language: prioritize lang from sources (set in fast mode), + # fallback to detecting from mem_str if sources don't have lang + lang = None + + # First, try to get lang from sources (fast mode already set this) if sources: - source_texts = [] for source in sources: - if hasattr(source, "content") and source.content: - source_texts.append(source.content) - elif isinstance(source, dict) and source.get("content"): - source_texts.append(source.get("content")) - - # If we have text content from sources, use it for language detection - if source_texts: - text_for_lang_detection = " ".join(source_texts) - - # Use the extracted text for language detection - lang = detect_lang(text_for_lang_detection) + if hasattr(source, "lang") and source.lang: + lang = source.lang + break + elif isinstance(source, dict) and source.get("lang"): + lang = source.get("lang") + break + + # Fallback: detect language from mem_str if no lang from sources + if lang is None: + lang = detect_lang(mem_str) # Select prompt template based on prompt_type if prompt_type == "doc": @@ -574,8 +621,13 @@ def _process_multi_modal_data( for fast_item in fast_memory_items: sources = fast_item.metadata.sources for source in sources: + lang = getattr(source, "lang", "en") items = self.multi_modal_parser.process_transfer( - source, context_items=[fast_item], custom_tags=custom_tags, info=info + source, + context_items=[fast_item], + custom_tags=custom_tags, + info=info, + lang=lang, ) fine_memory_items.extend(items) return fine_memory_items @@ -616,8 +668,9 @@ def _process_transfer_multi_modal_data( # Part B: get fine multimodal items for source in sources: + lang = getattr(source, "lang", "en") items = self.multi_modal_parser.process_transfer( - source, context_items=[raw_node], info=info, custom_tags=custom_tags + source, context_items=[raw_node], info=info, custom_tags=custom_tags, lang=lang ) fine_memory_items.extend(items) return fine_memory_items diff --git a/src/memos/mem_reader/read_multi_modal/assistant_parser.py b/src/memos/mem_reader/read_multi_modal/assistant_parser.py index 6ab74cbbb..3519216d2 100644 --- a/src/memos/mem_reader/read_multi_modal/assistant_parser.py +++ b/src/memos/mem_reader/read_multi_modal/assistant_parser.py @@ -14,7 +14,8 @@ ) from memos.types.openai_chat_completion_types import ChatCompletionAssistantMessageParam -from .base import BaseMessageParser, _derive_key, _extract_text_from_content +from .base import BaseMessageParser, _add_lang_to_source, _derive_key, _extract_text_from_content +from .utils import detect_lang logger = get_logger(__name__) @@ -68,71 +69,90 @@ def create_source( sources = [] if isinstance(raw_content, list): - # Multimodal: create one SourceMessage per part + # Multimodal: first collect all text content to detect overall language + text_contents = [] + for part in raw_content: + if isinstance(part, dict): + part_type = part.get("type", "") + if part_type == "text": + text_contents.append(part.get("text", "")) + elif part_type == "refusal": + text_contents.append(part.get("refusal", "")) + + # Detect overall language from all text content + overall_lang = "en" # default + if text_contents: + combined_text = " ".join(text_contents) + overall_lang = detect_lang(combined_text) # Note: Assistant messages only support "text" and "refusal" part types for part in raw_content: if isinstance(part, dict): part_type = part.get("type", "") if part_type == "text": - sources.append( - SourceMessage( - type="chat", - role=role, - chat_time=chat_time, - message_id=message_id, - content=part.get("text", ""), - ) + text_content = part.get("text", "") + source = SourceMessage( + type="chat", + role=role, + chat_time=chat_time, + message_id=message_id, + content=text_content, ) + source.lang = overall_lang + sources.append(source) elif part_type == "refusal": - sources.append( - SourceMessage( - type="refusal", - role=role, - chat_time=chat_time, - message_id=message_id, - content=part.get("refusal", ""), - ) + refusal_content = part.get("refusal", "") + source = SourceMessage( + type="refusal", + role=role, + chat_time=chat_time, + message_id=message_id, + content=refusal_content, ) + source.lang = overall_lang + sources.append(source) else: # Unknown part type - log warning but still create SourceMessage logger.warning( f"[AssistantParser] Unknown part type `{part_type}`. " f"Expected `text` or `refusal`. Creating SourceMessage with placeholder content." ) - sources.append( - SourceMessage( - type="chat", - role=role, - chat_time=chat_time, - message_id=message_id, - content=f"[{part_type}]", - ) + source = SourceMessage( + type="chat", + role=role, + chat_time=chat_time, + message_id=message_id, + content=f"[{part_type}]", ) + source.lang = overall_lang + sources.append(source) elif raw_content is not None: # Simple message: single SourceMessage content = _extract_text_from_content(raw_content) if content: - sources.append( - SourceMessage( - type="chat", - role=role, - chat_time=chat_time, - message_id=message_id, - content=content, - ) - ) - - # Handle top-level refusal field - if refusal: - sources.append( - SourceMessage( - type="refusal", + source = SourceMessage( + type="chat", role=role, chat_time=chat_time, message_id=message_id, - content=refusal, + content=content, ) + sources.append(_add_lang_to_source(source, content)) + + # Handle top-level refusal field + if refusal: + source = SourceMessage( + type="refusal", + role=role, + chat_time=chat_time, + message_id=message_id, + content=refusal, ) + # Use overall_lang if we have sources from multimodal content, otherwise detect + if sources and hasattr(sources[0], "lang"): + source.lang = sources[0].lang + else: + source = _add_lang_to_source(source, refusal) + sources.append(source) # Handle tool_calls (when content is None or empty) if tool_calls: @@ -141,34 +161,42 @@ def create_source( if isinstance(tool_calls, list | dict) else str(tool_calls) ) - sources.append( - SourceMessage( - type="tool_calls", - role=role, - chat_time=chat_time, - message_id=message_id, - content=f"[tool_calls]: {tool_calls_str}", - ) + source = SourceMessage( + type="tool_calls", + role=role, + chat_time=chat_time, + message_id=message_id, + content=f"[tool_calls]: {tool_calls_str}", ) + # Use overall_lang if we have sources from multimodal content, otherwise default + if sources and hasattr(sources[0], "lang"): + source.lang = sources[0].lang + else: + source = _add_lang_to_source(source, None) + sources.append(source) # Handle audio (optional) if audio: audio_id = audio.get("id", "") if isinstance(audio, dict) else str(audio) - sources.append( - SourceMessage( - type="audio", - role=role, - chat_time=chat_time, - message_id=message_id, - content=f"[audio]: {audio_id}", - ) + source = SourceMessage( + type="audio", + role=role, + chat_time=chat_time, + message_id=message_id, + content=f"[audio]: {audio_id}", ) - - return ( - sources - if len(sources) > 1 - else (sources[0] if sources else SourceMessage(type="chat", role=role)) - ) + # Use overall_lang if we have sources from multimodal content, otherwise default + if sources and hasattr(sources[0], "lang"): + source.lang = sources[0].lang + else: + source = _add_lang_to_source(source, None) + sources.append(source) + + if not sources: + return _add_lang_to_source(SourceMessage(type="chat", role=role), None) + if len(sources) > 1: + return sources + return sources[0] def rebuild_from_source( self, diff --git a/src/memos/mem_reader/read_multi_modal/base.py b/src/memos/mem_reader/read_multi_modal/base.py index a3992a1f1..7664f4d7f 100644 --- a/src/memos/mem_reader/read_multi_modal/base.py +++ b/src/memos/mem_reader/read_multi_modal/base.py @@ -16,7 +16,7 @@ TreeNodeTextualMemoryMetadata, ) -from .utils import get_text_splitter +from .utils import detect_lang, get_text_splitter logger = log.get_logger(__name__) @@ -57,6 +57,25 @@ def _extract_text_from_content(content: Any) -> str: return str(content) +def _add_lang_to_source(source: SourceMessage, content: str | None = None) -> SourceMessage: + """ + Add lang field to SourceMessage based on content. + + Args: + source: SourceMessage to add lang field to + content: Optional content text for language detection. + If None, uses source.content + + Returns: + SourceMessage with lang field added + """ + if not hasattr(source, "lang") or getattr(source, "lang", None) is None: + text_for_detection = content or getattr(source, "content", None) or "" + lang = detect_lang(text_for_detection) + source.lang = lang + return source + + class BaseMessageParser(ABC): """Base interface for message type parsers.""" diff --git a/src/memos/mem_reader/read_multi_modal/image_parser.py b/src/memos/mem_reader/read_multi_modal/image_parser.py index 741295089..b8cc9ae84 100644 --- a/src/memos/mem_reader/read_multi_modal/image_parser.py +++ b/src/memos/mem_reader/read_multi_modal/image_parser.py @@ -133,13 +133,18 @@ def parse_fine( # Get context items if available context_items = kwargs.get("context_items") - # Determine language from context if available - lang = "en" - if context_items: + # Determine language: prioritize lang from source (passed via kwargs), + # fallback to detecting from context_items if lang not provided + lang = kwargs.get("lang") + if lang is None and context_items: for item in context_items: if hasattr(item, "memory") and item.memory: lang = detect_lang(item.memory) break + if not lang: + lang = "en" + if not hasattr(source, "lang") or source.lang is None: + source.lang = lang # Select prompt based on language image_analysis_prompt = ( diff --git a/src/memos/mem_reader/read_multi_modal/multi_modal_parser.py b/src/memos/mem_reader/read_multi_modal/multi_modal_parser.py index a135d7fd2..2c8140419 100644 --- a/src/memos/mem_reader/read_multi_modal/multi_modal_parser.py +++ b/src/memos/mem_reader/read_multi_modal/multi_modal_parser.py @@ -217,9 +217,6 @@ def process_transfer( "session_id": first_item.metadata.session_id, } - # Extract custom_tags from kwargs (same as simple_struct.py) - custom_tags = kwargs.get("custom_tags") - # Try to determine parser from source.type parser = None if source.type == "file": @@ -243,9 +240,8 @@ def process_transfer( logger.error(f"[MultiModalParser] Error rebuilding message from source: {e}") return [] - # Parse in fine mode (pass custom_tags to parse_fine) + # Parse in fine mode (pass context_items and custom_tags to parse_fine) try: - context_items = kwargs.pop("custom_tags", None) custom_tags = kwargs.pop("custom_tags", None) info = kwargs.pop("info", None) return parser.parse_fine( diff --git a/src/memos/mem_reader/read_multi_modal/string_parser.py b/src/memos/mem_reader/read_multi_modal/string_parser.py index b5a58d68c..b6e18fda3 100644 --- a/src/memos/mem_reader/read_multi_modal/string_parser.py +++ b/src/memos/mem_reader/read_multi_modal/string_parser.py @@ -14,7 +14,7 @@ TreeNodeTextualMemoryMetadata, ) -from .base import BaseMessageParser, _derive_key +from .base import BaseMessageParser, _add_lang_to_source, _derive_key logger = get_logger(__name__) @@ -44,10 +44,11 @@ def create_source( info: dict[str, Any], ) -> SourceMessage: """Create SourceMessage from string message.""" - return SourceMessage( + source = SourceMessage( type="doc", content=str(message), ) + return _add_lang_to_source(source, str(message)) def rebuild_from_source( self, diff --git a/src/memos/mem_reader/read_multi_modal/system_parser.py b/src/memos/mem_reader/read_multi_modal/system_parser.py index 2e856365a..deb2a9832 100644 --- a/src/memos/mem_reader/read_multi_modal/system_parser.py +++ b/src/memos/mem_reader/read_multi_modal/system_parser.py @@ -17,7 +17,7 @@ ) from memos.types.openai_chat_completion_types import ChatCompletionSystemMessageParam -from .base import BaseMessageParser +from .base import BaseMessageParser, _add_lang_to_source logger = get_logger(__name__) @@ -55,7 +55,7 @@ def create_source( tool_schema_match = re.search(r"(.*?)", content, re.DOTALL) tool_schema_content = tool_schema_match.group(1) if tool_schema_match else "" - return SourceMessage( + source = SourceMessage( type="chat", role="system", chat_time=message.get("chat_time", None), @@ -63,6 +63,7 @@ def create_source( content=content_wo_tool_schema, tool_schema=tool_schema_content, ) + return _add_lang_to_source(source, content_wo_tool_schema) def rebuild_from_source( self, @@ -157,13 +158,13 @@ def parse_fine( return [ TextualMemoryItem( id=str(uuid.uuid4()), - memory=json.dumps(schema), + memory=json.dumps(schema, ensure_ascii=False), metadata=TreeNodeTextualMemoryMetadata( user_id=user_id, session_id=session_id, memory_type="ToolSchemaMemory", status="activated", - embedding=self.embedder.embed([json.dumps(schema)])[0], + embedding=self.embedder.embed([json.dumps(schema, ensure_ascii=False)])[0], info=info_, ), ) diff --git a/src/memos/mem_reader/read_multi_modal/text_content_parser.py b/src/memos/mem_reader/read_multi_modal/text_content_parser.py index febc166ec..549f74852 100644 --- a/src/memos/mem_reader/read_multi_modal/text_content_parser.py +++ b/src/memos/mem_reader/read_multi_modal/text_content_parser.py @@ -16,7 +16,7 @@ ) from memos.types.openai_chat_completion_types import ChatCompletionContentPartTextParam -from .base import BaseMessageParser, _derive_key +from .base import BaseMessageParser, _add_lang_to_source, _derive_key logger = get_logger(__name__) @@ -48,11 +48,13 @@ def create_source( """Create SourceMessage from text content part.""" if isinstance(message, dict): text = message.get("text", "") - return SourceMessage( + source = SourceMessage( type="text", content=text, ) - return SourceMessage(type="text", content=str(message)) + return _add_lang_to_source(source, text) + source = SourceMessage(type="text", content=str(message)) + return _add_lang_to_source(source, str(message)) def rebuild_from_source( self, diff --git a/src/memos/mem_reader/read_multi_modal/tool_parser.py b/src/memos/mem_reader/read_multi_modal/tool_parser.py index 705896489..caf5ffaa6 100644 --- a/src/memos/mem_reader/read_multi_modal/tool_parser.py +++ b/src/memos/mem_reader/read_multi_modal/tool_parser.py @@ -14,7 +14,8 @@ ) from memos.types.openai_chat_completion_types import ChatCompletionToolMessageParam -from .base import BaseMessageParser +from .base import BaseMessageParser, _add_lang_to_source +from .utils import detect_lang logger = get_logger(__name__) @@ -52,78 +53,92 @@ def create_source( sources = [] if isinstance(raw_content, list): - # Multimodal: create one SourceMessage per part + text_contents = [] for part in raw_content: if isinstance(part, dict): part_type = part.get("type", "") if part_type == "text": - sources.append( - SourceMessage( - type="text", - role=role, - chat_time=chat_time, - message_id=message_id, - content=part.get("text", ""), - tool_call_id=tool_call_id, - ) + text_contents.append(part.get("text", "")) + + # Detect overall language from all text content + overall_lang = "en" + if text_contents: + combined_text = " ".join(text_contents) + overall_lang = detect_lang(combined_text) + + # Create one SourceMessage per part, all with the same detected language + for part in raw_content: + if isinstance(part, dict): + part_type = part.get("type", "") + if part_type == "text": + text_content = part.get("text", "") + source = SourceMessage( + type="text", + role=role, + chat_time=chat_time, + message_id=message_id, + content=text_content, + tool_call_id=tool_call_id, ) + source.lang = overall_lang + sources.append(source) elif part_type == "file": file_info = part.get("file", {}) - sources.append( - SourceMessage( - type="file", - role=role, - chat_time=chat_time, - message_id=message_id, - content=file_info.get("file_data", ""), - filename=file_info.get("filename", ""), - file_id=file_info.get("file_id", ""), - tool_call_id=tool_call_id, - file_info=file_info, - ) + file_content = file_info.get("file_data", "") + source = SourceMessage( + type="file", + role=role, + chat_time=chat_time, + message_id=message_id, + content=file_content, + filename=file_info.get("filename", ""), + file_id=file_info.get("file_id", ""), + tool_call_id=tool_call_id, + file_info=file_info, ) + source.lang = overall_lang + sources.append(source) elif part_type == "image_url": file_info = part.get("image_url", {}) - sources.append( - SourceMessage( - type="image_url", - role=role, - chat_time=chat_time, - message_id=message_id, - content=file_info.get("url", ""), - detail=file_info.get("detail", "auto"), - tool_call_id=tool_call_id, - ) + source = SourceMessage( + type="image_url", + role=role, + chat_time=chat_time, + message_id=message_id, + content=file_info.get("url", ""), + detail=file_info.get("detail", "auto"), + tool_call_id=tool_call_id, ) + source.lang = overall_lang + sources.append(source) elif part_type == "input_audio": file_info = part.get("input_audio", {}) - sources.append( - SourceMessage( - type="input_audio", - role=role, - chat_time=chat_time, - message_id=message_id, - content=file_info.get("data", ""), - format=file_info.get("format", "wav"), - tool_call_id=tool_call_id, - ) + source = SourceMessage( + type="input_audio", + role=role, + chat_time=chat_time, + message_id=message_id, + content=file_info.get("data", ""), + format=file_info.get("format", "wav"), + tool_call_id=tool_call_id, ) + source.lang = overall_lang + sources.append(source) else: logger.warning(f"[ToolParser] Unsupported part type: {part_type}") continue else: # Simple string content message: single SourceMessage if raw_content: - sources.append( - SourceMessage( - type="chat", - role=role, - chat_time=chat_time, - message_id=message_id, - content=raw_content, - tool_call_id=tool_call_id, - ) + source = SourceMessage( + type="chat", + role=role, + chat_time=chat_time, + message_id=message_id, + content=raw_content, + tool_call_id=tool_call_id, ) + sources.append(_add_lang_to_source(source, raw_content)) return sources @@ -150,7 +165,9 @@ def parse_fast( if chat_time: parts.append(f"[{chat_time}]: ") prefix = "".join(parts) - content = json.dumps(content) if isinstance(content, list | dict) else content + content = ( + json.dumps(content, ensure_ascii=False) if isinstance(content, list | dict) else content + ) line = f"{prefix}{content}\n" if not line: return [] diff --git a/src/memos/mem_reader/read_multi_modal/user_parser.py b/src/memos/mem_reader/read_multi_modal/user_parser.py index e62d9369d..1c9afab65 100644 --- a/src/memos/mem_reader/read_multi_modal/user_parser.py +++ b/src/memos/mem_reader/read_multi_modal/user_parser.py @@ -12,7 +12,8 @@ ) from memos.types.openai_chat_completion_types import ChatCompletionUserMessageParam -from .base import BaseMessageParser, _derive_key, _extract_text_from_content +from .base import BaseMessageParser, _add_lang_to_source, _derive_key, _extract_text_from_content +from .utils import detect_lang logger = get_logger(__name__) @@ -56,74 +57,87 @@ def create_source( sources = [] if isinstance(raw_content, list): - # Multimodal: create one SourceMessage per part + # Multimodal: first collect all text content to detect overall language + text_contents = [] for part in raw_content: if isinstance(part, dict): part_type = part.get("type", "") if part_type == "text": - sources.append( - SourceMessage( - type="chat", - role=role, - chat_time=chat_time, - message_id=message_id, - content=part.get("text", ""), - ) + text_contents.append(part.get("text", "")) + + # Detect overall language from all text content + overall_lang = "en" + if text_contents: + combined_text = " ".join(text_contents) + overall_lang = detect_lang(combined_text) + + # Create one SourceMessage per part, all with the same detected language + for part in raw_content: + if isinstance(part, dict): + part_type = part.get("type", "") + if part_type == "text": + source = SourceMessage( + type="chat", + role=role, + chat_time=chat_time, + message_id=message_id, + content=part.get("text", ""), ) + source.lang = overall_lang + sources.append(source) elif part_type == "file": file_info = part.get("file", {}) - sources.append( - SourceMessage( - type="file", - role=role, - chat_time=chat_time, - message_id=message_id, - doc_path=file_info.get("filename") or file_info.get("file_id", ""), - content=file_info.get("file_data", ""), - file_info=file_info, - ) + source = SourceMessage( + type="file", + role=role, + chat_time=chat_time, + message_id=message_id, + doc_path=file_info.get("filename") or file_info.get("file_id", ""), + content=file_info.get("file_data", ""), + file_info=file_info, ) + source.lang = overall_lang + sources.append(source) elif part_type == "image_url": image_info = part.get("image_url", {}) - sources.append( - SourceMessage( - type="image", - role=role, - chat_time=chat_time, - message_id=message_id, - image_path=image_info.get("url"), - ) + source = SourceMessage( + type="image", + role=role, + chat_time=chat_time, + message_id=message_id, + image_path=image_info.get("url"), ) + source.lang = overall_lang + sources.append(source) else: # input_audio, etc. - sources.append( - SourceMessage( - type=part_type, - role=role, - chat_time=chat_time, - message_id=message_id, - content=f"[{part_type}]", - ) + source = SourceMessage( + type=part_type, + role=role, + chat_time=chat_time, + message_id=message_id, + content=f"[{part_type}]", ) + source.lang = overall_lang + sources.append(source) else: # Simple message: single SourceMessage content = _extract_text_from_content(raw_content) if content: - sources.append( - SourceMessage( - type="chat", - role=role, - chat_time=chat_time, - message_id=message_id, - content=content, - ) + source = SourceMessage( + type="chat", + role=role, + chat_time=chat_time, + message_id=message_id, + content=content, ) + sources.append(_add_lang_to_source(source, content)) - return ( - sources - if len(sources) > 1 - else (sources[0] if sources else SourceMessage(type="chat", role=role)) - ) + if not sources: + return _add_lang_to_source(SourceMessage(type="chat", role=role), None) + if len(sources) > 1: + return sources + return sources[0] def rebuild_from_source( self, @@ -142,8 +156,6 @@ def parse_fast( return [] role = message.get("role", "") - # TODO: if file/url/audio etc in content, how to transfer them into a - # readable string? content = message.get("content", "") chat_time = message.get("chat_time", None) if role != "user":