From bbe6119f56396d2e5a423d6952cf598e8a6900f4 Mon Sep 17 00:00:00 2001 From: "glin1993@outlook.com" <> Date: Sun, 7 Dec 2025 18:20:11 +0800 Subject: [PATCH 1/3] Fallback source_doc_id to file_ids in KB logs --- src/memos/mem_scheduler/general_scheduler.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/memos/mem_scheduler/general_scheduler.py b/src/memos/mem_scheduler/general_scheduler.py index dc64f5a45..691883cc0 100644 --- a/src/memos/mem_scheduler/general_scheduler.py +++ b/src/memos/mem_scheduler/general_scheduler.py @@ -516,8 +516,22 @@ def send_add_log_messages_to_cloud_env( """ kb_log_content: list[dict] = [] info = msg.info or {} + + def _get_source_doc_id_from_metadata(metadata): + """Prefer explicit source_doc_id; fallback to first file_id if available.""" + if metadata is None: + return None + sid = getattr(metadata, "source_doc_id", None) + if sid: + return sid + file_ids = getattr(metadata, "file_ids", None) + if isinstance(file_ids, list) and file_ids: + return file_ids[0] + return None + # Process added items for item in prepared_add_items: + source_doc_id = _get_source_doc_id_from_metadata(item.metadata) kb_log_content.append( { "log_source": "KNOWLEDGE_BASE_LOG", @@ -526,13 +540,14 @@ def send_add_log_messages_to_cloud_env( "memory_id": item.id, "content": item.memory, "original_content": None, - "source_doc_id": getattr(item.metadata, "source_doc_id", None), + "source_doc_id": source_doc_id, } ) # Process updated items for item_data in prepared_update_items_with_original: item = item_data["new_item"] + source_doc_id = _get_source_doc_id_from_metadata(item.metadata) kb_log_content.append( { "log_source": "KNOWLEDGE_BASE_LOG", @@ -541,7 +556,7 @@ def send_add_log_messages_to_cloud_env( "memory_id": item.id, "content": item.memory, "original_content": item_data.get("original_content"), - "source_doc_id": getattr(item.metadata, "source_doc_id", None), + "source_doc_id": source_doc_id, } ) From 24b512dfb5bbd3c741e6efdc9497c10c1a635548 Mon Sep 17 00:00:00 2001 From: "glin1993@outlook.com" <> Date: Sun, 7 Dec 2025 18:26:37 +0800 Subject: [PATCH 2/3] Refactor(scheduler): Use file_ids directly for source_doc_id in KB logs --- src/memos/mem_scheduler/general_scheduler.py | 22 +++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/memos/mem_scheduler/general_scheduler.py b/src/memos/mem_scheduler/general_scheduler.py index 691883cc0..25de6c2ea 100644 --- a/src/memos/mem_scheduler/general_scheduler.py +++ b/src/memos/mem_scheduler/general_scheduler.py @@ -517,21 +517,10 @@ def send_add_log_messages_to_cloud_env( kb_log_content: list[dict] = [] info = msg.info or {} - def _get_source_doc_id_from_metadata(metadata): - """Prefer explicit source_doc_id; fallback to first file_id if available.""" - if metadata is None: - return None - sid = getattr(metadata, "source_doc_id", None) - if sid: - return sid - file_ids = getattr(metadata, "file_ids", None) - if isinstance(file_ids, list) and file_ids: - return file_ids[0] - return None - # Process added items for item in prepared_add_items: - source_doc_id = _get_source_doc_id_from_metadata(item.metadata) + file_ids = getattr(item.metadata, "file_ids", None) + source_doc_id = file_ids[0] if isinstance(file_ids, list) and file_ids else None kb_log_content.append( { "log_source": "KNOWLEDGE_BASE_LOG", @@ -547,7 +536,8 @@ def _get_source_doc_id_from_metadata(metadata): # Process updated items for item_data in prepared_update_items_with_original: item = item_data["new_item"] - source_doc_id = _get_source_doc_id_from_metadata(item.metadata) + file_ids = getattr(item.metadata, "file_ids", None) + source_doc_id = file_ids[0] if isinstance(file_ids, list) and file_ids else None kb_log_content.append( { "log_source": "KNOWLEDGE_BASE_LOG", @@ -903,6 +893,8 @@ def _process_memories_with_reader( # New: Knowledge Base Logging (Cloud Service) kb_log_content = [] for item in flattened_memories: + file_ids = getattr(item.metadata, "file_ids", None) + source_doc_id = file_ids[0] if isinstance(file_ids, list) and file_ids else None kb_log_content.append( { "log_source": "KNOWLEDGE_BASE_LOG", @@ -913,7 +905,7 @@ def _process_memories_with_reader( "memory_id": item.id, "content": item.memory, "original_content": None, - "source_doc_id": getattr(item.metadata, "source_doc_id", None), + "source_doc_id": source_doc_id, } ) if kb_log_content: From d6f6100015f6d550f249591349fc8c7c73d66f93 Mon Sep 17 00:00:00 2001 From: "glin1993@outlook.com" <> Date: Sun, 7 Dec 2025 18:32:07 +0800 Subject: [PATCH 3/3] Refactor(scheduler): Safely access file_ids for KB log source_doc_id --- src/memos/mem_scheduler/general_scheduler.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/memos/mem_scheduler/general_scheduler.py b/src/memos/mem_scheduler/general_scheduler.py index 25de6c2ea..8f3eccecf 100644 --- a/src/memos/mem_scheduler/general_scheduler.py +++ b/src/memos/mem_scheduler/general_scheduler.py @@ -519,7 +519,8 @@ def send_add_log_messages_to_cloud_env( # Process added items for item in prepared_add_items: - file_ids = getattr(item.metadata, "file_ids", None) + metadata = getattr(item, "metadata", None) + file_ids = getattr(metadata, "file_ids", None) if metadata else None source_doc_id = file_ids[0] if isinstance(file_ids, list) and file_ids else None kb_log_content.append( { @@ -536,7 +537,8 @@ def send_add_log_messages_to_cloud_env( # Process updated items for item_data in prepared_update_items_with_original: item = item_data["new_item"] - file_ids = getattr(item.metadata, "file_ids", None) + metadata = getattr(item, "metadata", None) + file_ids = getattr(metadata, "file_ids", None) if metadata else None source_doc_id = file_ids[0] if isinstance(file_ids, list) and file_ids else None kb_log_content.append( { @@ -893,8 +895,11 @@ def _process_memories_with_reader( # New: Knowledge Base Logging (Cloud Service) kb_log_content = [] for item in flattened_memories: - file_ids = getattr(item.metadata, "file_ids", None) - source_doc_id = file_ids[0] if isinstance(file_ids, list) and file_ids else None + metadata = getattr(item, "metadata", None) + file_ids = getattr(metadata, "file_ids", None) if metadata else None + source_doc_id = ( + file_ids[0] if isinstance(file_ids, list) and file_ids else None + ) kb_log_content.append( { "log_source": "KNOWLEDGE_BASE_LOG",