diff --git a/src/memos/api/handlers/chat_handler.py b/src/memos/api/handlers/chat_handler.py index 2a97f1934..bcc3669b6 100644 --- a/src/memos/api/handlers/chat_handler.py +++ b/src/memos/api/handlers/chat_handler.py @@ -8,6 +8,7 @@ import asyncio import json import re +import time import traceback from collections.abc import Generator @@ -170,12 +171,18 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An ) model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys())) + + self.logger.info(f"[Cloud Service Chat Complete Model]: {model}") + strat = time.time() response = self.chat_llms[model].generate(current_messages, model_name_or_path=model) + end = time.time() + self.logger.info(f"[Cloud Service Chat Complete Time]: {end - strat} seconds") # Step 4: start add after chat asynchronously if chat_req.add_message_on_answer: # Resolve writable cube IDs (for add) writable_cube_ids = chat_req.writable_cube_ids or [chat_req.user_id] + start = time.time() self._start_add_to_memory( user_id=chat_req.user_id, writable_cube_ids=writable_cube_ids, @@ -184,6 +191,8 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An full_response=response, async_mode="async", ) + end = time.time() + self.logger.info(f"[Cloud Service Chat Add Time]: {end - start} seconds") match = re.search(r"([\s\S]*?)", response) reasoning_text = match.group(1) if match else None @@ -295,9 +304,14 @@ def generate_chat_response() -> Generator[str, None, None]: ) model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys())) + self.logger.info(f"[Cloud Service Chat Stream Model]: {model}") + + start = time.time() response_stream = self.chat_llms[model].generate_stream( current_messages, model_name_or_path=model ) + end = time.time() + self.logger.info(f"[Cloud Service Chat Stream Time]: {end - start} seconds") # Stream the response buffer = "" @@ -329,6 +343,7 @@ def generate_chat_response() -> Generator[str, None, None]: writable_cube_ids = chat_req.writable_cube_ids or ( [chat_req.mem_cube_id] if chat_req.mem_cube_id else [chat_req.user_id] ) + start = time.time() self._start_add_to_memory( user_id=chat_req.user_id, writable_cube_ids=writable_cube_ids, @@ -337,7 +352,10 @@ def generate_chat_response() -> Generator[str, None, None]: full_response=full_response, async_mode="async", ) - + end = time.time() + self.logger.info( + f"[Cloud Service Chat Stream Add Time]: {end - start} seconds" + ) except Exception as e: self.logger.error(f"Error in chat stream: {e}", exc_info=True) error_data = f"data: {json.dumps({'type': 'error', 'content': str(traceback.format_exc())})}\n\n"