diff --git a/src/memos/api/handlers/chat_handler.py b/src/memos/api/handlers/chat_handler.py index 9e60c2885..c101eece4 100644 --- a/src/memos/api/handlers/chat_handler.py +++ b/src/memos/api/handlers/chat_handler.py @@ -159,9 +159,11 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An # Step 3: Generate complete response from LLM if chat_req.model_name_or_path and chat_req.model_name_or_path not in self.chat_llms: - return { - "message": f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}" - } + raise HTTPException( + status_code=400, + detail=f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}", + ) + model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys())) response = self.chat_llms[model].generate(current_messages, model_name_or_path=model) @@ -281,9 +283,11 @@ def generate_chat_response() -> Generator[str, None, None]: chat_req.model_name_or_path and chat_req.model_name_or_path not in self.chat_llms ): - return { - "message": f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}" - } + raise HTTPException( + status_code=400, + detail=f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}", + ) + model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys())) response_stream = self.chat_llms[model].generate_stream( current_messages, model_name_or_path=model @@ -517,9 +521,11 @@ def generate_chat_response() -> Generator[str, None, None]: chat_req.model_name_or_path and chat_req.model_name_or_path not in self.chat_llms ): - return { - "message": f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}" - } + raise HTTPException( + status_code=400, + detail=f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}", + ) + model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys())) response_stream = self.chat_llms[model].generate_stream( current_messages, model_name_or_path=model diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index b1fb210c6..3e769e424 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -227,7 +227,8 @@ def _parse_task( query_embedding = None # fine mode will trigger initial embedding search - if mode == "fine_old": + # TODO: tmp "playground_search_goal_parser" for playground search goal parser, will be removed later + if mode == "fine_old" or kwargs.get("playground_search_goal_parser", False): logger.info("[SEARCH] Fine mode: embedding search") query_embedding = self.embedder.embed([query])[0] diff --git a/src/memos/templates/mos_prompts.py b/src/memos/templates/mos_prompts.py index 357a9f1bd..15f1a44b3 100644 --- a/src/memos/templates/mos_prompts.py +++ b/src/memos/templates/mos_prompts.py @@ -130,6 +130,8 @@ - Intelligently choose which memories (PersonalMemory[P] or OuterMemory[O]) are most relevant to the user's query - Only reference memories that are directly relevant to the user's question - Prioritize the most appropriate memory type based on the context and nature of the query +- Responses must not contain non-existent citations +- Explicit and implicit preferences can be referenced if relevant to the user's question, but must not be cited or source-attributed in responses - **Attribution-first selection:** Distinguish memory from user vs from assistant ** before composing. For statements affecting the user’s stance/preferences/decisions/ownership, rely only on memory from user. Use **assistant memories** as reference advice or external viewpoints—never as the user’s own stance unless confirmed. ### Response Style @@ -137,6 +139,8 @@ - Seamlessly incorporate memory references when appropriate - Ensure the flow of conversation remains smooth despite memory citations - Balance factual accuracy with engaging dialogue +- Avoid meaningless blank lines +- Keep the reply language consistent with the user's query language ## Key Principles - Reference only relevant memories to avoid information overload