diff --git a/src/memos/api/handlers/chat_handler.py b/src/memos/api/handlers/chat_handler.py
index 9e60c2885..c101eece4 100644
--- a/src/memos/api/handlers/chat_handler.py
+++ b/src/memos/api/handlers/chat_handler.py
@@ -159,9 +159,11 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An
 
             # Step 3: Generate complete response from LLM
             if chat_req.model_name_or_path and chat_req.model_name_or_path not in self.chat_llms:
-                return {
-                    "message": f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}"
-                }
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}",
+                )
+
             model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys()))
             response = self.chat_llms[model].generate(current_messages, model_name_or_path=model)
 
@@ -281,9 +283,11 @@ def generate_chat_response() -> Generator[str, None, None]:
                         chat_req.model_name_or_path
                         and chat_req.model_name_or_path not in self.chat_llms
                     ):
-                        return {
-                            "message": f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}"
-                        }
+                        raise HTTPException(
+                            status_code=400,
+                            detail=f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}",
+                        )
+
                     model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys()))
                     response_stream = self.chat_llms[model].generate_stream(
                         current_messages, model_name_or_path=model
@@ -517,9 +521,11 @@ def generate_chat_response() -> Generator[str, None, None]:
                         chat_req.model_name_or_path
                         and chat_req.model_name_or_path not in self.chat_llms
                     ):
-                        return {
-                            "message": f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}"
-                        }
+                        raise HTTPException(
+                            status_code=400,
+                            detail=f"Model {chat_req.model_name_or_path} not suport, choose from {list(self.chat_llms.keys())}",
+                        )
+
                     model = chat_req.model_name_or_path or next(iter(self.chat_llms.keys()))
                     response_stream = self.chat_llms[model].generate_stream(
                         current_messages, model_name_or_path=model
diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py
index b1fb210c6..3e769e424 100644
--- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py
+++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py
@@ -227,7 +227,8 @@ def _parse_task(
         query_embedding = None
 
         # fine mode will trigger initial embedding search
-        if mode == "fine_old":
+        # TODO: tmp "playground_search_goal_parser" for playground search goal parser, will be removed later
+        if mode == "fine_old" or kwargs.get("playground_search_goal_parser", False):
             logger.info("[SEARCH] Fine mode: embedding search")
             query_embedding = self.embedder.embed([query])[0]
 
diff --git a/src/memos/templates/mos_prompts.py b/src/memos/templates/mos_prompts.py
index 357a9f1bd..15f1a44b3 100644
--- a/src/memos/templates/mos_prompts.py
+++ b/src/memos/templates/mos_prompts.py
@@ -130,6 +130,8 @@
 - Intelligently choose which memories (PersonalMemory[P] or OuterMemory[O]) are most relevant to the user's query
 - Only reference memories that are directly relevant to the user's question
 - Prioritize the most appropriate memory type based on the context and nature of the query
+- Responses must not contain non-existent citations
+- Explicit and implicit preferences can be referenced if relevant to the user's question, but must not be cited or source-attributed in responses
 - **Attribution-first selection:** Distinguish memory from user vs from assistant ** before composing. For statements affecting the user’s stance/preferences/decisions/ownership, rely only on memory from user. Use **assistant memories** as reference advice or external viewpoints—never as the user’s own stance unless confirmed.
 
 ### Response Style
@@ -137,6 +139,8 @@
 - Seamlessly incorporate memory references when appropriate
 - Ensure the flow of conversation remains smooth despite memory citations
 - Balance factual accuracy with engaging dialogue
+- Avoid meaningless blank lines
+- Keep the reply language consistent with the user's query language
 
 ## Key Principles
 - Reference only relevant memories to avoid information overload