google · thesynapses · Nov 23, 2025 · Nov 24, 2025 · Nov 25, 2025 · Jan 30, 2026
diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
@@ -1352,13 +1352,7 @@ def _model_response_to_generate_content_response(
   if finish_reason:
     # If LiteLLM already provides a FinishReason enum (e.g., for Gemini), use
     # it directly. Otherwise, map the finish_reason string to the enum.
-    if isinstance(finish_reason, types.FinishReason):
-      llm_response.finish_reason = finish_reason
-    else:
-      finish_reason_str = str(finish_reason).lower()
-      llm_response.finish_reason = _FINISH_REASON_MAPPING.get(
-          finish_reason_str, types.FinishReason.OTHER
-      )
+    llm_response.finish_reason = _map_finish_reason(finish_reason)
   if response.get("usage", None):
     llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata(
         prompt_token_count=response["usage"].get("prompt_tokens", 0),
@@ -1917,7 +1911,13 @@ async def generate_content_async(
                 _message_to_generate_content_response(
                     ChatCompletionAssistantMessage(
                         role="assistant",
-                        content=text,
+                        # FIX: Set content=None for tool-only messages to avoid duplication
+                        # and follow OpenAI/LiteLLM conventions. Planning/reasoning text is
+                        # already streamed (lines 1288-1296) and preserved in thought_parts
+                        # (line 1357). Including it again in content causes duplication and
+                        # violates API specifications for tool-call messages.
+                        # See: https://github.com/google/adk-python/issues/3697
+                        content=None,
                         tool_calls=tool_calls,
                     ),
                     model_version=part.model,