Skip to content
16 changes: 8 additions & 8 deletions src/google/adk/models/lite_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1352,13 +1352,7 @@ def _model_response_to_generate_content_response(
if finish_reason:
# If LiteLLM already provides a FinishReason enum (e.g., for Gemini), use
# it directly. Otherwise, map the finish_reason string to the enum.
if isinstance(finish_reason, types.FinishReason):
llm_response.finish_reason = finish_reason
else:
finish_reason_str = str(finish_reason).lower()
llm_response.finish_reason = _FINISH_REASON_MAPPING.get(
finish_reason_str, types.FinishReason.OTHER
)
llm_response.finish_reason = _map_finish_reason(finish_reason)
if response.get("usage", None):
llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata(
prompt_token_count=response["usage"].get("prompt_tokens", 0),
Expand Down Expand Up @@ -1917,7 +1911,13 @@ async def generate_content_async(
_message_to_generate_content_response(
ChatCompletionAssistantMessage(
role="assistant",
content=text,
# FIX: Set content=None for tool-only messages to avoid duplication
# and follow OpenAI/LiteLLM conventions. Planning/reasoning text is
# already streamed (lines 1288-1296) and preserved in thought_parts
# (line 1357). Including it again in content causes duplication and
# violates API specifications for tool-call messages.
# See: https://github.com/google/adk-python/issues/3697
content=None,
tool_calls=tool_calls,
),
model_version=part.model,
Expand Down