fix: preserve Anthropic thinking blocks and signatures in LiteLLM round-trip

giulio-leone · giulio-leone · commit 59e6e040e2e3 · 2026-03-15T17:02:59.000+01:00
When using Claude models through LiteLLM, extended thinking blocks (with signatures) were lost after the first turn because: 1. _extract_reasoning_value() only read reasoning_content (flattened string without signatures), ignoring thinking_blocks 2. _content_to_message_param() set reasoning_content on the outgoing message, which LiteLLM's anthropic_messages_pt() template silently drops This fix: - Adds _is_anthropic_provider() helper to detect anthropic/bedrock/ vertex_ai providers - Updates _extract_reasoning_value() to prefer thinking_blocks (with per-block signatures) over reasoning_content - Updates _convert_reasoning_value_to_parts() to handle ChatCompletionThinkingBlock dicts, preserving thought_signature - Updates _content_to_message_param() to embed thinking blocks directly in the message content list for Anthropic providers, bypassing the broken reasoning_content path Fixes #4801
diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
@@ -233,6 +233,16 @@ def _get_provider_from_model(model: str) -> str:
   return ""
 
 
+# Providers that route to Anthropic's API and require thinking blocks
+# embedded directly in the message content list.
+_ANTHROPIC_PROVIDERS = frozenset({"anthropic", "bedrock", "vertex_ai"})
+
+
+def _is_anthropic_provider(provider: str) -> bool:
+  """Returns True if the provider routes to an Anthropic model endpoint."""
+  return provider.lower() in _ANTHROPIC_PROVIDERS if provider else False
+
+
 # Default MIME type when none can be inferred
 _DEFAULT_MIME_TYPE = "application/octet-stream"
 
@@ -385,7 +395,34 @@ def _iter_reasoning_texts(reasoning_value: Any) -> Iterable[str]:
 
 
 def _convert_reasoning_value_to_parts(reasoning_value: Any) -> List[types.Part]:
-  """Converts provider reasoning payloads into Gemini thought parts."""
+  """Converts provider reasoning payloads into Gemini thought parts.
+
+  Handles two formats:
+  - A list of ChatCompletionThinkingBlock dicts (Anthropic) with
+    'thinking' and 'signature' fields.
+  - A plain string or nested structure (OpenAI/Azure/Ollama) via
+    _iter_reasoning_texts.
+  """
+  if isinstance(reasoning_value, list):
+    parts = []
+    for block in reasoning_value:
+      if isinstance(block, dict) and block.get("type") == "thinking":
+        text = block.get("thinking", "")
+        signature = block.get("signature")
+        if text:
+          parts.append(
+              types.Part(
+                  text=text,
+                  thought=True,
+                  thought_signature=signature,
+              )
+          )
+      else:
+        # Fall back to text extraction for non-thinking-block items
+        for text in _iter_reasoning_texts(block):
+          if text:
+            parts.append(types.Part(text=text, thought=True))
+    return parts
   return [
       types.Part(text=text, thought=True)
       for text in _iter_reasoning_texts(reasoning_value)
@@ -396,12 +433,19 @@ def _convert_reasoning_value_to_parts(reasoning_value: Any) -> List[types.Part]:
 def _extract_reasoning_value(message: Message | Delta | None) -> Any:
   """Fetches the reasoning payload from a LiteLLM message.
 
-  Checks for both 'reasoning_content' (LiteLLM standard, used by Azure/Foundry,
-  Ollama via LiteLLM) and 'reasoning' (used by LM Studio, vLLM).
-  Prioritizes 'reasoning_content' when both are present.
+  Checks for 'thinking_blocks' (Anthropic thinking with signatures),
+  'reasoning_content' (LiteLLM standard, used by Azure/Foundry,
+  Ollama via LiteLLM), and 'reasoning' (used by LM Studio, vLLM).
+  Prioritizes 'thinking_blocks' when present, as they contain
+  the signature required for Anthropic's extended thinking API.
   """
   if message is None:
     return None
+  # Prefer thinking_blocks (Anthropic) — they carry per-block signatures
+  # needed for multi-turn conversations with extended thinking.
+  thinking_blocks = message.get("thinking_blocks")
+  if thinking_blocks:
+    return thinking_blocks
   reasoning_content = message.get("reasoning_content")
   if reasoning_content is not None:
     return reasoning_content
@@ -847,6 +891,33 @@ async def _content_to_message_param(
       ):
         reasoning_texts.append(_decode_inline_text_data(part.inline_data.data))
 
+    # Anthropic/Bedrock providers require thinking blocks to be embedded
+    # directly in the message content list. LiteLLM's prompt template for
+    # Anthropic drops the top-level reasoning_content field, so thinking
+    # blocks disappear from multi-turn histories and the model stops
+    # producing them after the first turn. Signatures are required by the
+    # Anthropic API for thinking blocks in multi-turn conversations.
+    if reasoning_parts and _is_anthropic_provider(provider):
+      content_list = []
+      for part in reasoning_parts:
+        if part.text:
+          block = {"type": "thinking", "thinking": part.text}
+          if part.thought_signature:
+            sig = part.thought_signature
+            if isinstance(sig, bytes):
+              sig = base64.b64encode(sig).decode("utf-8")
+            block["signature"] = sig
+          content_list.append(block)
+      if isinstance(final_content, list):
+        content_list.extend(final_content)
+      elif final_content:
+        content_list.append({"type": "text", "text": final_content})
+      return ChatCompletionAssistantMessage(
+          role=role,
+          content=content_list or None,
+          tool_calls=tool_calls or None,
+      )
+
     reasoning_content = _NEW_LINE.join(text for text in reasoning_texts if text)
     return ChatCompletionAssistantMessage(
         role=role,
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
@@ -4675,3 +4675,119 @@ def test_handles_litellm_logger_names(logger_name):
   finally:
     # Clean up
     test_logger.removeHandler(handler)
+
+
+# ---------- Anthropic thinking-block round-trip tests ----------
+
+from google.adk.models.lite_llm import _is_anthropic_provider
+from google.adk.models.lite_llm import _convert_reasoning_value_to_parts
+
+
+def test_is_anthropic_provider():
+  """Verify _is_anthropic_provider matches known Claude provider prefixes."""
+  assert _is_anthropic_provider("anthropic")
+  assert _is_anthropic_provider("bedrock")
+  assert _is_anthropic_provider("vertex_ai")
+  assert _is_anthropic_provider("ANTHROPIC")  # case-insensitive
+  assert not _is_anthropic_provider("openai")
+  assert not _is_anthropic_provider("")
+  assert not _is_anthropic_provider(None)
+
+
+def test_extract_reasoning_value_prefers_thinking_blocks():
+  """thinking_blocks (Anthropic format with signatures) take priority."""
+  thinking_blocks = [
+      {"type": "thinking", "thinking": "step 1", "signature": "sig1"},
+      {"type": "thinking", "thinking": "step 2", "signature": "sig2"},
+  ]
+  message = {
+      "role": "assistant",
+      "content": "Answer",
+      "reasoning_content": "flat string",
+      "thinking_blocks": thinking_blocks,
+  }
+  result = _extract_reasoning_value(message)
+  assert result is thinking_blocks
+
+
+def test_convert_reasoning_value_preserves_signatures():
+  """_convert_reasoning_value_to_parts keeps thought_signature from blocks."""
+  blocks = [
+      {"type": "thinking", "thinking": "I should greet", "signature": "c2lnX2E="},
+      {"type": "thinking", "thinking": "Let me respond", "signature": "c2lnX2I="},
+  ]
+  parts = _convert_reasoning_value_to_parts(blocks)
+  assert len(parts) == 2
+  assert parts[0].text == "I should greet"
+  assert parts[0].thought is True
+  assert parts[0].thought_signature == b"sig_a"
+  assert parts[1].text == "Let me respond"
+  assert parts[1].thought_signature == b"sig_b"
+
+
+def test_convert_reasoning_value_plain_string_no_signature():
+  """Plain strings (non-Anthropic) produce thought=True with no signature."""
+  parts = _convert_reasoning_value_to_parts("Plain reasoning")
+  assert len(parts) == 1
+  assert parts[0].text == "Plain reasoning"
+  assert parts[0].thought is True
+  assert parts[0].thought_signature is None
+
+
+@pytest.mark.asyncio
+async def test_content_to_message_param_anthropic_embeds_thinking_blocks():
+  """For anthropic provider, thinking parts become content-list blocks."""
+  content = types.Content(
+      role="model",
+      parts=[
+          types.Part(text="I need to think", thought=True, thought_signature="c2lnX3g="),
+          types.Part.from_text(text="Hello!"),
+      ],
+  )
+  msg = await _content_to_message_param(content, provider="anthropic")
+  # Content should be a list with thinking + text blocks
+  assert isinstance(msg["content"], list)
+  assert msg["content"][0]["type"] == "thinking"
+  assert msg["content"][0]["thinking"] == "I need to think"
+  assert msg["content"][0]["signature"] == "c2lnX3g="
+  assert msg["content"][1]["type"] == "text"
+  assert msg["content"][1]["text"] == "Hello!"
+  # reasoning_content should NOT be set (blocks are in content)
+  assert msg.get("reasoning_content") is None
+
+
+@pytest.mark.asyncio
+async def test_content_to_message_param_openai_uses_reasoning_content():
+  """For non-anthropic provider, thinking parts use reasoning_content field."""
+  content = types.Content(
+      role="model",
+      parts=[
+          types.Part(text="I need to think", thought=True),
+          types.Part.from_text(text="Hello!"),
+      ],
+  )
+  msg = await _content_to_message_param(content, provider="openai")
+  # reasoning_content should be set as a string
+  assert msg.get("reasoning_content") == "I need to think"
+  # Content should be a simple string (not a list)
+  assert isinstance(msg["content"], str)
+  assert msg["content"] == "Hello!"
+
+
+@pytest.mark.asyncio
+async def test_content_to_message_param_anthropic_thinking_with_tool_calls():
+  """Anthropic thinking + tool calls: thinking in content, tool_calls separate."""
+  content = types.Content(
+      role="model",
+      parts=[
+          types.Part(text="Let me calculate", thought=True, thought_signature="c2lnX2NhbGM="),
+          types.Part.from_function_call(name="add", args={"a": 1, "b": 2}),
+      ],
+  )
+  msg = await _content_to_message_param(content, provider="anthropic")
+  assert isinstance(msg["content"], list)
+  assert msg["content"][0]["type"] == "thinking"
+  assert msg["content"][0]["signature"] == "c2lnX2NhbGM="
+  assert msg["tool_calls"] is not None
+  assert len(msg["tool_calls"]) == 1
+  assert msg["tool_calls"][0]["function"]["name"] == "add"