microsoft · giles17 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026 · eavanvalkenburg
diff --git a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py
@@ -659,12 +659,44 @@ def _prepare_message_for_anthropic(self, message: Message) -> dict[str, Any]:
                         "input": content.parse_arguments(),
                     })
                 case "function_result":
-                    a_content.append({
-                        "type": "tool_result",
-                        "tool_use_id": content.call_id,
-                        "content": content.result if content.result is not None else "",
-                        "is_error": content.exception is not None,
-                    })
+                    if content.items:
+                        # Rich content: build array with text + image blocks
+                        tool_content: list[dict[str, Any]] = []
+                        if content.result:
+                            tool_content.append({"type": "text", "text": content.result})
+                        for item in content.items:
+                            if item.type == "data" and item.has_top_level_media_type("image"):
+                                tool_content.append({
+                                    "type": "image",
+                                    "source": {
+                                        "data": _get_data_bytes_as_str(item),  # type: ignore[attr-defined]
+                                        "media_type": item.media_type,
+                                        "type": "base64",
+                                    },
+                                })
+                            elif item.type == "uri" and item.has_top_level_media_type("image"):
+                                tool_content.append({
+                                    "type": "image",
+                                    "source": {"type": "url", "url": item.uri},
+                                })
+                            else:
+                                logger.debug(
+                                    "Ignoring unsupported rich content media type in tool result: %s",
+                                    item.media_type,
+                                )
+                        a_content.append({
+                            "type": "tool_result",
+                            "tool_use_id": content.call_id,
+                            "content": tool_content,
+                            "is_error": content.exception is not None,
+                        })
+                    else:
+                        a_content.append({
+                            "type": "tool_result",
+                            "tool_use_id": content.call_id,
+                            "content": content.result if content.result is not None else "",
+                            "is_error": content.exception is not None,
+                        })
                 case "mcp_server_tool_call":
                     mcp_call: dict[str, Any] = {
                         "type": "mcp_tool_use",

diff --git a/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py b/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py
@@ -1391,6 +1391,11 @@ def _prepare_tool_outputs_for_azure_ai(
                 call_id = run_and_call_ids[1]
 
                 if content.type == "function_result":
+                    if content.items:
+                        logger.warning(
+                            "Azure AI Agents does not support rich content (images, audio) in tool results. "
+                            "Rich content items will be omitted."
+                        )
                     if tool_outputs is None:
                         tool_outputs = []
                     tool_outputs.append(

diff --git a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py
@@ -503,10 +503,16 @@ def _convert_content_to_bedrock_block(self, content: Content) -> dict[str, Any]
                     }
                 }
             case "function_result":
+                tool_result_blocks = self._convert_tool_result_to_blocks(content.result)
+                if content.items:
+                    logger.warning(
+                        "Bedrock does not support rich content (images, audio) in tool results. "
+                        "Rich content items will be omitted."
+                    )
                 tool_result_block = {
                     "toolResult": {
                         "toolUseId": content.call_id,
-                        "content": self._convert_tool_result_to_blocks(content.result),
+                        "content": tool_result_blocks,
                         "status": "error" if content.exception else "success",
                     }
                 }
@@ -528,6 +534,8 @@ def _convert_content_to_bedrock_block(self, content: Content) -> dict[str, Any]
 
     def _convert_tool_result_to_blocks(self, result: Any) -> list[dict[str, Any]]:
         prepared_result = result if isinstance(result, str) else FunctionTool.parse_result(result)
+        if not isinstance(prepared_result, str):
+            return [{"text": str(prepared_result)}]
         try:
             parsed_result = json.loads(prepared_result)
         except json.JSONDecodeError:

diff --git a/python/packages/core/agent_framework/_mcp.py b/python/packages/core/agent_framework/_mcp.py
@@ -142,38 +142,44 @@ def _parse_message_from_mcp(
 
 def _parse_tool_result_from_mcp(
     mcp_type: types.CallToolResult,
-) -> str:
-    """Parse an MCP CallToolResult directly into a string representation.
+) -> str | list[Content]:
+    """Parse an MCP CallToolResult into a string or rich content list.
 
-    Converts each content item in the MCP result to its string form and combines them.
-    This skips the intermediate Content object step for tool results.
+    Converts each content item in the MCP result to its appropriate form.
+    Text-only results are returned as strings. When the result contains
+    image or audio content, returns a list of Content objects so the
+    framework can forward the rich media to the model.
 
     Args:
         mcp_type: The MCP CallToolResult object to convert.
 
     Returns:
-        A string representation of the tool result — either plain text or serialized JSON.
+        A string for text-only results, or a list of Content for rich media results.
     """
     import json
 
-    parts: list[str] = []
+    text_parts: list[str] = []
+    rich_items: list[Content] = []
     for item in mcp_type.content:
         match item:
             case types.TextContent():
-                parts.append(item.text)
-            case types.ImageContent() | types.AudioContent():
-                parts.append(
-                    json.dumps(
-                        {
-                            "type": "image" if isinstance(item, types.ImageContent) else "audio",
-                            "data": item.data,
-                            "mimeType": item.mimeType,
-                        },
-                        default=str,
+                text_parts.append(item.text)
+            case types.ImageContent():
+                rich_items.append(
+                    Content.from_uri(
+                        uri=f"data:{item.mimeType};base64,{item.data}",
+                        media_type=item.mimeType,
+                    )
+                )
+            case types.AudioContent():
+                rich_items.append(
+                    Content.from_uri(
+                        uri=f"data:{item.mimeType};base64,{item.data}",
+                        media_type=item.mimeType,
                     )
                 )
             case types.ResourceLink():
-                parts.append(
+                text_parts.append(
                     json.dumps(
                         {
                             "type": "resource_link",
@@ -186,9 +192,9 @@ def _parse_tool_result_from_mcp(
             case types.EmbeddedResource():
                 match item.resource:
                     case types.TextResourceContents():
-                        parts.append(item.resource.text)
+                        text_parts.append(item.resource.text)
                     case types.BlobResourceContents():
-                        parts.append(
+                        text_parts.append(
                             json.dumps(
                                 {
                                     "type": "blob",
@@ -199,12 +205,21 @@ def _parse_tool_result_from_mcp(
                             )
                         )
             case _:
-                parts.append(str(item))
-    if not parts:
+                text_parts.append(str(item))
+
+    if rich_items:
+        # Return rich content list with text items included
+        result: list[Content] = []
+        for text in text_parts:
+            result.append(Content.from_text(text))
+        result.extend(rich_items)
+        return result
+
+    if not text_parts:
         return ""
-    if len(parts) == 1:
-        return parts[0]
-    return json.dumps(parts, default=str)
+    if len(text_parts) == 1:
+        return text_parts[0]
+    return json.dumps(text_parts, default=str)
 
 
 def _parse_content_from_mcp(
@@ -425,7 +440,7 @@ def __init__(
         approval_mode: (Literal["always_require", "never_require"] | MCPSpecificApproval | None) = None,
         allowed_tools: Collection[str] | None = None,
         load_tools: bool = True,
-        parse_tool_results: Callable[[types.CallToolResult], str] | None = None,
+        parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None,
         load_prompts: bool = True,
         parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None,
         session: ClientSession | None = None,
@@ -850,7 +865,7 @@ async def _ensure_connected(self) -> None:
                     inner_exception=ex,
                 ) from ex
 
-    async def call_tool(self, tool_name: str, **kwargs: Any) -> str:
+    async def call_tool(self, tool_name: str, **kwargs: Any) -> str | list[Content]:
         """Call a tool with the given arguments.
 
         Args:
@@ -860,7 +875,7 @@ async def call_tool(self, tool_name: str, **kwargs: Any) -> str:
             kwargs: Arguments to pass to the tool.
 
         Returns:
-            A string representation of the tool result — either plain text or serialized JSON.
+            A string for text-only results, or a list of Content for rich media results.
 
         Raises:
             ToolExecutionException: If the MCP server is not connected, tools are not loaded,
@@ -1053,7 +1068,7 @@ def __init__(
         command: str,
         *,
         load_tools: bool = True,
-        parse_tool_results: Callable[[types.CallToolResult], str] | None = None,
+        parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None,
         load_prompts: bool = True,
         parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None,
         request_timeout: int | None = None,
@@ -1178,7 +1193,7 @@ def __init__(
         url: str,
         *,
         load_tools: bool = True,
-        parse_tool_results: Callable[[types.CallToolResult], str] | None = None,
+        parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None,
         load_prompts: bool = True,
         parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None,
         request_timeout: int | None = None,
@@ -1297,7 +1312,7 @@ def __init__(
         url: str,
         *,
         load_tools: bool = True,
-        parse_tool_results: Callable[[types.CallToolResult], str] | None = None,
+        parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None,
         load_prompts: bool = True,
         parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None,
         request_timeout: int | None = None,