diff --git a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py index acfc1b0180..f8e08c6982 100644 --- a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py +++ b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py @@ -659,12 +659,44 @@ def _prepare_message_for_anthropic(self, message: Message) -> dict[str, Any]: "input": content.parse_arguments(), }) case "function_result": - a_content.append({ - "type": "tool_result", - "tool_use_id": content.call_id, - "content": content.result if content.result is not None else "", - "is_error": content.exception is not None, - }) + if content.items: + # Rich content: build array with text + image blocks + tool_content: list[dict[str, Any]] = [] + if content.result: + tool_content.append({"type": "text", "text": content.result}) + for item in content.items: + if item.type == "data" and item.has_top_level_media_type("image"): + tool_content.append({ + "type": "image", + "source": { + "data": _get_data_bytes_as_str(item), # type: ignore[attr-defined] + "media_type": item.media_type, + "type": "base64", + }, + }) + elif item.type == "uri" and item.has_top_level_media_type("image"): + tool_content.append({ + "type": "image", + "source": {"type": "url", "url": item.uri}, + }) + else: + logger.debug( + "Ignoring unsupported rich content media type in tool result: %s", + item.media_type, + ) + a_content.append({ + "type": "tool_result", + "tool_use_id": content.call_id, + "content": tool_content, + "is_error": content.exception is not None, + }) + else: + a_content.append({ + "type": "tool_result", + "tool_use_id": content.call_id, + "content": content.result if content.result is not None else "", + "is_error": content.exception is not None, + }) case "mcp_server_tool_call": mcp_call: dict[str, Any] = { "type": "mcp_tool_use", diff --git a/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py b/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py index 7590111bac..cccae17aa2 100644 --- a/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py +++ b/python/packages/azure-ai/agent_framework_azure_ai/_chat_client.py @@ -1391,6 +1391,11 @@ def _prepare_tool_outputs_for_azure_ai( call_id = run_and_call_ids[1] if content.type == "function_result": + if content.items: + logger.warning( + "Azure AI Agents does not support rich content (images, audio) in tool results. " + "Rich content items will be omitted." + ) if tool_outputs is None: tool_outputs = [] tool_outputs.append( diff --git a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py index b0d87fe8cc..f4c3b35b76 100644 --- a/python/packages/bedrock/agent_framework_bedrock/_chat_client.py +++ b/python/packages/bedrock/agent_framework_bedrock/_chat_client.py @@ -503,10 +503,16 @@ def _convert_content_to_bedrock_block(self, content: Content) -> dict[str, Any] } } case "function_result": + tool_result_blocks = self._convert_tool_result_to_blocks(content.result) + if content.items: + logger.warning( + "Bedrock does not support rich content (images, audio) in tool results. " + "Rich content items will be omitted." + ) tool_result_block = { "toolResult": { "toolUseId": content.call_id, - "content": self._convert_tool_result_to_blocks(content.result), + "content": tool_result_blocks, "status": "error" if content.exception else "success", } } @@ -528,6 +534,8 @@ def _convert_content_to_bedrock_block(self, content: Content) -> dict[str, Any] def _convert_tool_result_to_blocks(self, result: Any) -> list[dict[str, Any]]: prepared_result = result if isinstance(result, str) else FunctionTool.parse_result(result) + if not isinstance(prepared_result, str): + return [{"text": str(prepared_result)}] try: parsed_result = json.loads(prepared_result) except json.JSONDecodeError: diff --git a/python/packages/core/agent_framework/_mcp.py b/python/packages/core/agent_framework/_mcp.py index 0c241cb89a..b455f1b696 100644 --- a/python/packages/core/agent_framework/_mcp.py +++ b/python/packages/core/agent_framework/_mcp.py @@ -142,38 +142,44 @@ def _parse_message_from_mcp( def _parse_tool_result_from_mcp( mcp_type: types.CallToolResult, -) -> str: - """Parse an MCP CallToolResult directly into a string representation. +) -> str | list[Content]: + """Parse an MCP CallToolResult into a string or rich content list. - Converts each content item in the MCP result to its string form and combines them. - This skips the intermediate Content object step for tool results. + Converts each content item in the MCP result to its appropriate form. + Text-only results are returned as strings. When the result contains + image or audio content, returns a list of Content objects so the + framework can forward the rich media to the model. Args: mcp_type: The MCP CallToolResult object to convert. Returns: - A string representation of the tool result — either plain text or serialized JSON. + A string for text-only results, or a list of Content for rich media results. """ import json - parts: list[str] = [] + text_parts: list[str] = [] + rich_items: list[Content] = [] for item in mcp_type.content: match item: case types.TextContent(): - parts.append(item.text) - case types.ImageContent() | types.AudioContent(): - parts.append( - json.dumps( - { - "type": "image" if isinstance(item, types.ImageContent) else "audio", - "data": item.data, - "mimeType": item.mimeType, - }, - default=str, + text_parts.append(item.text) + case types.ImageContent(): + rich_items.append( + Content.from_uri( + uri=f"data:{item.mimeType};base64,{item.data}", + media_type=item.mimeType, + ) + ) + case types.AudioContent(): + rich_items.append( + Content.from_uri( + uri=f"data:{item.mimeType};base64,{item.data}", + media_type=item.mimeType, ) ) case types.ResourceLink(): - parts.append( + text_parts.append( json.dumps( { "type": "resource_link", @@ -186,9 +192,9 @@ def _parse_tool_result_from_mcp( case types.EmbeddedResource(): match item.resource: case types.TextResourceContents(): - parts.append(item.resource.text) + text_parts.append(item.resource.text) case types.BlobResourceContents(): - parts.append( + text_parts.append( json.dumps( { "type": "blob", @@ -199,12 +205,21 @@ def _parse_tool_result_from_mcp( ) ) case _: - parts.append(str(item)) - if not parts: + text_parts.append(str(item)) + + if rich_items: + # Return rich content list with text items included + result: list[Content] = [] + for text in text_parts: + result.append(Content.from_text(text)) + result.extend(rich_items) + return result + + if not text_parts: return "" - if len(parts) == 1: - return parts[0] - return json.dumps(parts, default=str) + if len(text_parts) == 1: + return text_parts[0] + return json.dumps(text_parts, default=str) def _parse_content_from_mcp( @@ -425,7 +440,7 @@ def __init__( approval_mode: (Literal["always_require", "never_require"] | MCPSpecificApproval | None) = None, allowed_tools: Collection[str] | None = None, load_tools: bool = True, - parse_tool_results: Callable[[types.CallToolResult], str] | None = None, + parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None, load_prompts: bool = True, parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None, session: ClientSession | None = None, @@ -850,7 +865,7 @@ async def _ensure_connected(self) -> None: inner_exception=ex, ) from ex - async def call_tool(self, tool_name: str, **kwargs: Any) -> str: + async def call_tool(self, tool_name: str, **kwargs: Any) -> str | list[Content]: """Call a tool with the given arguments. Args: @@ -860,7 +875,7 @@ async def call_tool(self, tool_name: str, **kwargs: Any) -> str: kwargs: Arguments to pass to the tool. Returns: - A string representation of the tool result — either plain text or serialized JSON. + A string for text-only results, or a list of Content for rich media results. Raises: ToolExecutionException: If the MCP server is not connected, tools are not loaded, @@ -1053,7 +1068,7 @@ def __init__( command: str, *, load_tools: bool = True, - parse_tool_results: Callable[[types.CallToolResult], str] | None = None, + parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None, load_prompts: bool = True, parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None, request_timeout: int | None = None, @@ -1178,7 +1193,7 @@ def __init__( url: str, *, load_tools: bool = True, - parse_tool_results: Callable[[types.CallToolResult], str] | None = None, + parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None, load_prompts: bool = True, parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None, request_timeout: int | None = None, @@ -1297,7 +1312,7 @@ def __init__( url: str, *, load_tools: bool = True, - parse_tool_results: Callable[[types.CallToolResult], str] | None = None, + parse_tool_results: Callable[[types.CallToolResult], str | list[Content]] | None = None, load_prompts: bool = True, parse_prompt_results: Callable[[types.GetPromptResult], str] | None = None, request_timeout: int | None = None, diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 3ec167d4f7..bc9bddd5e4 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -242,7 +242,7 @@ def __init__( additional_properties: dict[str, Any] | None = None, func: Callable[..., Any] | None = None, input_model: type[BaseModel] | Mapping[str, Any] | None = None, - result_parser: Callable[[Any], str] | None = None, + result_parser: Callable[[Any], str | list[Content]] | None = None, **kwargs: Any, ) -> None: """Initialize the FunctionTool. @@ -438,19 +438,19 @@ async def invoke( *, arguments: BaseModel | Mapping[str, Any] | None = None, **kwargs: Any, - ) -> str: + ) -> str | list[Content]: """Run the AI function with the provided arguments as a Pydantic model. The raw return value of the wrapped function is automatically parsed into a ``str`` - (either plain text or serialized JSON) using :meth:`parse_result` or the custom - ``result_parser`` if one was provided. + (either plain text or serialized JSON) or a ``list[Content]`` (for rich content like + images) using :meth:`parse_result` or the custom ``result_parser`` if one was provided. Keyword Args: arguments: A mapping or model instance containing the arguments for the function. kwargs: Keyword arguments to pass to the function, will not be used if ``arguments`` is provided. Returns: - The parsed result as a string — either plain text or serialized JSON. + The parsed result as a string, or a list of Content items for rich results. Raises: TypeError: If arguments is not mapping-like or fails schema checks. @@ -556,8 +556,9 @@ async def invoke( parsed = str(result) logger.info(f"Function {self.name} succeeded.") if OBSERVABILITY_SETTINGS.SENSITIVE_DATA_ENABLED: # type: ignore[name-defined] - span.set_attribute(OtelAttr.TOOL_RESULT, parsed) - logger.debug(f"Function result: {parsed}") + result_str = parsed if isinstance(parsed, str) else str(parsed) + span.set_attribute(OtelAttr.TOOL_RESULT, result_str) + logger.debug(f"Function result: {result_str}") return parsed finally: duration = (end_time_stamp or perf_counter()) - start_time_stamp @@ -609,10 +610,13 @@ def _make_dumpable(value: Any) -> Any: return value @staticmethod - def parse_result(result: Any) -> str: - """Convert a raw function return value to a string representation. + def parse_result(result: Any) -> str | list[Content]: + """Convert a raw function return value to a string or rich content list. + + Returns a ``str`` for text-only results, or a ``list[Content]`` when the + function produced rich content (images, audio, files) that should be + forwarded to the model as visual/multi-modal input. - The return value is always a ``str`` — either plain text or serialized JSON. This is called automatically by :meth:`invoke` before returning the result, ensuring that the result stored in ``Content.from_function_result`` is already in a form that can be passed directly to LLM APIs. @@ -621,12 +625,22 @@ def parse_result(result: Any) -> str: result: The raw return value from the wrapped function. Returns: - A string representation of the result, either plain text or serialized JSON. + A string representation, or a list of Content items for rich results. """ + from ._types import Content + if result is None: return "" if isinstance(result, str): return result + # Preserve rich Content (images, audio, files) instead of serializing to JSON + if isinstance(result, Content): + if result.type in ("data", "uri"): + return [result] + if result.type == "text" and result.text: + return result.text + if isinstance(result, list) and any(isinstance(item, Content) for item in result): + return [item if isinstance(item, Content) else Content.from_text(str(item)) for item in result] dumpable = FunctionTool._make_dumpable(result) if isinstance(dumpable, str): return dumpable @@ -1080,7 +1094,7 @@ def tool( max_invocations: int | None = None, max_invocation_exceptions: int | None = None, additional_properties: dict[str, Any] | None = None, - result_parser: Callable[[Any], str] | None = None, + result_parser: Callable[[Any], str | list[Content]] | None = None, ) -> FunctionTool: ... @@ -1095,7 +1109,7 @@ def tool( max_invocations: int | None = None, max_invocation_exceptions: int | None = None, additional_properties: dict[str, Any] | None = None, - result_parser: Callable[[Any], str] | None = None, + result_parser: Callable[[Any], str | list[Content]] | None = None, ) -> Callable[[Callable[..., Any]], FunctionTool]: ... @@ -1109,7 +1123,7 @@ def tool( max_invocations: int | None = None, max_invocation_exceptions: int | None = None, additional_properties: dict[str, Any] | None = None, - result_parser: Callable[[Any], str] | None = None, + result_parser: Callable[[Any], str | list[Content]] | None = None, ) -> FunctionTool | Callable[[Callable[..., Any]], FunctionTool]: """Decorate a function to turn it into a FunctionTool that can be passed to models and executed automatically. @@ -1343,6 +1357,33 @@ def normalize_function_invocation_configuration( return normalized +def _build_function_result(call_id: str, function_result: str | list[Content]) -> Content: + """Build a function_result Content from a parsed tool result. + + When the tool returned rich content (list of Content items), the text + items are concatenated as the text result and media items are stored + in the ``items`` field so providers can forward them to the model. + + Args: + call_id: The function call ID this result corresponds to. + function_result: The parsed result from FunctionTool.invoke. + + Returns: + A Content with type ``function_result``. + """ + from ._types import Content + + if isinstance(function_result, list): + text_parts = [c.text for c in function_result if c.type == "text" and c.text] + rich_items = [c for c in function_result if c.type in ("data", "uri")] + return Content.from_function_result( + call_id=call_id, + result="\n".join(text_parts) if text_parts else "", + items=rich_items or None, + ) + return Content.from_function_result(call_id=call_id, result=function_result) + + async def _auto_invoke_function( function_call_content: Content, custom_args: dict[str, Any] | None = None, @@ -1440,9 +1481,9 @@ async def _auto_invoke_function( tool_call_id=function_call_content.call_id, **runtime_kwargs if getattr(tool, "_forward_runtime_kwargs", False) else {}, ) - return Content.from_function_result( + return _build_function_result( call_id=function_call_content.call_id, # type: ignore[arg-type] - result=function_result, + function_result=function_result, ) except Exception as exc: message = "Error: Function failed." @@ -1474,9 +1515,9 @@ async def final_function_handler(context_obj: Any) -> Any: # MiddlewareTermination bubbles up to signal loop termination try: function_result = await middleware_pipeline.execute(middleware_context, final_function_handler) - return Content.from_function_result( + return _build_function_result( call_id=function_call_content.call_id, # type: ignore[arg-type] - result=function_result, + function_result=function_result, ) except MiddlewareTermination as term_exc: # Re-raise to signal loop termination, but first capture any result set by middleware diff --git a/python/packages/core/agent_framework/_types.py b/python/packages/core/agent_framework/_types.py index 37ee9f1138..3943004c6e 100644 --- a/python/packages/core/agent_framework/_types.py +++ b/python/packages/core/agent_framework/_types.py @@ -468,6 +468,7 @@ def __init__( arguments: str | Mapping[str, Any] | None = None, exception: str | None = None, result: Any = None, + items: Sequence[Content] | None = None, # Hosted file/vector store fields file_id: str | None = None, vector_store_id: str | None = None, @@ -513,6 +514,7 @@ def __init__( self.arguments = arguments self.exception = exception self.result = result + self.items = items self.file_id = file_id self.vector_store_id = vector_store_id self.inputs = inputs @@ -756,16 +758,30 @@ def from_function_result( call_id: str, *, result: Any = None, + items: Sequence[Content] | None = None, exception: str | None = None, annotations: Sequence[Annotation] | None = None, additional_properties: MutableMapping[str, Any] | None = None, raw_representation: Any = None, ) -> ContentT: - """Create function result content.""" + """Create function result content. + + Args: + call_id: The ID of the function call this result corresponds to. + + Keyword Args: + result: The text result of the function call. + items: Optional rich content items (e.g. images, audio) produced by the tool. + exception: The exception message if the function call failed. + annotations: Optional annotations for the content. + additional_properties: Optional additional properties. + raw_representation: Optional raw representation from the provider. + """ return cls( "function_result", call_id=call_id, result=result, + items=list(items) if items else None, exception=exception, annotations=annotations, additional_properties=additional_properties, @@ -1029,6 +1045,7 @@ def to_dict(self, *, exclude_none: bool = True, exclude: set[str] | None = None) "arguments", "exception", "result", + "items", "file_id", "vector_store_id", "inputs", diff --git a/python/packages/core/agent_framework/openai/_chat_client.py b/python/packages/core/agent_framework/openai/_chat_client.py index f08d80e990..205aa6075c 100644 --- a/python/packages/core/agent_framework/openai/_chat_client.py +++ b/python/packages/core/agent_framework/openai/_chat_client.py @@ -571,9 +571,21 @@ def _prepare_message_for_openai(self, message: Message) -> list[dict[str, Any]]: args["tool_calls"] = [self._prepare_content_for_openai(content)] # type: ignore case "function_result": args["tool_call_id"] = content.call_id - # Always include content for tool results - API requires it even if empty - # Functions returning None should still have a tool result message - args["content"] = content.result if content.result is not None else "" + if content.items: + # Multi-part: text result + rich content (images, audio, files) + content_parts: list[dict[str, Any]] = [] + text_result = content.result if content.result else "" + if text_result: + content_parts.append({"type": "text", "text": text_result}) + for item in content.items: + prepared = self._prepare_content_for_openai(item) + if prepared: + content_parts.append(prepared) + args["content"] = content_parts + else: + # Always include content for tool results - API requires it even if empty + # Functions returning None should still have a tool result message + args["content"] = content.result if content.result is not None else "" case "text_reasoning" if (protected_data := content.protected_data) is not None: all_messages[-1]["reasoning_details"] = json.loads(protected_data) case _: diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py index fa140ee0b7..661648e46d 100644 --- a/python/packages/core/agent_framework/openai/_responses_client.py +++ b/python/packages/core/agent_framework/openai/_responses_client.py @@ -926,6 +926,19 @@ def _prepare_message_for_openai( new_args.update(self._prepare_content_for_openai(message.role, content, call_id_to_id)) # type: ignore[arg-type] if new_args: all_messages.append(new_args) + # Forward rich content items (images, audio, files) as a user message + if content.items: + rich_parts = [ + self._prepare_content_for_openai("user", item, call_id_to_id) # type: ignore[arg-type] + for item in content.items + ] + rich_parts = [p for p in rich_parts if p] + if rich_parts: + all_messages.append({ + "type": "message", + "role": "user", + "content": rich_parts, + }) case "function_call": function_call = self._prepare_content_for_openai(message.role, content, call_id_to_id) # type: ignore[arg-type] if function_call: diff --git a/python/packages/core/tests/core/test_mcp.py b/python/packages/core/tests/core/test_mcp.py index 65b4015093..50d8e4e4ec 100644 --- a/python/packages/core/tests/core/test_mcp.py +++ b/python/packages/core/tests/core/test_mcp.py @@ -64,7 +64,7 @@ def test_mcp_prompt_message_to_ai_content(): def test_parse_tool_result_from_mcp(): - """Test conversion from MCP tool result to string representation.""" + """Test conversion from MCP tool result with images returns rich content list.""" mcp_result = types.CallToolResult( content=[ types.TextContent(type="text", text="Result text"), @@ -74,20 +74,19 @@ def test_parse_tool_result_from_mcp(): ) result = _parse_tool_result_from_mcp(mcp_result) - # Multiple items produce a JSON array of strings - assert isinstance(result, str) - import json - - parsed = json.loads(result) - assert len(parsed) == 3 - assert parsed[0] == "Result text" - # Image items are JSON-encoded strings within the array - img1 = json.loads(parsed[1]) - assert img1["type"] == "image" - assert img1["data"] == "eHl6" - img2 = json.loads(parsed[2]) - assert img2["type"] == "image" - assert img2["data"] == "YWJj" + # Results with images return a list of Content objects + assert isinstance(result, list) + assert len(result) == 3 + # First item is the text content + assert result[0].type == "text" + assert result[0].text == "Result text" + # Image items are preserved as data Content objects (data URI) + assert result[1].type == "data" + assert result[1].media_type == "image/png" + assert "eHl6" in result[1].uri + assert result[2].type == "data" + assert result[2].media_type == "image/webp" + assert "YWJj" in result[2].uri def test_parse_tool_result_from_mcp_single_text(): @@ -117,6 +116,22 @@ def test_parse_tool_result_from_mcp_empty_content(): assert result == "" +def test_parse_tool_result_from_mcp_audio_content(): + """Test conversion from MCP tool result with audio returns rich content list.""" + mcp_result = types.CallToolResult( + content=[ + types.AudioContent(type="audio", data="YXVkaW8=", mimeType="audio/wav"), + ] + ) + result = _parse_tool_result_from_mcp(mcp_result) + + assert isinstance(result, list) + assert len(result) == 1 + assert result[0].type == "data" + assert result[0].media_type == "audio/wav" + assert "YXVkaW8=" in result[0].uri + + def test_mcp_content_types_to_ai_content_text(): """Test conversion of MCP text content to AI content.""" mcp_content = types.TextContent(type="text", text="Sample text") diff --git a/python/packages/core/tests/core/test_types.py b/python/packages/core/tests/core/test_types.py index 8a8885b919..8a250ea981 100644 --- a/python/packages/core/tests/core/test_types.py +++ b/python/packages/core/tests/core/test_types.py @@ -2210,12 +2210,103 @@ def test_parse_result_content_object(): def test_parse_result_list_of_content(): - """Test that list[Content] is serialized to JSON.""" + """Test that list[Content] with text-only items is returned as list[Content].""" contents = [Content.from_text("hello"), Content.from_text("world")] result = FunctionTool.parse_result(contents) + assert isinstance(result, list) + assert len(result) == 2 + assert result[0].text == "hello" + assert result[1].text == "world" + + +def test_parse_result_single_image_content(): + """Test that a single image Content is preserved as list[Content].""" + image_content = Content.from_data(data=b"fake_png_bytes", media_type="image/png") + result = FunctionTool.parse_result(image_content) + assert isinstance(result, list) + assert len(result) == 1 + assert result[0].type == "data" + assert result[0].media_type == "image/png" + + +def test_parse_result_single_text_content(): + """Test that a single text Content returns its text string.""" + text_content = Content.from_text("just text") + result = FunctionTool.parse_result(text_content) assert isinstance(result, str) - assert "hello" in result - assert "world" in result + assert result == "just text" + + +def test_parse_result_mixed_content_list(): + """Test that list with text and image Content is preserved.""" + contents = [ + Content.from_text("Chart rendered."), + Content.from_data(data=b"image_bytes", media_type="image/png"), + ] + result = FunctionTool.parse_result(contents) + assert isinstance(result, list) + assert len(result) == 2 + assert result[0].type == "text" + assert result[1].type == "data" + + +def test_build_function_result_with_rich_content(): + """Test _build_function_result separates text and rich items.""" + from agent_framework._tools import _build_function_result + + content_list = [ + Content.from_text("Chart rendered."), + Content.from_data(data=b"image_bytes", media_type="image/png"), + ] + result = _build_function_result(call_id="test-123", function_result=content_list) + assert result.type == "function_result" + assert result.call_id == "test-123" + assert result.result == "Chart rendered." + assert result.items is not None + assert len(result.items) == 1 + assert result.items[0].type == "data" + assert result.items[0].media_type == "image/png" + + +def test_build_function_result_with_string(): + """Test _build_function_result with plain string result.""" + from agent_framework._tools import _build_function_result + + result = _build_function_result(call_id="test-123", function_result="just text") + assert result.type == "function_result" + assert result.call_id == "test-123" + assert result.result == "just text" + assert result.items is None + + +def test_content_from_function_result_with_items(): + """Test Content.from_function_result with items parameter.""" + image = Content.from_data(data=b"png_data", media_type="image/png") + result = Content.from_function_result( + call_id="call-1", + result="Screenshot captured.", + items=[image], + ) + assert result.type == "function_result" + assert result.call_id == "call-1" + assert result.result == "Screenshot captured." + assert result.items is not None + assert len(result.items) == 1 + assert result.items[0].media_type == "image/png" + + +def test_content_from_function_result_items_in_to_dict(): + """Test that items are included in to_dict serialization.""" + image = Content.from_data(data=b"png_data", media_type="image/png") + result = Content.from_function_result( + call_id="call-1", + result="done", + items=[image], + ) + d = result.to_dict() + assert "items" in d + assert len(d["items"]) == 1 + assert d["items"][0]["type"] == "data" # endregion diff --git a/python/packages/ollama/agent_framework_ollama/_chat_client.py b/python/packages/ollama/agent_framework_ollama/_chat_client.py index cc7fc0c9a7..1d4d7bf307 100644 --- a/python/packages/ollama/agent_framework_ollama/_chat_client.py +++ b/python/packages/ollama/agent_framework_ollama/_chat_client.py @@ -500,11 +500,16 @@ def _format_assistant_message(self, message: Message) -> list[OllamaMessage]: def _format_tool_message(self, message: Message) -> list[OllamaMessage]: # Ollama does not support multiple tool results in a single message, so we create a separate - return [ - OllamaMessage(role="tool", content=str(item.result), tool_name=item.call_id) - for item in message.contents - if item.type == "function_result" - ] + messages: list[OllamaMessage] = [] + for item in message.contents: + if item.type == "function_result": + if item.items: + logger.warning( + "Ollama does not support rich content (images, audio) in tool results. " + "Rich content items will be omitted." + ) + messages.append(OllamaMessage(role="tool", content=str(item.result), tool_name=item.call_id)) + return messages def _parse_contents_from_ollama(self, response: OllamaChatResponse) -> list[Content]: contents: list[Content] = []