From bb488fe30f325422f1a8d8c9cc547eb26f66440d Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Wed, 21 Jan 2026 19:56:46 +0000
Subject: [PATCH] fix: add image content support to MCP tool responses

Fixes #10872

The processToolContent method in UseMcpToolTool.ts now handles image
content types from MCP protocol responses (e.g., Figma's get_screenshot).

Changes:
- Modified processToolContent to return both text and images
- Updated executeToolAndProcessResult to pass images to task.say() and
  pushToolResult()
- Added tests for image handling scenarios
---
 src/core/tools/UseMcpToolTool.ts              |  37 ++-
 .../tools/__tests__/useMcpToolTool.spec.ts    | 247 +++++++++++++++++-
 2 files changed, 272 insertions(+), 12 deletions(-)

diff --git a/src/core/tools/UseMcpToolTool.ts b/src/core/tools/UseMcpToolTool.ts
index 34763e24af..7546606fd7 100644
--- a/src/core/tools/UseMcpToolTool.ts
+++ b/src/core/tools/UseMcpToolTool.ts
@@ -250,12 +250,14 @@ export class UseMcpToolTool extends BaseTool<"use_mcp_tool"> {
 		})
 	}
 
-	private processToolContent(toolResult: any): string {
+	private processToolContent(toolResult: any): { text: string; images: string[] } {
 		if (!toolResult?.content || toolResult.content.length === 0) {
-			return ""
+			return { text: "", images: [] }
 		}
 
-		return toolResult.content
+		const images: string[] = []
+
+		const textContent = toolResult.content
 			.map((item: any) => {
 				if (item.type === "text") {
 					return item.text
@@ -264,10 +266,23 @@ export class UseMcpToolTool extends BaseTool<"use_mcp_tool"> {
 					const { blob: _, ...rest } = item.resource
 					return JSON.stringify(rest, null, 2)
 				}
+				if (item.type === "image") {
+					// Handle image content (MCP image content has mimeType and data properties)
+					if (item.mimeType && item.data) {
+						if (item.data.startsWith("data:")) {
+							images.push(item.data)
+						} else {
+							images.push(`data:${item.mimeType};base64,${item.data}`)
+						}
+					}
+					return ""
+				}
 				return ""
 			})
 			.filter(Boolean)
 			.join("\n\n")
+
+		return { text: textContent, images }
 	}
 
 	private async executeToolAndProcessResult(
@@ -291,18 +306,22 @@ export class UseMcpToolTool extends BaseTool<"use_mcp_tool"> {
 		const toolResult = await task.providerRef.deref()?.getMcpHub()?.callTool(serverName, toolName, parsedArguments)
 
 		let toolResultPretty = "(No response)"
+		let images: string[] = []
 
 		if (toolResult) {
-			const outputText = this.processToolContent(toolResult)
+			const { text: outputText, images: extractedImages } = this.processToolContent(toolResult)
+			images = extractedImages
 
-			if (outputText) {
+			if (outputText || images.length > 0) {
 				await this.sendExecutionStatus(task, {
 					executionId,
 					status: "output",
-					response: outputText,
+					response: outputText || (images.length > 0 ? `[${images.length} image(s)]` : ""),
 				})
 
-				toolResultPretty = (toolResult.isError ? "Error:\n" : "") + outputText
+				toolResultPretty =
+					(toolResult.isError ? "Error:\n" : "") +
+					(outputText || (images.length > 0 ? `[${images.length} image(s) received]` : ""))
 			}
 
 			// Send completion status
@@ -321,8 +340,8 @@ export class UseMcpToolTool extends BaseTool<"use_mcp_tool"> {
 			})
 		}
 
-		await task.say("mcp_server_response", toolResultPretty)
-		pushToolResult(formatResponse.toolResult(toolResultPretty))
+		await task.say("mcp_server_response", toolResultPretty, images)
+		pushToolResult(formatResponse.toolResult(toolResultPretty, images))
 	}
 }
 
diff --git a/src/core/tools/__tests__/useMcpToolTool.spec.ts b/src/core/tools/__tests__/useMcpToolTool.spec.ts
index e6d1e13e3f..3a575e6218 100644
--- a/src/core/tools/__tests__/useMcpToolTool.spec.ts
+++ b/src/core/tools/__tests__/useMcpToolTool.spec.ts
@@ -7,7 +7,12 @@ import { ToolUse } from "../../../shared/tools"
 // Mock dependencies
 vi.mock("../../prompts/responses", () => ({
 	formatResponse: {
-		toolResult: vi.fn((result: string) => `Tool result: ${result}`),
+		toolResult: vi.fn((result: string, images?: string[]) => {
+			if (images && images.length > 0) {
+				return `Tool result: ${result} [with ${images.length} image(s)]`
+			}
+			return `Tool result: ${result}`
+		}),
 		toolError: vi.fn((error: string) => `Tool error: ${error}`),
 		invalidMcpToolArgumentError: vi.fn((server: string, tool: string) => `Invalid args for ${server}:${tool}`),
 		unknownMcpToolError: vi.fn((server: string, tool: string, availableTools: string[]) => {
@@ -245,7 +250,7 @@ describe("useMcpToolTool", () => {
 			expect(mockTask.consecutiveMistakeCount).toBe(0)
 			expect(mockAskApproval).toHaveBeenCalled()
 			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started")
-			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully")
+			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully", [])
 			expect(mockPushToolResult).toHaveBeenCalledWith("Tool result: Tool executed successfully")
 		})
 
@@ -483,7 +488,7 @@ describe("useMcpToolTool", () => {
 			expect(mockTask.consecutiveMistakeCount).toBe(0)
 			expect(mockTask.recordToolError).not.toHaveBeenCalled()
 			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started")
-			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully")
+			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully", [])
 		})
 
 		it("should reject unknown server names with available servers listed", async () => {
@@ -578,4 +583,240 @@ describe("useMcpToolTool", () => {
 			expect(mockAskApproval).not.toHaveBeenCalled()
 		})
 	})
+
+	describe("image handling", () => {
+		it("should handle tool response with image content", async () => {
+			const block: ToolUse = {
+				type: "tool_use",
+				name: "use_mcp_tool",
+				params: {
+					server_name: "figma-server",
+					tool_name: "get_screenshot",
+					arguments: '{"nodeId": "123"}',
+				},
+				nativeArgs: {
+					server_name: "figma-server",
+					tool_name: "get_screenshot",
+					arguments: { nodeId: "123" },
+				},
+				partial: false,
+			}
+
+			mockAskApproval.mockResolvedValue(true)
+
+			const mockToolResult = {
+				content: [
+					{
+						type: "image",
+						mimeType: "image/png",
+						data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ",
+					},
+				],
+				isError: false,
+			}
+
+			mockProviderRef.deref.mockReturnValue({
+				getMcpHub: () => ({
+					callTool: vi.fn().mockResolvedValue(mockToolResult),
+					getAllServers: vi
+						.fn()
+						.mockReturnValue([
+							{
+								name: "figma-server",
+								tools: [{ name: "get_screenshot", description: "Get screenshot" }],
+							},
+						]),
+				}),
+				postMessageToWebview: vi.fn(),
+			})
+
+			await useMcpToolTool.handle(mockTask as Task, block as any, {
+				askApproval: mockAskApproval,
+				handleError: mockHandleError,
+				pushToolResult: mockPushToolResult,
+			})
+
+			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started")
+			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "[1 image(s) received]", [
+				"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ",
+			])
+			expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("with 1 image(s)"))
+		})
+
+		it("should handle tool response with both text and image content", async () => {
+			const block: ToolUse = {
+				type: "tool_use",
+				name: "use_mcp_tool",
+				params: {
+					server_name: "figma-server",
+					tool_name: "get_node_info",
+					arguments: '{"nodeId": "123"}',
+				},
+				nativeArgs: {
+					server_name: "figma-server",
+					tool_name: "get_node_info",
+					arguments: { nodeId: "123" },
+				},
+				partial: false,
+			}
+
+			mockAskApproval.mockResolvedValue(true)
+
+			const mockToolResult = {
+				content: [
+					{ type: "text", text: "Node name: Button" },
+					{
+						type: "image",
+						mimeType: "image/png",
+						data: "base64imagedata",
+					},
+				],
+				isError: false,
+			}
+
+			mockProviderRef.deref.mockReturnValue({
+				getMcpHub: () => ({
+					callTool: vi.fn().mockResolvedValue(mockToolResult),
+					getAllServers: vi
+						.fn()
+						.mockReturnValue([
+							{ name: "figma-server", tools: [{ name: "get_node_info", description: "Get node info" }] },
+						]),
+				}),
+				postMessageToWebview: vi.fn(),
+			})
+
+			await useMcpToolTool.handle(mockTask as Task, block as any, {
+				askApproval: mockAskApproval,
+				handleError: mockHandleError,
+				pushToolResult: mockPushToolResult,
+			})
+
+			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started")
+			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Node name: Button", [
+				"data:image/png;base64,base64imagedata",
+			])
+			expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("with 1 image(s)"))
+		})
+
+		it("should handle image with data URL already formatted", async () => {
+			const block: ToolUse = {
+				type: "tool_use",
+				name: "use_mcp_tool",
+				params: {
+					server_name: "figma-server",
+					tool_name: "get_screenshot",
+					arguments: '{"nodeId": "123"}',
+				},
+				nativeArgs: {
+					server_name: "figma-server",
+					tool_name: "get_screenshot",
+					arguments: { nodeId: "123" },
+				},
+				partial: false,
+			}
+
+			mockAskApproval.mockResolvedValue(true)
+
+			const mockToolResult = {
+				content: [
+					{
+						type: "image",
+						mimeType: "image/jpeg",
+						data: "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
+					},
+				],
+				isError: false,
+			}
+
+			mockProviderRef.deref.mockReturnValue({
+				getMcpHub: () => ({
+					callTool: vi.fn().mockResolvedValue(mockToolResult),
+					getAllServers: vi
+						.fn()
+						.mockReturnValue([
+							{
+								name: "figma-server",
+								tools: [{ name: "get_screenshot", description: "Get screenshot" }],
+							},
+						]),
+				}),
+				postMessageToWebview: vi.fn(),
+			})
+
+			await useMcpToolTool.handle(mockTask as Task, block as any, {
+				askApproval: mockAskApproval,
+				handleError: mockHandleError,
+				pushToolResult: mockPushToolResult,
+			})
+
+			// Should not double-prefix the data URL
+			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "[1 image(s) received]", [
+				"data:image/jpeg;base64,/9j/4AAQSkZJRg==",
+			])
+		})
+
+		it("should handle multiple images in response", async () => {
+			const block: ToolUse = {
+				type: "tool_use",
+				name: "use_mcp_tool",
+				params: {
+					server_name: "figma-server",
+					tool_name: "get_screenshots",
+					arguments: '{"nodeIds": ["1", "2"]}',
+				},
+				nativeArgs: {
+					server_name: "figma-server",
+					tool_name: "get_screenshots",
+					arguments: { nodeIds: ["1", "2"] },
+				},
+				partial: false,
+			}
+
+			mockAskApproval.mockResolvedValue(true)
+
+			const mockToolResult = {
+				content: [
+					{
+						type: "image",
+						mimeType: "image/png",
+						data: "image1data",
+					},
+					{
+						type: "image",
+						mimeType: "image/png",
+						data: "image2data",
+					},
+				],
+				isError: false,
+			}
+
+			mockProviderRef.deref.mockReturnValue({
+				getMcpHub: () => ({
+					callTool: vi.fn().mockResolvedValue(mockToolResult),
+					getAllServers: vi
+						.fn()
+						.mockReturnValue([
+							{
+								name: "figma-server",
+								tools: [{ name: "get_screenshots", description: "Get screenshots" }],
+							},
+						]),
+				}),
+				postMessageToWebview: vi.fn(),
+			})
+
+			await useMcpToolTool.handle(mockTask as Task, block as any, {
+				askApproval: mockAskApproval,
+				handleError: mockHandleError,
+				pushToolResult: mockPushToolResult,
+			})
+
+			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "[2 image(s) received]", [
+				"data:image/png;base64,image1data",
+				"data:image/png;base64,image2data",
+			])
+			expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("with 2 image(s)"))
+		})
+	})
 })