From bb488fe30f325422f1a8d8c9cc547eb26f66440d Mon Sep 17 00:00:00 2001 From: Roo Code Date: Wed, 21 Jan 2026 19:56:46 +0000 Subject: [PATCH] fix: add image content support to MCP tool responses Fixes #10872 The processToolContent method in UseMcpToolTool.ts now handles image content types from MCP protocol responses (e.g., Figma's get_screenshot). Changes: - Modified processToolContent to return both text and images - Updated executeToolAndProcessResult to pass images to task.say() and pushToolResult() - Added tests for image handling scenarios --- src/core/tools/UseMcpToolTool.ts | 37 ++- .../tools/__tests__/useMcpToolTool.spec.ts | 247 +++++++++++++++++- 2 files changed, 272 insertions(+), 12 deletions(-) diff --git a/src/core/tools/UseMcpToolTool.ts b/src/core/tools/UseMcpToolTool.ts index 34763e24af..7546606fd7 100644 --- a/src/core/tools/UseMcpToolTool.ts +++ b/src/core/tools/UseMcpToolTool.ts @@ -250,12 +250,14 @@ export class UseMcpToolTool extends BaseTool<"use_mcp_tool"> { }) } - private processToolContent(toolResult: any): string { + private processToolContent(toolResult: any): { text: string; images: string[] } { if (!toolResult?.content || toolResult.content.length === 0) { - return "" + return { text: "", images: [] } } - return toolResult.content + const images: string[] = [] + + const textContent = toolResult.content .map((item: any) => { if (item.type === "text") { return item.text @@ -264,10 +266,23 @@ export class UseMcpToolTool extends BaseTool<"use_mcp_tool"> { const { blob: _, ...rest } = item.resource return JSON.stringify(rest, null, 2) } + if (item.type === "image") { + // Handle image content (MCP image content has mimeType and data properties) + if (item.mimeType && item.data) { + if (item.data.startsWith("data:")) { + images.push(item.data) + } else { + images.push(`data:${item.mimeType};base64,${item.data}`) + } + } + return "" + } return "" }) .filter(Boolean) .join("\n\n") + + return { text: textContent, images } } private async executeToolAndProcessResult( @@ -291,18 +306,22 @@ export class UseMcpToolTool extends BaseTool<"use_mcp_tool"> { const toolResult = await task.providerRef.deref()?.getMcpHub()?.callTool(serverName, toolName, parsedArguments) let toolResultPretty = "(No response)" + let images: string[] = [] if (toolResult) { - const outputText = this.processToolContent(toolResult) + const { text: outputText, images: extractedImages } = this.processToolContent(toolResult) + images = extractedImages - if (outputText) { + if (outputText || images.length > 0) { await this.sendExecutionStatus(task, { executionId, status: "output", - response: outputText, + response: outputText || (images.length > 0 ? `[${images.length} image(s)]` : ""), }) - toolResultPretty = (toolResult.isError ? "Error:\n" : "") + outputText + toolResultPretty = + (toolResult.isError ? "Error:\n" : "") + + (outputText || (images.length > 0 ? `[${images.length} image(s) received]` : "")) } // Send completion status @@ -321,8 +340,8 @@ export class UseMcpToolTool extends BaseTool<"use_mcp_tool"> { }) } - await task.say("mcp_server_response", toolResultPretty) - pushToolResult(formatResponse.toolResult(toolResultPretty)) + await task.say("mcp_server_response", toolResultPretty, images) + pushToolResult(formatResponse.toolResult(toolResultPretty, images)) } } diff --git a/src/core/tools/__tests__/useMcpToolTool.spec.ts b/src/core/tools/__tests__/useMcpToolTool.spec.ts index e6d1e13e3f..3a575e6218 100644 --- a/src/core/tools/__tests__/useMcpToolTool.spec.ts +++ b/src/core/tools/__tests__/useMcpToolTool.spec.ts @@ -7,7 +7,12 @@ import { ToolUse } from "../../../shared/tools" // Mock dependencies vi.mock("../../prompts/responses", () => ({ formatResponse: { - toolResult: vi.fn((result: string) => `Tool result: ${result}`), + toolResult: vi.fn((result: string, images?: string[]) => { + if (images && images.length > 0) { + return `Tool result: ${result} [with ${images.length} image(s)]` + } + return `Tool result: ${result}` + }), toolError: vi.fn((error: string) => `Tool error: ${error}`), invalidMcpToolArgumentError: vi.fn((server: string, tool: string) => `Invalid args for ${server}:${tool}`), unknownMcpToolError: vi.fn((server: string, tool: string, availableTools: string[]) => { @@ -245,7 +250,7 @@ describe("useMcpToolTool", () => { expect(mockTask.consecutiveMistakeCount).toBe(0) expect(mockAskApproval).toHaveBeenCalled() expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started") - expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully") + expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully", []) expect(mockPushToolResult).toHaveBeenCalledWith("Tool result: Tool executed successfully") }) @@ -483,7 +488,7 @@ describe("useMcpToolTool", () => { expect(mockTask.consecutiveMistakeCount).toBe(0) expect(mockTask.recordToolError).not.toHaveBeenCalled() expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started") - expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully") + expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully", []) }) it("should reject unknown server names with available servers listed", async () => { @@ -578,4 +583,240 @@ describe("useMcpToolTool", () => { expect(mockAskApproval).not.toHaveBeenCalled() }) }) + + describe("image handling", () => { + it("should handle tool response with image content", async () => { + const block: ToolUse = { + type: "tool_use", + name: "use_mcp_tool", + params: { + server_name: "figma-server", + tool_name: "get_screenshot", + arguments: '{"nodeId": "123"}', + }, + nativeArgs: { + server_name: "figma-server", + tool_name: "get_screenshot", + arguments: { nodeId: "123" }, + }, + partial: false, + } + + mockAskApproval.mockResolvedValue(true) + + const mockToolResult = { + content: [ + { + type: "image", + mimeType: "image/png", + data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ", + }, + ], + isError: false, + } + + mockProviderRef.deref.mockReturnValue({ + getMcpHub: () => ({ + callTool: vi.fn().mockResolvedValue(mockToolResult), + getAllServers: vi + .fn() + .mockReturnValue([ + { + name: "figma-server", + tools: [{ name: "get_screenshot", description: "Get screenshot" }], + }, + ]), + }), + postMessageToWebview: vi.fn(), + }) + + await useMcpToolTool.handle(mockTask as Task, block as any, { + askApproval: mockAskApproval, + handleError: mockHandleError, + pushToolResult: mockPushToolResult, + }) + + expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started") + expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "[1 image(s) received]", [ + "", + ]) + expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("with 1 image(s)")) + }) + + it("should handle tool response with both text and image content", async () => { + const block: ToolUse = { + type: "tool_use", + name: "use_mcp_tool", + params: { + server_name: "figma-server", + tool_name: "get_node_info", + arguments: '{"nodeId": "123"}', + }, + nativeArgs: { + server_name: "figma-server", + tool_name: "get_node_info", + arguments: { nodeId: "123" }, + }, + partial: false, + } + + mockAskApproval.mockResolvedValue(true) + + const mockToolResult = { + content: [ + { type: "text", text: "Node name: Button" }, + { + type: "image", + mimeType: "image/png", + data: "base64imagedata", + }, + ], + isError: false, + } + + mockProviderRef.deref.mockReturnValue({ + getMcpHub: () => ({ + callTool: vi.fn().mockResolvedValue(mockToolResult), + getAllServers: vi + .fn() + .mockReturnValue([ + { name: "figma-server", tools: [{ name: "get_node_info", description: "Get node info" }] }, + ]), + }), + postMessageToWebview: vi.fn(), + }) + + await useMcpToolTool.handle(mockTask as Task, block as any, { + askApproval: mockAskApproval, + handleError: mockHandleError, + pushToolResult: mockPushToolResult, + }) + + expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started") + expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Node name: Button", [ + "", + ]) + expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("with 1 image(s)")) + }) + + it("should handle image with data URL already formatted", async () => { + const block: ToolUse = { + type: "tool_use", + name: "use_mcp_tool", + params: { + server_name: "figma-server", + tool_name: "get_screenshot", + arguments: '{"nodeId": "123"}', + }, + nativeArgs: { + server_name: "figma-server", + tool_name: "get_screenshot", + arguments: { nodeId: "123" }, + }, + partial: false, + } + + mockAskApproval.mockResolvedValue(true) + + const mockToolResult = { + content: [ + { + type: "image", + mimeType: "image/jpeg", + data: "", + }, + ], + isError: false, + } + + mockProviderRef.deref.mockReturnValue({ + getMcpHub: () => ({ + callTool: vi.fn().mockResolvedValue(mockToolResult), + getAllServers: vi + .fn() + .mockReturnValue([ + { + name: "figma-server", + tools: [{ name: "get_screenshot", description: "Get screenshot" }], + }, + ]), + }), + postMessageToWebview: vi.fn(), + }) + + await useMcpToolTool.handle(mockTask as Task, block as any, { + askApproval: mockAskApproval, + handleError: mockHandleError, + pushToolResult: mockPushToolResult, + }) + + // Should not double-prefix the data URL + expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "[1 image(s) received]", [ + "", + ]) + }) + + it("should handle multiple images in response", async () => { + const block: ToolUse = { + type: "tool_use", + name: "use_mcp_tool", + params: { + server_name: "figma-server", + tool_name: "get_screenshots", + arguments: '{"nodeIds": ["1", "2"]}', + }, + nativeArgs: { + server_name: "figma-server", + tool_name: "get_screenshots", + arguments: { nodeIds: ["1", "2"] }, + }, + partial: false, + } + + mockAskApproval.mockResolvedValue(true) + + const mockToolResult = { + content: [ + { + type: "image", + mimeType: "image/png", + data: "image1data", + }, + { + type: "image", + mimeType: "image/png", + data: "image2data", + }, + ], + isError: false, + } + + mockProviderRef.deref.mockReturnValue({ + getMcpHub: () => ({ + callTool: vi.fn().mockResolvedValue(mockToolResult), + getAllServers: vi + .fn() + .mockReturnValue([ + { + name: "figma-server", + tools: [{ name: "get_screenshots", description: "Get screenshots" }], + }, + ]), + }), + postMessageToWebview: vi.fn(), + }) + + await useMcpToolTool.handle(mockTask as Task, block as any, { + askApproval: mockAskApproval, + handleError: mockHandleError, + pushToolResult: mockPushToolResult, + }) + + expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "[2 image(s) received]", [ + "", + "", + ]) + expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("with 2 image(s)")) + }) + }) })