From 1632659c029e956b9a0f36ca4fd0df1cd2ea6736 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Wed, 21 Jan 2026 23:02:54 +0000 Subject: [PATCH 1/4] feat: add MCP image preview thumbnails and save_image tool - Add image thumbnails to McpExecution component (Feature 1) - Import Thumbnails component - Add images prop to McpExecutionProps interface - Render thumbnails when images are present (click to open in VSCode) - Pass message.images from ChatRow to McpExecution - Add save_image tool for agent to save images (Feature 2) - Create SaveImageTool.ts with base64 data URL support - Add save_image to toolNames in types/tool.ts - Add tool definitions in shared/tools.ts - Create native tool description in prompts/tools/native-tools - Register tool in presentAssistantMessage.ts - Add to edit tool group for file write operations Addresses Issue #10877 --- packages/types/src/tool.ts | 1 + .../presentAssistantMessage.ts | 12 ++ src/core/prompts/tools/native-tools/index.ts | 2 + .../prompts/tools/native-tools/save_image.ts | 41 ++++++ src/core/tools/SaveImageTool.ts | 137 ++++++++++++++++++ src/shared/tools.ts | 5 +- webview-ui/src/components/chat/ChatRow.tsx | 1 + .../src/components/chat/McpExecution.tsx | 10 ++ 8 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 src/core/prompts/tools/native-tools/save_image.ts create mode 100644 src/core/tools/SaveImageTool.ts diff --git a/packages/types/src/tool.ts b/packages/types/src/tool.ts index 147eb24b6cc..ed4dd60f85c 100644 --- a/packages/types/src/tool.ts +++ b/packages/types/src/tool.ts @@ -37,6 +37,7 @@ export const toolNames = [ "update_todo_list", "run_slash_command", "generate_image", + "save_image", "custom_tool", ] as const diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts index 6469ba8a5cc..95d67712761 100644 --- a/src/core/assistant-message/presentAssistantMessage.ts +++ b/src/core/assistant-message/presentAssistantMessage.ts @@ -36,6 +36,7 @@ import { newTaskTool } from "../tools/NewTaskTool" import { updateTodoListTool } from "../tools/UpdateTodoListTool" import { runSlashCommandTool } from "../tools/RunSlashCommandTool" import { generateImageTool } from "../tools/GenerateImageTool" +import { saveImageTool } from "../tools/SaveImageTool" import { applyDiffTool as applyDiffToolClass } from "../tools/ApplyDiffTool" import { isValidToolName, validateToolUse } from "../tools/validateToolUse" import { codebaseSearchTool } from "../tools/CodebaseSearchTool" @@ -411,6 +412,8 @@ export async function presentAssistantMessage(cline: Task) { return `[${block.name} for '${block.params.command}'${block.params.args ? ` with args: ${block.params.args}` : ""}]` case "generate_image": return `[${block.name} for '${block.params.path}']` + case "save_image": + return `[${block.name} for '${block.params.path}']` default: return `[${block.name}]` } @@ -919,6 +922,14 @@ export async function presentAssistantMessage(cline: Task) { pushToolResult, }) break + case "save_image": + await checkpointSaveAndMark(cline) + await saveImageTool.handle(cline, block as ToolUse<"save_image">, { + askApproval, + handleError, + pushToolResult, + }) + break default: { // Handle unknown/invalid tool names OR custom tools // This is critical for native tool calling where every tool_use MUST have a tool_result @@ -1095,6 +1106,7 @@ function containsXmlToolMarkup(text: string): boolean { "list_files", "new_task", "read_file", + "save_image", "search_and_replace", "search_files", "search_replace", diff --git a/src/core/prompts/tools/native-tools/index.ts b/src/core/prompts/tools/native-tools/index.ts index 4f78729cdc8..d171d839262 100644 --- a/src/core/prompts/tools/native-tools/index.ts +++ b/src/core/prompts/tools/native-tools/index.ts @@ -9,6 +9,7 @@ import codebaseSearch from "./codebase_search" import executeCommand from "./execute_command" import fetchInstructions from "./fetch_instructions" import generateImage from "./generate_image" +import saveImage from "./save_image" import listFiles from "./list_files" import newTask from "./new_task" import { createReadFileTool, type ReadFileToolOptions } from "./read_file" @@ -63,6 +64,7 @@ export function getNativeTools(options: NativeToolsOptions = {}): OpenAI.Chat.Ch executeCommand, fetchInstructions, generateImage, + saveImage, listFiles, newTask, createReadFileTool(readFileOptions), diff --git a/src/core/prompts/tools/native-tools/save_image.ts b/src/core/prompts/tools/native-tools/save_image.ts new file mode 100644 index 00000000000..314c8ad0b22 --- /dev/null +++ b/src/core/prompts/tools/native-tools/save_image.ts @@ -0,0 +1,41 @@ +import type OpenAI from "openai" + +const SAVE_IMAGE_DESCRIPTION = `Request to save a base64-encoded image to a file. This tool is useful for saving images that were received from MCP tools or other sources. The image data must be provided as a base64 data URL. + +Parameters: +- path: (required) The file path where the image should be saved (relative to the current workspace directory). The tool will automatically add the appropriate image extension based on the image format if not provided. +- data: (required) The base64-encoded image data URL (e.g., 'data:image/png;base64,...'). Supported formats: PNG, JPG, JPEG, GIF, WEBP, SVG. + +Example: Saving a PNG image +{ "path": "images/screenshot.png", "data": "data:image/png;base64,iVBORw0KGgoAAAANSUhEU..." } + +Example: Saving a JPEG image to a specific location +{ "path": "assets/captured-image", "data": "data:image/jpeg;base64,/9j/4AAQSkZJRg..." }` + +const PATH_PARAMETER_DESCRIPTION = `Filesystem path (relative to the workspace) where the image should be saved` + +const DATA_PARAMETER_DESCRIPTION = `Base64-encoded image data URL (e.g., 'data:image/png;base64,...')` + +export default { + type: "function", + function: { + name: "save_image", + description: SAVE_IMAGE_DESCRIPTION, + strict: true, + parameters: { + type: "object", + properties: { + path: { + type: "string", + description: PATH_PARAMETER_DESCRIPTION, + }, + data: { + type: "string", + description: DATA_PARAMETER_DESCRIPTION, + }, + }, + required: ["path", "data"], + additionalProperties: false, + }, + }, +} satisfies OpenAI.Chat.ChatCompletionTool diff --git a/src/core/tools/SaveImageTool.ts b/src/core/tools/SaveImageTool.ts new file mode 100644 index 00000000000..ecf9fbf5b9f --- /dev/null +++ b/src/core/tools/SaveImageTool.ts @@ -0,0 +1,137 @@ +import path from "path" +import fs from "fs/promises" +import * as vscode from "vscode" +import { Task } from "../task/Task" +import { formatResponse } from "../prompts/responses" +import { getReadablePath } from "../../utils/path" +import { isPathOutsideWorkspace } from "../../utils/pathUtils" +import { BaseTool, ToolCallbacks } from "./BaseTool" +import type { ToolUse } from "../../shared/tools" +import { t } from "../../i18n" + +interface SaveImageParams { + path: string + data: string +} + +export class SaveImageTool extends BaseTool<"save_image"> { + readonly name = "save_image" as const + + async execute(params: SaveImageParams, task: Task, callbacks: ToolCallbacks): Promise { + const { path: relPath, data } = params + const { handleError, pushToolResult, askApproval } = callbacks + + // Validate required parameters + if (!relPath) { + task.consecutiveMistakeCount++ + task.recordToolError("save_image") + pushToolResult(await task.sayAndCreateMissingParamError("save_image", "path")) + return + } + + if (!data) { + task.consecutiveMistakeCount++ + task.recordToolError("save_image") + pushToolResult(await task.sayAndCreateMissingParamError("save_image", "data")) + return + } + + // Validate access via .rooignore + const accessAllowed = task.rooIgnoreController?.validateAccess(relPath) + if (!accessAllowed) { + await task.say("rooignore_error", relPath) + pushToolResult(formatResponse.rooIgnoreError(relPath)) + return + } + + // Check write protection + const isWriteProtected = task.rooProtectedController?.isWriteProtected(relPath) || false + + const fullPath = path.resolve(task.cwd, relPath) + const isOutsideWorkspace = isPathOutsideWorkspace(fullPath) + + // Validate the image data format + const base64Match = data.match(/^data:image\/(png|jpeg|jpg|gif|webp|svg\+xml);base64,(.+)$/) + if (!base64Match) { + await task.say("error", t("tools:saveImage.invalidDataFormat")) + task.didToolFailInCurrentTurn = true + pushToolResult( + formatResponse.toolError( + "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...').", + ), + ) + return + } + + const imageFormat = base64Match[1] + const base64Data = base64Match[2] + + // Ensure the path has a valid image extension + let finalPath = relPath + if (!finalPath.match(/\.(png|jpg|jpeg|gif|webp|svg)$/i)) { + // Add extension based on the data format + const ext = imageFormat === "jpeg" ? "jpg" : imageFormat === "svg+xml" ? "svg" : imageFormat + finalPath = `${finalPath}.${ext}` + } + + const sharedMessageProps = { + tool: "saveImage" as const, + path: getReadablePath(task.cwd, finalPath), + isOutsideWorkspace, + isProtected: isWriteProtected, + } + + try { + task.consecutiveMistakeCount = 0 + + const approvalMessage = JSON.stringify({ + ...sharedMessageProps, + content: `Save image to ${getReadablePath(task.cwd, finalPath)}`, + }) + + const didApprove = await askApproval("tool", approvalMessage, undefined, isWriteProtected) + + if (!didApprove) { + return + } + + // Convert base64 to buffer and save + const imageBuffer = Buffer.from(base64Data, "base64") + + const absolutePath = path.resolve(task.cwd, finalPath) + const directory = path.dirname(absolutePath) + await fs.mkdir(directory, { recursive: true }) + + await fs.writeFile(absolutePath, imageBuffer) + + // Track the file context + if (finalPath) { + await task.fileContextTracker.trackFileContext(finalPath, "roo_edited") + } + + task.didEditFile = true + + task.recordToolUsage("save_image") + + const provider = task.providerRef.deref() + const fullImagePath = path.join(task.cwd, finalPath) + + let imageUri = provider?.convertToWebviewUri?.(fullImagePath) ?? vscode.Uri.file(fullImagePath).toString() + + // Add cache buster to force refresh + const cacheBuster = Date.now() + imageUri = imageUri.includes("?") ? `${imageUri}&t=${cacheBuster}` : `${imageUri}?t=${cacheBuster}` + + await task.say("image", JSON.stringify({ imageUri, imagePath: fullImagePath })) + pushToolResult(formatResponse.toolResult(`Image saved to ${getReadablePath(task.cwd, finalPath)}`)) + } catch (error) { + await handleError("saving image", error as Error) + } + } + + override async handlePartial(task: Task, block: ToolUse<"save_image">): Promise { + return + } +} + +export const saveImageTool = new SaveImageTool() diff --git a/src/shared/tools.ts b/src/shared/tools.ts index 01632b27460..4209d1cb608 100644 --- a/src/shared/tools.ts +++ b/src/shared/tools.ts @@ -67,6 +67,7 @@ export const toolParamNames = [ "todos", "prompt", "image", + "data", // save_image parameter for base64 image data "files", // Native protocol parameter for read_file "operations", // search_and_replace parameter for multiple operations "patch", // apply_patch parameter @@ -108,6 +109,7 @@ export type NativeToolArgs = { update_todo_list: { todos: string } use_mcp_tool: { server_name: string; tool_name: string; arguments?: Record } write_to_file: { path: string; content: string } + save_image: { path: string; data: string } // Add more tools as they are migrated to native protocol } @@ -264,6 +266,7 @@ export const TOOL_DISPLAY_NAMES: Record = { update_todo_list: "update todo list", run_slash_command: "run slash command", generate_image: "generate images", + save_image: "save images", custom_tool: "use custom tools", } as const @@ -273,7 +276,7 @@ export const TOOL_GROUPS: Record = { tools: ["read_file", "fetch_instructions", "search_files", "list_files", "codebase_search"], }, edit: { - tools: ["apply_diff", "write_to_file", "generate_image"], + tools: ["apply_diff", "write_to_file", "generate_image", "save_image"], customTools: ["search_and_replace", "search_replace", "edit_file", "apply_patch"], }, browser: { diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index e71f92dc415..c15d760cb5f 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -1627,6 +1627,7 @@ export const ChatRowContent = ({ server={server} useMcpServer={useMcpServer} alwaysAllowMcp={alwaysAllowMcp} + images={message.images} /> )} diff --git a/webview-ui/src/components/chat/McpExecution.tsx b/webview-ui/src/components/chat/McpExecution.tsx index 9e48552fdc8..c9b62ec3513 100644 --- a/webview-ui/src/components/chat/McpExecution.tsx +++ b/webview-ui/src/components/chat/McpExecution.tsx @@ -16,6 +16,7 @@ import { cn } from "@src/lib/utils" import { Button } from "@src/components/ui" import CodeBlock from "../common/CodeBlock" +import Thumbnails from "../common/Thumbnails" import McpToolRow from "../mcp/McpToolRow" import { Markdown } from "./Markdown" @@ -36,6 +37,7 @@ interface McpExecutionProps { } useMcpServer?: ClineAskUseMcpServer alwaysAllowMcp?: boolean + images?: string[] } export const McpExecution = ({ @@ -47,6 +49,7 @@ export const McpExecution = ({ server, useMcpServer, alwaysAllowMcp = false, + images, }: McpExecutionProps) => { const { t } = useTranslation("mcp") @@ -289,6 +292,13 @@ export const McpExecution = ({ hasArguments={!!(isArguments || useMcpServer?.arguments || argumentsText)} isPartial={status ? status.status !== "completed" : false} /> + + {/* Images section - show thumbnails of returned images */} + {images && images.length > 0 && ( +
+ +
+ )} ) From 7f1cab989fa1ea95f0442cd10474b9baae7f6a43 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Thu, 22 Jan 2026 06:52:52 +0000 Subject: [PATCH 2/4] fix: add i18n translation key and fix security validation path in SaveImageTool --- src/core/tools/SaveImageTool.ts | 30 +++++++++++++++--------------- src/i18n/locales/ca/tools.json | 3 +++ src/i18n/locales/de/tools.json | 3 +++ src/i18n/locales/en/tools.json | 3 +++ src/i18n/locales/es/tools.json | 3 +++ src/i18n/locales/fr/tools.json | 3 +++ src/i18n/locales/hi/tools.json | 3 +++ src/i18n/locales/id/tools.json | 3 +++ src/i18n/locales/it/tools.json | 3 +++ src/i18n/locales/ja/tools.json | 3 +++ src/i18n/locales/ko/tools.json | 3 +++ src/i18n/locales/nl/tools.json | 3 +++ src/i18n/locales/pl/tools.json | 3 +++ src/i18n/locales/pt-BR/tools.json | 3 +++ src/i18n/locales/ru/tools.json | 3 +++ src/i18n/locales/tr/tools.json | 3 +++ src/i18n/locales/vi/tools.json | 3 +++ src/i18n/locales/zh-CN/tools.json | 3 +++ src/i18n/locales/zh-TW/tools.json | 3 +++ 19 files changed, 69 insertions(+), 15 deletions(-) diff --git a/src/core/tools/SaveImageTool.ts b/src/core/tools/SaveImageTool.ts index ecf9fbf5b9f..aac282b5670 100644 --- a/src/core/tools/SaveImageTool.ts +++ b/src/core/tools/SaveImageTool.ts @@ -36,21 +36,7 @@ export class SaveImageTool extends BaseTool<"save_image"> { return } - // Validate access via .rooignore - const accessAllowed = task.rooIgnoreController?.validateAccess(relPath) - if (!accessAllowed) { - await task.say("rooignore_error", relPath) - pushToolResult(formatResponse.rooIgnoreError(relPath)) - return - } - - // Check write protection - const isWriteProtected = task.rooProtectedController?.isWriteProtected(relPath) || false - - const fullPath = path.resolve(task.cwd, relPath) - const isOutsideWorkspace = isPathOutsideWorkspace(fullPath) - - // Validate the image data format + // Validate the image data format first (to determine finalPath) const base64Match = data.match(/^data:image\/(png|jpeg|jpg|gif|webp|svg\+xml);base64,(.+)$/) if (!base64Match) { await task.say("error", t("tools:saveImage.invalidDataFormat")) @@ -74,6 +60,20 @@ export class SaveImageTool extends BaseTool<"save_image"> { finalPath = `${finalPath}.${ext}` } + // Validate access via .rooignore (using finalPath after extension is added) + const accessAllowed = task.rooIgnoreController?.validateAccess(finalPath) + if (!accessAllowed) { + await task.say("rooignore_error", finalPath) + pushToolResult(formatResponse.rooIgnoreError(finalPath)) + return + } + + // Check write protection (using finalPath after extension is added) + const isWriteProtected = task.rooProtectedController?.isWriteProtected(finalPath) || false + + const fullPath = path.resolve(task.cwd, finalPath) + const isOutsideWorkspace = isPathOutsideWorkspace(fullPath) + const sharedMessageProps = { tool: "saveImage" as const, path: getReadablePath(task.cwd, finalPath), diff --git a/src/i18n/locales/ca/tools.json b/src/i18n/locales/ca/tools.json index 7e9385abf0b..d0f505b8e2d 100644 --- a/src/i18n/locales/ca/tools.json +++ b/src/i18n/locales/ca/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "Es requereix autenticació de Roo Code Cloud per a la generació d'imatges. Inicia sessió a Roo Code Cloud." } + }, + "saveImage": { + "invalidDataFormat": "Format de dades d'imatge no vàlid. S'esperava una URL de dades base64 (p. ex., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/de/tools.json b/src/i18n/locales/de/tools.json index 8dc5e93e702..4043722d9ad 100644 --- a/src/i18n/locales/de/tools.json +++ b/src/i18n/locales/de/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "Roo Code Cloud-Authentifizierung ist für die Bildgenerierung erforderlich. Bitte melde dich bei Roo Code Cloud an." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/en/tools.json b/src/i18n/locales/en/tools.json index 94e1820249b..4f2fc578491 100644 --- a/src/i18n/locales/en/tools.json +++ b/src/i18n/locales/en/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "Roo Code Cloud authentication is required for image generation. Please sign in to Roo Code Cloud." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/es/tools.json b/src/i18n/locales/es/tools.json index 9103643cfc6..077704db9d3 100644 --- a/src/i18n/locales/es/tools.json +++ b/src/i18n/locales/es/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "Se requiere autenticación de Roo Code Cloud para la generación de imágenes. Por favor, inicia sesión en Roo Code Cloud." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/fr/tools.json b/src/i18n/locales/fr/tools.json index df0a1136cbf..6023db8663a 100644 --- a/src/i18n/locales/fr/tools.json +++ b/src/i18n/locales/fr/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "L'authentification Roo Code Cloud est requise pour la génération d'images. Veuillez vous connecter à Roo Code Cloud." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/hi/tools.json b/src/i18n/locales/hi/tools.json index 73c7729a933..7bd5a26c8de 100644 --- a/src/i18n/locales/hi/tools.json +++ b/src/i18n/locales/hi/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "छवि निर्माण के लिए Roo Code Cloud प्रमाणीकरण आवश्यक है। कृपया Roo Code Cloud में साइन इन करें।" } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/id/tools.json b/src/i18n/locales/id/tools.json index 412711104b8..cf4c5167a1d 100644 --- a/src/i18n/locales/id/tools.json +++ b/src/i18n/locales/id/tools.json @@ -30,5 +30,8 @@ "roo": { "authRequired": "Autentikasi Roo Code Cloud diperlukan untuk menghasilkan gambar. Silakan masuk ke Roo Code Cloud." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/it/tools.json b/src/i18n/locales/it/tools.json index 24022f52ae4..f50de5bb977 100644 --- a/src/i18n/locales/it/tools.json +++ b/src/i18n/locales/it/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "L'autenticazione Roo Code Cloud è richiesta per la generazione di immagini. Accedi a Roo Code Cloud." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/ja/tools.json b/src/i18n/locales/ja/tools.json index f79ce9ac2f5..c135355f844 100644 --- a/src/i18n/locales/ja/tools.json +++ b/src/i18n/locales/ja/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "画像生成にはRoo Code Cloud認証が必要です。Roo Code Cloudにサインインしてください。" } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/ko/tools.json b/src/i18n/locales/ko/tools.json index 88cf28cdcc1..0eff7b6967f 100644 --- a/src/i18n/locales/ko/tools.json +++ b/src/i18n/locales/ko/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "이미지 생성에는 Roo Code Cloud 인증이 필요합니다. Roo Code Cloud에 로그인하세요." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/nl/tools.json b/src/i18n/locales/nl/tools.json index d696a19937e..9108cde9967 100644 --- a/src/i18n/locales/nl/tools.json +++ b/src/i18n/locales/nl/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "Roo Code Cloud-authenticatie is vereist voor het genereren van afbeeldingen. Log in bij Roo Code Cloud." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/pl/tools.json b/src/i18n/locales/pl/tools.json index d7c0d18c148..3999986ea90 100644 --- a/src/i18n/locales/pl/tools.json +++ b/src/i18n/locales/pl/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "Uwierzytelnienie Roo Code Cloud jest wymagane do generowania obrazów. Zaloguj się do Roo Code Cloud." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/pt-BR/tools.json b/src/i18n/locales/pt-BR/tools.json index e8da6dae7f6..20d72a6c117 100644 --- a/src/i18n/locales/pt-BR/tools.json +++ b/src/i18n/locales/pt-BR/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "A autenticação do Roo Code Cloud é necessária para geração de imagens. Faça login no Roo Code Cloud." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/ru/tools.json b/src/i18n/locales/ru/tools.json index 0e4e1466055..d13c3801950 100644 --- a/src/i18n/locales/ru/tools.json +++ b/src/i18n/locales/ru/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "Для генерации изображений требуется аутентификация Roo Code Cloud. Войдите в Roo Code Cloud." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/tr/tools.json b/src/i18n/locales/tr/tools.json index 2f4263b20da..1a09c8e9528 100644 --- a/src/i18n/locales/tr/tools.json +++ b/src/i18n/locales/tr/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "Görüntü oluşturma için Roo Code Cloud kimlik doğrulaması gereklidir. Lütfen Roo Code Cloud'da oturum açın." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/vi/tools.json b/src/i18n/locales/vi/tools.json index 4bccf1ff48c..2855519da37 100644 --- a/src/i18n/locales/vi/tools.json +++ b/src/i18n/locales/vi/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "Yêu cầu xác thực Roo Code Cloud để tạo hình ảnh. Vui lòng đăng nhập vào Roo Code Cloud." } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/zh-CN/tools.json b/src/i18n/locales/zh-CN/tools.json index 886706ff85f..88b60f0a428 100644 --- a/src/i18n/locales/zh-CN/tools.json +++ b/src/i18n/locales/zh-CN/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "图像生成需要 Roo Code Cloud 认证。请登录 Roo Code Cloud。" } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } diff --git a/src/i18n/locales/zh-TW/tools.json b/src/i18n/locales/zh-TW/tools.json index ebd0f104ad1..618f5e6a9b9 100644 --- a/src/i18n/locales/zh-TW/tools.json +++ b/src/i18n/locales/zh-TW/tools.json @@ -27,5 +27,8 @@ "roo": { "authRequired": "圖像生成需要 Roo Code Cloud 認證。請登入 Roo Code Cloud。" } + }, + "saveImage": { + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." } } From 953f037c7ffe07fc76a1cebe33f3f3a76fa8ef4a Mon Sep 17 00:00:00 2001 From: Roo Code Date: Thu, 22 Jan 2026 09:22:53 +0000 Subject: [PATCH 3/4] fix: MCP image preview thumbnails and save_image tool functionality - Feature 1: Fix image thumbnails not displaying in MCP tool responses - Add mcpResponseImages prop to ChatRow to pass images from mcp_server_response - Find corresponding mcp_server_response message in ChatView and pass its images - McpExecution now receives images from the response message instead of ask message - Feature 2: Fix save_image tool not receiving image data - Include base64 data URLs in MCP tool result text response - Agent now receives image data in a format usable with save_image tool - Images are wrapped in tags for easy parsing - Update tests to match new behavior --- src/core/tools/UseMcpToolTool.ts | 16 +++- .../tools/__tests__/useMcpToolTool.spec.ts | 81 ++++++++++--------- webview-ui/src/components/chat/ChatRow.tsx | 4 +- webview-ui/src/components/chat/ChatView.tsx | 10 +++ 4 files changed, 70 insertions(+), 41 deletions(-) diff --git a/src/core/tools/UseMcpToolTool.ts b/src/core/tools/UseMcpToolTool.ts index 7546606fd77..6e819825afc 100644 --- a/src/core/tools/UseMcpToolTool.ts +++ b/src/core/tools/UseMcpToolTool.ts @@ -319,9 +319,19 @@ export class UseMcpToolTool extends BaseTool<"use_mcp_tool"> { response: outputText || (images.length > 0 ? `[${images.length} image(s)]` : ""), }) - toolResultPretty = - (toolResult.isError ? "Error:\n" : "") + - (outputText || (images.length > 0 ? `[${images.length} image(s) received]` : "")) + // Build the result text + let resultText = outputText || "" + + // Include image data URLs in the text response so the agent can use them with save_image tool + if (images.length > 0) { + const imageDataSection = images + .map((img, index) => `\n${img}\n`) + .join("\n\n") + const imageInfo = `\n\n[${images.length} image(s) received - data URLs provided below for use with save_image tool]\n\n${imageDataSection}` + resultText = resultText ? resultText + imageInfo : imageInfo.trim() + } + + toolResultPretty = (toolResult.isError ? "Error:\n" : "") + resultText } // Send completion status diff --git a/src/core/tools/__tests__/useMcpToolTool.spec.ts b/src/core/tools/__tests__/useMcpToolTool.spec.ts index 3a575e62186..f44266c7632 100644 --- a/src/core/tools/__tests__/useMcpToolTool.spec.ts +++ b/src/core/tools/__tests__/useMcpToolTool.spec.ts @@ -618,14 +618,12 @@ describe("useMcpToolTool", () => { mockProviderRef.deref.mockReturnValue({ getMcpHub: () => ({ callTool: vi.fn().mockResolvedValue(mockToolResult), - getAllServers: vi - .fn() - .mockReturnValue([ - { - name: "figma-server", - tools: [{ name: "get_screenshot", description: "Get screenshot" }], - }, - ]), + getAllServers: vi.fn().mockReturnValue([ + { + name: "figma-server", + tools: [{ name: "get_screenshot", description: "Get screenshot" }], + }, + ]), }), postMessageToWebview: vi.fn(), }) @@ -637,9 +635,13 @@ describe("useMcpToolTool", () => { }) expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started") - expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "[1 image(s) received]", [ - "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ", - ]) + expect(mockTask.say).toHaveBeenCalledWith( + "mcp_server_response", + expect.stringContaining( + "[1 image(s) received - data URLs provided below for use with save_image tool]", + ), + ["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ"], + ) expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("with 1 image(s)")) }) @@ -693,9 +695,11 @@ describe("useMcpToolTool", () => { }) expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started") - expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Node name: Button", [ - "data:image/png;base64,base64imagedata", - ]) + expect(mockTask.say).toHaveBeenCalledWith( + "mcp_server_response", + expect.stringContaining("Node name: Button"), + ["data:image/png;base64,base64imagedata"], + ) expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("with 1 image(s)")) }) @@ -732,14 +736,12 @@ describe("useMcpToolTool", () => { mockProviderRef.deref.mockReturnValue({ getMcpHub: () => ({ callTool: vi.fn().mockResolvedValue(mockToolResult), - getAllServers: vi - .fn() - .mockReturnValue([ - { - name: "figma-server", - tools: [{ name: "get_screenshot", description: "Get screenshot" }], - }, - ]), + getAllServers: vi.fn().mockReturnValue([ + { + name: "figma-server", + tools: [{ name: "get_screenshot", description: "Get screenshot" }], + }, + ]), }), postMessageToWebview: vi.fn(), }) @@ -751,9 +753,13 @@ describe("useMcpToolTool", () => { }) // Should not double-prefix the data URL - expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "[1 image(s) received]", [ - "data:image/jpeg;base64,/9j/4AAQSkZJRg==", - ]) + expect(mockTask.say).toHaveBeenCalledWith( + "mcp_server_response", + expect.stringContaining( + "[1 image(s) received - data URLs provided below for use with save_image tool]", + ), + ["data:image/jpeg;base64,/9j/4AAQSkZJRg=="], + ) }) it("should handle multiple images in response", async () => { @@ -794,14 +800,12 @@ describe("useMcpToolTool", () => { mockProviderRef.deref.mockReturnValue({ getMcpHub: () => ({ callTool: vi.fn().mockResolvedValue(mockToolResult), - getAllServers: vi - .fn() - .mockReturnValue([ - { - name: "figma-server", - tools: [{ name: "get_screenshots", description: "Get screenshots" }], - }, - ]), + getAllServers: vi.fn().mockReturnValue([ + { + name: "figma-server", + tools: [{ name: "get_screenshots", description: "Get screenshots" }], + }, + ]), }), postMessageToWebview: vi.fn(), }) @@ -812,10 +816,13 @@ describe("useMcpToolTool", () => { pushToolResult: mockPushToolResult, }) - expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "[2 image(s) received]", [ - "data:image/png;base64,image1data", - "data:image/png;base64,image2data", - ]) + expect(mockTask.say).toHaveBeenCalledWith( + "mcp_server_response", + expect.stringContaining( + "[2 image(s) received - data URLs provided below for use with save_image tool]", + ), + ["data:image/png;base64,image1data", "data:image/png;base64,image2data"], + ) expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("with 2 image(s)")) }) }) diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index c15d760cb5f..66f9ba75f5f 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -119,6 +119,7 @@ interface ChatRowProps { onFollowUpUnmount?: () => void isFollowUpAnswered?: boolean isFollowUpAutoApprovalPaused?: boolean + mcpResponseImages?: string[] editable?: boolean hasCheckpoint?: boolean } @@ -173,6 +174,7 @@ export const ChatRowContent = ({ onBatchFileResponse, isFollowUpAnswered, isFollowUpAutoApprovalPaused, + mcpResponseImages, }: ChatRowContentProps) => { const { t, i18n } = useTranslation() @@ -1627,7 +1629,7 @@ export const ChatRowContent = ({ server={server} useMcpServer={useMcpServer} alwaysAllowMcp={alwaysAllowMcp} - images={message.images} + images={mcpResponseImages ?? message.images} /> )} diff --git a/webview-ui/src/components/chat/ChatView.tsx b/webview-ui/src/components/chat/ChatView.tsx index 81f6cbebf66..fdf0ff648f9 100644 --- a/webview-ui/src/components/chat/ChatView.tsx +++ b/webview-ui/src/components/chat/ChatView.tsx @@ -1327,6 +1327,15 @@ const ChatViewComponent: React.ForwardRefRenderFunction } + // For use_mcp_server ask messages, find the corresponding mcp_server_response to get images + let mcpResponseImages: string[] | undefined + if (messageOrGroup.type === "ask" && messageOrGroup.ask === "use_mcp_server") { + const mcpResponse = modifiedMessages.find( + (m) => m.ts > messageOrGroup.ts && m.say === "mcp_server_response", + ) + mcpResponseImages = mcpResponse?.images + } + // regular message return ( Date: Thu, 22 Jan 2026 11:27:05 +0000 Subject: [PATCH 4/4] fix: implement intermediate file persistence for MCP images - Save MCP tool images to temp storage instead of passing raw base64 to LLM - Add source_path parameter to save_image tool for copying from temp storage - This prevents data corruption and reduces token costs significantly - Images are still stored as data URLs in message.images for UI thumbnails - Update tests to match new behavior with file paths instead of base64 in text --- .../prompts/tools/native-tools/save_image.ts | 36 +++-- src/core/tools/SaveImageTool.ts | 151 +++++++++++++++++- src/core/tools/UseMcpToolTool.ts | 120 +++++++++++++- .../tools/__tests__/useMcpToolTool.spec.ts | 38 ++++- src/i18n/locales/en/tools.json | 5 +- src/shared/tools.ts | 3 +- 6 files changed, 327 insertions(+), 26 deletions(-) diff --git a/src/core/prompts/tools/native-tools/save_image.ts b/src/core/prompts/tools/native-tools/save_image.ts index 314c8ad0b22..49165e5e24b 100644 --- a/src/core/prompts/tools/native-tools/save_image.ts +++ b/src/core/prompts/tools/native-tools/save_image.ts @@ -1,27 +1,36 @@ import type OpenAI from "openai" -const SAVE_IMAGE_DESCRIPTION = `Request to save a base64-encoded image to a file. This tool is useful for saving images that were received from MCP tools or other sources. The image data must be provided as a base64 data URL. +const SAVE_IMAGE_DESCRIPTION = `Request to save an image to a file. This tool supports two methods: + +1. **Using source_path (PREFERRED for MCP tools)**: When you receive images from MCP tools like Figma, the images are automatically saved to temporary storage and you receive file paths. Use the source_path parameter to copy the image to your desired location. This is efficient and avoids data corruption. + +2. **Using data (for base64 data URLs)**: For images provided as base64 data URLs from other sources. Parameters: -- path: (required) The file path where the image should be saved (relative to the current workspace directory). The tool will automatically add the appropriate image extension based on the image format if not provided. -- data: (required) The base64-encoded image data URL (e.g., 'data:image/png;base64,...'). Supported formats: PNG, JPG, JPEG, GIF, WEBP, SVG. +- path: (required) The destination file path where the image should be saved (relative to the current workspace directory). The tool will automatically add the appropriate image extension based on the source image format if not provided. +- source_path: (optional) The absolute path to a source image file (typically from MCP tool temporary storage). Use this for images received from MCP tools - the path is provided in the tool response. PREFERRED over data. +- data: (optional) Base64-encoded image data URL (e.g., 'data:image/png;base64,...'). Supported formats: PNG, JPG, JPEG, GIF, WEBP, SVG. Only use if source_path is not available. + +NOTE: Either source_path OR data must be provided. -Example: Saving a PNG image -{ "path": "images/screenshot.png", "data": "data:image/png;base64,iVBORw0KGgoAAAANSUhEU..." } +Example: Saving an image from MCP tool (PREFERRED) +{ "path": "images/figma-screenshot.png", "source_path": "/path/to/temp/figma_get_screenshot_123.png" } -Example: Saving a JPEG image to a specific location -{ "path": "assets/captured-image", "data": "data:image/jpeg;base64,/9j/4AAQSkZJRg..." }` +Example: Saving a base64 image (fallback) +{ "path": "images/screenshot.png", "data": "data:image/png;base64,iVBORw0KGgoAAAANSUhEU..." }` -const PATH_PARAMETER_DESCRIPTION = `Filesystem path (relative to the workspace) where the image should be saved` +const PATH_PARAMETER_DESCRIPTION = `Destination filesystem path (relative to the workspace) where the image should be saved` -const DATA_PARAMETER_DESCRIPTION = `Base64-encoded image data URL (e.g., 'data:image/png;base64,...')` +const SOURCE_PATH_PARAMETER_DESCRIPTION = `Absolute path to a source image file (from MCP tool temporary storage). PREFERRED method for saving images from MCP tools.` + +const DATA_PARAMETER_DESCRIPTION = `Base64-encoded image data URL (e.g., 'data:image/png;base64,...'). Only use if source_path is not available.` export default { type: "function", function: { name: "save_image", description: SAVE_IMAGE_DESCRIPTION, - strict: true, + strict: false, // Changed to non-strict to allow optional parameters parameters: { type: "object", properties: { @@ -29,13 +38,16 @@ export default { type: "string", description: PATH_PARAMETER_DESCRIPTION, }, + source_path: { + type: "string", + description: SOURCE_PATH_PARAMETER_DESCRIPTION, + }, data: { type: "string", description: DATA_PARAMETER_DESCRIPTION, }, }, - required: ["path", "data"], - additionalProperties: false, + required: ["path"], }, }, } satisfies OpenAI.Chat.ChatCompletionTool diff --git a/src/core/tools/SaveImageTool.ts b/src/core/tools/SaveImageTool.ts index aac282b5670..fa6c8501f17 100644 --- a/src/core/tools/SaveImageTool.ts +++ b/src/core/tools/SaveImageTool.ts @@ -5,20 +5,22 @@ import { Task } from "../task/Task" import { formatResponse } from "../prompts/responses" import { getReadablePath } from "../../utils/path" import { isPathOutsideWorkspace } from "../../utils/pathUtils" +import { fileExistsAtPath } from "../../utils/fs" import { BaseTool, ToolCallbacks } from "./BaseTool" import type { ToolUse } from "../../shared/tools" import { t } from "../../i18n" interface SaveImageParams { path: string - data: string + data?: string + source_path?: string } export class SaveImageTool extends BaseTool<"save_image"> { readonly name = "save_image" as const async execute(params: SaveImageParams, task: Task, callbacks: ToolCallbacks): Promise { - const { path: relPath, data } = params + const { path: relPath, data, source_path: sourcePath } = params const { handleError, pushToolResult, askApproval } = callbacks // Validate required parameters @@ -29,15 +31,35 @@ export class SaveImageTool extends BaseTool<"save_image"> { return } - if (!data) { + // Need either source_path or data + if (!sourcePath && !data) { task.consecutiveMistakeCount++ task.recordToolError("save_image") - pushToolResult(await task.sayAndCreateMissingParamError("save_image", "data")) + await task.say( + "error", + t("tools:saveImage.missingSourceOrData", { + defaultValue: + "Either 'source_path' or 'data' parameter is required. Use 'source_path' for images from MCP tools, or 'data' for base64 data URLs.", + }), + ) + task.didToolFailInCurrentTurn = true + pushToolResult( + formatResponse.toolError( + "Either 'source_path' or 'data' parameter is required. Use 'source_path' for images from MCP tools, or 'data' for base64 data URLs.", + ), + ) + return + } + + // If source_path is provided, use it to copy the file + if (sourcePath) { + await this.copyFromSourcePath(task, sourcePath, relPath, callbacks) return } + // Otherwise, use the data parameter (base64 data URL) // Validate the image data format first (to determine finalPath) - const base64Match = data.match(/^data:image\/(png|jpeg|jpg|gif|webp|svg\+xml);base64,(.+)$/) + const base64Match = data!.match(/^data:image\/(png|jpeg|jpg|gif|webp|svg\+xml);base64,(.+)$/) if (!base64Match) { await task.say("error", t("tools:saveImage.invalidDataFormat")) task.didToolFailInCurrentTurn = true @@ -129,6 +151,125 @@ export class SaveImageTool extends BaseTool<"save_image"> { } } + /** + * Copy an image from a source path (typically from MCP temp storage) to the destination path. + * This is the preferred method for saving images from MCP tools as it avoids passing + * raw base64 through LLM context. + */ + private async copyFromSourcePath( + task: Task, + sourcePath: string, + destRelPath: string, + callbacks: ToolCallbacks, + ): Promise { + const { handleError, pushToolResult, askApproval } = callbacks + + try { + // Check if source file exists + const sourceExists = await fileExistsAtPath(sourcePath) + if (!sourceExists) { + task.consecutiveMistakeCount++ + task.recordToolError("save_image") + await task.say( + "error", + t("tools:saveImage.sourceNotFound", { + defaultValue: `Source image not found at path: ${sourcePath}`, + path: sourcePath, + }), + ) + task.didToolFailInCurrentTurn = true + pushToolResult(formatResponse.toolError(`Source image not found at path: ${sourcePath}`)) + return + } + + // Get extension from source file + const sourceExt = path.extname(sourcePath).toLowerCase() + const validExtensions = [".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg"] + + if (!validExtensions.includes(sourceExt)) { + task.consecutiveMistakeCount++ + task.recordToolError("save_image") + await task.say("error", t("tools:saveImage.invalidSourceFormat")) + task.didToolFailInCurrentTurn = true + pushToolResult( + formatResponse.toolError( + `Invalid source image format. Supported formats: ${validExtensions.join(", ")}`, + ), + ) + return + } + + // Ensure the destination path has the correct extension + let finalPath = destRelPath + if (!finalPath.match(/\.(png|jpg|jpeg|gif|webp|svg)$/i)) { + finalPath = `${finalPath}${sourceExt}` + } + + // Validate access via .rooignore + const accessAllowed = task.rooIgnoreController?.validateAccess(finalPath) + if (!accessAllowed) { + await task.say("rooignore_error", finalPath) + pushToolResult(formatResponse.rooIgnoreError(finalPath)) + return + } + + // Check write protection + const isWriteProtected = task.rooProtectedController?.isWriteProtected(finalPath) || false + + const fullPath = path.resolve(task.cwd, finalPath) + const isOutsideWorkspace = isPathOutsideWorkspace(fullPath) + + const sharedMessageProps = { + tool: "saveImage" as const, + path: getReadablePath(task.cwd, finalPath), + isOutsideWorkspace, + isProtected: isWriteProtected, + } + + task.consecutiveMistakeCount = 0 + + const approvalMessage = JSON.stringify({ + ...sharedMessageProps, + content: `Save image from ${sourcePath} to ${getReadablePath(task.cwd, finalPath)}`, + }) + + const didApprove = await askApproval("tool", approvalMessage, undefined, isWriteProtected) + + if (!didApprove) { + return + } + + // Create destination directory and copy file + const absolutePath = path.resolve(task.cwd, finalPath) + const directory = path.dirname(absolutePath) + await fs.mkdir(directory, { recursive: true }) + + await fs.copyFile(sourcePath, absolutePath) + + // Track the file context + if (finalPath) { + await task.fileContextTracker.trackFileContext(finalPath, "roo_edited") + } + + task.didEditFile = true + task.recordToolUsage("save_image") + + const provider = task.providerRef.deref() + const fullImagePath = path.join(task.cwd, finalPath) + + let imageUri = provider?.convertToWebviewUri?.(fullImagePath) ?? vscode.Uri.file(fullImagePath).toString() + + // Add cache buster to force refresh + const cacheBuster = Date.now() + imageUri = imageUri.includes("?") ? `${imageUri}&t=${cacheBuster}` : `${imageUri}?t=${cacheBuster}` + + await task.say("image", JSON.stringify({ imageUri, imagePath: fullImagePath })) + pushToolResult(formatResponse.toolResult(`Image saved to ${getReadablePath(task.cwd, finalPath)}`)) + } catch (error) { + await handleError("saving image", error as Error) + } + } + override async handlePartial(task: Task, block: ToolUse<"save_image">): Promise { return } diff --git a/src/core/tools/UseMcpToolTool.ts b/src/core/tools/UseMcpToolTool.ts index 6e819825afc..4534a8fed89 100644 --- a/src/core/tools/UseMcpToolTool.ts +++ b/src/core/tools/UseMcpToolTool.ts @@ -1,9 +1,12 @@ +import path from "path" +import fs from "fs/promises" import type { ClineAskUseMcpServer, McpExecutionStatus } from "@roo-code/types" import { Task } from "../task/Task" import { formatResponse } from "../prompts/responses" import { t } from "../../i18n" import type { ToolUse } from "../../shared/tools" +import { getTaskDirectoryPath } from "../../utils/storage" import { BaseTool, ToolCallbacks } from "./BaseTool" @@ -322,12 +325,17 @@ export class UseMcpToolTool extends BaseTool<"use_mcp_tool"> { // Build the result text let resultText = outputText || "" - // Include image data URLs in the text response so the agent can use them with save_image tool + // If there are images, save them to temp storage and provide file paths to the LLM + // This avoids passing raw base64 through LLM context which causes corruption and high costs if (images.length > 0) { - const imageDataSection = images - .map((img, index) => `\n${img}\n`) + const savedImagePaths = await this.saveImagesToTempStorage(task, images, serverName, toolName) + const imagePathsSection = savedImagePaths + .map( + (imgPath, index) => + `\n ${imgPath}\n`, + ) .join("\n\n") - const imageInfo = `\n\n[${images.length} image(s) received - data URLs provided below for use with save_image tool]\n\n${imageDataSection}` + const imageInfo = `\n\n[${images.length} image(s) received and saved to temporary storage. Use save_image tool with source_path to save to your desired location.]\n\n${imagePathsSection}` resultText = resultText ? resultText + imageInfo : imageInfo.trim() } @@ -353,6 +361,110 @@ export class UseMcpToolTool extends BaseTool<"use_mcp_tool"> { await task.say("mcp_server_response", toolResultPretty, images) pushToolResult(formatResponse.toolResult(toolResultPretty, images)) } + + /** + * Save images to task-specific temp storage and return file paths. + * This allows passing file paths to the LLM instead of raw base64 data, + * which prevents data corruption and reduces token costs. + */ + private async saveImagesToTempStorage( + task: Task, + images: string[], + serverName: string, + toolName: string, + ): Promise { + const savedPaths: string[] = [] + + try { + const provider = task.providerRef.deref() + if (!provider) { + // Fall back to using task.cwd as temp location + return this.saveImagesToFallbackLocation(task, images, serverName, toolName) + } + + const globalStoragePath = provider.context?.globalStorageUri?.fsPath + if (!globalStoragePath) { + return this.saveImagesToFallbackLocation(task, images, serverName, toolName) + } + + // Create a temp directory for MCP images within the task directory + const taskDir = await getTaskDirectoryPath(globalStoragePath, task.taskId) + const mcpImagesDir = path.join(taskDir, "mcp_images") + await fs.mkdir(mcpImagesDir, { recursive: true }) + + const timestamp = Date.now() + + for (let i = 0; i < images.length; i++) { + const imageDataUrl = images[i] + const { format, data } = this.parseImageDataUrl(imageDataUrl) + + if (data) { + const filename = `${serverName}_${toolName}_${timestamp}_${i + 1}.${format}` + const filePath = path.join(mcpImagesDir, filename) + + const imageBuffer = Buffer.from(data, "base64") + await fs.writeFile(filePath, imageBuffer) + + savedPaths.push(filePath) + } + } + } catch (error) { + console.error("Error saving images to temp storage:", error) + // Return empty paths array on error - the LLM will see the error and handle accordingly + } + + return savedPaths + } + + /** + * Fallback method to save images to workspace .roo/temp directory + */ + private async saveImagesToFallbackLocation( + task: Task, + images: string[], + serverName: string, + toolName: string, + ): Promise { + const savedPaths: string[] = [] + + try { + const tempDir = path.join(task.cwd, ".roo", "temp", "mcp_images") + await fs.mkdir(tempDir, { recursive: true }) + + const timestamp = Date.now() + + for (let i = 0; i < images.length; i++) { + const imageDataUrl = images[i] + const { format, data } = this.parseImageDataUrl(imageDataUrl) + + if (data) { + const filename = `${serverName}_${toolName}_${timestamp}_${i + 1}.${format}` + const filePath = path.join(tempDir, filename) + + const imageBuffer = Buffer.from(data, "base64") + await fs.writeFile(filePath, imageBuffer) + + savedPaths.push(filePath) + } + } + } catch (error) { + console.error("Error saving images to fallback location:", error) + } + + return savedPaths + } + + /** + * Parse a data URL to extract format and base64 data + */ + private parseImageDataUrl(dataUrl: string): { format: string; data: string | null } { + const match = dataUrl.match(/^data:image\/(png|jpeg|jpg|gif|webp|svg\+xml);base64,(.+)$/) + if (match) { + const format = match[1] === "jpeg" ? "jpg" : match[1] === "svg+xml" ? "svg" : match[1] + return { format, data: match[2] } + } + return { format: "png", data: null } + } } export const useMcpToolTool = new UseMcpToolTool() diff --git a/src/core/tools/__tests__/useMcpToolTool.spec.ts b/src/core/tools/__tests__/useMcpToolTool.spec.ts index f44266c7632..fc81b17978a 100644 --- a/src/core/tools/__tests__/useMcpToolTool.spec.ts +++ b/src/core/tools/__tests__/useMcpToolTool.spec.ts @@ -4,6 +4,19 @@ import { useMcpToolTool } from "../UseMcpToolTool" import { Task } from "../../task/Task" import { ToolUse } from "../../../shared/tools" +// Mock fs/promises +vi.mock("fs/promises", () => ({ + default: { + mkdir: vi.fn().mockResolvedValue(undefined), + writeFile: vi.fn().mockResolvedValue(undefined), + }, +})) + +// Mock storage utils +vi.mock("../../../utils/storage", () => ({ + getTaskDirectoryPath: vi.fn().mockResolvedValue("/mock/storage/tasks/test-task"), +})) + // Mock dependencies vi.mock("../../prompts/responses", () => ({ formatResponse: { @@ -62,6 +75,11 @@ describe("useMcpToolTool", () => { getAllServers: vi.fn().mockReturnValue([]), }), postMessageToWebview: vi.fn(), + context: { + globalStorageUri: { + fsPath: "/mock/global/storage", + }, + }, }), } @@ -73,6 +91,8 @@ describe("useMcpToolTool", () => { ask: vi.fn(), lastMessageTs: 123456789, providerRef: mockProviderRef, + taskId: "test-task-123", + cwd: "/test/workspace", } }) @@ -638,10 +658,16 @@ describe("useMcpToolTool", () => { expect(mockTask.say).toHaveBeenCalledWith( "mcp_server_response", expect.stringContaining( - "[1 image(s) received - data URLs provided below for use with save_image tool]", + "[1 image(s) received and saved to temporary storage. Use save_image tool with source_path to save to your desired location.]", ), ["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ"], ) + // Text response should contain source_path XML tags, not raw base64 + expect(mockTask.say).toHaveBeenCalledWith( + "mcp_server_response", + expect.stringContaining(""), + expect.anything(), + ) expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("with 1 image(s)")) }) @@ -700,6 +726,12 @@ describe("useMcpToolTool", () => { expect.stringContaining("Node name: Button"), ["data:image/png;base64,base64imagedata"], ) + // Text response should contain source_path, not raw base64 + expect(mockTask.say).toHaveBeenCalledWith( + "mcp_server_response", + expect.stringContaining(""), + expect.anything(), + ) expect(mockPushToolResult).toHaveBeenCalledWith(expect.stringContaining("with 1 image(s)")) }) @@ -756,7 +788,7 @@ describe("useMcpToolTool", () => { expect(mockTask.say).toHaveBeenCalledWith( "mcp_server_response", expect.stringContaining( - "[1 image(s) received - data URLs provided below for use with save_image tool]", + "[1 image(s) received and saved to temporary storage. Use save_image tool with source_path to save to your desired location.]", ), ["data:image/jpeg;base64,/9j/4AAQSkZJRg=="], ) @@ -819,7 +851,7 @@ describe("useMcpToolTool", () => { expect(mockTask.say).toHaveBeenCalledWith( "mcp_server_response", expect.stringContaining( - "[2 image(s) received - data URLs provided below for use with save_image tool]", + "[2 image(s) received and saved to temporary storage. Use save_image tool with source_path to save to your desired location.]", ), ["data:image/png;base64,image1data", "data:image/png;base64,image2data"], ) diff --git a/src/i18n/locales/en/tools.json b/src/i18n/locales/en/tools.json index 4f2fc578491..75ba162dd4d 100644 --- a/src/i18n/locales/en/tools.json +++ b/src/i18n/locales/en/tools.json @@ -29,6 +29,9 @@ } }, "saveImage": { - "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...')." + "invalidDataFormat": "Invalid image data format. Expected a base64 data URL (e.g., 'data:image/png;base64,...').", + "missingSourceOrData": "Either 'source_path' or 'data' parameter is required. Use 'source_path' for images from MCP tools, or 'data' for base64 data URLs.", + "sourceNotFound": "Source image not found at path: {{path}}", + "invalidSourceFormat": "Invalid source image format. Supported formats: PNG, JPG, JPEG, GIF, WEBP, SVG." } } diff --git a/src/shared/tools.ts b/src/shared/tools.ts index 4209d1cb608..ed5b3ef6c8e 100644 --- a/src/shared/tools.ts +++ b/src/shared/tools.ts @@ -68,6 +68,7 @@ export const toolParamNames = [ "prompt", "image", "data", // save_image parameter for base64 image data + "source_path", // save_image parameter for copying from temp storage "files", // Native protocol parameter for read_file "operations", // search_and_replace parameter for multiple operations "patch", // apply_patch parameter @@ -109,7 +110,7 @@ export type NativeToolArgs = { update_todo_list: { todos: string } use_mcp_tool: { server_name: string; tool_name: string; arguments?: Record } write_to_file: { path: string; content: string } - save_image: { path: string; data: string } + save_image: { path: string; data?: string; source_path?: string } // Add more tools as they are migrated to native protocol }