diff --git a/src/core/webview/__tests__/diagnosticsHandler.spec.ts b/src/core/webview/__tests__/diagnosticsHandler.spec.ts index 27593b56bf0..75acd68f21f 100644 --- a/src/core/webview/__tests__/diagnosticsHandler.spec.ts +++ b/src/core/webview/__tests__/diagnosticsHandler.spec.ts @@ -48,6 +48,12 @@ import * as vscode from "vscode" import * as fs from "fs/promises" import * as fsUtils from "../../../utils/fs" import { generateErrorDiagnostics } from "../diagnosticsHandler" +import { redactDiagnosticsData } from "../../../utils/redact" + +// Mock the redact module +vi.mock("../../../utils/redact", () => ({ + redactDiagnosticsData: vi.fn((data) => data), +})) describe("generateErrorDiagnostics", () => { const mockLog = vi.fn() @@ -85,7 +91,7 @@ describe("generateErrorDiagnostics", () => { // taskId.slice(0, 8) = "test-tas" from "test-task-id" expect(String(writtenPath)).toContain("roo-diagnostics-test-tas") expect(String(writtenContent)).toContain( - "// Please share this file with Roo Code Support (support@roocode.com) to diagnose the issue faster", + "// Sensitive data (API keys, tokens, secrets) has been automatically redacted.", ) expect(String(writtenContent)).toContain('"error":') expect(String(writtenContent)).toContain('"history":') @@ -170,6 +176,36 @@ describe("generateErrorDiagnostics", () => { expect(String(writtenContent)).toContain('"history": []') }) + it("calls redactDiagnosticsData before writing the file", async () => { + vi.mocked(fsUtils.fileExistsAtPath).mockResolvedValue(true as any) + vi.mocked(fs.readFile).mockResolvedValue('[{"role": "user", "content": "test"}]' as any) + + await generateErrorDiagnostics({ + taskId: "test-task-id", + globalStoragePath: "/mock/global/storage", + values: { + timestamp: "2025-01-01T00:00:00.000Z", + version: "1.0.0", + provider: "test", + model: "test", + details: "error", + }, + log: mockLog, + }) + + // Verify redactDiagnosticsData was called with the diagnostics object + expect(redactDiagnosticsData).toHaveBeenCalledTimes(1) + expect(redactDiagnosticsData).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.objectContaining({ + version: "1.0.0", + provider: "test", + }), + history: expect.any(Array), + }), + ) + }) + it("returns error result when file write fails", async () => { vi.mocked(fsUtils.fileExistsAtPath).mockResolvedValue(false as any) vi.mocked(fs.writeFile).mockRejectedValue(new Error("Write failed")) diff --git a/src/core/webview/diagnosticsHandler.ts b/src/core/webview/diagnosticsHandler.ts index 212ddbc5dee..f65e16cb0c7 100644 --- a/src/core/webview/diagnosticsHandler.ts +++ b/src/core/webview/diagnosticsHandler.ts @@ -5,6 +5,7 @@ import * as vscode from "vscode" import { getTaskDirectoryPath } from "../../utils/storage" import { fileExistsAtPath } from "../../utils/fs" +import { redactDiagnosticsData } from "../../utils/redact" export interface ErrorDiagnosticsValues { timestamp?: string @@ -63,11 +64,16 @@ export async function generateErrorDiagnostics(params: GenerateDiagnosticsParams history, } + // Redact sensitive information (API keys, tokens, secrets) from the + // diagnostics payload before writing it to a file users may share. + const redactedDiagnostics = redactDiagnosticsData(diagnostics) + // Prepend human-readable guidance comments before the JSON payload const headerComment = "// Please share this file with Roo Code Support (support@roocode.com) to diagnose the issue faster\n" + - "// Just make sure you're OK sharing the contents of the conversation below.\n\n" - const jsonContent = JSON.stringify(diagnostics, null, 2) + "// Sensitive data (API keys, tokens, secrets) has been automatically redacted.\n" + + "// However, please review the contents below before sharing to ensure no private information remains.\n\n" + const jsonContent = JSON.stringify(redactedDiagnostics, null, 2) const fullContent = headerComment + jsonContent // Create a temporary diagnostics file diff --git a/src/utils/__tests__/redact.spec.ts b/src/utils/__tests__/redact.spec.ts new file mode 100644 index 00000000000..2d4da5eb965 --- /dev/null +++ b/src/utils/__tests__/redact.spec.ts @@ -0,0 +1,215 @@ +import { redactSensitiveInfo, redactDiagnosticsData } from "../redact" + +describe("redactSensitiveInfo", () => { + it("should return non-string inputs unchanged", () => { + expect(redactSensitiveInfo(null as any)).toBe(null) + expect(redactSensitiveInfo(undefined as any)).toBe(undefined) + expect(redactSensitiveInfo("" as any)).toBe("") + }) + + it("should redact Anthropic API keys", () => { + const input = "Using key sk-ant-api03-abcdefghijklmnopqrst1234567890" + const result = redactSensitiveInfo(input) + expect(result).toContain("[ANTHROPIC_API_KEY]") + expect(result).not.toContain("sk-ant-api03") + }) + + it("should redact OpenAI API keys", () => { + const input = "My key is sk-abcdefghijklmnopqrstuvwxyz1234567890" + const result = redactSensitiveInfo(input) + expect(result).toContain("[OPENAI_API_KEY]") + expect(result).not.toContain("sk-abcdefghijklmnopqrstuvwxyz") + }) + + it("should redact OpenRouter API keys", () => { + const input = "Using sk-or-v1-abcdefghijklmnopqrstuvwxyz1234567890" + const result = redactSensitiveInfo(input) + expect(result).toContain("[OPENROUTER_API_KEY]") + expect(result).not.toContain("sk-or-v1-abcdefghijklmnopqrstuvwxyz") + }) + + it("should redact GitHub tokens", () => { + const input = "Token: ghp_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789" + const result = redactSensitiveInfo(input) + expect(result).toContain("[GITHUB_TOKEN]") + expect(result).not.toContain("ghp_aBcDeFgHiJkLmNoPqRsTuVwXyZ") + }) + + it("should redact Bearer tokens", () => { + const input = "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0" + const result = redactSensitiveInfo(input) + expect(result).toContain("[BEARER_TOKEN]") + expect(result).not.toContain("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9") + }) + + it("should redact Google API keys", () => { + const input = "Using AIzaSyAbCdEfGhIjKlMnOpQrStUvWxYz0123456" + const result = redactSensitiveInfo(input) + expect(result).toContain("[GOOGLE_API_KEY]") + expect(result).not.toContain("AIzaSyAbCdEfGhIjKlMnOpQrStUvWxYz") + }) + + it("should redact AWS access keys", () => { + const input = "AWS key: AKIAIOSFODNN7EXAMPLE" + const result = redactSensitiveInfo(input) + expect(result).toContain("[AWS_ACCESS_KEY]") + expect(result).not.toContain("AKIAIOSFODNN7EXAMPLE") + }) + + it("should redact environment variable assignments with secret-like names", () => { + const input = "OPENAI_API_KEY=sk-abcdefghijklmnopqrstuvwxyz1234567890" + const result = redactSensitiveInfo(input) + expect(result).toContain("OPENAI_API_KEY=[REDACTED]") + expect(result).not.toContain("sk-abcdefghijklmnopqrstuvwxyz") + }) + + it("should redact quoted environment variable assignments", () => { + const input = 'export MY_SECRET_TOKEN="supersecretvalue123456"' + const result = redactSensitiveInfo(input) + expect(result).toContain("[REDACTED]") + expect(result).not.toContain("supersecretvalue123456") + }) + + it("should redact key-value pairs in JSON-like strings", () => { + const input = '"api_key": "abcdef1234567890abcdef1234567890ab"' + const result = redactSensitiveInfo(input) + expect(result).toContain("[REDACTED_SECRET]") + expect(result).not.toContain("abcdef1234567890abcdef1234567890ab") + }) + + it("should not redact normal text", () => { + const input = "This is a normal conversation about code review and testing." + const result = redactSensitiveInfo(input) + expect(result).toBe(input) + }) + + it("should handle multiple sensitive values in the same string", () => { + const input = "Keys: sk-ant-api03-abcdefghijklmnopqrst1234567890 and ghp_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789" + const result = redactSensitiveInfo(input) + expect(result).toContain("[ANTHROPIC_API_KEY]") + expect(result).toContain("[GITHUB_TOKEN]") + }) +}) + +describe("redactDiagnosticsData", () => { + it("should return null/undefined unchanged", () => { + expect(redactDiagnosticsData(null)).toBe(null) + expect(redactDiagnosticsData(undefined)).toBe(undefined) + }) + + it("should return numbers and booleans unchanged", () => { + expect(redactDiagnosticsData(42)).toBe(42) + expect(redactDiagnosticsData(true)).toBe(true) + }) + + it("should redact strings", () => { + const result = redactDiagnosticsData("key: sk-ant-api03-abcdefghijklmnopqrst1234567890") + expect(result).toContain("[ANTHROPIC_API_KEY]") + }) + + it("should redact values in objects with secret-like keys", () => { + const input = { + apiKey: "some-secret-value", + model: "gpt-4", + api_key: "another-secret", + } + const result = redactDiagnosticsData(input) as Record + expect(result.apiKey).toBe("[REDACTED]") + expect(result.api_key).toBe("[REDACTED]") + expect(result.model).toBe("gpt-4") + }) + + it("should recursively redact nested objects", () => { + const input = { + error: { + details: "Failed with sk-ant-api03-abcdefghijklmnopqrst1234567890", + }, + history: [ + { + role: "user", + content: "My token is ghp_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789", + }, + ], + } + const result = redactDiagnosticsData(input) as any + expect(result.error.details).toContain("[ANTHROPIC_API_KEY]") + expect(result.history[0].content).toContain("[GITHUB_TOKEN]") + }) + + it("should redact known secret field names regardless of value pattern", () => { + const input = { + password: "mysimplepassword", + token: "short-but-secret", + authorization: "Basic dXNlcjpwYXNz", + } + const result = redactDiagnosticsData(input) as Record + expect(result.password).toBe("[REDACTED]") + expect(result.token).toBe("[REDACTED]") + expect(result.authorization).toBe("[REDACTED]") + }) + + it("should not redact empty string values for secret keys", () => { + const input = { + apiKey: "", + password: "", + } + const result = redactDiagnosticsData(input) as Record + // Empty strings are kept as-is (no secret to redact) + expect(result.apiKey).toBe("") + expect(result.password).toBe("") + }) + + it("should handle deeply nested arrays", () => { + const input = [ + [ + { + content: [{ type: "text", text: "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.abcdefghijklmnop" }], + }, + ], + ] + const result = redactDiagnosticsData(input) as any + expect(result[0][0].content[0].text).toContain("[BEARER_TOKEN]") + }) + + it("should handle a realistic diagnostics payload", () => { + const diagnostics = { + error: { + timestamp: "2025-01-01T00:00:00.000Z", + version: "1.2.3", + provider: "anthropic", + model: "claude-sonnet-4-20250514", + details: "API error occurred", + }, + history: [ + { + role: "user", + content: [ + { + type: "text", + text: "Please use my API key sk-ant-api03-abcdefghijklmnopqrst1234567890 to make the request", + }, + ], + }, + { + role: "assistant", + content: [ + { + type: "text", + text: "I'll help with that request.", + }, + ], + }, + ], + } + const result = redactDiagnosticsData(diagnostics) as any + // Error metadata should be preserved + expect(result.error.timestamp).toBe("2025-01-01T00:00:00.000Z") + expect(result.error.version).toBe("1.2.3") + expect(result.error.provider).toBe("anthropic") + // Sensitive data in history should be redacted + expect(result.history[0].content[0].text).toContain("[ANTHROPIC_API_KEY]") + expect(result.history[0].content[0].text).not.toContain("sk-ant-api03") + // Normal text should be preserved + expect(result.history[1].content[0].text).toBe("I'll help with that request.") + }) +}) diff --git a/src/utils/redact.ts b/src/utils/redact.ts new file mode 100644 index 00000000000..dbe97430c1d --- /dev/null +++ b/src/utils/redact.ts @@ -0,0 +1,154 @@ +/** + * Redacts sensitive information from diagnostic output to prevent accidental + * disclosure of API keys, tokens, passwords, and other secrets. + * + * This module is used by the diagnostics handler to sanitize conversation + * history before it is written to a file that users may share with support. + */ + +/** + * Patterns that match common API key and secret formats. + * Each entry has a regex and a label used in the redacted replacement. + */ +const SENSITIVE_PATTERNS: { pattern: RegExp; label: string }[] = [ + // Anthropic API keys: sk-ant-api03-... + { pattern: /\bsk-ant-[\w-]{20,}\b/g, label: "ANTHROPIC_API_KEY" }, + + // OpenRouter API keys: sk-or-v1-... (must come before generic OpenAI pattern) + { pattern: /\bsk-or-v1-[\w]{20,}\b/g, label: "OPENROUTER_API_KEY" }, + + // OpenAI API keys: sk-... (but not sk-ant or sk-or which are other providers) + { pattern: /\bsk-(?!ant|or-)[\w-]{20,}\b/g, label: "OPENAI_API_KEY" }, + + // Generic secret key patterns: key-..., api-key-... + { pattern: /\bkey-[\w-]{20,}\b/g, label: "API_KEY" }, + + // AWS access keys + { pattern: /\b(?:AKIA|ASIA)[A-Z0-9]{16}\b/g, label: "AWS_ACCESS_KEY" }, + + // AWS secret keys (40 char base64-like) + { pattern: /\b[A-Za-z0-9/+=]{40}\b(?=.*(?:aws|secret|access))/gi, label: "AWS_SECRET_KEY" }, + + // Google API keys + { pattern: /\bAIza[A-Za-z0-9_-]{35}\b/g, label: "GOOGLE_API_KEY" }, + + // GitHub tokens: ghp_, gho_, ghu_, ghs_, ghr_ + { pattern: /\bgh[pousr]_[A-Za-z0-9_]{36,}\b/g, label: "GITHUB_TOKEN" }, + + // Generic Bearer tokens in authorization headers + { pattern: /Bearer\s+[A-Za-z0-9._~+/=-]{20,}/gi, label: "BEARER_TOKEN" }, + + // Generic base64-encoded long tokens (likely secrets) after common key names + { + pattern: + /(?<="(?:api[_-]?key|apikey|api[_-]?secret|secret[_-]?key|access[_-]?token|auth[_-]?token|token|password|passwd|secret|credential|private[_-]?key|authorization)":\s*")[^"]{20,}/gi, + label: "REDACTED_SECRET", + }, + + // Azure keys (32 hex chars) + { pattern: /\b[a-f0-9]{32}\b(?=.*(?:azure|endpoint|cognitive))/gi, label: "AZURE_KEY" }, + + // Generic hex tokens that appear after key-like field names (in JSON context) + { + pattern: /(?<="(?:api[_-]?key|apikey|secret|token|password|credential)":\s*")[a-f0-9-]{32,}(?=")/gi, + label: "REDACTED_SECRET", + }, +] + +/** + * Patterns for environment variable assignments containing secrets. + * Matches patterns like: SOME_API_KEY=value or export SOME_SECRET="value" + */ +const ENV_VAR_PATTERNS: RegExp[] = [ + // KEY=value patterns (unquoted) + /\b([A-Z_]*(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH)[A-Z_]*)=([^\s"']{8,})\b/gi, + // KEY="value" or KEY='value' patterns (quoted) + /\b([A-Z_]*(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH)[A-Z_]*)=["']([^"']{8,})["']/gi, + // export KEY=value + /\bexport\s+([A-Z_]*(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH)[A-Z_]*)=["']?([^\s"']{8,})["']?/gi, +] + +/** + * Redacts sensitive information from a string. + * + * Applies pattern-based redaction to remove API keys, tokens, passwords, + * and other secrets that may appear in conversation history or error details. + * + * @param input - The string to redact sensitive information from + * @returns The input string with sensitive values replaced by redaction markers + */ +export function redactSensitiveInfo(input: string): string { + if (!input || typeof input !== "string") { + return input + } + + let result = input + + // Apply sensitive patterns + for (const { pattern, label } of SENSITIVE_PATTERNS) { + // Reset lastIndex for global regexes + pattern.lastIndex = 0 + result = result.replace(pattern, `[${label}]`) + } + + // Redact environment variable assignments with secret-like names + for (const pattern of ENV_VAR_PATTERNS) { + pattern.lastIndex = 0 + result = result.replace(pattern, (_match, name) => `${name}=[REDACTED]`) + } + + return result +} + +/** + * Recursively redacts sensitive information from an object structure. + * + * Walks through objects and arrays, applying string redaction to all + * string values. This is used to redact the full diagnostics payload + * (including nested conversation history) before writing to a file. + * + * @param data - The data structure to redact (object, array, or primitive) + * @returns A new data structure with sensitive string values redacted + */ +export function redactDiagnosticsData(data: unknown): unknown { + if (data === null || data === undefined) { + return data + } + + if (typeof data === "string") { + return redactSensitiveInfo(data) + } + + if (Array.isArray(data)) { + return data.map((item) => redactDiagnosticsData(item)) + } + + if (typeof data === "object") { + const result: Record = {} + for (const [key, value] of Object.entries(data)) { + // For keys that are known to hold secrets, redact the entire value + const lowerKey = key.toLowerCase() + if ( + (lowerKey.includes("apikey") || + lowerKey.includes("api_key") || + lowerKey === "password" || + lowerKey === "secret" || + lowerKey === "token" || + lowerKey === "authorization" || + lowerKey === "credential" || + lowerKey === "private_key" || + lowerKey === "privatekey") && + typeof value === "string" && + value.length > 0 + ) { + result[key] = "[REDACTED]" + } else { + result[key] = redactDiagnosticsData(value) + } + } + return result + } + + // Numbers, booleans, etc. pass through unchanged + return data +}