From c23ec0cdab000ac3d690d28d14d370750d01d86e Mon Sep 17 00:00:00 2001 From: Patrick Decat Date: Mon, 12 Jan 2026 18:41:16 +0100 Subject: [PATCH] fix: encode hyphens in MCP tool names before sanitization --- .../assistant-message/NativeToolCallParser.ts | 24 ++- .../native-tools/__tests__/mcp_server.spec.ts | 2 +- src/utils/__tests__/mcp-name.spec.ts | 162 ++++++++++++++++-- src/utils/mcp-name.ts | 81 ++++++++- 4 files changed, 239 insertions(+), 30 deletions(-) diff --git a/src/core/assistant-message/NativeToolCallParser.ts b/src/core/assistant-message/NativeToolCallParser.ts index f6eac36a9c1..56d71eb3dd0 100644 --- a/src/core/assistant-message/NativeToolCallParser.ts +++ b/src/core/assistant-message/NativeToolCallParser.ts @@ -16,7 +16,7 @@ import type { ApiStreamToolCallDeltaChunk, ApiStreamToolCallEndChunk, } from "../../api/transform/stream" -import { MCP_TOOL_PREFIX, MCP_TOOL_SEPARATOR, parseMcpToolName } from "../../utils/mcp-name" +import { MCP_TOOL_PREFIX, MCP_TOOL_SEPARATOR, parseMcpToolName, normalizeMcpToolName } from "../../utils/mcp-name" /** * Helper type to extract properly typed native arguments for a given tool. @@ -52,7 +52,7 @@ export type ToolCallStreamEvent = ApiStreamToolCallStartChunk | ApiStreamToolCal */ export class NativeToolCallParser { // Streaming state management for argument accumulation (keyed by tool call id) - // Note: name is string to accommodate dynamic MCP tools (mcp_serverName_toolName) + // Note: name is string to accommodate dynamic MCP tools (mcp--serverName--toolName) private static streamingToolCalls = new Map< string, { @@ -199,7 +199,7 @@ export class NativeToolCallParser { /** * Start streaming a new tool call. * Initializes tracking for incremental argument parsing. - * Accepts string to support both ToolName and dynamic MCP tools (mcp_serverName_toolName). + * Accepts string to support both ToolName and dynamic MCP tools (mcp--serverName--toolName). */ public static startStreamingToolCall(id: string, name: string): void { this.streamingToolCalls.set(id, { @@ -575,10 +575,16 @@ export class NativeToolCallParser { arguments: string }): ToolUse | McpToolUse | null { // Check if this is a dynamic MCP tool (mcp--serverName--toolName) + // Also handle models that output underscores instead of hyphens (mcp__serverName__toolName) const mcpPrefix = MCP_TOOL_PREFIX + MCP_TOOL_SEPARATOR - if (typeof toolCall.name === "string" && toolCall.name.startsWith(mcpPrefix)) { - return this.parseDynamicMcpTool(toolCall) + if (typeof toolCall.name === "string") { + // Normalize the tool name to handle models that output underscores instead of hyphens + const normalizedName = normalizeMcpToolName(toolCall.name) + if (normalizedName.startsWith(mcpPrefix)) { + // Pass the original tool call but with normalized name for parsing + return this.parseDynamicMcpTool({ ...toolCall, name: normalizedName }) + } } // Resolve tool alias to canonical name @@ -865,11 +871,15 @@ export class NativeToolCallParser { // Parse the arguments - these are the actual tool arguments passed directly const args = JSON.parse(toolCall.arguments || "{}") + // Normalize the tool name to handle models that output underscores instead of hyphens + // e.g., mcp__serverName__toolName -> mcp--serverName--toolName + const normalizedName = normalizeMcpToolName(toolCall.name) + // Extract server_name and tool_name from the tool name itself // Format: mcp--serverName--toolName (using -- separator) - const parsed = parseMcpToolName(toolCall.name) + const parsed = parseMcpToolName(normalizedName) if (!parsed) { - console.error(`Invalid dynamic MCP tool name format: ${toolCall.name}`) + console.error(`Invalid dynamic MCP tool name format: ${toolCall.name} (normalized: ${normalizedName})`) return null } diff --git a/src/core/prompts/tools/native-tools/__tests__/mcp_server.spec.ts b/src/core/prompts/tools/native-tools/__tests__/mcp_server.spec.ts index ddd7caaccf4..932468cd9b1 100644 --- a/src/core/prompts/tools/native-tools/__tests__/mcp_server.spec.ts +++ b/src/core/prompts/tools/native-tools/__tests__/mcp_server.spec.ts @@ -89,7 +89,7 @@ describe("getMcpServerTools", () => { // Should only have one tool (from project server) expect(result).toHaveLength(1) - expect(getFunction(result[0]).name).toBe("mcp--context7--resolve-library-id") + expect(getFunction(result[0]).name).toBe("mcp--context7--resolve___library___id") // Project server takes priority expect(getFunction(result[0]).description).toBe("Project description") }) diff --git a/src/utils/__tests__/mcp-name.spec.ts b/src/utils/__tests__/mcp-name.spec.ts index 5511893f79e..0f3e37d5750 100644 --- a/src/utils/__tests__/mcp-name.spec.ts +++ b/src/utils/__tests__/mcp-name.spec.ts @@ -2,9 +2,12 @@ import { sanitizeMcpName, buildMcpToolName, parseMcpToolName, + decodeMcpName, + normalizeMcpToolName, isMcpTool, MCP_TOOL_SEPARATOR, MCP_TOOL_PREFIX, + HYPHEN_ENCODING, } from "../mcp-name" describe("mcp-name utilities", () => { @@ -13,6 +16,10 @@ describe("mcp-name utilities", () => { expect(MCP_TOOL_SEPARATOR).toBe("--") expect(MCP_TOOL_PREFIX).toBe("mcp") }) + + it("should have correct hyphen encoding", () => { + expect(HYPHEN_ENCODING).toBe("___") + }) }) describe("isMcpTool", () => { @@ -53,9 +60,10 @@ describe("mcp-name utilities", () => { expect(sanitizeMcpName("test#$%^&*()")).toBe("test") }) - it("should keep valid characters (alphanumeric, underscore, dash)", () => { + it("should keep alphanumeric and underscores, but encode hyphens", () => { expect(sanitizeMcpName("server_name")).toBe("server_name") - expect(sanitizeMcpName("server-name")).toBe("server-name") + // Hyphens are now encoded as triple underscores + expect(sanitizeMcpName("server-name")).toBe("server___name") expect(sanitizeMcpName("Server123")).toBe("Server123") }) @@ -63,12 +71,16 @@ describe("mcp-name utilities", () => { // Dots and colons are NOT allowed due to AWS Bedrock restrictions expect(sanitizeMcpName("server.name")).toBe("servername") expect(sanitizeMcpName("server:name")).toBe("servername") - expect(sanitizeMcpName("awslabs.aws-documentation-mcp-server")).toBe("awslabsaws-documentation-mcp-server") + // Hyphens are encoded as triple underscores + expect(sanitizeMcpName("awslabs.aws-documentation-mcp-server")).toBe( + "awslabsaws___documentation___mcp___server", + ) }) it("should prepend underscore if name starts with non-letter/underscore", () => { expect(sanitizeMcpName("123server")).toBe("_123server") - expect(sanitizeMcpName("-server")).toBe("_-server") + // Hyphen at start is encoded to ___, which starts with underscore (valid) + expect(sanitizeMcpName("-server")).toBe("___server") // Dots are removed, so ".server" becomes "server" which starts with a letter expect(sanitizeMcpName(".server")).toBe("server") }) @@ -79,15 +91,17 @@ describe("mcp-name utilities", () => { expect(sanitizeMcpName("Server")).toBe("Server") }) - it("should replace double-hyphen sequences with single hyphen to avoid separator conflicts", () => { - expect(sanitizeMcpName("server--name")).toBe("server-name") - expect(sanitizeMcpName("test---server")).toBe("test-server") - expect(sanitizeMcpName("my----tool")).toBe("my-tool") + it("should replace double-hyphen sequences with single hyphen then encode", () => { + // Double hyphens become single hyphen, then encoded as ___ + expect(sanitizeMcpName("server--name")).toBe("server___name") + expect(sanitizeMcpName("test---server")).toBe("test___server") + expect(sanitizeMcpName("my----tool")).toBe("my___tool") }) it("should handle complex names with multiple issues", () => { expect(sanitizeMcpName("My Server @ Home!")).toBe("My_Server__Home") - expect(sanitizeMcpName("123-test server")).toBe("_123-test_server") + // Hyphen is encoded as ___ + expect(sanitizeMcpName("123-test server")).toBe("_123___test_server") }) it("should return placeholder for names that become empty after sanitization", () => { @@ -95,6 +109,28 @@ describe("mcp-name utilities", () => { // Spaces become underscores, which is a valid character, so it returns "_" expect(sanitizeMcpName(" ")).toBe("_") }) + + it("should encode hyphens as triple underscores for model compatibility", () => { + // This is the key feature: hyphens are encoded so they survive model tool calling + expect(sanitizeMcpName("atlassian-jira_search")).toBe("atlassian___jira_search") + expect(sanitizeMcpName("atlassian-confluence_search")).toBe("atlassian___confluence_search") + }) + }) + + describe("decodeMcpName", () => { + it("should decode triple underscores back to hyphens", () => { + expect(decodeMcpName("server___name")).toBe("server-name") + expect(decodeMcpName("atlassian___jira_search")).toBe("atlassian-jira_search") + }) + + it("should not modify names without triple underscores", () => { + expect(decodeMcpName("server_name")).toBe("server_name") + expect(decodeMcpName("tool")).toBe("tool") + }) + + it("should handle multiple encoded hyphens", () => { + expect(decodeMcpName("a___b___c")).toBe("a-b-c") + }) }) describe("buildMcpToolName", () => { @@ -125,6 +161,11 @@ describe("mcp-name utilities", () => { it("should preserve underscores in server and tool names", () => { expect(buildMcpToolName("my_server", "my_tool")).toBe("mcp--my_server--my_tool") }) + + it("should encode hyphens in tool names", () => { + // Hyphens are encoded as triple underscores + expect(buildMcpToolName("onellm", "atlassian-jira_search")).toBe("mcp--onellm--atlassian___jira_search") + }) }) describe("parseMcpToolName", () => { @@ -151,8 +192,7 @@ describe("mcp-name utilities", () => { }) }) - it("should correctly handle server names with underscores (fixed from old behavior)", () => { - // With the new -- separator, server names with underscores work correctly + it("should correctly handle server names with underscores", () => { expect(parseMcpToolName("mcp--my_server--tool")).toEqual({ serverName: "my_server", toolName: "tool", @@ -166,6 +206,14 @@ describe("mcp-name utilities", () => { }) }) + it("should decode triple underscores back to hyphens", () => { + // This is the key feature: encoded hyphens are decoded back + expect(parseMcpToolName("mcp--onellm--atlassian___jira_search")).toEqual({ + serverName: "onellm", + toolName: "atlassian-jira_search", + }) + }) + it("should return null for malformed names", () => { expect(parseMcpToolName("mcp--")).toBeNull() expect(parseMcpToolName("mcp--server")).toBeNull() @@ -183,7 +231,6 @@ describe("mcp-name utilities", () => { }) it("should preserve sanitized names through roundtrip with underscores", () => { - // Names with underscores now work correctly through roundtrip const toolName = buildMcpToolName("my_server", "my_tool") const parsed = parseMcpToolName(toolName) expect(parsed).toEqual({ @@ -193,7 +240,6 @@ describe("mcp-name utilities", () => { }) it("should handle spaces that get converted to underscores", () => { - // "my server" becomes "my_server" after sanitization const toolName = buildMcpToolName("my server", "get tool") const parsed = parseMcpToolName(toolName) expect(parsed).toEqual({ @@ -210,5 +256,95 @@ describe("mcp-name utilities", () => { toolName: "get_current_forecast", }) }) + + it("should preserve hyphens through roundtrip via encoding/decoding", () => { + // This is the key test: hyphens survive the roundtrip + const toolName = buildMcpToolName("onellm", "atlassian-jira_search") + expect(toolName).toBe("mcp--onellm--atlassian___jira_search") + + const parsed = parseMcpToolName(toolName) + expect(parsed).toEqual({ + serverName: "onellm", + toolName: "atlassian-jira_search", // Hyphen is preserved! + }) + }) + + it("should handle tool names with multiple hyphens", () => { + const toolName = buildMcpToolName("server", "get-user-profile") + const parsed = parseMcpToolName(toolName) + expect(parsed).toEqual({ + serverName: "server", + toolName: "get-user-profile", + }) + }) + }) + + describe("normalizeMcpToolName", () => { + it("should convert underscore separators to hyphen separators", () => { + expect(normalizeMcpToolName("mcp__server__tool")).toBe("mcp--server--tool") + }) + + it("should not modify names that already have hyphen separators", () => { + expect(normalizeMcpToolName("mcp--server--tool")).toBe("mcp--server--tool") + }) + + it("should not modify non-MCP tool names", () => { + expect(normalizeMcpToolName("read_file")).toBe("read_file") + expect(normalizeMcpToolName("some__tool")).toBe("some__tool") + }) + + it("should preserve triple underscores (encoded hyphens) while normalizing separators", () => { + // Model outputs: mcp__onellm__atlassian___jira_search + // Should become: mcp--onellm--atlassian___jira_search + expect(normalizeMcpToolName("mcp__onellm__atlassian___jira_search")).toBe( + "mcp--onellm--atlassian___jira_search", + ) + }) + + it("should handle multiple encoded hyphens", () => { + expect(normalizeMcpToolName("mcp__server__get___user___profile")).toBe("mcp--server--get___user___profile") + }) + }) + + describe("model compatibility - full flow", () => { + it("should handle the complete flow: build -> model mangles -> normalize -> parse", () => { + // Step 1: Build the tool name (hyphens encoded as ___) + const builtName = buildMcpToolName("onellm", "atlassian-jira_search") + expect(builtName).toBe("mcp--onellm--atlassian___jira_search") + + // Step 2: Model mangles the separators (-- becomes __) + const mangledName = "mcp__onellm__atlassian___jira_search" + + // Step 3: Normalize the separators back (__ becomes --) + const normalizedName = normalizeMcpToolName(mangledName) + expect(normalizedName).toBe("mcp--onellm--atlassian___jira_search") + + // Step 4: Parse the normalized name (decodes ___ back to -) + const parsed = parseMcpToolName(normalizedName) + expect(parsed).toEqual({ + serverName: "onellm", + toolName: "atlassian-jira_search", // Original hyphen is preserved! + }) + }) + + it("should handle tool names with multiple hyphens through the full flow", () => { + // Build + const builtName = buildMcpToolName("server", "get-user-profile") + expect(builtName).toBe("mcp--server--get___user___profile") + + // Model mangles + const mangledName = "mcp__server__get___user___profile" + + // Normalize + const normalizedName = normalizeMcpToolName(mangledName) + expect(normalizedName).toBe("mcp--server--get___user___profile") + + // Parse + const parsed = parseMcpToolName(normalizedName) + expect(parsed).toEqual({ + serverName: "server", + toolName: "get-user-profile", + }) + }) }) }) diff --git a/src/utils/mcp-name.ts b/src/utils/mcp-name.ts index c81d5e770f0..3e6d1ab8a47 100644 --- a/src/utils/mcp-name.ts +++ b/src/utils/mcp-name.ts @@ -17,6 +17,52 @@ export const MCP_TOOL_SEPARATOR = "--" */ export const MCP_TOOL_PREFIX = "mcp" +/** + * Encoding for hyphens in tool names. + * We use triple underscores because: + * 1. It's unlikely to appear naturally in tool names + * 2. It's safe for all API providers + * 3. It allows us to preserve hyphens through the encoding/decoding process + * + * This solves the problem where models (especially Claude) convert hyphens to underscores + * in tool names when using native tool calling. By encoding hyphens as triple underscores, + * we can decode them back to hyphens when parsing the tool name. + */ +export const HYPHEN_ENCODING = "___" + +/** + * Normalize an MCP tool name by converting underscore separators back to hyphens. + * This handles the case where models (especially Claude) convert hyphens to underscores + * in tool names when using native tool calling. + * + * For example: "mcp__server__tool" -> "mcp--server--tool" + * + * @param toolName - The tool name that may have underscore separators + * @returns The normalized tool name with hyphen separators + */ +export function normalizeMcpToolName(toolName: string): string { + // Only normalize if it looks like an MCP tool with underscore separators + if (toolName.startsWith("mcp__")) { + // Replace double underscores with double hyphens for the separators + // We need to be careful to only replace the separators, not the encoded hyphens (triple underscores) + // Pattern: mcp__server__tool -> mcp--server--tool + // But: mcp__server__tool___name should become mcp--server--tool___name (preserve triple underscores) + + // First, temporarily replace triple underscores with a placeholder + const placeholder = "\x00HYPHEN\x00" + let normalized = toolName.replace(/___/g, placeholder) + + // Now replace double underscores (separators) with double hyphens + normalized = normalized.replace(/__/g, "--") + + // Restore triple underscores from placeholder + normalized = normalized.replace(new RegExp(placeholder, "g"), "___") + + return normalized + } + return toolName +} + /** * Check if a tool name is an MCP tool (starts with the MCP prefix and separator). * @@ -29,10 +75,9 @@ export function isMcpTool(toolName: string): boolean { /** * Sanitize a name to be safe for use in API function names. - * This removes special characters and ensures the name starts correctly. - * - * Note: This does NOT remove dashes from names, but the separator "--" is - * distinct enough (double hyphen) that single hyphens in names won't conflict. + * This removes special characters, ensures the name starts correctly, + * and encodes hyphens as triple underscores to preserve them through + * the model's tool calling process. * * @param name - The original name (e.g., MCP server name or tool name) * @returns A sanitized name that conforms to API requirements @@ -51,6 +96,11 @@ export function sanitizeMcpName(name: string): string { // Replace any double-hyphen sequences with single hyphen to avoid separator conflicts sanitized = sanitized.replace(/--+/g, "-") + // Encode single hyphens as triple underscores to preserve them + // This allows us to decode them back to hyphens when parsing + // e.g., "atlassian-jira_search" -> "atlassian___jira_search" + sanitized = sanitized.replace(/-/g, HYPHEN_ENCODING) + // Ensure the name starts with a letter or underscore if (sanitized.length > 0 && !/^[a-zA-Z_]/.test(sanitized)) { sanitized = "_" + sanitized @@ -90,11 +140,20 @@ export function buildMcpToolName(serverName: string, toolName: string): string { } /** - * Parse an MCP tool function name back into server and tool names. - * This handles sanitized names by splitting on the "--" separator. + * Decode a sanitized name back to its original form by converting + * triple underscores back to hyphens. * - * Note: This returns the sanitized names, not the original names. - * The original names cannot be recovered from the sanitized version. + * @param sanitizedName - The sanitized name with encoded hyphens + * @returns The decoded name with hyphens restored + */ +export function decodeMcpName(sanitizedName: string): string { + return sanitizedName.replace(new RegExp(HYPHEN_ENCODING, "g"), "-") +} + +/** + * Parse an MCP tool function name back into server and tool names. + * This handles sanitized names by splitting on the "--" separator + * and decoding triple underscores back to hyphens. * * @param mcpToolName - The full MCP tool name (e.g., "mcp--weather--get_forecast") * @returns An object with serverName and toolName, or null if parsing fails @@ -121,5 +180,9 @@ export function parseMcpToolName(mcpToolName: string): { serverName: string; too return null } - return { serverName, toolName } + // Decode triple underscores back to hyphens + return { + serverName: decodeMcpName(serverName), + toolName: decodeMcpName(toolName), + } }