From d705365eff6f9c0e548da02dda188c81c304f730 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Wed, 14 Jan 2026 21:09:53 -0500 Subject: [PATCH 1/2] fix: inject thought signatures for Gemini 3 when switching models via LiteLLM When users switch mid-task from Claude (or other models) to Gemini 3/2.5 via LiteLLM, the API returns 'Corrupted thought signature' errors because conversation history contains tool calls without the required signatures. This fix injects dummy thought signatures into tool calls when targeting Gemini models, following LiteLLM's official documentation: - Detect Gemini 3.x and 2.5.x models (including provider-prefixed variants) - Inject base64('skip_thought_signature_validator') into first tool call - Preserve existing provider_specific_fields on tool calls - Skip injection if signature already exists Added 8 new tests covering model detection, injection behavior, and integration. Fixes: COM-489 --- src/api/providers/__tests__/lite-llm.spec.ts | 318 ++++++++++++++++++- src/api/providers/lite-llm.ts | 99 +++++- 2 files changed, 410 insertions(+), 7 deletions(-) diff --git a/src/api/providers/__tests__/lite-llm.spec.ts b/src/api/providers/__tests__/lite-llm.spec.ts index a95118469e2..e330fbbbece 100644 --- a/src/api/providers/__tests__/lite-llm.spec.ts +++ b/src/api/providers/__tests__/lite-llm.spec.ts @@ -3,7 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import { LiteLLMHandler } from "../lite-llm" import { ApiHandlerOptions } from "../../../shared/api" -import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types" +import { litellmDefaultModelId, litellmDefaultModelInfo, TOOL_PROTOCOL } from "@roo-code/types" // Mock vscode first to avoid import errors vi.mock("vscode", () => ({})) @@ -40,6 +40,12 @@ vi.mock("../fetchers/modelCache", () => ({ "claude-3-opus": { ...litellmDefaultModelInfo, maxTokens: 8192 }, "llama-3": { ...litellmDefaultModelInfo, maxTokens: 8192 }, "gpt-4-turbo": { ...litellmDefaultModelInfo, maxTokens: 8192 }, + // Gemini models for thought signature injection tests + "gemini-3-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + "gemini-3-flash": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + "gemini-2.5-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + "google/gemini-3-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + "vertex_ai/gemini-3-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, }) }), getModelsFromCache: vi.fn().mockReturnValue(undefined), @@ -388,4 +394,314 @@ describe("LiteLLMHandler", () => { expect(createCall.max_completion_tokens).toBeUndefined() }) }) + + describe("Gemini thought signature injection", () => { + describe("isGeminiModel detection", () => { + it("should detect Gemini 3 models", () => { + const handler = new LiteLLMHandler(mockOptions) + const isGeminiModel = (handler as any).isGeminiModel.bind(handler) + + expect(isGeminiModel("gemini-3-pro")).toBe(true) + expect(isGeminiModel("gemini-3-flash")).toBe(true) + expect(isGeminiModel("gemini-3-pro-preview")).toBe(true) + }) + + it("should detect Gemini 2.5 models", () => { + const handler = new LiteLLMHandler(mockOptions) + const isGeminiModel = (handler as any).isGeminiModel.bind(handler) + + expect(isGeminiModel("gemini-2.5-pro")).toBe(true) + expect(isGeminiModel("gemini-2.5-flash")).toBe(true) + }) + + it("should detect provider-prefixed Gemini models", () => { + const handler = new LiteLLMHandler(mockOptions) + const isGeminiModel = (handler as any).isGeminiModel.bind(handler) + + expect(isGeminiModel("google/gemini-3-pro")).toBe(true) + expect(isGeminiModel("vertex_ai/gemini-3-pro")).toBe(true) + expect(isGeminiModel("vertex/gemini-2.5-pro")).toBe(true) + }) + + it("should not detect non-Gemini models", () => { + const handler = new LiteLLMHandler(mockOptions) + const isGeminiModel = (handler as any).isGeminiModel.bind(handler) + + expect(isGeminiModel("gpt-4")).toBe(false) + expect(isGeminiModel("claude-3-opus")).toBe(false) + expect(isGeminiModel("gemini-1.5-pro")).toBe(false) + expect(isGeminiModel("gemini-2.0-flash")).toBe(false) + }) + }) + + describe("injectThoughtSignatureForGemini", () => { + // Base64 encoded "skip_thought_signature_validator" + const dummySignature = Buffer.from("skip_thought_signature_validator").toString("base64") + + it("should inject provider_specific_fields.thought_signature for assistant messages with tool_calls", () => { + const handler = new LiteLLMHandler(mockOptions) + const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) + + const messages = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: "", + tool_calls: [ + { id: "call_123", type: "function", function: { name: "test_tool", arguments: "{}" } }, + ], + }, + { role: "tool", tool_call_id: "call_123", content: "result" }, + ] + + const result = injectThoughtSignature(messages) + + // The first tool call should have provider_specific_fields.thought_signature injected + expect(result[1].tool_calls[0].provider_specific_fields).toBeDefined() + expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature) + }) + + it("should not inject if assistant message has no tool_calls", () => { + const handler = new LiteLLMHandler(mockOptions) + const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) + + const messages = [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "Hi there!" }, + ] + + const result = injectThoughtSignature(messages) + + // No changes should be made + expect(result[1].tool_calls).toBeUndefined() + }) + + it("should not inject if thought_signature already exists", () => { + const handler = new LiteLLMHandler(mockOptions) + const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) + + const existingSignature = "existing_signature_base64" + + const messages = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: "", + tool_calls: [ + { + id: "call_123", + type: "function", + function: { name: "test_tool", arguments: "{}" }, + provider_specific_fields: { thought_signature: existingSignature }, + }, + ], + }, + ] + + const result = injectThoughtSignature(messages) + + // Should keep existing thought_signature unchanged + expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(existingSignature) + }) + + it("should only inject signature into first tool call for parallel calls", () => { + const handler = new LiteLLMHandler(mockOptions) + const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) + + const messages = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: "", + tool_calls: [ + { id: "call_first", type: "function", function: { name: "tool1", arguments: "{}" } }, + { id: "call_second", type: "function", function: { name: "tool2", arguments: "{}" } }, + ], + }, + ] + + const result = injectThoughtSignature(messages) + + // Only first tool call should have the signature + expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature) + // Second tool call should not have provider_specific_fields added + expect(result[1].tool_calls[1].provider_specific_fields).toBeUndefined() + }) + + it("should preserve existing provider_specific_fields when adding thought_signature", () => { + const handler = new LiteLLMHandler(mockOptions) + const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) + + const messages = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: "", + tool_calls: [ + { + id: "call_123", + type: "function", + function: { name: "test_tool", arguments: "{}" }, + provider_specific_fields: { other_field: "value" }, + }, + ], + }, + ] + + const result = injectThoughtSignature(messages) + + // Should have both existing field and new thought_signature + expect(result[1].tool_calls[0].provider_specific_fields.other_field).toBe("value") + expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature) + }) + }) + + describe("createMessage integration with Gemini models", () => { + // Base64 encoded "skip_thought_signature_validator" + const dummySignature = Buffer.from("skip_thought_signature_validator").toString("base64") + + it("should inject thought signatures for Gemini 3 models with native tools", async () => { + const optionsWithGemini: ApiHandlerOptions = { + ...mockOptions, + litellmModelId: "gemini-3-pro", + } + handler = new LiteLLMHandler(optionsWithGemini) + + // Mock fetchModel to return a Gemini model with native tool support + vi.spyOn(handler as any, "fetchModel").mockResolvedValue({ + id: "gemini-3-pro", + info: { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + }) + + const systemPrompt = "You are a helpful assistant" + // Simulate conversation history with a tool call from a previous model (Claude) + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: [ + { type: "text", text: "I'll help you with that." }, + { type: "tool_use", id: "toolu_123", name: "read_file", input: { path: "test.txt" } }, + ], + }, + { + role: "user", + content: [{ type: "tool_result", tool_use_id: "toolu_123", content: "file contents" }], + }, + { role: "user", content: "Thanks!" }, + ] + + // Mock the stream response + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "You're welcome!" } }], + usage: { + prompt_tokens: 100, + completion_tokens: 20, + }, + } + }, + } + + mockCreate.mockReturnValue({ + withResponse: vi.fn().mockResolvedValue({ data: mockStream }), + }) + + // Provide tools and native protocol to trigger the injection + const metadata = { + tools: [ + { + type: "function", + function: { name: "read_file", description: "Read a file", parameters: {} }, + }, + ], + toolProtocol: TOOL_PROTOCOL.NATIVE, + } + + const generator = handler.createMessage(systemPrompt, messages, metadata as any) + for await (const _chunk of generator) { + // Consume the generator + } + + // Verify that the assistant message with tool_calls has thought_signature injected + const createCall = mockCreate.mock.calls[0][0] + const assistantMessage = createCall.messages.find( + (msg: any) => msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0, + ) + + expect(assistantMessage).toBeDefined() + // First tool call should have the thought signature + expect(assistantMessage.tool_calls[0].provider_specific_fields).toBeDefined() + expect(assistantMessage.tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature) + }) + + it("should not inject thought signatures for non-Gemini models", async () => { + const optionsWithGPT4: ApiHandlerOptions = { + ...mockOptions, + litellmModelId: "gpt-4", + } + handler = new LiteLLMHandler(optionsWithGPT4) + + vi.spyOn(handler as any, "fetchModel").mockResolvedValue({ + id: "gpt-4", + info: { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + }) + + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: [ + { type: "text", text: "I'll help you with that." }, + { type: "tool_use", id: "toolu_123", name: "read_file", input: { path: "test.txt" } }, + ], + }, + { + role: "user", + content: [{ type: "tool_result", tool_use_id: "toolu_123", content: "file contents" }], + }, + ] + + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "Response" } }], + usage: { prompt_tokens: 100, completion_tokens: 20 }, + } + }, + } + + mockCreate.mockReturnValue({ + withResponse: vi.fn().mockResolvedValue({ data: mockStream }), + }) + + const metadata = { + tools: [ + { + type: "function", + function: { name: "read_file", description: "Read a file", parameters: {} }, + }, + ], + toolProtocol: TOOL_PROTOCOL.NATIVE, + } + + const generator = handler.createMessage(systemPrompt, messages, metadata as any) + for await (const _chunk of generator) { + // Consume + } + + // Verify that thought_signature was NOT injected for non-Gemini model + const createCall = mockCreate.mock.calls[0][0] + const assistantMessage = createCall.messages.find( + (msg: any) => msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0, + ) + + expect(assistantMessage).toBeDefined() + // Tool calls should not have provider_specific_fields added + expect(assistantMessage.tool_calls[0].provider_specific_fields).toBeUndefined() + }) + }) + }) }) diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts index 34341a88300..279293c256e 100644 --- a/src/api/providers/lite-llm.ts +++ b/src/api/providers/lite-llm.ts @@ -9,6 +9,7 @@ import { ApiHandlerOptions } from "../../shared/api" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" +import { resolveToolProtocol } from "../../utils/resolveToolProtocol" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" @@ -38,6 +39,81 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa return /\bgpt-?5(?!\d)/i.test(modelId) } + /** + * Detect if the model is a Gemini model that requires thought signature handling. + * Gemini 3 models validate thought signatures for tool/function calling steps. + */ + private isGeminiModel(modelId: string): boolean { + // Match various Gemini model patterns: + // - gemini-3-pro, gemini-3-flash, gemini-3-* + // - gemini/gemini-3-*, google/gemini-3-* + // - vertex_ai/gemini-3-*, vertex/gemini-3-* + // Also match Gemini 2.5+ models which use similar validation + const lowerModelId = modelId.toLowerCase() + return ( + lowerModelId.includes("gemini-3") || + lowerModelId.includes("gemini-2.5") || + // Also match provider-prefixed versions + /\b(gemini|google|vertex_ai|vertex)\/gemini-(3|2\.5)/i.test(modelId) + ) + } + + /** + * Inject thought signatures for Gemini models via provider_specific_fields. + * This is required when switching from other models to Gemini to satisfy API validation + * for function calls that weren't generated by Gemini (and thus lack thought signatures). + * + * Per LiteLLM documentation: + * - Thought signatures are stored in provider_specific_fields.thought_signature of tool calls + * - The dummy signature base64("skip_thought_signature_validator") bypasses validation + * - Only the first tool call in parallel calls needs the signature + * + * Note: LiteLLM claims to automatically handle missing signatures, but this explicit + * injection ensures compatibility when LiteLLM doesn't detect the model switch. + */ + private injectThoughtSignatureForGemini( + openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[], + ): OpenAI.Chat.ChatCompletionMessageParam[] { + // Base64 encoded "skip_thought_signature_validator" as per LiteLLM docs + const dummySignature = Buffer.from("skip_thought_signature_validator").toString("base64") + + return openAiMessages.map((msg) => { + if (msg.role === "assistant") { + const toolCalls = (msg as any).tool_calls as any[] | undefined + + // Only process if there are tool calls + if (toolCalls && toolCalls.length > 0) { + // Check if first tool call already has a thought_signature + const firstToolCall = toolCalls[0] + const hasExistingSignature = firstToolCall?.provider_specific_fields?.thought_signature + + if (!hasExistingSignature) { + // Inject dummy signature into the first tool call's provider_specific_fields + const updatedToolCalls = toolCalls.map((tc, index) => { + if (index === 0) { + // Only first tool call needs the signature for parallel calls + return { + ...tc, + provider_specific_fields: { + ...(tc.provider_specific_fields || {}), + thought_signature: dummySignature, + }, + } + } + return tc + }) + + return { + ...msg, + tool_calls: updatedToolCalls, + } + } + } + } + return msg + }) + } + override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], @@ -116,17 +192,28 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa // Check if this is a GPT-5 model that requires max_completion_tokens instead of max_tokens const isGPT5Model = this.isGpt5(modelId) + // Resolve tool protocol - use metadata's locked protocol if provided, otherwise resolve from options + const toolProtocol = resolveToolProtocol(this.options, info, metadata?.toolProtocol) + const isNativeProtocol = toolProtocol === TOOL_PROTOCOL.NATIVE + // Check if model supports native tools and tools are provided with native protocol const supportsNativeTools = info.supportsNativeTools ?? false - const useNativeTools = - supportsNativeTools && - metadata?.tools && - metadata.tools.length > 0 && - metadata?.toolProtocol === TOOL_PROTOCOL.NATIVE + const useNativeTools = supportsNativeTools && metadata?.tools && metadata.tools.length > 0 && isNativeProtocol + + // For Gemini models with native protocol: inject fake reasoning.encrypted block for tool calls + // This is required when switching from other models to Gemini to satisfy API validation. + // Gemini 3 models validate thought signatures for function calls, and when conversation + // history contains tool calls from other models (like Claude), they lack the required + // signatures. The "skip_thought_signature_validator" value bypasses this validation. + const isGemini = this.isGeminiModel(modelId) + let processedMessages = enhancedMessages + if (isNativeProtocol && isGemini) { + processedMessages = this.injectThoughtSignatureForGemini(enhancedMessages) + } const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelId, - messages: [systemMessage, ...enhancedMessages], + messages: [systemMessage, ...processedMessages], stream: true, stream_options: { include_usage: true, From 9eebec073d132888f8afefd31f302c14fc1e3790 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Wed, 14 Jan 2026 22:07:47 -0500 Subject: [PATCH 2/2] fix(litellm): inject dummy thought signatures on ALL tool calls for Gemini When switching from other models (like Claude) to Gemini mid-task, Gemini validates thought signatures on tool calls. Tool calls from other models don't have these signatures, causing 'Corrupted thought signature' errors. Changes: - Inject dummy thought_signature on ALL tool calls (not just first) - Always overwrite existing signatures to ensure compatibility - Update tests to reflect new behavior The dummy signature base64('skip_thought_signature_validator') bypasses Gemini's validation for tool calls that weren't generated by Gemini. --- src/api/providers/__tests__/lite-llm.spec.ts | 15 +++---- src/api/providers/lite-llm.ts | 43 ++++++++------------ 2 files changed, 24 insertions(+), 34 deletions(-) diff --git a/src/api/providers/__tests__/lite-llm.spec.ts b/src/api/providers/__tests__/lite-llm.spec.ts index e330fbbbece..64cbd6e8652 100644 --- a/src/api/providers/__tests__/lite-llm.spec.ts +++ b/src/api/providers/__tests__/lite-llm.spec.ts @@ -476,7 +476,7 @@ describe("LiteLLMHandler", () => { expect(result[1].tool_calls).toBeUndefined() }) - it("should not inject if thought_signature already exists", () => { + it("should always overwrite existing thought_signature", () => { const handler = new LiteLLMHandler(mockOptions) const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) @@ -500,11 +500,11 @@ describe("LiteLLMHandler", () => { const result = injectThoughtSignature(messages) - // Should keep existing thought_signature unchanged - expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(existingSignature) + // Should overwrite with dummy signature (always inject to ensure compatibility) + expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature) }) - it("should only inject signature into first tool call for parallel calls", () => { + it("should inject signature into ALL tool calls for parallel calls", () => { const handler = new LiteLLMHandler(mockOptions) const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) @@ -516,16 +516,17 @@ describe("LiteLLMHandler", () => { tool_calls: [ { id: "call_first", type: "function", function: { name: "tool1", arguments: "{}" } }, { id: "call_second", type: "function", function: { name: "tool2", arguments: "{}" } }, + { id: "call_third", type: "function", function: { name: "tool3", arguments: "{}" } }, ], }, ] const result = injectThoughtSignature(messages) - // Only first tool call should have the signature + // ALL tool calls should have the signature expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature) - // Second tool call should not have provider_specific_fields added - expect(result[1].tool_calls[1].provider_specific_fields).toBeUndefined() + expect(result[1].tool_calls[1].provider_specific_fields.thought_signature).toBe(dummySignature) + expect(result[1].tool_calls[2].provider_specific_fields.thought_signature).toBe(dummySignature) }) it("should preserve existing provider_specific_fields when adding thought_signature", () => { diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts index 279293c256e..fbafc9410f6 100644 --- a/src/api/providers/lite-llm.ts +++ b/src/api/providers/lite-llm.ts @@ -66,10 +66,10 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa * Per LiteLLM documentation: * - Thought signatures are stored in provider_specific_fields.thought_signature of tool calls * - The dummy signature base64("skip_thought_signature_validator") bypasses validation - * - Only the first tool call in parallel calls needs the signature * - * Note: LiteLLM claims to automatically handle missing signatures, but this explicit - * injection ensures compatibility when LiteLLM doesn't detect the model switch. + * We inject the dummy signature on EVERY tool call unconditionally to ensure Gemini + * doesn't complain about missing/corrupted signatures when conversation history + * contains tool calls from other models (like Claude). */ private injectThoughtSignatureForGemini( openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[], @@ -83,30 +83,19 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa // Only process if there are tool calls if (toolCalls && toolCalls.length > 0) { - // Check if first tool call already has a thought_signature - const firstToolCall = toolCalls[0] - const hasExistingSignature = firstToolCall?.provider_specific_fields?.thought_signature - - if (!hasExistingSignature) { - // Inject dummy signature into the first tool call's provider_specific_fields - const updatedToolCalls = toolCalls.map((tc, index) => { - if (index === 0) { - // Only first tool call needs the signature for parallel calls - return { - ...tc, - provider_specific_fields: { - ...(tc.provider_specific_fields || {}), - thought_signature: dummySignature, - }, - } - } - return tc - }) - - return { - ...msg, - tool_calls: updatedToolCalls, - } + // Inject dummy signature into ALL tool calls' provider_specific_fields + // This ensures Gemini doesn't reject tool calls from other models + const updatedToolCalls = toolCalls.map((tc) => ({ + ...tc, + provider_specific_fields: { + ...(tc.provider_specific_fields || {}), + thought_signature: dummySignature, + }, + })) + + return { + ...msg, + tool_calls: updatedToolCalls, } } }