diff --git a/src/api/providers/__tests__/lite-llm.spec.ts b/src/api/providers/__tests__/lite-llm.spec.ts index a95118469e2..e330fbbbece 100644 --- a/src/api/providers/__tests__/lite-llm.spec.ts +++ b/src/api/providers/__tests__/lite-llm.spec.ts @@ -3,7 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import { LiteLLMHandler } from "../lite-llm" import { ApiHandlerOptions } from "../../../shared/api" -import { litellmDefaultModelId, litellmDefaultModelInfo } from "@roo-code/types" +import { litellmDefaultModelId, litellmDefaultModelInfo, TOOL_PROTOCOL } from "@roo-code/types" // Mock vscode first to avoid import errors vi.mock("vscode", () => ({})) @@ -40,6 +40,12 @@ vi.mock("../fetchers/modelCache", () => ({ "claude-3-opus": { ...litellmDefaultModelInfo, maxTokens: 8192 }, "llama-3": { ...litellmDefaultModelInfo, maxTokens: 8192 }, "gpt-4-turbo": { ...litellmDefaultModelInfo, maxTokens: 8192 }, + // Gemini models for thought signature injection tests + "gemini-3-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + "gemini-3-flash": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + "gemini-2.5-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + "google/gemini-3-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + "vertex_ai/gemini-3-pro": { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, }) }), getModelsFromCache: vi.fn().mockReturnValue(undefined), @@ -388,4 +394,314 @@ describe("LiteLLMHandler", () => { expect(createCall.max_completion_tokens).toBeUndefined() }) }) + + describe("Gemini thought signature injection", () => { + describe("isGeminiModel detection", () => { + it("should detect Gemini 3 models", () => { + const handler = new LiteLLMHandler(mockOptions) + const isGeminiModel = (handler as any).isGeminiModel.bind(handler) + + expect(isGeminiModel("gemini-3-pro")).toBe(true) + expect(isGeminiModel("gemini-3-flash")).toBe(true) + expect(isGeminiModel("gemini-3-pro-preview")).toBe(true) + }) + + it("should detect Gemini 2.5 models", () => { + const handler = new LiteLLMHandler(mockOptions) + const isGeminiModel = (handler as any).isGeminiModel.bind(handler) + + expect(isGeminiModel("gemini-2.5-pro")).toBe(true) + expect(isGeminiModel("gemini-2.5-flash")).toBe(true) + }) + + it("should detect provider-prefixed Gemini models", () => { + const handler = new LiteLLMHandler(mockOptions) + const isGeminiModel = (handler as any).isGeminiModel.bind(handler) + + expect(isGeminiModel("google/gemini-3-pro")).toBe(true) + expect(isGeminiModel("vertex_ai/gemini-3-pro")).toBe(true) + expect(isGeminiModel("vertex/gemini-2.5-pro")).toBe(true) + }) + + it("should not detect non-Gemini models", () => { + const handler = new LiteLLMHandler(mockOptions) + const isGeminiModel = (handler as any).isGeminiModel.bind(handler) + + expect(isGeminiModel("gpt-4")).toBe(false) + expect(isGeminiModel("claude-3-opus")).toBe(false) + expect(isGeminiModel("gemini-1.5-pro")).toBe(false) + expect(isGeminiModel("gemini-2.0-flash")).toBe(false) + }) + }) + + describe("injectThoughtSignatureForGemini", () => { + // Base64 encoded "skip_thought_signature_validator" + const dummySignature = Buffer.from("skip_thought_signature_validator").toString("base64") + + it("should inject provider_specific_fields.thought_signature for assistant messages with tool_calls", () => { + const handler = new LiteLLMHandler(mockOptions) + const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) + + const messages = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: "", + tool_calls: [ + { id: "call_123", type: "function", function: { name: "test_tool", arguments: "{}" } }, + ], + }, + { role: "tool", tool_call_id: "call_123", content: "result" }, + ] + + const result = injectThoughtSignature(messages) + + // The first tool call should have provider_specific_fields.thought_signature injected + expect(result[1].tool_calls[0].provider_specific_fields).toBeDefined() + expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature) + }) + + it("should not inject if assistant message has no tool_calls", () => { + const handler = new LiteLLMHandler(mockOptions) + const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) + + const messages = [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "Hi there!" }, + ] + + const result = injectThoughtSignature(messages) + + // No changes should be made + expect(result[1].tool_calls).toBeUndefined() + }) + + it("should not inject if thought_signature already exists", () => { + const handler = new LiteLLMHandler(mockOptions) + const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) + + const existingSignature = "existing_signature_base64" + + const messages = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: "", + tool_calls: [ + { + id: "call_123", + type: "function", + function: { name: "test_tool", arguments: "{}" }, + provider_specific_fields: { thought_signature: existingSignature }, + }, + ], + }, + ] + + const result = injectThoughtSignature(messages) + + // Should keep existing thought_signature unchanged + expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(existingSignature) + }) + + it("should only inject signature into first tool call for parallel calls", () => { + const handler = new LiteLLMHandler(mockOptions) + const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) + + const messages = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: "", + tool_calls: [ + { id: "call_first", type: "function", function: { name: "tool1", arguments: "{}" } }, + { id: "call_second", type: "function", function: { name: "tool2", arguments: "{}" } }, + ], + }, + ] + + const result = injectThoughtSignature(messages) + + // Only first tool call should have the signature + expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature) + // Second tool call should not have provider_specific_fields added + expect(result[1].tool_calls[1].provider_specific_fields).toBeUndefined() + }) + + it("should preserve existing provider_specific_fields when adding thought_signature", () => { + const handler = new LiteLLMHandler(mockOptions) + const injectThoughtSignature = (handler as any).injectThoughtSignatureForGemini.bind(handler) + + const messages = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: "", + tool_calls: [ + { + id: "call_123", + type: "function", + function: { name: "test_tool", arguments: "{}" }, + provider_specific_fields: { other_field: "value" }, + }, + ], + }, + ] + + const result = injectThoughtSignature(messages) + + // Should have both existing field and new thought_signature + expect(result[1].tool_calls[0].provider_specific_fields.other_field).toBe("value") + expect(result[1].tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature) + }) + }) + + describe("createMessage integration with Gemini models", () => { + // Base64 encoded "skip_thought_signature_validator" + const dummySignature = Buffer.from("skip_thought_signature_validator").toString("base64") + + it("should inject thought signatures for Gemini 3 models with native tools", async () => { + const optionsWithGemini: ApiHandlerOptions = { + ...mockOptions, + litellmModelId: "gemini-3-pro", + } + handler = new LiteLLMHandler(optionsWithGemini) + + // Mock fetchModel to return a Gemini model with native tool support + vi.spyOn(handler as any, "fetchModel").mockResolvedValue({ + id: "gemini-3-pro", + info: { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + }) + + const systemPrompt = "You are a helpful assistant" + // Simulate conversation history with a tool call from a previous model (Claude) + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: [ + { type: "text", text: "I'll help you with that." }, + { type: "tool_use", id: "toolu_123", name: "read_file", input: { path: "test.txt" } }, + ], + }, + { + role: "user", + content: [{ type: "tool_result", tool_use_id: "toolu_123", content: "file contents" }], + }, + { role: "user", content: "Thanks!" }, + ] + + // Mock the stream response + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "You're welcome!" } }], + usage: { + prompt_tokens: 100, + completion_tokens: 20, + }, + } + }, + } + + mockCreate.mockReturnValue({ + withResponse: vi.fn().mockResolvedValue({ data: mockStream }), + }) + + // Provide tools and native protocol to trigger the injection + const metadata = { + tools: [ + { + type: "function", + function: { name: "read_file", description: "Read a file", parameters: {} }, + }, + ], + toolProtocol: TOOL_PROTOCOL.NATIVE, + } + + const generator = handler.createMessage(systemPrompt, messages, metadata as any) + for await (const _chunk of generator) { + // Consume the generator + } + + // Verify that the assistant message with tool_calls has thought_signature injected + const createCall = mockCreate.mock.calls[0][0] + const assistantMessage = createCall.messages.find( + (msg: any) => msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0, + ) + + expect(assistantMessage).toBeDefined() + // First tool call should have the thought signature + expect(assistantMessage.tool_calls[0].provider_specific_fields).toBeDefined() + expect(assistantMessage.tool_calls[0].provider_specific_fields.thought_signature).toBe(dummySignature) + }) + + it("should not inject thought signatures for non-Gemini models", async () => { + const optionsWithGPT4: ApiHandlerOptions = { + ...mockOptions, + litellmModelId: "gpt-4", + } + handler = new LiteLLMHandler(optionsWithGPT4) + + vi.spyOn(handler as any, "fetchModel").mockResolvedValue({ + id: "gpt-4", + info: { ...litellmDefaultModelInfo, maxTokens: 8192, supportsNativeTools: true }, + }) + + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Hello" }, + { + role: "assistant", + content: [ + { type: "text", text: "I'll help you with that." }, + { type: "tool_use", id: "toolu_123", name: "read_file", input: { path: "test.txt" } }, + ], + }, + { + role: "user", + content: [{ type: "tool_result", tool_use_id: "toolu_123", content: "file contents" }], + }, + ] + + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + choices: [{ delta: { content: "Response" } }], + usage: { prompt_tokens: 100, completion_tokens: 20 }, + } + }, + } + + mockCreate.mockReturnValue({ + withResponse: vi.fn().mockResolvedValue({ data: mockStream }), + }) + + const metadata = { + tools: [ + { + type: "function", + function: { name: "read_file", description: "Read a file", parameters: {} }, + }, + ], + toolProtocol: TOOL_PROTOCOL.NATIVE, + } + + const generator = handler.createMessage(systemPrompt, messages, metadata as any) + for await (const _chunk of generator) { + // Consume + } + + // Verify that thought_signature was NOT injected for non-Gemini model + const createCall = mockCreate.mock.calls[0][0] + const assistantMessage = createCall.messages.find( + (msg: any) => msg.role === "assistant" && msg.tool_calls && msg.tool_calls.length > 0, + ) + + expect(assistantMessage).toBeDefined() + // Tool calls should not have provider_specific_fields added + expect(assistantMessage.tool_calls[0].provider_specific_fields).toBeUndefined() + }) + }) + }) }) diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts index 34341a88300..279293c256e 100644 --- a/src/api/providers/lite-llm.ts +++ b/src/api/providers/lite-llm.ts @@ -9,6 +9,7 @@ import { ApiHandlerOptions } from "../../shared/api" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" +import { resolveToolProtocol } from "../../utils/resolveToolProtocol" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" @@ -38,6 +39,81 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa return /\bgpt-?5(?!\d)/i.test(modelId) } + /** + * Detect if the model is a Gemini model that requires thought signature handling. + * Gemini 3 models validate thought signatures for tool/function calling steps. + */ + private isGeminiModel(modelId: string): boolean { + // Match various Gemini model patterns: + // - gemini-3-pro, gemini-3-flash, gemini-3-* + // - gemini/gemini-3-*, google/gemini-3-* + // - vertex_ai/gemini-3-*, vertex/gemini-3-* + // Also match Gemini 2.5+ models which use similar validation + const lowerModelId = modelId.toLowerCase() + return ( + lowerModelId.includes("gemini-3") || + lowerModelId.includes("gemini-2.5") || + // Also match provider-prefixed versions + /\b(gemini|google|vertex_ai|vertex)\/gemini-(3|2\.5)/i.test(modelId) + ) + } + + /** + * Inject thought signatures for Gemini models via provider_specific_fields. + * This is required when switching from other models to Gemini to satisfy API validation + * for function calls that weren't generated by Gemini (and thus lack thought signatures). + * + * Per LiteLLM documentation: + * - Thought signatures are stored in provider_specific_fields.thought_signature of tool calls + * - The dummy signature base64("skip_thought_signature_validator") bypasses validation + * - Only the first tool call in parallel calls needs the signature + * + * Note: LiteLLM claims to automatically handle missing signatures, but this explicit + * injection ensures compatibility when LiteLLM doesn't detect the model switch. + */ + private injectThoughtSignatureForGemini( + openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[], + ): OpenAI.Chat.ChatCompletionMessageParam[] { + // Base64 encoded "skip_thought_signature_validator" as per LiteLLM docs + const dummySignature = Buffer.from("skip_thought_signature_validator").toString("base64") + + return openAiMessages.map((msg) => { + if (msg.role === "assistant") { + const toolCalls = (msg as any).tool_calls as any[] | undefined + + // Only process if there are tool calls + if (toolCalls && toolCalls.length > 0) { + // Check if first tool call already has a thought_signature + const firstToolCall = toolCalls[0] + const hasExistingSignature = firstToolCall?.provider_specific_fields?.thought_signature + + if (!hasExistingSignature) { + // Inject dummy signature into the first tool call's provider_specific_fields + const updatedToolCalls = toolCalls.map((tc, index) => { + if (index === 0) { + // Only first tool call needs the signature for parallel calls + return { + ...tc, + provider_specific_fields: { + ...(tc.provider_specific_fields || {}), + thought_signature: dummySignature, + }, + } + } + return tc + }) + + return { + ...msg, + tool_calls: updatedToolCalls, + } + } + } + } + return msg + }) + } + override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], @@ -116,17 +192,28 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa // Check if this is a GPT-5 model that requires max_completion_tokens instead of max_tokens const isGPT5Model = this.isGpt5(modelId) + // Resolve tool protocol - use metadata's locked protocol if provided, otherwise resolve from options + const toolProtocol = resolveToolProtocol(this.options, info, metadata?.toolProtocol) + const isNativeProtocol = toolProtocol === TOOL_PROTOCOL.NATIVE + // Check if model supports native tools and tools are provided with native protocol const supportsNativeTools = info.supportsNativeTools ?? false - const useNativeTools = - supportsNativeTools && - metadata?.tools && - metadata.tools.length > 0 && - metadata?.toolProtocol === TOOL_PROTOCOL.NATIVE + const useNativeTools = supportsNativeTools && metadata?.tools && metadata.tools.length > 0 && isNativeProtocol + + // For Gemini models with native protocol: inject fake reasoning.encrypted block for tool calls + // This is required when switching from other models to Gemini to satisfy API validation. + // Gemini 3 models validate thought signatures for function calls, and when conversation + // history contains tool calls from other models (like Claude), they lack the required + // signatures. The "skip_thought_signature_validator" value bypasses this validation. + const isGemini = this.isGeminiModel(modelId) + let processedMessages = enhancedMessages + if (isNativeProtocol && isGemini) { + processedMessages = this.injectThoughtSignatureForGemini(enhancedMessages) + } const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelId, - messages: [systemMessage, ...enhancedMessages], + messages: [systemMessage, ...processedMessages], stream: true, stream_options: { include_usage: true,