diff --git a/src/services/code-index/__tests__/service-factory.spec.ts b/src/services/code-index/__tests__/service-factory.spec.ts index 1d8f7ba4786..3e943ebd82e 100644 --- a/src/services/code-index/__tests__/service-factory.spec.ts +++ b/src/services/code-index/__tests__/service-factory.spec.ts @@ -286,7 +286,7 @@ describe("CodeIndexServiceFactory", () => { // Arrange const testConfig = { embedderProvider: "gemini", - modelId: "text-embedding-004", + modelId: "gemini-embedding-001", geminiOptions: { apiKey: "test-gemini-api-key", }, @@ -297,6 +297,25 @@ describe("CodeIndexServiceFactory", () => { factory.createEmbedder() // Assert + expect(MockedGeminiEmbedder).toHaveBeenCalledWith("test-gemini-api-key", "gemini-embedding-001") + }) + + it("should pass deprecated text-embedding-004 modelId to GeminiEmbedder (migration happens inside GeminiEmbedder)", () => { + // Arrange - service-factory passes the config modelId directly; + // GeminiEmbedder handles the migration internally + const testConfig = { + embedderProvider: "gemini", + modelId: "text-embedding-004", + geminiOptions: { + apiKey: "test-gemini-api-key", + }, + } + mockConfigManager.getConfig.mockReturnValue(testConfig as any) + + // Act + factory.createEmbedder() + + // Assert - factory passes the original modelId; GeminiEmbedder migrates it internally expect(MockedGeminiEmbedder).toHaveBeenCalledWith("test-gemini-api-key", "text-embedding-004") }) diff --git a/src/services/code-index/embedders/__tests__/gemini.spec.ts b/src/services/code-index/embedders/__tests__/gemini.spec.ts index d41a4dc1e93..d84dcd8abc1 100644 --- a/src/services/code-index/embedders/__tests__/gemini.spec.ts +++ b/src/services/code-index/embedders/__tests__/gemini.spec.ts @@ -44,7 +44,7 @@ describe("GeminiEmbedder", () => { it("should create an instance with specified model", () => { // Arrange const apiKey = "test-gemini-api-key" - const modelId = "text-embedding-004" + const modelId = "gemini-embedding-001" // Act embedder = new GeminiEmbedder(apiKey, modelId) @@ -53,7 +53,24 @@ describe("GeminiEmbedder", () => { expect(MockedOpenAICompatibleEmbedder).toHaveBeenCalledWith( "https://generativelanguage.googleapis.com/v1beta/openai/", apiKey, - "text-embedding-004", + "gemini-embedding-001", + 2048, + ) + }) + + it("should migrate deprecated text-embedding-004 to gemini-embedding-001", () => { + // Arrange + const apiKey = "test-gemini-api-key" + const deprecatedModelId = "text-embedding-004" + + // Act + embedder = new GeminiEmbedder(apiKey, deprecatedModelId) + + // Assert - should be migrated to gemini-embedding-001 + expect(MockedOpenAICompatibleEmbedder).toHaveBeenCalledWith( + "https://generativelanguage.googleapis.com/v1beta/openai/", + apiKey, + "gemini-embedding-001", 2048, ) }) @@ -109,8 +126,8 @@ describe("GeminiEmbedder", () => { }) it("should use provided model parameter when specified", async () => { - // Arrange - embedder = new GeminiEmbedder("test-api-key", "text-embedding-004") + // Arrange - even with deprecated model in constructor, the runtime parameter takes precedence + embedder = new GeminiEmbedder("test-api-key", "gemini-embedding-001") const texts = ["test text 1", "test text 2"] const mockResponse = { embeddings: [ @@ -120,7 +137,7 @@ describe("GeminiEmbedder", () => { } mockCreateEmbeddings.mockResolvedValue(mockResponse) - // Act + // Act - specify a different model at runtime const result = await embedder.createEmbeddings(texts, "gemini-embedding-001") // Assert diff --git a/src/services/code-index/embedders/gemini.ts b/src/services/code-index/embedders/gemini.ts index 7e795875c9d..03bfc35aaec 100644 --- a/src/services/code-index/embedders/gemini.ts +++ b/src/services/code-index/embedders/gemini.ts @@ -10,15 +10,33 @@ import { TelemetryService } from "@roo-code/telemetry" * with configuration for Google's Gemini embedding API. * * Supported models: - * - text-embedding-004 (dimension: 768) - * - gemini-embedding-001 (dimension: 2048) + * - gemini-embedding-001 (dimension: 3072) + * + * Note: text-embedding-004 has been deprecated and is automatically + * migrated to gemini-embedding-001 for backward compatibility. */ export class GeminiEmbedder implements IEmbedder { private readonly openAICompatibleEmbedder: OpenAICompatibleEmbedder private static readonly GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/" private static readonly DEFAULT_MODEL = "gemini-embedding-001" + /** + * Deprecated models that are automatically migrated to their replacements. + * Users with these models configured will be silently migrated without interruption. + */ + private static readonly DEPRECATED_MODEL_MIGRATIONS: Record = { + "text-embedding-004": "gemini-embedding-001", + } private readonly modelId: string + /** + * Migrates deprecated model IDs to their replacements. + * @param modelId The model ID to potentially migrate + * @returns The migrated model ID, or the original if no migration is needed + */ + private static migrateModelId(modelId: string): string { + return GeminiEmbedder.DEPRECATED_MODEL_MIGRATIONS[modelId] ?? modelId + } + /** * Creates a new Gemini embedder * @param apiKey The Gemini API key for authentication @@ -29,8 +47,11 @@ export class GeminiEmbedder implements IEmbedder { throw new Error(t("embeddings:validation.apiKeyRequired")) } - // Use provided model or default - this.modelId = modelId || GeminiEmbedder.DEFAULT_MODEL + // Migrate deprecated models to their replacements silently + const migratedModelId = modelId ? GeminiEmbedder.migrateModelId(modelId) : undefined + + // Use provided model (after migration) or default + this.modelId = migratedModelId || GeminiEmbedder.DEFAULT_MODEL // Create an OpenAI Compatible embedder with Gemini's configuration this.openAICompatibleEmbedder = new OpenAICompatibleEmbedder( diff --git a/src/shared/__tests__/embeddingModels.spec.ts b/src/shared/__tests__/embeddingModels.spec.ts new file mode 100644 index 00000000000..16aa019c7f1 --- /dev/null +++ b/src/shared/__tests__/embeddingModels.spec.ts @@ -0,0 +1,95 @@ +import { describe, it, expect } from "vitest" +import { + getModelDimension, + getModelScoreThreshold, + getDefaultModelId, + EMBEDDING_MODEL_PROFILES, +} from "../embeddingModels" + +describe("embeddingModels", () => { + describe("EMBEDDING_MODEL_PROFILES", () => { + it("should have gemini provider with gemini-embedding-001 model", () => { + const geminiProfiles = EMBEDDING_MODEL_PROFILES.gemini + expect(geminiProfiles).toBeDefined() + expect(geminiProfiles!["gemini-embedding-001"]).toBeDefined() + expect(geminiProfiles!["gemini-embedding-001"].dimension).toBe(3072) + }) + + it("should have deprecated text-embedding-004 in gemini profiles for backward compatibility", () => { + // This is critical for backward compatibility: + // Users with text-embedding-004 configured need dimension lookup to work + // even though the model is migrated to gemini-embedding-001 in GeminiEmbedder + const geminiProfiles = EMBEDDING_MODEL_PROFILES.gemini + expect(geminiProfiles).toBeDefined() + expect(geminiProfiles!["text-embedding-004"]).toBeDefined() + expect(geminiProfiles!["text-embedding-004"].dimension).toBe(3072) + }) + }) + + describe("getModelDimension", () => { + it("should return dimension for gemini-embedding-001", () => { + const dimension = getModelDimension("gemini", "gemini-embedding-001") + expect(dimension).toBe(3072) + }) + + it("should return dimension for deprecated text-embedding-004", () => { + // This ensures createVectorStore() works for users with text-embedding-004 configured + // The dimension should be 3072 (matching gemini-embedding-001) because: + // 1. GeminiEmbedder migrates text-embedding-004 to gemini-embedding-001 + // 2. gemini-embedding-001 produces 3072-dimensional embeddings + // 3. Vector store dimension must match the actual embedding dimension + const dimension = getModelDimension("gemini", "text-embedding-004") + expect(dimension).toBe(3072) + }) + + it("should return undefined for unknown model", () => { + const dimension = getModelDimension("gemini", "unknown-model") + expect(dimension).toBeUndefined() + }) + + it("should return undefined for unknown provider", () => { + const dimension = getModelDimension("unknown-provider" as any, "some-model") + expect(dimension).toBeUndefined() + }) + + it("should return correct dimensions for openai models", () => { + expect(getModelDimension("openai", "text-embedding-3-small")).toBe(1536) + expect(getModelDimension("openai", "text-embedding-3-large")).toBe(3072) + expect(getModelDimension("openai", "text-embedding-ada-002")).toBe(1536) + }) + }) + + describe("getModelScoreThreshold", () => { + it("should return score threshold for gemini-embedding-001", () => { + const threshold = getModelScoreThreshold("gemini", "gemini-embedding-001") + expect(threshold).toBe(0.4) + }) + + it("should return score threshold for deprecated text-embedding-004", () => { + const threshold = getModelScoreThreshold("gemini", "text-embedding-004") + expect(threshold).toBe(0.4) + }) + + it("should return undefined for unknown model", () => { + const threshold = getModelScoreThreshold("gemini", "unknown-model") + expect(threshold).toBeUndefined() + }) + }) + + describe("getDefaultModelId", () => { + it("should return gemini-embedding-001 for gemini provider", () => { + const defaultModel = getDefaultModelId("gemini") + expect(defaultModel).toBe("gemini-embedding-001") + }) + + it("should return text-embedding-3-small for openai provider", () => { + const defaultModel = getDefaultModelId("openai") + expect(defaultModel).toBe("text-embedding-3-small") + }) + + it("should return codestral-embed-2505 for mistral provider", () => { + const defaultModel = getDefaultModelId("mistral") + expect(defaultModel).toBe("codestral-embed-2505") + }) + }) +}) diff --git a/src/shared/embeddingModels.ts b/src/shared/embeddingModels.ts index a4c5217a9d2..0b59c5b4b28 100644 --- a/src/shared/embeddingModels.ts +++ b/src/shared/embeddingModels.ts @@ -34,8 +34,10 @@ export const EMBEDDING_MODEL_PROFILES: EmbeddingModelProfiles = { }, }, gemini: { - "text-embedding-004": { dimension: 768 }, "gemini-embedding-001": { dimension: 3072, scoreThreshold: 0.4 }, + // Deprecated: text-embedding-004 is migrated to gemini-embedding-001 in GeminiEmbedder + // Kept here for backward-compatible dimension lookup in createVectorStore() + "text-embedding-004": { dimension: 3072, scoreThreshold: 0.4 }, }, mistral: { "codestral-embed-2505": { dimension: 1536, scoreThreshold: 0.4 },