From 49baae14179bd67966fbbd107e49d070828e95a9 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Mon, 9 Mar 2026 11:31:21 +0000 Subject: [PATCH] feat: add /v1/models fallback when /v1/model/info is inaccessible for LiteLLM When /v1/model/info returns a non-200 status (e.g. 403 Forbidden), the function now automatically falls back to /v1/models to retrieve the model list. This helps enterprise/managed LiteLLM users where /v1/model/info may be blocked by API gateways or internal policies. Closes #11898 --- .../fetchers/__tests__/litellm.spec.ts | 383 +++++++++++++++--- src/api/providers/fetchers/litellm.ts | 188 ++++++--- 2 files changed, 454 insertions(+), 117 deletions(-) diff --git a/src/api/providers/fetchers/__tests__/litellm.spec.ts b/src/api/providers/fetchers/__tests__/litellm.spec.ts index c05cda88398..a094f42d255 100644 --- a/src/api/providers/fetchers/__tests__/litellm.spec.ts +++ b/src/api/providers/fetchers/__tests__/litellm.spec.ts @@ -3,7 +3,7 @@ vi.mock("axios") import type { Mock } from "vitest" import axios from "axios" -import { getLiteLLMModels } from "../litellm" +import { getLiteLLMModels, parseModelInfoResponse, parseModelsListResponse } from "../litellm" import { DEFAULT_HEADERS } from "../../constants" const mockedAxios = axios as typeof axios & { @@ -323,63 +323,6 @@ describe("getLiteLLMModels", () => { }) }) - it("throws error for unexpected response format", async () => { - const mockResponse = { - data: { - // Missing 'data' field - models: [], - }, - } - - mockedAxios.get.mockResolvedValue(mockResponse) - - await expect(getLiteLLMModels("test-api-key", "http://localhost:4000")).rejects.toThrow( - "Failed to fetch LiteLLM models: Unexpected response format.", - ) - }) - - it("throws detailed error for HTTP error responses", async () => { - const axiosError = { - response: { - status: 401, - statusText: "Unauthorized", - }, - isAxiosError: true, - } - - mockedAxios.isAxiosError.mockReturnValue(true) - mockedAxios.get.mockRejectedValue(axiosError) - - await expect(getLiteLLMModels(DUMMY_INVALID_KEY, "http://localhost:4000")).rejects.toThrow( - "Failed to fetch LiteLLM models: 401 Unauthorized. Check base URL and API key.", - ) - }) - - it("throws network error for request failures", async () => { - const axiosError = { - request: {}, - isAxiosError: true, - } - - mockedAxios.isAxiosError.mockReturnValue(true) - mockedAxios.get.mockRejectedValue(axiosError) - - await expect(getLiteLLMModels("test-api-key", "http://invalid-url")).rejects.toThrow( - "Failed to fetch LiteLLM models: No response from server. Check LiteLLM server status and base URL.", - ) - }) - - it("throws generic error for other failures", async () => { - const genericError = new Error("Network timeout") - - mockedAxios.isAxiosError.mockReturnValue(false) - mockedAxios.get.mockRejectedValue(genericError) - - await expect(getLiteLLMModels("test-api-key", "http://localhost:4000")).rejects.toThrow( - "Failed to fetch LiteLLM models: Network timeout", - ) - }) - it("handles timeout parameter correctly", async () => { const mockResponse = { data: { data: [] } } mockedAxios.get.mockResolvedValue(mockResponse) @@ -697,4 +640,328 @@ describe("getLiteLLMModels", () => { description: "model-with-only-max-output-tokens via LiteLLM proxy", }) }) + + describe("fallback to /v1/models", () => { + it("falls back to /v1/models when /v1/model/info returns 403", async () => { + const forbiddenError = { + response: { status: 403, statusText: "Forbidden" }, + isAxiosError: true, + message: "Request failed with status code 403", + } + + const fallbackResponse = { + data: { + object: "list", + data: [ + { id: "gpt-4", object: "model", created: 1687882411, owned_by: "openai" }, + { id: "claude-3-opus", object: "model", created: 1687882411, owned_by: "anthropic" }, + ], + }, + } + + mockedAxios.get + .mockRejectedValueOnce(forbiddenError) // /v1/model/info fails + .mockResolvedValueOnce(fallbackResponse) // /v1/models succeeds + + const result = await getLiteLLMModels("test-api-key", "http://localhost:4000") + + expect(mockedAxios.get).toHaveBeenCalledTimes(2) + expect(mockedAxios.get).toHaveBeenNthCalledWith( + 1, + "http://localhost:4000/v1/model/info", + expect.any(Object), + ) + expect(mockedAxios.get).toHaveBeenNthCalledWith(2, "http://localhost:4000/v1/models", expect.any(Object)) + + expect(result).toEqual({ + "gpt-4": { + maxTokens: 8192, + contextWindow: 200000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: undefined, + outputPrice: undefined, + cacheWritesPrice: undefined, + cacheReadsPrice: undefined, + description: "gpt-4 via LiteLLM proxy", + }, + "claude-3-opus": { + maxTokens: 8192, + contextWindow: 200000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: undefined, + outputPrice: undefined, + cacheWritesPrice: undefined, + cacheReadsPrice: undefined, + description: "claude-3-opus via LiteLLM proxy", + }, + }) + }) + + it("falls back to /v1/models when /v1/model/info returns 500", async () => { + const serverError = { + response: { status: 500, statusText: "Internal Server Error" }, + isAxiosError: true, + message: "Request failed with status code 500", + } + + const fallbackResponse = { + data: { + object: "list", + data: [{ id: "gpt-4", object: "model", created: 1687882411, owned_by: "openai" }], + }, + } + + mockedAxios.get + .mockRejectedValueOnce(serverError) // /v1/model/info fails + .mockResolvedValueOnce(fallbackResponse) // /v1/models succeeds + + const result = await getLiteLLMModels("test-api-key", "http://localhost:4000") + + expect(mockedAxios.get).toHaveBeenCalledTimes(2) + expect(Object.keys(result)).toEqual(["gpt-4"]) + }) + + it("falls back to /v1/models when /v1/model/info returns unexpected format", async () => { + // /v1/model/info returns successfully but with unexpected format + const unexpectedResponse = { + data: { + models: [], // Wrong field name - no 'data' array + }, + } + + const fallbackResponse = { + data: { + object: "list", + data: [{ id: "gpt-4", object: "model", created: 1687882411, owned_by: "openai" }], + }, + } + + mockedAxios.get + .mockResolvedValueOnce(unexpectedResponse) // /v1/model/info returns unexpected format + .mockResolvedValueOnce(fallbackResponse) // /v1/models succeeds + + const result = await getLiteLLMModels("test-api-key", "http://localhost:4000") + + expect(mockedAxios.get).toHaveBeenCalledTimes(2) + expect(result).toEqual({ + "gpt-4": { + maxTokens: 8192, + contextWindow: 200000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: undefined, + outputPrice: undefined, + cacheWritesPrice: undefined, + cacheReadsPrice: undefined, + description: "gpt-4 via LiteLLM proxy", + }, + }) + }) + + it("throws error when both /v1/model/info and /v1/models fail with HTTP errors", async () => { + const modelInfoError = { + response: { status: 403, statusText: "Forbidden" }, + isAxiosError: true, + message: "Request failed with status code 403", + } + + const modelsError = { + response: { status: 401, statusText: "Unauthorized" }, + isAxiosError: true, + message: "Request failed with status code 401", + } + + mockedAxios.isAxiosError.mockReturnValue(true) + mockedAxios.get + .mockRejectedValueOnce(modelInfoError) // /v1/model/info fails + .mockRejectedValueOnce(modelsError) // /v1/models also fails + + await expect(getLiteLLMModels(DUMMY_INVALID_KEY, "http://localhost:4000")).rejects.toThrow( + "Failed to fetch LiteLLM models: Both /v1/model/info and /v1/models failed. Last error: 401 Unauthorized. Check base URL and API key.", + ) + + expect(mockedAxios.get).toHaveBeenCalledTimes(2) + }) + + it("throws network error when both endpoints have no response", async () => { + const networkError = { + request: {}, + isAxiosError: true, + message: "Network Error", + } + + mockedAxios.isAxiosError.mockReturnValue(true) + mockedAxios.get + .mockRejectedValueOnce(networkError) // /v1/model/info fails + .mockRejectedValueOnce(networkError) // /v1/models also fails + + await expect(getLiteLLMModels("test-api-key", "http://invalid-url")).rejects.toThrow( + "Failed to fetch LiteLLM models: No response from server. Check LiteLLM server status and base URL.", + ) + }) + + it("throws generic error when both endpoints fail with unknown errors", async () => { + const genericError = new Error("Network timeout") + + mockedAxios.isAxiosError.mockReturnValue(false) + mockedAxios.get + .mockRejectedValueOnce(genericError) // /v1/model/info fails + .mockRejectedValueOnce(genericError) // /v1/models also fails + + await expect(getLiteLLMModels("test-api-key", "http://localhost:4000")).rejects.toThrow( + "Failed to fetch LiteLLM models: Network timeout", + ) + }) + + it("does not call /v1/models when /v1/model/info succeeds", async () => { + const mockResponse = { + data: { + data: [], + }, + } + + mockedAxios.get.mockResolvedValue(mockResponse) + + await getLiteLLMModels("test-api-key", "http://localhost:4000") + + // Should only call /v1/model/info, not /v1/models + expect(mockedAxios.get).toHaveBeenCalledTimes(1) + expect(mockedAxios.get).toHaveBeenCalledWith("http://localhost:4000/v1/model/info", expect.any(Object)) + }) + + it("skips models without valid id in /v1/models fallback response", async () => { + const modelInfoError = new Error("Failed") + const fallbackResponse = { + data: { + object: "list", + data: [ + { id: "valid-model", object: "model" }, + { id: "", object: "model" }, // empty id + { id: null, object: "model" }, // null id + { object: "model" }, // missing id + { id: 123, object: "model" }, // non-string id + ], + }, + } + + mockedAxios.get.mockRejectedValueOnce(modelInfoError).mockResolvedValueOnce(fallbackResponse) + + const result = await getLiteLLMModels("test-api-key", "http://localhost:4000") + + expect(Object.keys(result)).toEqual(["valid-model"]) + }) + + it("preserves URL structure in fallback request", async () => { + const modelInfoError = new Error("Failed") + const fallbackResponse = { + data: { + object: "list", + data: [{ id: "model-1", object: "model" }], + }, + } + + mockedAxios.get.mockRejectedValueOnce(modelInfoError).mockResolvedValueOnce(fallbackResponse) + + await getLiteLLMModels("test-api-key", "http://localhost:4000/litellm/") + + expect(mockedAxios.get).toHaveBeenNthCalledWith( + 2, + "http://localhost:4000/litellm/v1/models", + expect.any(Object), + ) + }) + }) +}) + +describe("parseModelInfoResponse", () => { + it("throws on unexpected format", () => { + expect(() => parseModelInfoResponse({ models: [] })).toThrow("Unexpected response format") + expect(() => parseModelInfoResponse(null)).toThrow("Unexpected response format") + expect(() => parseModelInfoResponse(undefined)).toThrow("Unexpected response format") + }) + + it("parses valid model info response", () => { + const data = { + data: [ + { + model_name: "test-model", + model_info: { + max_tokens: 4096, + max_input_tokens: 128000, + supports_vision: true, + supports_prompt_caching: true, + input_cost_per_token: 0.000003, + output_cost_per_token: 0.000015, + }, + litellm_params: { model: "provider/test-model" }, + }, + ], + } + + const result = parseModelInfoResponse(data) + + expect(result["test-model"]).toEqual({ + maxTokens: 4096, + contextWindow: 128000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 3, + outputPrice: 15, + cacheWritesPrice: undefined, + cacheReadsPrice: undefined, + description: "test-model via LiteLLM proxy", + }) + }) +}) + +describe("parseModelsListResponse", () => { + it("throws on unexpected format", () => { + expect(() => parseModelsListResponse({ models: [] })).toThrow("Unexpected response format from /v1/models") + expect(() => parseModelsListResponse(null)).toThrow("Unexpected response format from /v1/models") + expect(() => parseModelsListResponse(undefined)).toThrow("Unexpected response format from /v1/models") + }) + + it("parses valid /v1/models response with default values", () => { + const data = { + object: "list", + data: [ + { id: "gpt-4", object: "model", created: 1687882411, owned_by: "openai" }, + { id: "claude-3-opus", object: "model", created: 1687882411, owned_by: "anthropic" }, + ], + } + + const result = parseModelsListResponse(data) + + expect(result).toEqual({ + "gpt-4": { + maxTokens: 8192, + contextWindow: 200000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: undefined, + outputPrice: undefined, + cacheWritesPrice: undefined, + cacheReadsPrice: undefined, + description: "gpt-4 via LiteLLM proxy", + }, + "claude-3-opus": { + maxTokens: 8192, + contextWindow: 200000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: undefined, + outputPrice: undefined, + cacheWritesPrice: undefined, + cacheReadsPrice: undefined, + description: "claude-3-opus via LiteLLM proxy", + }, + }) + }) + + it("returns empty object for empty data array", () => { + const result = parseModelsListResponse({ data: [] }) + expect(result).toEqual({}) + }) }) diff --git a/src/api/providers/fetchers/litellm.ts b/src/api/providers/fetchers/litellm.ts index de895d01fbf..47deb0ff27d 100644 --- a/src/api/providers/fetchers/litellm.ts +++ b/src/api/providers/fetchers/litellm.ts @@ -3,80 +3,150 @@ import axios from "axios" import type { ModelRecord } from "@roo-code/types" import { DEFAULT_HEADERS } from "../constants" + +/** + * Builds the standard headers for LiteLLM requests. + */ +function buildHeaders(apiKey: string): Record { + const headers: Record = { + "Content-Type": "application/json", + ...DEFAULT_HEADERS, + } + + if (apiKey) { + headers["Authorization"] = `Bearer ${apiKey}` + } + + return headers +} + +/** + * Builds a URL by appending the given path to the base URL, + * normalizing slashes along the way. + */ +function buildUrl(baseUrl: string, path: string): string { + const urlObj = new URL(baseUrl) + urlObj.pathname = urlObj.pathname.replace(/\/+$/, "").replace(/\/+/g, "/") + path + return urlObj.href +} + /** - * Fetches available models from a LiteLLM server + * Parses the response from `/v1/model/info` into a ModelRecord. + * + * This endpoint returns richer metadata (token limits, pricing, capabilities). + */ +export function parseModelInfoResponse(data: any): ModelRecord { + const models: ModelRecord = {} + + if (!data || !data.data || !Array.isArray(data.data)) { + throw new Error("Failed to fetch LiteLLM models: Unexpected response format.") + } + + for (const model of data.data) { + const modelName = model.model_name + const modelInfo = model.model_info + const litellmModelName = model?.litellm_params?.model as string | undefined + + if (!modelName || !modelInfo || !litellmModelName) continue + + models[modelName] = { + maxTokens: modelInfo.max_output_tokens || modelInfo.max_tokens || 8192, + contextWindow: modelInfo.max_input_tokens || 200000, + supportsImages: Boolean(modelInfo.supports_vision), + supportsPromptCache: Boolean(modelInfo.supports_prompt_caching), + inputPrice: modelInfo.input_cost_per_token ? modelInfo.input_cost_per_token * 1000000 : undefined, + outputPrice: modelInfo.output_cost_per_token ? modelInfo.output_cost_per_token * 1000000 : undefined, + cacheWritesPrice: modelInfo.cache_creation_input_token_cost + ? modelInfo.cache_creation_input_token_cost * 1000000 + : undefined, + cacheReadsPrice: modelInfo.cache_read_input_token_cost + ? modelInfo.cache_read_input_token_cost * 1000000 + : undefined, + description: `${modelName} via LiteLLM proxy`, + } + } + + return models +} + +/** + * Parses the response from `/v1/models` (OpenAI-compatible) into a ModelRecord. + * + * This endpoint returns a simpler list of models with only IDs, so we use + * sensible defaults for fields not available from this endpoint. + */ +export function parseModelsListResponse(data: any): ModelRecord { + const models: ModelRecord = {} + + if (!data || !data.data || !Array.isArray(data.data)) { + throw new Error("Failed to fetch LiteLLM models: Unexpected response format from /v1/models.") + } + + for (const model of data.data) { + const modelId = model.id + if (!modelId || typeof modelId !== "string") continue + + models[modelId] = { + maxTokens: 8192, + contextWindow: 200000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: undefined, + outputPrice: undefined, + cacheWritesPrice: undefined, + cacheReadsPrice: undefined, + description: `${modelId} via LiteLLM proxy`, + } + } + + return models +} + +/** + * Fetches available models from a LiteLLM server. + * + * First attempts `/v1/model/info` which provides rich metadata. If that + * endpoint is inaccessible (e.g. 403 Forbidden), falls back to `/v1/models` + * which returns a simpler OpenAI-compatible model list. * * @param apiKey The API key for the LiteLLM server * @param baseUrl The base URL of the LiteLLM server * @returns A promise that resolves to a record of model IDs to model info - * @throws Will throw an error if the request fails or the response is not as expected. + * @throws Will throw an error if both endpoints fail. */ export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise { - try { - const headers: Record = { - "Content-Type": "application/json", - ...DEFAULT_HEADERS, - } + const headers = buildHeaders(apiKey) - if (apiKey) { - headers["Authorization"] = `Bearer ${apiKey}` - } - // Use URL constructor to properly join base URL and path - // This approach handles all edge cases including paths, query params, and fragments - const urlObj = new URL(baseUrl) - // Normalize the pathname by removing trailing slashes and multiple slashes - urlObj.pathname = urlObj.pathname.replace(/\/+$/, "").replace(/\/+/g, "/") + "/v1/model/info" - const url = urlObj.href - // Added timeout to prevent indefinite hanging - const response = await axios.get(url, { headers, timeout: 5000 }) - const models: ModelRecord = {} - - // Process the model info from the response - if (response.data && response.data.data && Array.isArray(response.data.data)) { - for (const model of response.data.data) { - const modelName = model.model_name - const modelInfo = model.model_info - const litellmModelName = model?.litellm_params?.model as string | undefined - - if (!modelName || !modelInfo || !litellmModelName) continue - - models[modelName] = { - maxTokens: modelInfo.max_output_tokens || modelInfo.max_tokens || 8192, - contextWindow: modelInfo.max_input_tokens || 200000, - supportsImages: Boolean(modelInfo.supports_vision), - supportsPromptCache: Boolean(modelInfo.supports_prompt_caching), - inputPrice: modelInfo.input_cost_per_token ? modelInfo.input_cost_per_token * 1000000 : undefined, - outputPrice: modelInfo.output_cost_per_token - ? modelInfo.output_cost_per_token * 1000000 - : undefined, - cacheWritesPrice: modelInfo.cache_creation_input_token_cost - ? modelInfo.cache_creation_input_token_cost * 1000000 - : undefined, - cacheReadsPrice: modelInfo.cache_read_input_token_cost - ? modelInfo.cache_read_input_token_cost * 1000000 - : undefined, - description: `${modelName} via LiteLLM proxy`, - } - } - } else { - // If response.data.data is not in the expected format, consider it an error. - console.error("Error fetching LiteLLM models: Unexpected response format", response.data) - throw new Error("Failed to fetch LiteLLM models: Unexpected response format.") - } + // First, try the richer /v1/model/info endpoint + try { + const modelInfoUrl = buildUrl(baseUrl, "/v1/model/info") + const response = await axios.get(modelInfoUrl, { headers, timeout: 5000 }) + return parseModelInfoResponse(response.data) + } catch (modelInfoError: any) { + // Log the failure and attempt fallback + console.error( + "LiteLLM /v1/model/info failed, attempting /v1/models fallback:", + modelInfoError.message || modelInfoError, + ) + } - return models - } catch (error: any) { - console.error("Error fetching LiteLLM models:", error.message ? error.message : error) - if (axios.isAxiosError(error) && error.response) { + // Fallback: try /v1/models (OpenAI-compatible endpoint) + try { + const modelsUrl = buildUrl(baseUrl, "/v1/models") + const response = await axios.get(modelsUrl, { headers, timeout: 5000 }) + return parseModelsListResponse(response.data) + } catch (fallbackError: any) { + console.error("Error fetching LiteLLM models from /v1/models fallback:", fallbackError.message || fallbackError) + if (axios.isAxiosError(fallbackError) && fallbackError.response) { throw new Error( - `Failed to fetch LiteLLM models: ${error.response.status} ${error.response.statusText}. Check base URL and API key.`, + `Failed to fetch LiteLLM models: Both /v1/model/info and /v1/models failed. Last error: ${fallbackError.response.status} ${fallbackError.response.statusText}. Check base URL and API key.`, ) - } else if (axios.isAxiosError(error) && error.request) { + } else if (axios.isAxiosError(fallbackError) && fallbackError.request) { throw new Error( "Failed to fetch LiteLLM models: No response from server. Check LiteLLM server status and base URL.", ) } else { - throw new Error(`Failed to fetch LiteLLM models: ${error.message || "An unknown error occurred."}`) + throw new Error(`Failed to fetch LiteLLM models: ${fallbackError.message || "An unknown error occurred."}`) } } }