From 49baae14179bd67966fbbd107e49d070828e95a9 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Mon, 9 Mar 2026 11:31:21 +0000
Subject: [PATCH] feat: add /v1/models fallback when /v1/model/info is
 inaccessible for LiteLLM

When /v1/model/info returns a non-200 status (e.g. 403 Forbidden), the
function now automatically falls back to /v1/models to retrieve the model
list. This helps enterprise/managed LiteLLM users where /v1/model/info
may be blocked by API gateways or internal policies.

Closes #11898
---
 .../fetchers/__tests__/litellm.spec.ts        | 383 +++++++++++++++---
 src/api/providers/fetchers/litellm.ts         | 188 ++++++---
 2 files changed, 454 insertions(+), 117 deletions(-)

diff --git a/src/api/providers/fetchers/__tests__/litellm.spec.ts b/src/api/providers/fetchers/__tests__/litellm.spec.ts
index c05cda88398..a094f42d255 100644
--- a/src/api/providers/fetchers/__tests__/litellm.spec.ts
+++ b/src/api/providers/fetchers/__tests__/litellm.spec.ts
@@ -3,7 +3,7 @@ vi.mock("axios")
 
 import type { Mock } from "vitest"
 import axios from "axios"
-import { getLiteLLMModels } from "../litellm"
+import { getLiteLLMModels, parseModelInfoResponse, parseModelsListResponse } from "../litellm"
 import { DEFAULT_HEADERS } from "../../constants"
 
 const mockedAxios = axios as typeof axios & {
@@ -323,63 +323,6 @@ describe("getLiteLLMModels", () => {
 		})
 	})
 
-	it("throws error for unexpected response format", async () => {
-		const mockResponse = {
-			data: {
-				// Missing 'data' field
-				models: [],
-			},
-		}
-
-		mockedAxios.get.mockResolvedValue(mockResponse)
-
-		await expect(getLiteLLMModels("test-api-key", "http://localhost:4000")).rejects.toThrow(
-			"Failed to fetch LiteLLM models: Unexpected response format.",
-		)
-	})
-
-	it("throws detailed error for HTTP error responses", async () => {
-		const axiosError = {
-			response: {
-				status: 401,
-				statusText: "Unauthorized",
-			},
-			isAxiosError: true,
-		}
-
-		mockedAxios.isAxiosError.mockReturnValue(true)
-		mockedAxios.get.mockRejectedValue(axiosError)
-
-		await expect(getLiteLLMModels(DUMMY_INVALID_KEY, "http://localhost:4000")).rejects.toThrow(
-			"Failed to fetch LiteLLM models: 401 Unauthorized. Check base URL and API key.",
-		)
-	})
-
-	it("throws network error for request failures", async () => {
-		const axiosError = {
-			request: {},
-			isAxiosError: true,
-		}
-
-		mockedAxios.isAxiosError.mockReturnValue(true)
-		mockedAxios.get.mockRejectedValue(axiosError)
-
-		await expect(getLiteLLMModels("test-api-key", "http://invalid-url")).rejects.toThrow(
-			"Failed to fetch LiteLLM models: No response from server. Check LiteLLM server status and base URL.",
-		)
-	})
-
-	it("throws generic error for other failures", async () => {
-		const genericError = new Error("Network timeout")
-
-		mockedAxios.isAxiosError.mockReturnValue(false)
-		mockedAxios.get.mockRejectedValue(genericError)
-
-		await expect(getLiteLLMModels("test-api-key", "http://localhost:4000")).rejects.toThrow(
-			"Failed to fetch LiteLLM models: Network timeout",
-		)
-	})
-
 	it("handles timeout parameter correctly", async () => {
 		const mockResponse = { data: { data: [] } }
 		mockedAxios.get.mockResolvedValue(mockResponse)
@@ -697,4 +640,328 @@ describe("getLiteLLMModels", () => {
 			description: "model-with-only-max-output-tokens via LiteLLM proxy",
 		})
 	})
+
+	describe("fallback to /v1/models", () => {
+		it("falls back to /v1/models when /v1/model/info returns 403", async () => {
+			const forbiddenError = {
+				response: { status: 403, statusText: "Forbidden" },
+				isAxiosError: true,
+				message: "Request failed with status code 403",
+			}
+
+			const fallbackResponse = {
+				data: {
+					object: "list",
+					data: [
+						{ id: "gpt-4", object: "model", created: 1687882411, owned_by: "openai" },
+						{ id: "claude-3-opus", object: "model", created: 1687882411, owned_by: "anthropic" },
+					],
+				},
+			}
+
+			mockedAxios.get
+				.mockRejectedValueOnce(forbiddenError) // /v1/model/info fails
+				.mockResolvedValueOnce(fallbackResponse) // /v1/models succeeds
+
+			const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
+
+			expect(mockedAxios.get).toHaveBeenCalledTimes(2)
+			expect(mockedAxios.get).toHaveBeenNthCalledWith(
+				1,
+				"http://localhost:4000/v1/model/info",
+				expect.any(Object),
+			)
+			expect(mockedAxios.get).toHaveBeenNthCalledWith(2, "http://localhost:4000/v1/models", expect.any(Object))
+
+			expect(result).toEqual({
+				"gpt-4": {
+					maxTokens: 8192,
+					contextWindow: 200000,
+					supportsImages: false,
+					supportsPromptCache: false,
+					inputPrice: undefined,
+					outputPrice: undefined,
+					cacheWritesPrice: undefined,
+					cacheReadsPrice: undefined,
+					description: "gpt-4 via LiteLLM proxy",
+				},
+				"claude-3-opus": {
+					maxTokens: 8192,
+					contextWindow: 200000,
+					supportsImages: false,
+					supportsPromptCache: false,
+					inputPrice: undefined,
+					outputPrice: undefined,
+					cacheWritesPrice: undefined,
+					cacheReadsPrice: undefined,
+					description: "claude-3-opus via LiteLLM proxy",
+				},
+			})
+		})
+
+		it("falls back to /v1/models when /v1/model/info returns 500", async () => {
+			const serverError = {
+				response: { status: 500, statusText: "Internal Server Error" },
+				isAxiosError: true,
+				message: "Request failed with status code 500",
+			}
+
+			const fallbackResponse = {
+				data: {
+					object: "list",
+					data: [{ id: "gpt-4", object: "model", created: 1687882411, owned_by: "openai" }],
+				},
+			}
+
+			mockedAxios.get
+				.mockRejectedValueOnce(serverError) // /v1/model/info fails
+				.mockResolvedValueOnce(fallbackResponse) // /v1/models succeeds
+
+			const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
+
+			expect(mockedAxios.get).toHaveBeenCalledTimes(2)
+			expect(Object.keys(result)).toEqual(["gpt-4"])
+		})
+
+		it("falls back to /v1/models when /v1/model/info returns unexpected format", async () => {
+			// /v1/model/info returns successfully but with unexpected format
+			const unexpectedResponse = {
+				data: {
+					models: [], // Wrong field name - no 'data' array
+				},
+			}
+
+			const fallbackResponse = {
+				data: {
+					object: "list",
+					data: [{ id: "gpt-4", object: "model", created: 1687882411, owned_by: "openai" }],
+				},
+			}
+
+			mockedAxios.get
+				.mockResolvedValueOnce(unexpectedResponse) // /v1/model/info returns unexpected format
+				.mockResolvedValueOnce(fallbackResponse) // /v1/models succeeds
+
+			const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
+
+			expect(mockedAxios.get).toHaveBeenCalledTimes(2)
+			expect(result).toEqual({
+				"gpt-4": {
+					maxTokens: 8192,
+					contextWindow: 200000,
+					supportsImages: false,
+					supportsPromptCache: false,
+					inputPrice: undefined,
+					outputPrice: undefined,
+					cacheWritesPrice: undefined,
+					cacheReadsPrice: undefined,
+					description: "gpt-4 via LiteLLM proxy",
+				},
+			})
+		})
+
+		it("throws error when both /v1/model/info and /v1/models fail with HTTP errors", async () => {
+			const modelInfoError = {
+				response: { status: 403, statusText: "Forbidden" },
+				isAxiosError: true,
+				message: "Request failed with status code 403",
+			}
+
+			const modelsError = {
+				response: { status: 401, statusText: "Unauthorized" },
+				isAxiosError: true,
+				message: "Request failed with status code 401",
+			}
+
+			mockedAxios.isAxiosError.mockReturnValue(true)
+			mockedAxios.get
+				.mockRejectedValueOnce(modelInfoError) // /v1/model/info fails
+				.mockRejectedValueOnce(modelsError) // /v1/models also fails
+
+			await expect(getLiteLLMModels(DUMMY_INVALID_KEY, "http://localhost:4000")).rejects.toThrow(
+				"Failed to fetch LiteLLM models: Both /v1/model/info and /v1/models failed. Last error: 401 Unauthorized. Check base URL and API key.",
+			)
+
+			expect(mockedAxios.get).toHaveBeenCalledTimes(2)
+		})
+
+		it("throws network error when both endpoints have no response", async () => {
+			const networkError = {
+				request: {},
+				isAxiosError: true,
+				message: "Network Error",
+			}
+
+			mockedAxios.isAxiosError.mockReturnValue(true)
+			mockedAxios.get
+				.mockRejectedValueOnce(networkError) // /v1/model/info fails
+				.mockRejectedValueOnce(networkError) // /v1/models also fails
+
+			await expect(getLiteLLMModels("test-api-key", "http://invalid-url")).rejects.toThrow(
+				"Failed to fetch LiteLLM models: No response from server. Check LiteLLM server status and base URL.",
+			)
+		})
+
+		it("throws generic error when both endpoints fail with unknown errors", async () => {
+			const genericError = new Error("Network timeout")
+
+			mockedAxios.isAxiosError.mockReturnValue(false)
+			mockedAxios.get
+				.mockRejectedValueOnce(genericError) // /v1/model/info fails
+				.mockRejectedValueOnce(genericError) // /v1/models also fails
+
+			await expect(getLiteLLMModels("test-api-key", "http://localhost:4000")).rejects.toThrow(
+				"Failed to fetch LiteLLM models: Network timeout",
+			)
+		})
+
+		it("does not call /v1/models when /v1/model/info succeeds", async () => {
+			const mockResponse = {
+				data: {
+					data: [],
+				},
+			}
+
+			mockedAxios.get.mockResolvedValue(mockResponse)
+
+			await getLiteLLMModels("test-api-key", "http://localhost:4000")
+
+			// Should only call /v1/model/info, not /v1/models
+			expect(mockedAxios.get).toHaveBeenCalledTimes(1)
+			expect(mockedAxios.get).toHaveBeenCalledWith("http://localhost:4000/v1/model/info", expect.any(Object))
+		})
+
+		it("skips models without valid id in /v1/models fallback response", async () => {
+			const modelInfoError = new Error("Failed")
+			const fallbackResponse = {
+				data: {
+					object: "list",
+					data: [
+						{ id: "valid-model", object: "model" },
+						{ id: "", object: "model" }, // empty id
+						{ id: null, object: "model" }, // null id
+						{ object: "model" }, // missing id
+						{ id: 123, object: "model" }, // non-string id
+					],
+				},
+			}
+
+			mockedAxios.get.mockRejectedValueOnce(modelInfoError).mockResolvedValueOnce(fallbackResponse)
+
+			const result = await getLiteLLMModels("test-api-key", "http://localhost:4000")
+
+			expect(Object.keys(result)).toEqual(["valid-model"])
+		})
+
+		it("preserves URL structure in fallback request", async () => {
+			const modelInfoError = new Error("Failed")
+			const fallbackResponse = {
+				data: {
+					object: "list",
+					data: [{ id: "model-1", object: "model" }],
+				},
+			}
+
+			mockedAxios.get.mockRejectedValueOnce(modelInfoError).mockResolvedValueOnce(fallbackResponse)
+
+			await getLiteLLMModels("test-api-key", "http://localhost:4000/litellm/")
+
+			expect(mockedAxios.get).toHaveBeenNthCalledWith(
+				2,
+				"http://localhost:4000/litellm/v1/models",
+				expect.any(Object),
+			)
+		})
+	})
+})
+
+describe("parseModelInfoResponse", () => {
+	it("throws on unexpected format", () => {
+		expect(() => parseModelInfoResponse({ models: [] })).toThrow("Unexpected response format")
+		expect(() => parseModelInfoResponse(null)).toThrow("Unexpected response format")
+		expect(() => parseModelInfoResponse(undefined)).toThrow("Unexpected response format")
+	})
+
+	it("parses valid model info response", () => {
+		const data = {
+			data: [
+				{
+					model_name: "test-model",
+					model_info: {
+						max_tokens: 4096,
+						max_input_tokens: 128000,
+						supports_vision: true,
+						supports_prompt_caching: true,
+						input_cost_per_token: 0.000003,
+						output_cost_per_token: 0.000015,
+					},
+					litellm_params: { model: "provider/test-model" },
+				},
+			],
+		}
+
+		const result = parseModelInfoResponse(data)
+
+		expect(result["test-model"]).toEqual({
+			maxTokens: 4096,
+			contextWindow: 128000,
+			supportsImages: true,
+			supportsPromptCache: true,
+			inputPrice: 3,
+			outputPrice: 15,
+			cacheWritesPrice: undefined,
+			cacheReadsPrice: undefined,
+			description: "test-model via LiteLLM proxy",
+		})
+	})
+})
+
+describe("parseModelsListResponse", () => {
+	it("throws on unexpected format", () => {
+		expect(() => parseModelsListResponse({ models: [] })).toThrow("Unexpected response format from /v1/models")
+		expect(() => parseModelsListResponse(null)).toThrow("Unexpected response format from /v1/models")
+		expect(() => parseModelsListResponse(undefined)).toThrow("Unexpected response format from /v1/models")
+	})
+
+	it("parses valid /v1/models response with default values", () => {
+		const data = {
+			object: "list",
+			data: [
+				{ id: "gpt-4", object: "model", created: 1687882411, owned_by: "openai" },
+				{ id: "claude-3-opus", object: "model", created: 1687882411, owned_by: "anthropic" },
+			],
+		}
+
+		const result = parseModelsListResponse(data)
+
+		expect(result).toEqual({
+			"gpt-4": {
+				maxTokens: 8192,
+				contextWindow: 200000,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: undefined,
+				outputPrice: undefined,
+				cacheWritesPrice: undefined,
+				cacheReadsPrice: undefined,
+				description: "gpt-4 via LiteLLM proxy",
+			},
+			"claude-3-opus": {
+				maxTokens: 8192,
+				contextWindow: 200000,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: undefined,
+				outputPrice: undefined,
+				cacheWritesPrice: undefined,
+				cacheReadsPrice: undefined,
+				description: "claude-3-opus via LiteLLM proxy",
+			},
+		})
+	})
+
+	it("returns empty object for empty data array", () => {
+		const result = parseModelsListResponse({ data: [] })
+		expect(result).toEqual({})
+	})
 })
diff --git a/src/api/providers/fetchers/litellm.ts b/src/api/providers/fetchers/litellm.ts
index de895d01fbf..47deb0ff27d 100644
--- a/src/api/providers/fetchers/litellm.ts
+++ b/src/api/providers/fetchers/litellm.ts
@@ -3,80 +3,150 @@ import axios from "axios"
 import type { ModelRecord } from "@roo-code/types"
 
 import { DEFAULT_HEADERS } from "../constants"
+
+/**
+ * Builds the standard headers for LiteLLM requests.
+ */
+function buildHeaders(apiKey: string): Record<string, string> {
+	const headers: Record<string, string> = {
+		"Content-Type": "application/json",
+		...DEFAULT_HEADERS,
+	}
+
+	if (apiKey) {
+		headers["Authorization"] = `Bearer ${apiKey}`
+	}
+
+	return headers
+}
+
+/**
+ * Builds a URL by appending the given path to the base URL,
+ * normalizing slashes along the way.
+ */
+function buildUrl(baseUrl: string, path: string): string {
+	const urlObj = new URL(baseUrl)
+	urlObj.pathname = urlObj.pathname.replace(/\/+$/, "").replace(/\/+/g, "/") + path
+	return urlObj.href
+}
+
 /**
- * Fetches available models from a LiteLLM server
+ * Parses the response from `/v1/model/info` into a ModelRecord.
+ *
+ * This endpoint returns richer metadata (token limits, pricing, capabilities).
+ */
+export function parseModelInfoResponse(data: any): ModelRecord {
+	const models: ModelRecord = {}
+
+	if (!data || !data.data || !Array.isArray(data.data)) {
+		throw new Error("Failed to fetch LiteLLM models: Unexpected response format.")
+	}
+
+	for (const model of data.data) {
+		const modelName = model.model_name
+		const modelInfo = model.model_info
+		const litellmModelName = model?.litellm_params?.model as string | undefined
+
+		if (!modelName || !modelInfo || !litellmModelName) continue
+
+		models[modelName] = {
+			maxTokens: modelInfo.max_output_tokens || modelInfo.max_tokens || 8192,
+			contextWindow: modelInfo.max_input_tokens || 200000,
+			supportsImages: Boolean(modelInfo.supports_vision),
+			supportsPromptCache: Boolean(modelInfo.supports_prompt_caching),
+			inputPrice: modelInfo.input_cost_per_token ? modelInfo.input_cost_per_token * 1000000 : undefined,
+			outputPrice: modelInfo.output_cost_per_token ? modelInfo.output_cost_per_token * 1000000 : undefined,
+			cacheWritesPrice: modelInfo.cache_creation_input_token_cost
+				? modelInfo.cache_creation_input_token_cost * 1000000
+				: undefined,
+			cacheReadsPrice: modelInfo.cache_read_input_token_cost
+				? modelInfo.cache_read_input_token_cost * 1000000
+				: undefined,
+			description: `${modelName} via LiteLLM proxy`,
+		}
+	}
+
+	return models
+}
+
+/**
+ * Parses the response from `/v1/models` (OpenAI-compatible) into a ModelRecord.
+ *
+ * This endpoint returns a simpler list of models with only IDs, so we use
+ * sensible defaults for fields not available from this endpoint.
+ */
+export function parseModelsListResponse(data: any): ModelRecord {
+	const models: ModelRecord = {}
+
+	if (!data || !data.data || !Array.isArray(data.data)) {
+		throw new Error("Failed to fetch LiteLLM models: Unexpected response format from /v1/models.")
+	}
+
+	for (const model of data.data) {
+		const modelId = model.id
+		if (!modelId || typeof modelId !== "string") continue
+
+		models[modelId] = {
+			maxTokens: 8192,
+			contextWindow: 200000,
+			supportsImages: false,
+			supportsPromptCache: false,
+			inputPrice: undefined,
+			outputPrice: undefined,
+			cacheWritesPrice: undefined,
+			cacheReadsPrice: undefined,
+			description: `${modelId} via LiteLLM proxy`,
+		}
+	}
+
+	return models
+}
+
+/**
+ * Fetches available models from a LiteLLM server.
+ *
+ * First attempts `/v1/model/info` which provides rich metadata. If that
+ * endpoint is inaccessible (e.g. 403 Forbidden), falls back to `/v1/models`
+ * which returns a simpler OpenAI-compatible model list.
  *
  * @param apiKey The API key for the LiteLLM server
  * @param baseUrl The base URL of the LiteLLM server
  * @returns A promise that resolves to a record of model IDs to model info
- * @throws Will throw an error if the request fails or the response is not as expected.
+ * @throws Will throw an error if both endpoints fail.
  */
 export async function getLiteLLMModels(apiKey: string, baseUrl: string): Promise<ModelRecord> {
-	try {
-		const headers: Record<string, string> = {
-			"Content-Type": "application/json",
-			...DEFAULT_HEADERS,
-		}
+	const headers = buildHeaders(apiKey)
 
-		if (apiKey) {
-			headers["Authorization"] = `Bearer ${apiKey}`
-		}
-		// Use URL constructor to properly join base URL and path
-		// This approach handles all edge cases including paths, query params, and fragments
-		const urlObj = new URL(baseUrl)
-		// Normalize the pathname by removing trailing slashes and multiple slashes
-		urlObj.pathname = urlObj.pathname.replace(/\/+$/, "").replace(/\/+/g, "/") + "/v1/model/info"
-		const url = urlObj.href
-		// Added timeout to prevent indefinite hanging
-		const response = await axios.get(url, { headers, timeout: 5000 })
-		const models: ModelRecord = {}
-
-		// Process the model info from the response
-		if (response.data && response.data.data && Array.isArray(response.data.data)) {
-			for (const model of response.data.data) {
-				const modelName = model.model_name
-				const modelInfo = model.model_info
-				const litellmModelName = model?.litellm_params?.model as string | undefined
-
-				if (!modelName || !modelInfo || !litellmModelName) continue
-
-				models[modelName] = {
-					maxTokens: modelInfo.max_output_tokens || modelInfo.max_tokens || 8192,
-					contextWindow: modelInfo.max_input_tokens || 200000,
-					supportsImages: Boolean(modelInfo.supports_vision),
-					supportsPromptCache: Boolean(modelInfo.supports_prompt_caching),
-					inputPrice: modelInfo.input_cost_per_token ? modelInfo.input_cost_per_token * 1000000 : undefined,
-					outputPrice: modelInfo.output_cost_per_token
-						? modelInfo.output_cost_per_token * 1000000
-						: undefined,
-					cacheWritesPrice: modelInfo.cache_creation_input_token_cost
-						? modelInfo.cache_creation_input_token_cost * 1000000
-						: undefined,
-					cacheReadsPrice: modelInfo.cache_read_input_token_cost
-						? modelInfo.cache_read_input_token_cost * 1000000
-						: undefined,
-					description: `${modelName} via LiteLLM proxy`,
-				}
-			}
-		} else {
-			// If response.data.data is not in the expected format, consider it an error.
-			console.error("Error fetching LiteLLM models: Unexpected response format", response.data)
-			throw new Error("Failed to fetch LiteLLM models: Unexpected response format.")
-		}
+	// First, try the richer /v1/model/info endpoint
+	try {
+		const modelInfoUrl = buildUrl(baseUrl, "/v1/model/info")
+		const response = await axios.get(modelInfoUrl, { headers, timeout: 5000 })
+		return parseModelInfoResponse(response.data)
+	} catch (modelInfoError: any) {
+		// Log the failure and attempt fallback
+		console.error(
+			"LiteLLM /v1/model/info failed, attempting /v1/models fallback:",
+			modelInfoError.message || modelInfoError,
+		)
+	}
 
-		return models
-	} catch (error: any) {
-		console.error("Error fetching LiteLLM models:", error.message ? error.message : error)
-		if (axios.isAxiosError(error) && error.response) {
+	// Fallback: try /v1/models (OpenAI-compatible endpoint)
+	try {
+		const modelsUrl = buildUrl(baseUrl, "/v1/models")
+		const response = await axios.get(modelsUrl, { headers, timeout: 5000 })
+		return parseModelsListResponse(response.data)
+	} catch (fallbackError: any) {
+		console.error("Error fetching LiteLLM models from /v1/models fallback:", fallbackError.message || fallbackError)
+		if (axios.isAxiosError(fallbackError) && fallbackError.response) {
 			throw new Error(
-				`Failed to fetch LiteLLM models: ${error.response.status} ${error.response.statusText}. Check base URL and API key.`,
+				`Failed to fetch LiteLLM models: Both /v1/model/info and /v1/models failed. Last error: ${fallbackError.response.status} ${fallbackError.response.statusText}. Check base URL and API key.`,
 			)
-		} else if (axios.isAxiosError(error) && error.request) {
+		} else if (axios.isAxiosError(fallbackError) && fallbackError.request) {
 			throw new Error(
 				"Failed to fetch LiteLLM models: No response from server. Check LiteLLM server status and base URL.",
 			)
 		} else {
-			throw new Error(`Failed to fetch LiteLLM models: ${error.message || "An unknown error occurred."}`)
+			throw new Error(`Failed to fetch LiteLLM models: ${fallbackError.message || "An unknown error occurred."}`)
 		}
 	}
 }