-
Notifications
You must be signed in to change notification settings - Fork 14.2k
feat: add Grok (xAI) API adapter #148
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| import { describe, expect, test, beforeEach, afterEach } from 'bun:test' | ||
| import { getGrokClient, clearGrokClientCache } from '../client.js' | ||
|
|
||
| describe('getGrokClient', () => { | ||
| const originalEnv = { ...process.env } | ||
|
|
||
| beforeEach(() => { | ||
| clearGrokClientCache() | ||
| process.env.GROK_API_KEY = 'test-key' | ||
| delete process.env.GROK_BASE_URL | ||
| delete process.env.XAI_API_KEY | ||
| }) | ||
|
|
||
| afterEach(() => { | ||
| clearGrokClientCache() | ||
| process.env = { ...originalEnv } | ||
| }) | ||
|
|
||
| test('creates client with default base URL', () => { | ||
| const client = getGrokClient() | ||
| expect(client).toBeDefined() | ||
| expect(client.baseURL).toBe('https://api.x.ai/v1') | ||
| }) | ||
|
|
||
| test('uses GROK_BASE_URL when set', () => { | ||
| process.env.GROK_BASE_URL = 'https://custom.grok.api/v1' | ||
| clearGrokClientCache() | ||
| const client = getGrokClient() | ||
| expect(client.baseURL).toBe('https://custom.grok.api/v1') | ||
| }) | ||
|
|
||
| test('falls back to XAI_API_KEY', () => { | ||
| delete process.env.GROK_API_KEY | ||
| process.env.XAI_API_KEY = 'test-key-placeholder' | ||
| clearGrokClientCache() | ||
| const client = getGrokClient() | ||
| expect(client).toBeDefined() | ||
| expect(client.apiKey).toBe('test-key-placeholder') | ||
| }) | ||
|
|
||
| test('returns cached client on second call', () => { | ||
| const client1 = getGrokClient() | ||
| const client2 = getGrokClient() | ||
| expect(client1).toBe(client2) | ||
| }) | ||
|
|
||
| test('clearGrokClientCache resets cache', () => { | ||
| const client1 = getGrokClient() | ||
| clearGrokClientCache() | ||
| process.env.GROK_BASE_URL = 'https://other.api/v1' | ||
| const client2 = getGrokClient() | ||
| expect(client1).not.toBe(client2) | ||
| }) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| import { describe, expect, test, beforeEach, afterEach } from 'bun:test' | ||
| import { resolveGrokModel } from '../modelMapping.js' | ||
|
|
||
| describe('resolveGrokModel', () => { | ||
| const originalEnv = { ...process.env } | ||
|
|
||
| beforeEach(() => { | ||
| delete process.env.GROK_MODEL | ||
| delete process.env.ANTHROPIC_DEFAULT_SONNET_MODEL | ||
| delete process.env.ANTHROPIC_DEFAULT_OPUS_MODEL | ||
| delete process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL | ||
| }) | ||
|
|
||
| afterEach(() => { | ||
| process.env = { ...originalEnv } | ||
| }) | ||
|
|
||
| test('GROK_MODEL env var takes highest priority', () => { | ||
| process.env.GROK_MODEL = 'grok-custom' | ||
| expect(resolveGrokModel('claude-sonnet-4-6')).toBe('grok-custom') | ||
| }) | ||
|
|
||
| test('maps sonnet models to grok-3', () => { | ||
| expect(resolveGrokModel('claude-sonnet-4-6')).toBe('grok-3') | ||
| }) | ||
|
|
||
| test('maps opus models to grok-3', () => { | ||
| expect(resolveGrokModel('claude-opus-4-6')).toBe('grok-3') | ||
| }) | ||
|
|
||
| test('maps haiku models to grok-3-mini', () => { | ||
| expect(resolveGrokModel('claude-haiku-4-5-20251001')).toBe('grok-3-mini') | ||
| }) | ||
|
|
||
| test('ANTHROPIC_DEFAULT_SONNET_MODEL overrides default map', () => { | ||
| process.env.ANTHROPIC_DEFAULT_SONNET_MODEL = 'grok-2' | ||
| expect(resolveGrokModel('claude-sonnet-4-6')).toBe('grok-2') | ||
| }) | ||
|
|
||
| test('passes through unknown model names', () => { | ||
| expect(resolveGrokModel('some-unknown-model')).toBe('some-unknown-model') | ||
| }) | ||
|
|
||
| test('strips [1m] suffix before lookup', () => { | ||
| expect(resolveGrokModel('claude-sonnet-4-6[1m]')).toBe('grok-3') | ||
| }) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| import OpenAI from 'openai' | ||
| import { getProxyFetchOptions } from 'src/utils/proxy.js' | ||
|
|
||
| /** | ||
| * Environment variables: | ||
| * | ||
| * GROK_API_KEY (or XAI_API_KEY): Required. API key for the xAI Grok endpoint. | ||
| * GROK_BASE_URL: Optional. Defaults to https://api.x.ai/v1. | ||
| */ | ||
|
|
||
| const DEFAULT_BASE_URL = 'https://api.x.ai/v1' | ||
|
|
||
| let cachedClient: OpenAI | null = null | ||
|
|
||
| export function getGrokClient(options?: { | ||
| maxRetries?: number | ||
| fetchOverride?: typeof fetch | ||
| source?: string | ||
| }): OpenAI { | ||
| if (cachedClient) return cachedClient | ||
|
|
||
| const apiKey = process.env.GROK_API_KEY || process.env.XAI_API_KEY || '' | ||
| const baseURL = process.env.GROK_BASE_URL || DEFAULT_BASE_URL | ||
|
|
||
| const client = new OpenAI({ | ||
| apiKey, | ||
| baseURL, | ||
| maxRetries: options?.maxRetries ?? 0, | ||
| timeout: parseInt(process.env.API_TIMEOUT_MS || String(600 * 1000), 10), | ||
| dangerouslyAllowBrowser: true, | ||
| fetchOptions: getProxyFetchOptions({ forAnthropicAPI: false }) as RequestInit, | ||
| ...(options?.fetchOverride && { fetch: options.fetchOverride }), | ||
| }) | ||
|
|
||
| if (!options?.fetchOverride) { | ||
| cachedClient = client | ||
| } | ||
|
|
||
| return client | ||
| } | ||
|
|
||
| /** Clear the cached client (useful when env vars change). */ | ||
| export function clearGrokClientCache(): void { | ||
| cachedClient = null | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,199 @@ | ||
| import type { BetaToolUnion } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' | ||
| import type { SystemPrompt } from '../../../utils/systemPromptType.js' | ||
| import type { Message, StreamEvent, SystemAPIErrorMessage, AssistantMessage } from '../../../types/message.js' | ||
| import type { Tools } from '../../../Tool.js' | ||
| import { getGrokClient } from './client.js' | ||
| import { anthropicMessagesToOpenAI } from '../openai/convertMessages.js' | ||
| import { anthropicToolsToOpenAI, anthropicToolChoiceToOpenAI } from '../openai/convertTools.js' | ||
| import { adaptOpenAIStreamToAnthropic } from '../openai/streamAdapter.js' | ||
| import { resolveGrokModel } from './modelMapping.js' | ||
| import { normalizeMessagesForAPI } from '../../../utils/messages.js' | ||
| import { toolToAPISchema } from '../../../utils/api.js' | ||
| import { logForDebugging } from '../../../utils/debug.js' | ||
| import { addToTotalSessionCost } from '../../../cost-tracker.js' | ||
| import { calculateUSDCost } from '../../../utils/modelCost.js' | ||
| import type { Options } from '../claude.js' | ||
| import { randomUUID } from 'crypto' | ||
| import { | ||
| createAssistantAPIErrorMessage, | ||
| normalizeContentFromAPI, | ||
| } from '../../../utils/messages.js' | ||
|
|
||
| /** | ||
| * Grok (xAI) query path. Grok uses an OpenAI-compatible API, so we reuse | ||
| * the OpenAI message/tool converters and stream adapter. Only the client | ||
| * (different base URL + API key) and model mapping are Grok-specific. | ||
| */ | ||
| export async function* queryModelGrok( | ||
| messages: Message[], | ||
| systemPrompt: SystemPrompt, | ||
| tools: Tools, | ||
| signal: AbortSignal, | ||
| options: Options, | ||
| ): AsyncGenerator< | ||
| StreamEvent | AssistantMessage | SystemAPIErrorMessage, | ||
| void | ||
| > { | ||
| try { | ||
| const grokModel = resolveGrokModel(options.model) | ||
| const messagesForAPI = normalizeMessagesForAPI(messages, tools) | ||
|
|
||
| const toolSchemas = await Promise.all( | ||
| tools.map(tool => | ||
| toolToAPISchema(tool, { | ||
| getToolPermissionContext: options.getToolPermissionContext, | ||
| tools, | ||
| agents: options.agents, | ||
| allowedAgentTypes: options.allowedAgentTypes, | ||
| model: options.model, | ||
| }), | ||
| ), | ||
| ) | ||
| const standardTools = toolSchemas.filter( | ||
| (t): t is BetaToolUnion & { type: string } => { | ||
| const anyT = t as Record<string, unknown> | ||
| return anyT.type !== 'advisor_20260301' && anyT.type !== 'computer_20250124' | ||
| }, | ||
| ) | ||
|
|
||
| const openaiMessages = anthropicMessagesToOpenAI(messagesForAPI, systemPrompt) | ||
| const openaiTools = anthropicToolsToOpenAI(standardTools) | ||
| const openaiToolChoice = anthropicToolChoiceToOpenAI(options.toolChoice) | ||
|
|
||
| const client = getGrokClient({ | ||
| maxRetries: 0, | ||
| fetchOverride: options.fetchOverride, | ||
| source: options.querySource, | ||
| }) | ||
|
|
||
| logForDebugging(`[Grok] Calling model=${grokModel}, messages=${openaiMessages.length}, tools=${openaiTools.length}`) | ||
|
|
||
| const stream = await client.chat.completions.create( | ||
| { | ||
| model: grokModel, | ||
| messages: openaiMessages, | ||
| ...(openaiTools.length > 0 && { | ||
| tools: openaiTools, | ||
| ...(openaiToolChoice && { tool_choice: openaiToolChoice }), | ||
| }), | ||
| stream: true, | ||
| stream_options: { include_usage: true }, | ||
| ...(options.temperatureOverride !== undefined && { | ||
| temperature: options.temperatureOverride, | ||
| }), | ||
| }, | ||
|
Comment on lines
+71
to
+84
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: cat -n src/services/api/grok/index.ts | head -100Repository: claude-code-best/claude-code Length of output: 4447 🏁 Script executed: rg "maxOutputTokensOverride" --type ts -B 3 -A 3Repository: claude-code-best/claude-code Length of output: 8548 🏁 Script executed: rg "max_tokens|maxTokens" src/services/api --type ts -B 2 -A 2Repository: claude-code-best/claude-code Length of output: 9296 🏁 Script executed: rg "queryModel" src/services/api --type ts | head -20Repository: claude-code-best/claude-code Length of output: 1330 🏁 Script executed: grep -r "max_tokens" src/services/api/openai --type ts -A 1 -B 1 | head -30Repository: claude-code-best/claude-code Length of output: 189 🏁 Script executed: cat -n src/services/api/gemini/index.ts | grep -A 50 "const stream = await"Repository: claude-code-best/claude-code Length of output: 54 🏁 Script executed: rg "max_tokens" src/services/api/openai/index.ts -B 2 -A 2Repository: claude-code-best/claude-code Length of output: 54 🏁 Script executed: rg "max_tokens" src/services/api/gemini/index.ts -B 2 -A 2Repository: claude-code-best/claude-code Length of output: 54 🏁 Script executed: cat -n src/services/api/gemini/index.ts | head -120Repository: claude-code-best/claude-code Length of output: 4898 🏁 Script executed: rg "maxOutputTokensOverride" src/services/api --type ts -B 5 -A 5Repository: claude-code-best/claude-code Length of output: 1474 🏁 Script executed: cat -n src/services/api/openai/index.ts | head -150Repository: claude-code-best/claude-code Length of output: 6772 🏁 Script executed: rg "maxOutputTokens|max_output_tokens" src/services/api/gemini -B 3 -A 3Repository: claude-code-best/claude-code Length of output: 54 🏁 Script executed: cat -n src/services/api/claude.ts | sed -n '325,345p'Repository: claude-code-best/claude-code Length of output: 853 🏁 Script executed: cat -n src/services/api/claude.ts | sed -n '220,290p'Repository: claude-code-best/claude-code Length of output: 3092 🏁 Script executed: grep -n "const maxOutputTokens" src/services/api/claude.ts -A 5Repository: claude-code-best/claude-code Length of output: 664 🏁 Script executed: cat -n src/services/api/grok/index.ts | sed -n '60,95p'Repository: claude-code-best/claude-code Length of output: 1454 🏁 Script executed: cat -n src/services/api/openai/index.ts | sed -n '76,100p'Repository: claude-code-best/claude-code Length of output: 1014 🏁 Script executed: rg "getMaxOutputTokensForModel" src/services/api --type ts -B 2 -A 2 | head -40Repository: claude-code-best/claude-code Length of output: 628 🏁 Script executed: rg "max_tokens|maxOutputTokens" src/services/api/openai/index.ts -B 3 -A 3Repository: claude-code-best/claude-code Length of output: 54 🏁 Script executed: grep -n "getMaxOutputTokensForModel" src/services/api/grok/index.tsRepository: claude-code-best/claude-code Length of output: 54 🏁 Script executed: cat -n src/services/api/claude.ts | sed -n '1630,1670p'Repository: claude-code-best/claude-code Length of output: 2123 🏁 Script executed: rg "max_tokens" --type ts src/services/api -lRepository: claude-code-best/claude-code Length of output: 266 🏁 Script executed: cat -n src/services/api/openai/index.ts | sed -n '1,50p'Repository: claude-code-best/claude-code Length of output: 2551 🏁 Script executed: grep -n "maxOutputTokens\|max_tokens" src/services/api/openai/index.tsRepository: claude-code-best/claude-code Length of output: 54 🏁 Script executed: cat -n src/services/api/openai/index.ts | tail -100Repository: claude-code-best/claude-code Length of output: 4297 🏁 Script executed: cat -n src/services/api/claude.ts | sed -n '1670,1700p'Repository: claude-code-best/claude-code Length of output: 1383 🏁 Script executed: cat -n src/services/api/claude.ts | sed -n '1700,1750p'Repository: claude-code-best/claude-code Length of output: 2156 🏁 Script executed: cat -n src/services/api/claude.ts | sed -n '1740,1770p'Repository: claude-code-best/claude-code Length of output: 1229 Thread the resolved output-token limit into the Grok request payload. The Grok adapter ignores 🤖 Prompt for AI Agents |
||
| { | ||
| signal, | ||
| }, | ||
| ) | ||
|
|
||
| const adaptedStream = adaptOpenAIStreamToAnthropic(stream, grokModel) | ||
|
|
||
| const contentBlocks: Record<number, any> = {} | ||
| let partialMessage: any = undefined | ||
| let usage = { | ||
| input_tokens: 0, | ||
| output_tokens: 0, | ||
| cache_creation_input_tokens: 0, | ||
| cache_read_input_tokens: 0, | ||
| } | ||
| let ttftMs = 0 | ||
| const start = Date.now() | ||
|
|
||
| for await (const event of adaptedStream) { | ||
| switch (event.type) { | ||
| case 'message_start': { | ||
| partialMessage = (event as any).message | ||
| ttftMs = Date.now() - start | ||
| if ((event as any).message?.usage) { | ||
| usage = { | ||
| ...usage, | ||
| ...((event as any).message.usage), | ||
| } | ||
| } | ||
| break | ||
| } | ||
| case 'content_block_start': { | ||
| const idx = (event as any).index | ||
| const cb = (event as any).content_block | ||
| if (cb.type === 'tool_use') { | ||
| contentBlocks[idx] = { ...cb, input: '' } | ||
| } else if (cb.type === 'text') { | ||
| contentBlocks[idx] = { ...cb, text: '' } | ||
| } else if (cb.type === 'thinking') { | ||
| contentBlocks[idx] = { ...cb, thinking: '', signature: '' } | ||
| } else { | ||
| contentBlocks[idx] = { ...cb } | ||
| } | ||
| break | ||
| } | ||
| case 'content_block_delta': { | ||
| const idx = (event as any).index | ||
| const delta = (event as any).delta | ||
| const block = contentBlocks[idx] | ||
| if (!block) break | ||
| if (delta.type === 'text_delta') { | ||
| block.text = (block.text || '') + delta.text | ||
| } else if (delta.type === 'input_json_delta') { | ||
| block.input = (block.input || '') + delta.partial_json | ||
| } else if (delta.type === 'thinking_delta') { | ||
| block.thinking = (block.thinking || '') + delta.thinking | ||
| } else if (delta.type === 'signature_delta') { | ||
| block.signature = delta.signature | ||
| } | ||
| break | ||
| } | ||
| case 'content_block_stop': { | ||
| const idx = (event as any).index | ||
| const block = contentBlocks[idx] | ||
| if (!block || !partialMessage) break | ||
|
|
||
| const m: AssistantMessage = { | ||
| message: { | ||
| ...partialMessage, | ||
| content: normalizeContentFromAPI( | ||
| [block], | ||
| tools, | ||
| options.agentId, | ||
| ), | ||
| }, | ||
| requestId: undefined, | ||
| type: 'assistant', | ||
| uuid: randomUUID(), | ||
| timestamp: new Date().toISOString(), | ||
| } | ||
| yield m | ||
|
Comment on lines
+151
to
+165
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Patch the yielded assistant message after Each 🐛 Suggested fix- const contentBlocks: Record<number, any> = {}
+ const contentBlocks: Record<number, any> = {}
+ const emittedMessages: AssistantMessage[] = []
@@
const m: AssistantMessage = {
message: {
...partialMessage,
content: normalizeContentFromAPI(
[block],
@@
type: 'assistant',
uuid: randomUUID(),
timestamp: new Date().toISOString(),
}
+ emittedMessages.push(m)
yield m
break
}
case 'message_delta': {
const deltaUsage = (event as any).usage
if (deltaUsage) {
usage = { ...usage, ...deltaUsage }
}
+ const lastMessage = emittedMessages.at(-1)
+ if (lastMessage) {
+ lastMessage.message.usage = usage as any
+ lastMessage.message.stop_reason =
+ (event as any).delta?.stop_reason ??
+ lastMessage.message.stop_reason
+ }
break
}Also applies to: 168-173 🤖 Prompt for AI Agents |
||
| break | ||
| } | ||
| case 'message_delta': { | ||
| const deltaUsage = (event as any).usage | ||
| if (deltaUsage) { | ||
| usage = { ...usage, ...deltaUsage } | ||
| } | ||
| break | ||
| } | ||
| case 'message_stop': | ||
| break | ||
| } | ||
|
|
||
| if (event.type === 'message_stop' && usage.input_tokens + usage.output_tokens > 0) { | ||
| const costUSD = calculateUSDCost(grokModel, usage as any) | ||
| addToTotalSessionCost(costUSD, usage as any, options.model) | ||
| } | ||
|
|
||
| yield { | ||
| type: 'stream_event', | ||
| event, | ||
| ...(event.type === 'message_start' ? { ttftMs } : undefined), | ||
| } as StreamEvent | ||
| } | ||
| } catch (error) { | ||
| const errorMessage = error instanceof Error ? error.message : String(error) | ||
| logForDebugging(`[Grok] Error: ${errorMessage}`, { level: 'error' }) | ||
| yield createAssistantAPIErrorMessage({ | ||
| content: `API Error: ${errorMessage}`, | ||
| apiError: 'api_error', | ||
| error: error instanceof Error ? error : new Error(String(error)), | ||
| }) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The cache short-circuit drops per-call client options.
Once
cachedClientexists, Line 20 returns it beforefetchOverrideor a non-defaultmaxRetriescan take effect. A later Grok call can therefore reuse the wrong transport configuration.🐛 Suggested fix
export function getGrokClient(options?: { maxRetries?: number fetchOverride?: typeof fetch source?: string }): OpenAI { - if (cachedClient) return cachedClient + const useCachedClient = + !options?.fetchOverride && + (options?.maxRetries === undefined || options.maxRetries === 0) + + if (useCachedClient && cachedClient) return cachedClient const apiKey = process.env.GROK_API_KEY || process.env.XAI_API_KEY || '' const baseURL = process.env.GROK_BASE_URL || DEFAULT_BASE_URL const client = new OpenAI({ @@ - if (!options?.fetchOverride) { + if (useCachedClient) { cachedClient = client }Also applies to: 35-37
🤖 Prompt for AI Agents