Skip to content

Commit 8d4ffa9

Browse files
kubaerorcaozhiyuanjkorsvikluisbrandaoCopilot
committed
feat: add thinking support, enterprise URLs, and Prometheus metrics
Phase 4 implementation from upstream PRs: - PR ericc-ch#167: Claude thinking/reasoning support with thinking block extraction - PR ericc-ch#128: GitHub Enterprise Server/Cloud support with --enterprise-url option - PR ericc-ch#132: Prometheus metrics endpoint at /metrics for Grafana monitoring Changes: - Add reasoning_text/reasoning_opaque to ResponseMessage type - Extract thinking blocks from Claude responses - New src/lib/url.ts for enterprise URL helpers - Convert static GitHub URLs to dynamic getGitHubApiBaseUrl/getGitHubBaseUrl - Update all GitHub service files for enterprise support - Add --enterprise-url/-e CLI option - New src/lib/metrics.ts MetricsCollector class - New /metrics route for Prometheus format output - Instrument chat-completions, messages, responses handlers with metrics Co-authored-by: caozhiyuan <caozhiyuan@users.noreply.github.com> Co-authored-by: jkorsvik <jkorsvik@users.noreply.github.com> Co-authored-by: luisbrandao <luisbrandao@users.noreply.github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 3fad5d3 commit 8d4ffa9

19 files changed

Lines changed: 8179 additions & 19 deletions

package-lock.json

Lines changed: 7867 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/lib/api-config.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ export const copilotHeaders = (state: State, vision: boolean = false) => {
3636
return headers
3737
}
3838

39-
export const GITHUB_API_BASE_URL = "https://api.github.com"
39+
// PR #128: Use dynamic URL functions for Enterprise support (@jkorsvik)
40+
4041
export const githubHeaders = (state: State) => ({
4142
...standardHeaders(),
4243
authorization: `token ${state.githubToken}`,
@@ -47,6 +48,10 @@ export const githubHeaders = (state: State) => ({
4748
"x-vscode-user-agent-library-version": "electron-fetch",
4849
})
4950

50-
export const GITHUB_BASE_URL = "https://github.com"
5151
export const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98"
5252
export const GITHUB_APP_SCOPES = ["read:user"].join(" ")
53+
54+
export {
55+
githubApiBaseUrl as getGitHubApiBaseUrl,
56+
githubBaseUrl as getGitHubBaseUrl,
57+
} from "./url"

src/lib/metrics.ts

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
/**
2+
* Prometheus Metrics
3+
* PR #132: Add Prometheus support (@luisbrandao)
4+
*
5+
* Provides a simple metrics collection system for Grafana monitoring.
6+
* Tracks request counts, latencies, and token usage.
7+
*/
8+
9+
interface RequestMetric {
10+
endpoint: string
11+
method: string
12+
status: number
13+
model?: string
14+
latencyMs: number
15+
inputTokens?: number
16+
outputTokens?: number
17+
timestamp: number
18+
}
19+
20+
class MetricsCollector {
21+
private requests: Array<RequestMetric> = []
22+
private readonly maxHistorySize = 10000
23+
24+
recordRequest(metric: RequestMetric): void {
25+
this.requests.push(metric)
26+
// Trim old entries if we exceed max size
27+
if (this.requests.length > this.maxHistorySize) {
28+
this.requests = this.requests.slice(-this.maxHistorySize)
29+
}
30+
}
31+
32+
getPrometheusMetrics(): string {
33+
const lines: Array<string> = []
34+
35+
// Request count by endpoint and status
36+
const requestCounts = new Map<string, number>()
37+
const latencySums = new Map<string, number>()
38+
const latencyCounts = new Map<string, number>()
39+
const tokenInputSums = new Map<string, number>()
40+
const tokenOutputSums = new Map<string, number>()
41+
42+
for (const req of this.requests) {
43+
const key = `endpoint="${req.endpoint}",method="${req.method}",status="${req.status}"`
44+
requestCounts.set(key, (requestCounts.get(key) ?? 0) + 1)
45+
latencySums.set(key, (latencySums.get(key) ?? 0) + req.latencyMs)
46+
latencyCounts.set(key, (latencyCounts.get(key) ?? 0) + 1)
47+
48+
if (req.model) {
49+
const modelKey = `model="${req.model}"`
50+
if (req.inputTokens) {
51+
tokenInputSums.set(
52+
modelKey,
53+
(tokenInputSums.get(modelKey) ?? 0) + req.inputTokens,
54+
)
55+
}
56+
if (req.outputTokens) {
57+
tokenOutputSums.set(
58+
modelKey,
59+
(tokenOutputSums.get(modelKey) ?? 0) + req.outputTokens,
60+
)
61+
}
62+
}
63+
}
64+
65+
// Output metrics in Prometheus format
66+
lines.push(
67+
"# HELP copilot_api_requests_total Total number of API requests",
68+
"# TYPE copilot_api_requests_total counter",
69+
)
70+
for (const [key, count] of requestCounts) {
71+
lines.push(`copilot_api_requests_total{${key}} ${count}`)
72+
}
73+
74+
lines.push(
75+
"",
76+
"# HELP copilot_api_request_latency_ms_sum Sum of request latencies in milliseconds",
77+
"# TYPE copilot_api_request_latency_ms_sum counter",
78+
)
79+
for (const [key, sum] of latencySums) {
80+
lines.push(`copilot_api_request_latency_ms_sum{${key}} ${sum}`)
81+
}
82+
83+
lines.push(
84+
"",
85+
"# HELP copilot_api_request_latency_ms_count Count of requests for latency calculation",
86+
"# TYPE copilot_api_request_latency_ms_count counter",
87+
)
88+
for (const [key, count] of latencyCounts) {
89+
lines.push(`copilot_api_request_latency_ms_count{${key}} ${count}`)
90+
}
91+
92+
lines.push(
93+
"",
94+
"# HELP copilot_api_tokens_input_total Total input tokens by model",
95+
"# TYPE copilot_api_tokens_input_total counter",
96+
)
97+
for (const [key, sum] of tokenInputSums) {
98+
lines.push(`copilot_api_tokens_input_total{${key}} ${sum}`)
99+
}
100+
101+
lines.push(
102+
"",
103+
"# HELP copilot_api_tokens_output_total Total output tokens by model",
104+
"# TYPE copilot_api_tokens_output_total counter",
105+
)
106+
for (const [key, sum] of tokenOutputSums) {
107+
lines.push(`copilot_api_tokens_output_total{${key}} ${sum}`)
108+
}
109+
110+
return lines.join("\n")
111+
}
112+
113+
getStats(): {
114+
totalRequests: number
115+
successRate: number
116+
avgLatencyMs: number
117+
} {
118+
if (this.requests.length === 0) {
119+
return { totalRequests: 0, successRate: 0, avgLatencyMs: 0 }
120+
}
121+
122+
const successful = this.requests.filter((r) => r.status < 400).length
123+
const totalLatency = this.requests.reduce((sum, r) => sum + r.latencyMs, 0)
124+
125+
return {
126+
totalRequests: this.requests.length,
127+
successRate: successful / this.requests.length,
128+
avgLatencyMs: totalLatency / this.requests.length,
129+
}
130+
}
131+
}
132+
133+
export const metrics = new MetricsCollector()

src/lib/paths.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,19 @@ import path from "node:path"
55
const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api")
66

77
const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token")
8+
// PR #128: GitHub Enterprise support (@jkorsvik)
9+
const ENTERPRISE_URL_PATH = path.join(APP_DIR, "enterprise_url")
810

911
export const PATHS = {
1012
APP_DIR,
1113
GITHUB_TOKEN_PATH,
14+
ENTERPRISE_URL_PATH,
1215
}
1316

1417
export async function ensurePaths(): Promise<void> {
1518
await fs.mkdir(PATHS.APP_DIR, { recursive: true })
1619
await ensureFile(PATHS.GITHUB_TOKEN_PATH)
20+
await ensureFile(PATHS.ENTERPRISE_URL_PATH)
1721
}
1822

1923
async function ensureFile(filePath: string): Promise<void> {

src/lib/state.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ export interface State {
1818

1919
// PR #144: API key authentication (@ZiuChen)
2020
apiKeys?: Array<string>
21+
22+
// PR #128: GitHub Enterprise support (@jkorsvik)
23+
enterpriseUrl?: string
2124
}
2225

2326
export const state: State = {

src/lib/url.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/**
2+
* URL Helpers for GitHub Enterprise Support
3+
* PR #128: Add GitHub Enterprise Server/Cloud support (@jkorsvik)
4+
*
5+
* Provides functions to normalize URLs and determine API endpoints
6+
* for both github.com and enterprise instances.
7+
*/
8+
9+
import { state } from "./state"
10+
11+
/**
12+
* Normalizes a domain by stripping protocol and trailing slashes
13+
* @example normalizeDomain("https://github.example.com/") => "github.example.com"
14+
*/
15+
export function normalizeDomain(url: string): string {
16+
return url
17+
.replace(/^https?:\/\//, "")
18+
.replace(/\/+$/, "")
19+
.toLowerCase()
20+
}
21+
22+
/**
23+
* Returns the base GitHub URL for OAuth endpoints
24+
* @returns github.com or the enterprise domain
25+
*/
26+
export function githubBaseUrl(): string {
27+
if (state.enterpriseUrl) {
28+
return `https://${normalizeDomain(state.enterpriseUrl)}`
29+
}
30+
return "https://github.com"
31+
}
32+
33+
/**
34+
* Returns the GitHub API base URL
35+
* @returns api.github.com or api.{enterprise}
36+
*/
37+
export function githubApiBaseUrl(): string {
38+
if (state.enterpriseUrl) {
39+
const domain = normalizeDomain(state.enterpriseUrl)
40+
return `https://api.${domain}`
41+
}
42+
return "https://api.github.com"
43+
}

src/routes/chat-completions/handler.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import consola from "consola"
44
import { streamSSE, type SSEMessage } from "hono/streaming"
55

66
import { awaitApproval } from "~/lib/approval"
7+
import { metrics } from "~/lib/metrics"
78
import { checkRateLimit } from "~/lib/rate-limit"
89
import { state } from "~/lib/state"
910
import { getTokenCount } from "~/lib/tokenizer"
@@ -15,6 +16,7 @@ import {
1516
} from "~/services/copilot/create-chat-completions"
1617

1718
export async function handleCompletion(c: Context) {
19+
const startTime = Date.now()
1820
await checkRateLimit(state)
1921

2022
let payload = await c.req.json<ChatCompletionsPayload>()
@@ -51,6 +53,14 @@ export async function handleCompletion(c: Context) {
5153

5254
if (isNonStreaming(response)) {
5355
consola.debug("Non-streaming response:", JSON.stringify(response))
56+
const latency = Date.now() - startTime
57+
metrics.recordRequest(
58+
"/chat/completions",
59+
"POST",
60+
200,
61+
payload.model,
62+
latency,
63+
)
5464
// Add object type for pydantic_ai compatibility (PR #185 by @Vincenthays)
5565
return c.json({ ...response, object: "chat.completion" })
5666
}
@@ -61,6 +71,14 @@ export async function handleCompletion(c: Context) {
6171
consola.debug("Streaming chunk:", JSON.stringify(chunk))
6272
await stream.writeSSE(chunk as SSEMessage)
6373
}
74+
const latency = Date.now() - startTime
75+
metrics.recordRequest(
76+
"/chat/completions",
77+
"POST",
78+
200,
79+
payload.model,
80+
latency,
81+
)
6482
})
6583
}
6684

src/routes/messages/handler.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import consola from "consola"
44
import { streamSSE } from "hono/streaming"
55

66
import { awaitApproval } from "~/lib/approval"
7+
import { metrics } from "~/lib/metrics"
78
import { checkRateLimit } from "~/lib/rate-limit"
89
import { state } from "~/lib/state"
910
import {
@@ -23,6 +24,7 @@ import {
2324
import { translateChunkToAnthropicEvents } from "./stream-translation"
2425

2526
export async function handleCompletion(c: Context) {
27+
const startTime = Date.now()
2628
await checkRateLimit(state)
2729

2830
const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
@@ -50,6 +52,14 @@ export async function handleCompletion(c: Context) {
5052
"Translated Anthropic response:",
5153
JSON.stringify(anthropicResponse),
5254
)
55+
const latency = Date.now() - startTime
56+
metrics.recordRequest(
57+
"/v1/messages",
58+
"POST",
59+
200,
60+
anthropicPayload.model,
61+
latency,
62+
)
5363
return c.json(anthropicResponse)
5464
}
5565

@@ -83,6 +93,14 @@ export async function handleCompletion(c: Context) {
8393
})
8494
}
8595
}
96+
const latency = Date.now() - startTime
97+
metrics.recordRequest(
98+
"/v1/messages",
99+
"POST",
100+
200,
101+
anthropicPayload.model,
102+
latency,
103+
)
86104
})
87105
}
88106

src/routes/messages/non-stream-translation.ts

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
type ChatCompletionsPayload,
44
type ContentPart,
55
type Message,
6+
type ResponseMessage,
67
type TextPart,
78
type Tool,
89
type ToolCall,
@@ -313,17 +314,21 @@ export function translateToAnthropic(
313314
response: ChatCompletionResponse,
314315
): AnthropicResponse {
315316
// Merge content from all choices
317+
const allThinkingBlocks: Array<AnthropicThinkingBlock> = []
316318
const allTextBlocks: Array<AnthropicTextBlock> = []
317319
const allToolUseBlocks: Array<AnthropicToolUseBlock> = []
318320
let stopReason: "stop" | "length" | "tool_calls" | "content_filter" | null =
319321
null // default
320322
stopReason = response.choices[0]?.finish_reason ?? stopReason
321323

322-
// Process all choices to extract text and tool use blocks
324+
// Process all choices to extract text, thinking, and tool use blocks
323325
for (const choice of response.choices) {
326+
// PR #167: Extract thinking blocks from reasoning fields (@caozhiyuan)
327+
const thinkingBlocks = getAnthropicThinkingBlocks(choice.message)
324328
const textBlocks = getAnthropicTextBlocks(choice.message.content)
325329
const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls)
326330

331+
allThinkingBlocks.push(...thinkingBlocks)
327332
allTextBlocks.push(...textBlocks)
328333
allToolUseBlocks.push(...toolUseBlocks)
329334

@@ -333,14 +338,13 @@ export function translateToAnthropic(
333338
}
334339
}
335340

336-
// Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses
337-
341+
// Order: thinking blocks first, then text blocks, then tool use blocks
338342
return {
339343
id: response.id,
340344
type: "message",
341345
role: "assistant",
342346
model: response.model,
343-
content: [...allTextBlocks, ...allToolUseBlocks],
347+
content: [...allThinkingBlocks, ...allTextBlocks, ...allToolUseBlocks],
344348
stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
345349
stop_sequence: null,
346350
usage: {
@@ -357,6 +361,23 @@ export function translateToAnthropic(
357361
}
358362
}
359363

364+
// PR #167: Extract thinking blocks from reasoning fields (@caozhiyuan)
365+
function getAnthropicThinkingBlocks(
366+
message: ResponseMessage,
367+
): Array<AnthropicThinkingBlock> {
368+
const blocks: Array<AnthropicThinkingBlock> = []
369+
370+
// reasoning_text contains the actual thinking content
371+
if (message.reasoning_text) {
372+
blocks.push({
373+
type: "thinking",
374+
thinking: message.reasoning_text,
375+
})
376+
}
377+
378+
return blocks
379+
}
380+
360381
function getAnthropicTextBlocks(
361382
messageContent: Message["content"],
362383
): Array<AnthropicTextBlock> {

0 commit comments

Comments
 (0)