|
| 1 | +/** |
| 2 | + * Prometheus Metrics |
| 3 | + * PR #132: Add Prometheus support (@luisbrandao) |
| 4 | + * |
| 5 | + * Provides a simple metrics collection system for Grafana monitoring. |
| 6 | + * Tracks request counts, latencies, and token usage. |
| 7 | + */ |
| 8 | + |
| 9 | +interface RequestMetric { |
| 10 | + endpoint: string |
| 11 | + method: string |
| 12 | + status: number |
| 13 | + model?: string |
| 14 | + latencyMs: number |
| 15 | + inputTokens?: number |
| 16 | + outputTokens?: number |
| 17 | + timestamp: number |
| 18 | +} |
| 19 | + |
| 20 | +class MetricsCollector { |
| 21 | + private requests: Array<RequestMetric> = [] |
| 22 | + private readonly maxHistorySize = 10000 |
| 23 | + |
| 24 | + recordRequest(metric: RequestMetric): void { |
| 25 | + this.requests.push(metric) |
| 26 | + // Trim old entries if we exceed max size |
| 27 | + if (this.requests.length > this.maxHistorySize) { |
| 28 | + this.requests = this.requests.slice(-this.maxHistorySize) |
| 29 | + } |
| 30 | + } |
| 31 | + |
| 32 | + getPrometheusMetrics(): string { |
| 33 | + const lines: Array<string> = [] |
| 34 | + |
| 35 | + // Request count by endpoint and status |
| 36 | + const requestCounts = new Map<string, number>() |
| 37 | + const latencySums = new Map<string, number>() |
| 38 | + const latencyCounts = new Map<string, number>() |
| 39 | + const tokenInputSums = new Map<string, number>() |
| 40 | + const tokenOutputSums = new Map<string, number>() |
| 41 | + |
| 42 | + for (const req of this.requests) { |
| 43 | + const key = `endpoint="${req.endpoint}",method="${req.method}",status="${req.status}"` |
| 44 | + requestCounts.set(key, (requestCounts.get(key) ?? 0) + 1) |
| 45 | + latencySums.set(key, (latencySums.get(key) ?? 0) + req.latencyMs) |
| 46 | + latencyCounts.set(key, (latencyCounts.get(key) ?? 0) + 1) |
| 47 | + |
| 48 | + if (req.model) { |
| 49 | + const modelKey = `model="${req.model}"` |
| 50 | + if (req.inputTokens) { |
| 51 | + tokenInputSums.set( |
| 52 | + modelKey, |
| 53 | + (tokenInputSums.get(modelKey) ?? 0) + req.inputTokens, |
| 54 | + ) |
| 55 | + } |
| 56 | + if (req.outputTokens) { |
| 57 | + tokenOutputSums.set( |
| 58 | + modelKey, |
| 59 | + (tokenOutputSums.get(modelKey) ?? 0) + req.outputTokens, |
| 60 | + ) |
| 61 | + } |
| 62 | + } |
| 63 | + } |
| 64 | + |
| 65 | + // Output metrics in Prometheus format |
| 66 | + lines.push( |
| 67 | + "# HELP copilot_api_requests_total Total number of API requests", |
| 68 | + "# TYPE copilot_api_requests_total counter", |
| 69 | + ) |
| 70 | + for (const [key, count] of requestCounts) { |
| 71 | + lines.push(`copilot_api_requests_total{${key}} ${count}`) |
| 72 | + } |
| 73 | + |
| 74 | + lines.push( |
| 75 | + "", |
| 76 | + "# HELP copilot_api_request_latency_ms_sum Sum of request latencies in milliseconds", |
| 77 | + "# TYPE copilot_api_request_latency_ms_sum counter", |
| 78 | + ) |
| 79 | + for (const [key, sum] of latencySums) { |
| 80 | + lines.push(`copilot_api_request_latency_ms_sum{${key}} ${sum}`) |
| 81 | + } |
| 82 | + |
| 83 | + lines.push( |
| 84 | + "", |
| 85 | + "# HELP copilot_api_request_latency_ms_count Count of requests for latency calculation", |
| 86 | + "# TYPE copilot_api_request_latency_ms_count counter", |
| 87 | + ) |
| 88 | + for (const [key, count] of latencyCounts) { |
| 89 | + lines.push(`copilot_api_request_latency_ms_count{${key}} ${count}`) |
| 90 | + } |
| 91 | + |
| 92 | + lines.push( |
| 93 | + "", |
| 94 | + "# HELP copilot_api_tokens_input_total Total input tokens by model", |
| 95 | + "# TYPE copilot_api_tokens_input_total counter", |
| 96 | + ) |
| 97 | + for (const [key, sum] of tokenInputSums) { |
| 98 | + lines.push(`copilot_api_tokens_input_total{${key}} ${sum}`) |
| 99 | + } |
| 100 | + |
| 101 | + lines.push( |
| 102 | + "", |
| 103 | + "# HELP copilot_api_tokens_output_total Total output tokens by model", |
| 104 | + "# TYPE copilot_api_tokens_output_total counter", |
| 105 | + ) |
| 106 | + for (const [key, sum] of tokenOutputSums) { |
| 107 | + lines.push(`copilot_api_tokens_output_total{${key}} ${sum}`) |
| 108 | + } |
| 109 | + |
| 110 | + return lines.join("\n") |
| 111 | + } |
| 112 | + |
| 113 | + getStats(): { |
| 114 | + totalRequests: number |
| 115 | + successRate: number |
| 116 | + avgLatencyMs: number |
| 117 | + } { |
| 118 | + if (this.requests.length === 0) { |
| 119 | + return { totalRequests: 0, successRate: 0, avgLatencyMs: 0 } |
| 120 | + } |
| 121 | + |
| 122 | + const successful = this.requests.filter((r) => r.status < 400).length |
| 123 | + const totalLatency = this.requests.reduce((sum, r) => sum + r.latencyMs, 0) |
| 124 | + |
| 125 | + return { |
| 126 | + totalRequests: this.requests.length, |
| 127 | + successRate: successful / this.requests.length, |
| 128 | + avgLatencyMs: totalLatency / this.requests.length, |
| 129 | + } |
| 130 | + } |
| 131 | +} |
| 132 | + |
| 133 | +export const metrics = new MetricsCollector() |
0 commit comments