From 2428780092522e5af68b1bc477f4c668ed7dabf7 Mon Sep 17 00:00:00 2001 From: Andrew Kent Date: Thu, 8 Jan 2026 18:35:31 -0700 Subject: [PATCH] add time to first token metric for gemini instrumentation --- .../com/google/genai/BraintrustApiClient.java | 46 +++++++++++++++---- .../genai/BraintrustGenAITest.java | 8 ++++ 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/google/genai/BraintrustApiClient.java b/src/main/java/com/google/genai/BraintrustApiClient.java index 5a0bc87..4f6dbbf 100644 --- a/src/main/java/com/google/genai/BraintrustApiClient.java +++ b/src/main/java/com/google/genai/BraintrustApiClient.java @@ -51,7 +51,8 @@ private void tagSpan( @Nullable String genAIEndpoint, @Nullable String requestMethod, @Nullable String requestBody, - @Nullable String responseBody) { + @Nullable String responseBody, + double timeToFirstToken) { try { Map metadata = new java.util.HashMap<>(); metadata.put("provider", "gemini"); @@ -126,9 +127,13 @@ private void tagSpan( "braintrust.output_json", JSON_MAPPER.writeValueAsString(responseJson)); // Parse usage metadata for metrics + Map metrics = new java.util.HashMap<>(); + + // Always add time_to_first_token + metrics.put("time_to_first_token", timeToFirstToken); + if (responseJson.get("usageMetadata") instanceof Map) { var usage = (Map) responseJson.get("usageMetadata"); - Map metrics = new java.util.HashMap<>(); if (usage.containsKey("promptTokenCount")) { metrics.put("prompt_tokens", (Number) usage.get("promptTokenCount")); @@ -145,10 +150,10 @@ private void tagSpan( "prompt_cached_tokens", (Number) usage.get("cachedContentTokenCount")); } - - span.setAttribute( - "braintrust.metrics", JSON_MAPPER.writeValueAsString(metrics)); } + + // Always set metrics (at minimum with time_to_first_token) + span.setAttribute("braintrust.metrics", JSON_MAPPER.writeValueAsString(metrics)); } // Set metadata @@ -195,10 +200,19 @@ public ApiResponse request( Span span = tracer.spanBuilder(getOperation(genAIUrl)).setSpanKind(SpanKind.CLIENT).startSpan(); try (Scope scope = span.makeCurrent()) { + long startTimeNanos = System.nanoTime(); ApiResponse response = delegate.request(requestMethod, genAIUrl, requestBody, options); + double timeToFirstToken = (System.nanoTime() - startTimeNanos) / 1_000_000_000.0; + BufferedApiResponse bufferedResponse = new BufferedApiResponse(response); span.setStatus(StatusCode.OK); - tagSpan(span, genAIUrl, requestMethod, requestBody, bufferedResponse.getBodyAsString()); + tagSpan( + span, + genAIUrl, + requestMethod, + requestBody, + bufferedResponse.getBodyAsString(), + timeToFirstToken); return bufferedResponse; } catch (Throwable t) { span.setStatus(StatusCode.ERROR, t.getMessage()); @@ -219,8 +233,11 @@ public ApiResponse request( Span span = tracer.spanBuilder(getOperation(genAIUrl)).setSpanKind(SpanKind.CLIENT).startSpan(); try (Scope scope = span.makeCurrent()) { + long startTimeNanos = System.nanoTime(); ApiResponse response = delegate.request(requestMethod, genAIUrl, requestBodyBytes, options); + double timeToFirstToken = (System.nanoTime() - startTimeNanos) / 1_000_000_000.0; + BufferedApiResponse bufferedResponse = new BufferedApiResponse(response); span.setStatus(StatusCode.OK); tagSpan( @@ -228,7 +245,8 @@ public ApiResponse request( genAIUrl, requestMethod, new String(requestBodyBytes), - bufferedResponse.getBodyAsString()); + bufferedResponse.getBodyAsString(), + timeToFirstToken); return bufferedResponse; } catch (Throwable t) { span.setStatus(StatusCode.ERROR, t.getMessage()); @@ -244,6 +262,7 @@ public CompletableFuture asyncRequest( String method, String url, String body, Optional options) { Span span = tracer.spanBuilder(getOperation(url)).setSpanKind(SpanKind.CLIENT).startSpan(); Context context = Context.current().with(span); + long startTimeNanos = System.nanoTime(); return delegate.asyncRequest(method, url, body, options) .handle( @@ -256,6 +275,9 @@ public CompletableFuture asyncRequest( } try { + double timeToFirstToken = + (System.nanoTime() - startTimeNanos) / 1_000_000_000.0; + // Buffer the response so we can read it for instrumentation BufferedApiResponse bufferedResponse = new BufferedApiResponse(response); @@ -265,7 +287,8 @@ public CompletableFuture asyncRequest( url, method, body, - bufferedResponse.getBodyAsString()); + bufferedResponse.getBodyAsString(), + timeToFirstToken); return (ApiResponse) bufferedResponse; } catch (Exception e) { span.setStatus(StatusCode.ERROR, e.getMessage()); @@ -283,6 +306,7 @@ public CompletableFuture asyncRequest( String method, String url, byte[] body, Optional options) { Span span = tracer.spanBuilder(getOperation(url)).setSpanKind(SpanKind.CLIENT).startSpan(); Context context = Context.current().with(span); + long startTimeNanos = System.nanoTime(); return delegate.asyncRequest(method, url, body, options) .handle( @@ -295,6 +319,9 @@ public CompletableFuture asyncRequest( } try { + double timeToFirstToken = + (System.nanoTime() - startTimeNanos) / 1_000_000_000.0; + // Buffer the response so we can read it for instrumentation BufferedApiResponse bufferedResponse = new BufferedApiResponse(response); @@ -304,7 +331,8 @@ public CompletableFuture asyncRequest( url, method, new String(body), - bufferedResponse.getBodyAsString()); + bufferedResponse.getBodyAsString(), + timeToFirstToken); return (ApiResponse) bufferedResponse; } catch (Exception e) { span.setStatus(StatusCode.ERROR, e.getMessage()); diff --git a/src/test/java/dev/braintrust/instrumentation/genai/BraintrustGenAITest.java b/src/test/java/dev/braintrust/instrumentation/genai/BraintrustGenAITest.java index e25a74c..03ba468 100644 --- a/src/test/java/dev/braintrust/instrumentation/genai/BraintrustGenAITest.java +++ b/src/test/java/dev/braintrust/instrumentation/genai/BraintrustGenAITest.java @@ -72,6 +72,10 @@ void testWrapGemini() { assertTrue(metrics.get("prompt_tokens").asInt() > 0, "prompt_tokens should be > 0"); assertTrue(metrics.get("completion_tokens").asInt() > 0, "completion_tokens should be > 0"); assertTrue(metrics.get("tokens").asInt() > 0, "tokens should be > 0"); + assertTrue(metrics.has("time_to_first_token"), "time_to_first_token should be present"); + assertTrue( + metrics.get("time_to_first_token").asDouble() >= 0.0, + "time_to_first_token should be >= 0"); // Verify braintrust.span_attributes marks this as an LLM span String spanAttributesJson = @@ -153,6 +157,10 @@ void testWrapGeminiAsync() { assertTrue(metrics.get("prompt_tokens").asInt() > 0, "prompt_tokens should be > 0"); assertTrue(metrics.get("completion_tokens").asInt() > 0, "completion_tokens should be > 0"); assertTrue(metrics.get("tokens").asInt() > 0, "tokens should be > 0"); + assertTrue(metrics.has("time_to_first_token"), "time_to_first_token should be present"); + assertTrue( + metrics.get("time_to_first_token").asDouble() >= 0.0, + "time_to_first_token should be >= 0"); // Verify braintrust.span_attributes marks this as an LLM span String spanAttributesJson =