Skip to content

Commit 77e9e20

Browse files
committed
feat(integrations): openai-agents: record TTFT for ai_spans
1 parent 7ebfce9 commit 77e9e20

File tree

3 files changed

+116
-1
lines changed

3 files changed

+116
-1
lines changed

sentry_sdk/integrations/openai_agents/patches/models.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import copy
2-
import sys
2+
import time
33
from functools import wraps
44

55
from sentry_sdk.integrations import DidNotEnable
@@ -149,8 +149,21 @@ async def wrapped_stream_response(*args: "Any", **kwargs: "Any") -> "Any":
149149
span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True)
150150

151151
streaming_response = None
152+
ttft_recorded = False
152153

153154
async for event in original_stream_response(*args, **kwargs):
155+
# Detect first content token (text delta event)
156+
if not ttft_recorded and hasattr(event, "delta"):
157+
start_time = getattr(
158+
agent, "_sentry_chat_ttft_start_time", None
159+
)
160+
if start_time is not None:
161+
ttft = time.perf_counter() - start_time
162+
span.set_data(
163+
SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
164+
)
165+
ttft_recorded = True
166+
154167
# Capture the full response from ResponseCompletedEvent
155168
if hasattr(event, "response"):
156169
streaming_response = event.response

sentry_sdk/integrations/openai_agents/spans/ai_client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import time
2+
13
import sentry_sdk
24
from sentry_sdk.consts import OP, SPANDATA
35

@@ -36,6 +38,9 @@ def ai_client_span(
3638
# TODO-anton: remove hardcoded stuff and replace something that also works for embedding and so on
3739
span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "chat")
3840

41+
# Store start time for TTFT calculation on the agent object
42+
agent._sentry_chat_ttft_start_time = time.perf_counter()
43+
3944
_set_agent_data(span, agent)
4045
_set_input_data(span, get_response_kwargs)
4146

tests/integrations/openai_agents/test_openai_agents.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2202,3 +2202,100 @@ async def test_streaming_span_update_captures_response_data(
22022202
assert span._data["gen_ai.usage.input_tokens"] == 10
22032203
assert span._data["gen_ai.usage.output_tokens"] == 20
22042204
assert span._data["gen_ai.response.model"] == "gpt-4-streaming"
2205+
2206+
2207+
@pytest.mark.asyncio
2208+
async def test_streaming_ttft_on_chat_span(sentry_init, test_agent):
2209+
"""
2210+
Test that time-to-first-token (TTFT) is recorded on chat spans during streaming.
2211+
2212+
TTFT is triggered by events with a `delta` attribute, which includes:
2213+
- ResponseTextDeltaEvent (text output)
2214+
- ResponseAudioDeltaEvent (audio output)
2215+
- ResponseReasoningTextDeltaEvent (reasoning/thinking)
2216+
- ResponseFunctionCallArgumentsDeltaEvent (function call args)
2217+
- and other delta events...
2218+
2219+
Events WITHOUT delta (like ResponseCompletedEvent, ResponseCreatedEvent, etc.)
2220+
should NOT trigger TTFT.
2221+
"""
2222+
import time
2223+
2224+
sentry_init(
2225+
integrations=[OpenAIAgentsIntegration()],
2226+
traces_sample_rate=1.0,
2227+
)
2228+
2229+
# Create a mock model that returns a stream_response generator
2230+
class MockModel:
2231+
model = "gpt-4"
2232+
2233+
async def stream_response(self, *args, **kwargs):
2234+
# First event: ResponseCreatedEvent (no delta - should NOT trigger TTFT)
2235+
created_event = MagicMock(spec=["type", "sequence_number"])
2236+
created_event.type = "response.created"
2237+
yield created_event
2238+
2239+
# Simulate server-side processing delay before first token
2240+
await asyncio.sleep(0.05) # 50ms delay
2241+
2242+
# Second event: ResponseTextDeltaEvent (HAS delta - triggers TTFT)
2243+
# This simulates the first actual content token
2244+
text_delta_event = MagicMock(spec=["delta", "type", "content_index"])
2245+
text_delta_event.delta = "Hello"
2246+
text_delta_event.type = "response.output_text.delta"
2247+
yield text_delta_event
2248+
await asyncio.sleep(0.05) # 50ms delay
2249+
2250+
# Third event: more text content (also has delta, but TTFT already recorded)
2251+
text_delta_event2 = MagicMock(spec=["delta", "type", "content_index"])
2252+
text_delta_event2.delta = " world!"
2253+
text_delta_event2.type = "response.output_text.delta"
2254+
yield text_delta_event2
2255+
2256+
# Final event: ResponseCompletedEvent (has response, no delta)
2257+
completed_event = MagicMock(spec=["response", "type", "sequence_number"])
2258+
completed_event.response = MagicMock()
2259+
completed_event.response.model = "gpt-4"
2260+
completed_event.response.usage = Usage(
2261+
requests=1,
2262+
input_tokens=10,
2263+
output_tokens=5,
2264+
total_tokens=15,
2265+
)
2266+
completed_event.response.output = []
2267+
yield completed_event
2268+
2269+
mock_model = MockModel()
2270+
2271+
with sentry_sdk.start_transaction(name="test_ttft", sampled=True) as transaction:
2272+
# Simulate calling the wrapped stream_response logic
2273+
from sentry_sdk.integrations.openai_agents.spans import ai_client_span
2274+
2275+
with ai_client_span(test_agent, {}) as span:
2276+
span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True)
2277+
2278+
ttft_recorded = False
2279+
start_time = getattr(test_agent, "_sentry_chat_ttft_start_time", None)
2280+
2281+
async for event in mock_model.stream_response():
2282+
# This is the same logic used in the actual integration
2283+
if (
2284+
not ttft_recorded
2285+
and hasattr(event, "delta")
2286+
and start_time is not None
2287+
):
2288+
ttft = time.perf_counter() - start_time
2289+
span.set_data(SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft)
2290+
ttft_recorded = True
2291+
2292+
# Verify TTFT is recorded on the chat span (inside transaction context)
2293+
chat_spans = [
2294+
s for s in transaction._span_recorder.spans if s.op == "gen_ai.chat"
2295+
]
2296+
assert len(chat_spans) >= 1
2297+
chat_span = chat_spans[0]
2298+
assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span._data
2299+
ttft_value = chat_span._data[SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
2300+
# TTFT should be at least 40ms (our simulated delay minus some variance) but reasonable
2301+
assert 0.04 < ttft_value < 1.0, f"TTFT {ttft_value} should be around 50ms"

0 commit comments

Comments
 (0)