@@ -2202,3 +2202,100 @@ async def test_streaming_span_update_captures_response_data(
22022202 assert span ._data ["gen_ai.usage.input_tokens" ] == 10
22032203 assert span ._data ["gen_ai.usage.output_tokens" ] == 20
22042204 assert span ._data ["gen_ai.response.model" ] == "gpt-4-streaming"
2205+
2206+
2207+ @pytest .mark .asyncio
2208+ async def test_streaming_ttft_on_chat_span (sentry_init , test_agent ):
2209+ """
2210+ Test that time-to-first-token (TTFT) is recorded on chat spans during streaming.
2211+
2212+ TTFT is triggered by events with a `delta` attribute, which includes:
2213+ - ResponseTextDeltaEvent (text output)
2214+ - ResponseAudioDeltaEvent (audio output)
2215+ - ResponseReasoningTextDeltaEvent (reasoning/thinking)
2216+ - ResponseFunctionCallArgumentsDeltaEvent (function call args)
2217+ - and other delta events...
2218+
2219+ Events WITHOUT delta (like ResponseCompletedEvent, ResponseCreatedEvent, etc.)
2220+ should NOT trigger TTFT.
2221+ """
2222+ import time
2223+
2224+ sentry_init (
2225+ integrations = [OpenAIAgentsIntegration ()],
2226+ traces_sample_rate = 1.0 ,
2227+ )
2228+
2229+ # Create a mock model that returns a stream_response generator
2230+ class MockModel :
2231+ model = "gpt-4"
2232+
2233+ async def stream_response (self , * args , ** kwargs ):
2234+ # First event: ResponseCreatedEvent (no delta - should NOT trigger TTFT)
2235+ created_event = MagicMock (spec = ["type" , "sequence_number" ])
2236+ created_event .type = "response.created"
2237+ yield created_event
2238+
2239+ # Simulate server-side processing delay before first token
2240+ await asyncio .sleep (0.05 ) # 50ms delay
2241+
2242+ # Second event: ResponseTextDeltaEvent (HAS delta - triggers TTFT)
2243+ # This simulates the first actual content token
2244+ text_delta_event = MagicMock (spec = ["delta" , "type" , "content_index" ])
2245+ text_delta_event .delta = "Hello"
2246+ text_delta_event .type = "response.output_text.delta"
2247+ yield text_delta_event
2248+ await asyncio .sleep (0.05 ) # 50ms delay
2249+
2250+ # Third event: more text content (also has delta, but TTFT already recorded)
2251+ text_delta_event2 = MagicMock (spec = ["delta" , "type" , "content_index" ])
2252+ text_delta_event2 .delta = " world!"
2253+ text_delta_event2 .type = "response.output_text.delta"
2254+ yield text_delta_event2
2255+
2256+ # Final event: ResponseCompletedEvent (has response, no delta)
2257+ completed_event = MagicMock (spec = ["response" , "type" , "sequence_number" ])
2258+ completed_event .response = MagicMock ()
2259+ completed_event .response .model = "gpt-4"
2260+ completed_event .response .usage = Usage (
2261+ requests = 1 ,
2262+ input_tokens = 10 ,
2263+ output_tokens = 5 ,
2264+ total_tokens = 15 ,
2265+ )
2266+ completed_event .response .output = []
2267+ yield completed_event
2268+
2269+ mock_model = MockModel ()
2270+
2271+ with sentry_sdk .start_transaction (name = "test_ttft" , sampled = True ) as transaction :
2272+ # Simulate calling the wrapped stream_response logic
2273+ from sentry_sdk .integrations .openai_agents .spans import ai_client_span
2274+
2275+ with ai_client_span (test_agent , {}) as span :
2276+ span .set_data (SPANDATA .GEN_AI_RESPONSE_STREAMING , True )
2277+
2278+ ttft_recorded = False
2279+ start_time = getattr (test_agent , "_sentry_chat_ttft_start_time" , None )
2280+
2281+ async for event in mock_model .stream_response ():
2282+ # This is the same logic used in the actual integration
2283+ if (
2284+ not ttft_recorded
2285+ and hasattr (event , "delta" )
2286+ and start_time is not None
2287+ ):
2288+ ttft = time .perf_counter () - start_time
2289+ span .set_data (SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN , ttft )
2290+ ttft_recorded = True
2291+
2292+ # Verify TTFT is recorded on the chat span (inside transaction context)
2293+ chat_spans = [
2294+ s for s in transaction ._span_recorder .spans if s .op == "gen_ai.chat"
2295+ ]
2296+ assert len (chat_spans ) >= 1
2297+ chat_span = chat_spans [0 ]
2298+ assert SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span ._data
2299+ ttft_value = chat_span ._data [SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN ]
2300+ # TTFT should be at least 40ms (our simulated delay minus some variance) but reasonable
2301+ assert 0.04 < ttft_value < 1.0 , f"TTFT { ttft_value } should be around 50ms"
0 commit comments