From 9d89b90da0ae1e6d2fd0b31b0c5a69a9d746633e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 26 Feb 2026 16:02:09 +0100 Subject: [PATCH 01/10] ref(openai): Only handle streamed results when applicable --- sentry_sdk/integrations/openai.py | 215 +++++++++++++++++++++-- tests/integrations/openai/test_openai.py | 34 +++- 2 files changed, 228 insertions(+), 21 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index d8139f217b..36bcbbccc0 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -746,14 +746,103 @@ def _set_streaming_completions_api_output_data( if messages is not None and isinstance(messages, str): messages = [messages] - _common_set_output_data( - span, - response, - messages, - integration, - start_time, - finish_span, - ) + ttft: "Optional[float]" = None + data_buf: "list[list[str]]" = [] # one for each choice + + old_iterator = response._iterator + + def new_iterator() -> "Iterator[ChatCompletionChunk]": + nonlocal ttft + count_tokens_manually = True + for x in old_iterator: + with capture_internal_exceptions(): + if hasattr(x, "choices"): + choice_index = 0 + for choice in x.choices: + if hasattr(choice, "delta") and hasattr( + choice.delta, "content" + ): + if start_time is not None and ttft is None: + ttft = time.perf_counter() - start_time + content = choice.delta.content + if len(data_buf) <= choice_index: + data_buf.append([]) + data_buf[choice_index].append(content or "") + choice_index += 1 + + yield x + + with capture_internal_exceptions(): + if ttft is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft + ) + if len(data_buf) > 0: + all_responses = ["".join(chunk) for chunk in data_buf] + if should_send_default_pii() and integration.include_prompts: + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses + ) + if count_tokens_manually: + _calculate_token_usage( + messages, + response, + span, + all_responses, + integration.count_tokens, + ) + + if finish_span: + span.__exit__(None, None, None) + + async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]": + nonlocal ttft + count_tokens_manually = True + async for x in old_iterator: + with capture_internal_exceptions(): + # OpenAI chat completion API + if hasattr(x, "choices"): + choice_index = 0 + for choice in x.choices: + if hasattr(choice, "delta") and hasattr( + choice.delta, "content" + ): + if start_time is not None and ttft is None: + ttft = time.perf_counter() - start_time + content = choice.delta.content + if len(data_buf) <= choice_index: + data_buf.append([]) + data_buf[choice_index].append(content or "") + choice_index += 1 + + yield x + + with capture_internal_exceptions(): + if ttft is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft + ) + if len(data_buf) > 0: + all_responses = ["".join(chunk) for chunk in data_buf] + if should_send_default_pii() and integration.include_prompts: + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses + ) + if count_tokens_manually: + _calculate_token_usage( + messages, + response, + span, + all_responses, + integration.count_tokens, + ) + if finish_span: + span.__exit__(None, None, None) + + if str(type(response._iterator)) == "": + response._iterator = new_iterator_async() + else: + response._iterator = new_iterator() def _set_responses_api_output_data( @@ -792,14 +881,108 @@ def _set_streaming_responses_api_output_data( if input is not None and isinstance(input, str): input = [input] - _common_set_output_data( - span, - response, - input, - integration, - start_time, - finish_span, - ) + ttft: "Optional[float]" = None + data_buf: "list[list[str]]" = [] # one for each choice + + old_iterator = response._iterator + + def new_iterator() -> "Iterator[ChatCompletionChunk]": + nonlocal ttft + count_tokens_manually = True + for x in old_iterator: + with capture_internal_exceptions(): + if hasattr(x, "delta"): + if start_time is not None and ttft is None: + ttft = time.perf_counter() - start_time + if len(data_buf) == 0: + data_buf.append([]) + data_buf[0].append(x.delta or "") + + if isinstance(x, ResponseCompletedEvent): + _calculate_token_usage( + input, + x.response, + span, + None, + integration.count_tokens, + ) + count_tokens_manually = False + + yield x + + with capture_internal_exceptions(): + if ttft is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft + ) + if len(data_buf) > 0: + all_responses = ["".join(chunk) for chunk in data_buf] + if should_send_default_pii() and integration.include_prompts: + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses + ) + if count_tokens_manually: + _calculate_token_usage( + input, + response, + span, + all_responses, + integration.count_tokens, + ) + + if finish_span: + span.__exit__(None, None, None) + + async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]": + nonlocal ttft + count_tokens_manually = True + async for x in old_iterator: + with capture_internal_exceptions(): + if hasattr(x, "delta"): + if start_time is not None and ttft is None: + ttft = time.perf_counter() - start_time + if len(data_buf) == 0: + data_buf.append([]) + data_buf[0].append(x.delta or "") + + if isinstance(x, ResponseCompletedEvent): + _calculate_token_usage( + input, + x.response, + span, + None, + integration.count_tokens, + ) + count_tokens_manually = False + + yield x + + with capture_internal_exceptions(): + if ttft is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft + ) + if len(data_buf) > 0: + all_responses = ["".join(chunk) for chunk in data_buf] + if should_send_default_pii() and integration.include_prompts: + set_data_normalized( + span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses + ) + if count_tokens_manually: + _calculate_token_usage( + input, + response, + span, + all_responses, + integration.count_tokens, + ) + if finish_span: + span.__exit__(None, None, None) + + if str(type(response._iterator)) == "": + response._iterator = new_iterator_async() + else: + response._iterator = new_iterator() def _set_embeddings_output_data( diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index d99fb4caf8..cb6f65565f 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1,6 +1,8 @@ import json import pytest +from typing import Union + from sentry_sdk.utils import package_version try: @@ -21,6 +23,8 @@ from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage +from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent + SKIP_RESPONSES_TESTS = False try: @@ -500,6 +504,7 @@ def test_streaming_chat_completion_no_prompts( {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "hello"}, ], + stream=True, ) response_string = "".join( map(lambda x: x.choices[0].delta.content, response_stream) @@ -624,6 +629,7 @@ def test_streaming_chat_completion(sentry_init, capture_events, messages, reques response_stream = client.chat.completions.create( model="some-model", messages=messages, + stream=True, ) response_string = "".join( map(lambda x: x.choices[0].delta.content, response_stream) @@ -747,6 +753,7 @@ async def test_streaming_chat_completion_async_no_prompts( {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "hello"}, ], + stream=True, ) response_string = "" @@ -881,6 +888,7 @@ async def test_streaming_chat_completion_async( response_stream = await client.chat.completions.create( model="some-model", messages=messages, + stream=True, ) response_string = "" @@ -942,7 +950,9 @@ def test_bad_chat_completion(sentry_init, capture_events): ) with pytest.raises(OpenAIError): client.chat.completions.create( - model="some-model", messages=[{"role": "system", "content": "hello"}] + model="some-model", + messages=[{"role": "system", "content": "hello"}], + stream=True, ) (event,) = events @@ -2340,6 +2350,16 @@ async def test_ai_client_span_responses_async_api( assert spans[0]["data"] == expected_data +async def example_response_stream(): + yield EXAMPLE_RESPONSES_STREAM[0] + yield EXAMPLE_RESPONSES_STREAM[1] + yield EXAMPLE_RESPONSES_STREAM[2] + yield EXAMPLE_RESPONSES_STREAM[3] + yield EXAMPLE_RESPONSES_STREAM[4] + + return + + @pytest.mark.asyncio @pytest.mark.parametrize( "instructions", @@ -2417,15 +2437,19 @@ async def test_ai_client_span_streaming_responses_async_api( events = capture_events() client = AsyncOpenAI(api_key="z") - client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) + returned_stream = AsyncStream(cast_to=None, response=None, client=client) + returned_stream._iterator = example_response_stream() + client.responses._post = mock.AsyncMock(return_value=returned_stream) with start_transaction(name="openai tx"): - await client.responses.create( + result = await client.responses.create( model="gpt-4o", instructions=instructions, input=input, stream=True, ) + async for _ in result: + pass (transaction,) = events spans = transaction["spans"] @@ -2438,14 +2462,14 @@ async def test_ai_client_span_streaming_responses_async_api( "gen_ai.operation.name": "responses", "gen_ai.response.streaming": True, "gen_ai.system": "openai", - "gen_ai.response.model": "response-model-id", + "gen_ai.response.time_to_first_token": mock.ANY, "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5, "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, "gen_ai.request.model": "gpt-4o", - "gen_ai.response.text": "the model response", + "gen_ai.response.text": "hello world", "thread.id": mock.ANY, "thread.name": mock.ANY, } From 3fe34993b93302bf67e3b9c9c168c320ec83386e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 26 Feb 2026 16:04:21 +0100 Subject: [PATCH 02/10] . --- sentry_sdk/integrations/openai.py | 139 ------------------------------ 1 file changed, 139 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 36bcbbccc0..1a408ef0b8 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -473,8 +473,6 @@ def _common_set_output_data( if hasattr(response, "model"): set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model) - ttft: "Optional[float]" = None - if hasattr(response, "choices"): if should_send_default_pii() and integration.include_prompts: response_text = [ @@ -525,143 +523,6 @@ def _common_set_output_data( if finish_span: span.__exit__(None, None, None) - - elif hasattr(response, "_iterator"): - data_buf: "list[list[str]]" = [] # one for each choice - - old_iterator = response._iterator - - def new_iterator() -> "Iterator[ChatCompletionChunk]": - nonlocal ttft - count_tokens_manually = True - for x in old_iterator: - with capture_internal_exceptions(): - # OpenAI chat completion API - if hasattr(x, "choices"): - choice_index = 0 - for choice in x.choices: - if hasattr(choice, "delta") and hasattr( - choice.delta, "content" - ): - if start_time is not None and ttft is None: - ttft = time.perf_counter() - start_time - content = choice.delta.content - if len(data_buf) <= choice_index: - data_buf.append([]) - data_buf[choice_index].append(content or "") - choice_index += 1 - - # OpenAI responses API - elif hasattr(x, "delta"): - if start_time is not None and ttft is None: - ttft = time.perf_counter() - start_time - if len(data_buf) == 0: - data_buf.append([]) - data_buf[0].append(x.delta or "") - - # OpenAI responses API end of streaming response - if RESPONSES_API_ENABLED and isinstance(x, ResponseCompletedEvent): - _calculate_token_usage( - input, - x.response, - span, - None, - integration.count_tokens, - ) - count_tokens_manually = False - - yield x - - with capture_internal_exceptions(): - if ttft is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft - ) - if len(data_buf) > 0: - all_responses = ["".join(chunk) for chunk in data_buf] - if should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses - ) - if count_tokens_manually: - _calculate_token_usage( - input, - response, - span, - all_responses, - integration.count_tokens, - ) - - if finish_span: - span.__exit__(None, None, None) - - async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]": - nonlocal ttft - count_tokens_manually = True - async for x in old_iterator: - with capture_internal_exceptions(): - # OpenAI chat completion API - if hasattr(x, "choices"): - choice_index = 0 - for choice in x.choices: - if hasattr(choice, "delta") and hasattr( - choice.delta, "content" - ): - if start_time is not None and ttft is None: - ttft = time.perf_counter() - start_time - content = choice.delta.content - if len(data_buf) <= choice_index: - data_buf.append([]) - data_buf[choice_index].append(content or "") - choice_index += 1 - - # OpenAI responses API - elif hasattr(x, "delta"): - if start_time is not None and ttft is None: - ttft = time.perf_counter() - start_time - if len(data_buf) == 0: - data_buf.append([]) - data_buf[0].append(x.delta or "") - - # OpenAI responses API end of streaming response - if RESPONSES_API_ENABLED and isinstance(x, ResponseCompletedEvent): - _calculate_token_usage( - input, - x.response, - span, - None, - integration.count_tokens, - ) - count_tokens_manually = False - - yield x - - with capture_internal_exceptions(): - if ttft is not None: - set_data_normalized( - span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft - ) - if len(data_buf) > 0: - all_responses = ["".join(chunk) for chunk in data_buf] - if should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses - ) - if count_tokens_manually: - _calculate_token_usage( - input, - response, - span, - all_responses, - integration.count_tokens, - ) - if finish_span: - span.__exit__(None, None, None) - - if str(type(response._iterator)) == "": - response._iterator = new_iterator_async() - else: - response._iterator = new_iterator() else: _calculate_token_usage(input, response, span, None, integration.count_tokens) if finish_span: From 6c5f254ad345350ee5cfc037f1130ccd98dd5fc0 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 26 Feb 2026 16:18:01 +0100 Subject: [PATCH 03/10] . --- tests/integrations/openai/test_openai.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index cb6f65565f..328c368ca2 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -3018,7 +3018,9 @@ def test_streaming_chat_completion_ttft(sentry_init, capture_events): with start_transaction(name="openai tx"): response_stream = client.chat.completions.create( - model="some-model", messages=[{"role": "user", "content": "Say hello"}] + model="some-model", + messages=[{"role": "user", "content": "Say hello"}], + stream=True, ) # Consume the stream for _ in response_stream: @@ -3082,7 +3084,9 @@ async def test_streaming_chat_completion_ttft_async(sentry_init, capture_events) with start_transaction(name="openai tx"): response_stream = await client.chat.completions.create( - model="some-model", messages=[{"role": "user", "content": "Say hello"}] + model="some-model", + messages=[{"role": "user", "content": "Say hello"}], + stream=True, ) # Consume the stream async for _ in response_stream: From 5a36e3dc0b41b7566024bce376bebb4b5a0677e7 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 26 Feb 2026 16:21:10 +0100 Subject: [PATCH 04/10] . --- tests/integrations/openai/test_openai.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 328c368ca2..0d583c8e01 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -2350,16 +2350,6 @@ async def test_ai_client_span_responses_async_api( assert spans[0]["data"] == expected_data -async def example_response_stream(): - yield EXAMPLE_RESPONSES_STREAM[0] - yield EXAMPLE_RESPONSES_STREAM[1] - yield EXAMPLE_RESPONSES_STREAM[2] - yield EXAMPLE_RESPONSES_STREAM[3] - yield EXAMPLE_RESPONSES_STREAM[4] - - return - - @pytest.mark.asyncio @pytest.mark.parametrize( "instructions", @@ -2438,7 +2428,7 @@ async def test_ai_client_span_streaming_responses_async_api( client = AsyncOpenAI(api_key="z") returned_stream = AsyncStream(cast_to=None, response=None, client=client) - returned_stream._iterator = example_response_stream() + returned_stream._iterator = async_iterator(EXAMPLE_RESPONSES_STREAM) client.responses._post = mock.AsyncMock(return_value=returned_stream) with start_transaction(name="openai tx"): From f1fdaf974e14d1d9bdeffa9012d6b5855bd0759f Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 26 Feb 2026 16:30:22 +0100 Subject: [PATCH 05/10] . --- tests/integrations/openai/test_openai.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 0d583c8e01..48453b573e 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1,8 +1,6 @@ import json import pytest -from typing import Union - from sentry_sdk.utils import package_version try: @@ -23,8 +21,6 @@ from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage -from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent - SKIP_RESPONSES_TESTS = False try: From f7c5356dc3df099be56b6655768b5f8dbead7f96 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 26 Feb 2026 16:34:19 +0100 Subject: [PATCH 06/10] . --- tests/integrations/openai/test_openai.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 48453b573e..060600ee65 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -948,7 +948,6 @@ def test_bad_chat_completion(sentry_init, capture_events): client.chat.completions.create( model="some-model", messages=[{"role": "system", "content": "hello"}], - stream=True, ) (event,) = events From edf0a4f9c719d31014b15aed1b09edc749913459 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 26 Feb 2026 17:11:32 +0100 Subject: [PATCH 07/10] remove unused parameter --- sentry_sdk/integrations/openai.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 1a408ef0b8..c5de0458b1 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -467,7 +467,6 @@ def _common_set_output_data( response: "Any", input: "Any", integration: "OpenAIIntegration", - start_time: "Optional[float]" = None, finish_span: bool = True, ) -> None: if hasattr(response, "model"): @@ -565,7 +564,7 @@ def _new_chat_completion_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any ) else: _set_completions_api_output_data( - span, response, kwargs, integration, start_time, finish_span=True + span, response, kwargs, integration, finish_span=True ) return response @@ -576,7 +575,6 @@ def _set_completions_api_output_data( response: "Any", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", - start_time: "Optional[float]" = None, finish_span: bool = True, ) -> None: messages = kwargs.get("messages") @@ -589,7 +587,6 @@ def _set_completions_api_output_data( response, messages, integration, - start_time, finish_span, ) @@ -711,7 +708,6 @@ def _set_responses_api_output_data( response: "Any", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", - start_time: "Optional[float]" = None, finish_span: bool = True, ) -> None: input = kwargs.get("input") @@ -724,7 +720,6 @@ def _set_responses_api_output_data( response, input, integration, - start_time, finish_span, ) @@ -851,7 +846,6 @@ def _set_embeddings_output_data( response: "Any", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", - start_time: "Optional[float]" = None, finish_span: bool = True, ) -> None: input = kwargs.get("input") @@ -864,7 +858,6 @@ def _set_embeddings_output_data( response, input, integration, - start_time, finish_span, ) @@ -1052,7 +1045,7 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An ) else: _set_responses_api_output_data( - span, response, kwargs, integration, start_time, finish_span=True + span, response, kwargs, integration, finish_span=True ) return response From 85e17915459f6199b31055c07667e6f916fd67fd Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 26 Feb 2026 17:26:55 +0100 Subject: [PATCH 08/10] . --- sentry_sdk/integrations/openai.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index c5de0458b1..169e0d8227 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -658,7 +658,6 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]": count_tokens_manually = True async for x in old_iterator: with capture_internal_exceptions(): - # OpenAI chat completion API if hasattr(x, "choices"): choice_index = 0 for choice in x.choices: From 3fb20f04e8b24025c15f704ad84481c702ce0612 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 27 Feb 2026 10:09:17 +0100 Subject: [PATCH 09/10] remove manual token counting for completions --- sentry_sdk/integrations/openai.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 169e0d8227..9d4b5cfd14 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -611,7 +611,6 @@ def _set_streaming_completions_api_output_data( def new_iterator() -> "Iterator[ChatCompletionChunk]": nonlocal ttft - count_tokens_manually = True for x in old_iterator: with capture_internal_exceptions(): if hasattr(x, "choices"): @@ -641,21 +640,12 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]": set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses ) - if count_tokens_manually: - _calculate_token_usage( - messages, - response, - span, - all_responses, - integration.count_tokens, - ) if finish_span: span.__exit__(None, None, None) async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]": nonlocal ttft - count_tokens_manually = True async for x in old_iterator: with capture_internal_exceptions(): if hasattr(x, "choices"): @@ -685,14 +675,7 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]": set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses ) - if count_tokens_manually: - _calculate_token_usage( - messages, - response, - span, - all_responses, - integration.count_tokens, - ) + if finish_span: span.__exit__(None, None, None) From 82843e2fd25f8555087e7420c75d3316314c03ec Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 27 Feb 2026 10:15:18 +0100 Subject: [PATCH 10/10] add unconditional manual token counting for completions --- sentry_sdk/integrations/openai.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 9d4b5cfd14..d4fe6e2c26 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -640,6 +640,13 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]": set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses ) + _calculate_token_usage( + messages, + response, + span, + all_responses, + integration.count_tokens, + ) if finish_span: span.__exit__(None, None, None) @@ -675,6 +682,13 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]": set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses ) + _calculate_token_usage( + messages, + response, + span, + all_responses, + integration.count_tokens, + ) if finish_span: span.__exit__(None, None, None)