From 9d89b90da0ae1e6d2fd0b31b0c5a69a9d746633e Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 26 Feb 2026 16:02:09 +0100
Subject: [PATCH 01/10] ref(openai): Only handle streamed results when
 applicable

---
 sentry_sdk/integrations/openai.py        | 215 +++++++++++++++++++++--
 tests/integrations/openai/test_openai.py |  34 +++-
 2 files changed, 228 insertions(+), 21 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index d8139f217b..36bcbbccc0 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -746,14 +746,103 @@ def _set_streaming_completions_api_output_data(
     if messages is not None and isinstance(messages, str):
         messages = [messages]
 
-    _common_set_output_data(
-        span,
-        response,
-        messages,
-        integration,
-        start_time,
-        finish_span,
-    )
+    ttft: "Optional[float]" = None
+    data_buf: "list[list[str]]" = []  # one for each choice
+
+    old_iterator = response._iterator
+
+    def new_iterator() -> "Iterator[ChatCompletionChunk]":
+        nonlocal ttft
+        count_tokens_manually = True
+        for x in old_iterator:
+            with capture_internal_exceptions():
+                if hasattr(x, "choices"):
+                    choice_index = 0
+                    for choice in x.choices:
+                        if hasattr(choice, "delta") and hasattr(
+                            choice.delta, "content"
+                        ):
+                            if start_time is not None and ttft is None:
+                                ttft = time.perf_counter() - start_time
+                            content = choice.delta.content
+                            if len(data_buf) <= choice_index:
+                                data_buf.append([])
+                            data_buf[choice_index].append(content or "")
+                        choice_index += 1
+
+            yield x
+
+        with capture_internal_exceptions():
+            if ttft is not None:
+                set_data_normalized(
+                    span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                )
+            if len(data_buf) > 0:
+                all_responses = ["".join(chunk) for chunk in data_buf]
+                if should_send_default_pii() and integration.include_prompts:
+                    set_data_normalized(
+                        span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
+                    )
+                if count_tokens_manually:
+                    _calculate_token_usage(
+                        messages,
+                        response,
+                        span,
+                        all_responses,
+                        integration.count_tokens,
+                    )
+
+        if finish_span:
+            span.__exit__(None, None, None)
+
+    async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
+        nonlocal ttft
+        count_tokens_manually = True
+        async for x in old_iterator:
+            with capture_internal_exceptions():
+                # OpenAI chat completion API
+                if hasattr(x, "choices"):
+                    choice_index = 0
+                    for choice in x.choices:
+                        if hasattr(choice, "delta") and hasattr(
+                            choice.delta, "content"
+                        ):
+                            if start_time is not None and ttft is None:
+                                ttft = time.perf_counter() - start_time
+                            content = choice.delta.content
+                            if len(data_buf) <= choice_index:
+                                data_buf.append([])
+                            data_buf[choice_index].append(content or "")
+                        choice_index += 1
+
+            yield x
+
+        with capture_internal_exceptions():
+            if ttft is not None:
+                set_data_normalized(
+                    span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                )
+            if len(data_buf) > 0:
+                all_responses = ["".join(chunk) for chunk in data_buf]
+                if should_send_default_pii() and integration.include_prompts:
+                    set_data_normalized(
+                        span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
+                    )
+                if count_tokens_manually:
+                    _calculate_token_usage(
+                        messages,
+                        response,
+                        span,
+                        all_responses,
+                        integration.count_tokens,
+                    )
+        if finish_span:
+            span.__exit__(None, None, None)
+
+    if str(type(response._iterator)) == "<class 'async_generator'>":
+        response._iterator = new_iterator_async()
+    else:
+        response._iterator = new_iterator()
 
 
 def _set_responses_api_output_data(
@@ -792,14 +881,108 @@ def _set_streaming_responses_api_output_data(
     if input is not None and isinstance(input, str):
         input = [input]
 
-    _common_set_output_data(
-        span,
-        response,
-        input,
-        integration,
-        start_time,
-        finish_span,
-    )
+    ttft: "Optional[float]" = None
+    data_buf: "list[list[str]]" = []  # one for each choice
+
+    old_iterator = response._iterator
+
+    def new_iterator() -> "Iterator[ChatCompletionChunk]":
+        nonlocal ttft
+        count_tokens_manually = True
+        for x in old_iterator:
+            with capture_internal_exceptions():
+                if hasattr(x, "delta"):
+                    if start_time is not None and ttft is None:
+                        ttft = time.perf_counter() - start_time
+                    if len(data_buf) == 0:
+                        data_buf.append([])
+                    data_buf[0].append(x.delta or "")
+
+                if isinstance(x, ResponseCompletedEvent):
+                    _calculate_token_usage(
+                        input,
+                        x.response,
+                        span,
+                        None,
+                        integration.count_tokens,
+                    )
+                    count_tokens_manually = False
+
+            yield x
+
+        with capture_internal_exceptions():
+            if ttft is not None:
+                set_data_normalized(
+                    span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                )
+            if len(data_buf) > 0:
+                all_responses = ["".join(chunk) for chunk in data_buf]
+                if should_send_default_pii() and integration.include_prompts:
+                    set_data_normalized(
+                        span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
+                    )
+                if count_tokens_manually:
+                    _calculate_token_usage(
+                        input,
+                        response,
+                        span,
+                        all_responses,
+                        integration.count_tokens,
+                    )
+
+        if finish_span:
+            span.__exit__(None, None, None)
+
+    async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
+        nonlocal ttft
+        count_tokens_manually = True
+        async for x in old_iterator:
+            with capture_internal_exceptions():
+                if hasattr(x, "delta"):
+                    if start_time is not None and ttft is None:
+                        ttft = time.perf_counter() - start_time
+                    if len(data_buf) == 0:
+                        data_buf.append([])
+                    data_buf[0].append(x.delta or "")
+
+                if isinstance(x, ResponseCompletedEvent):
+                    _calculate_token_usage(
+                        input,
+                        x.response,
+                        span,
+                        None,
+                        integration.count_tokens,
+                    )
+                    count_tokens_manually = False
+
+            yield x
+
+        with capture_internal_exceptions():
+            if ttft is not None:
+                set_data_normalized(
+                    span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                )
+            if len(data_buf) > 0:
+                all_responses = ["".join(chunk) for chunk in data_buf]
+                if should_send_default_pii() and integration.include_prompts:
+                    set_data_normalized(
+                        span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
+                    )
+                if count_tokens_manually:
+                    _calculate_token_usage(
+                        input,
+                        response,
+                        span,
+                        all_responses,
+                        integration.count_tokens,
+                    )
+        if finish_span:
+            span.__exit__(None, None, None)
+
+    if str(type(response._iterator)) == "<class 'async_generator'>":
+        response._iterator = new_iterator_async()
+    else:
+        response._iterator = new_iterator()
 
 
 def _set_embeddings_output_data(
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index d99fb4caf8..cb6f65565f 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1,6 +1,8 @@
 import json
 import pytest
 
+from typing import Union
+
 from sentry_sdk.utils import package_version
 
 try:
@@ -21,6 +23,8 @@
 from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice
 from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage
 
+from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent
+
 SKIP_RESPONSES_TESTS = False
 
 try:
@@ -500,6 +504,7 @@ def test_streaming_chat_completion_no_prompts(
                 {"role": "system", "content": "You are a helpful assistant."},
                 {"role": "user", "content": "hello"},
             ],
+            stream=True,
         )
         response_string = "".join(
             map(lambda x: x.choices[0].delta.content, response_stream)
@@ -624,6 +629,7 @@ def test_streaming_chat_completion(sentry_init, capture_events, messages, reques
         response_stream = client.chat.completions.create(
             model="some-model",
             messages=messages,
+            stream=True,
         )
         response_string = "".join(
             map(lambda x: x.choices[0].delta.content, response_stream)
@@ -747,6 +753,7 @@ async def test_streaming_chat_completion_async_no_prompts(
                 {"role": "system", "content": "You are a helpful assistant."},
                 {"role": "user", "content": "hello"},
             ],
+            stream=True,
         )
 
         response_string = ""
@@ -881,6 +888,7 @@ async def test_streaming_chat_completion_async(
         response_stream = await client.chat.completions.create(
             model="some-model",
             messages=messages,
+            stream=True,
         )
 
         response_string = ""
@@ -942,7 +950,9 @@ def test_bad_chat_completion(sentry_init, capture_events):
     )
     with pytest.raises(OpenAIError):
         client.chat.completions.create(
-            model="some-model", messages=[{"role": "system", "content": "hello"}]
+            model="some-model",
+            messages=[{"role": "system", "content": "hello"}],
+            stream=True,
         )
 
     (event,) = events
@@ -2340,6 +2350,16 @@ async def test_ai_client_span_responses_async_api(
     assert spans[0]["data"] == expected_data
 
 
+async def example_response_stream():
+    yield EXAMPLE_RESPONSES_STREAM[0]
+    yield EXAMPLE_RESPONSES_STREAM[1]
+    yield EXAMPLE_RESPONSES_STREAM[2]
+    yield EXAMPLE_RESPONSES_STREAM[3]
+    yield EXAMPLE_RESPONSES_STREAM[4]
+
+    return
+
+
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "instructions",
@@ -2417,15 +2437,19 @@ async def test_ai_client_span_streaming_responses_async_api(
     events = capture_events()
 
     client = AsyncOpenAI(api_key="z")
-    client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE)
+    returned_stream = AsyncStream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = example_response_stream()
+    client.responses._post = mock.AsyncMock(return_value=returned_stream)
 
     with start_transaction(name="openai tx"):
-        await client.responses.create(
+        result = await client.responses.create(
             model="gpt-4o",
             instructions=instructions,
             input=input,
             stream=True,
         )
+        async for _ in result:
+            pass
 
     (transaction,) = events
     spans = transaction["spans"]
@@ -2438,14 +2462,14 @@ async def test_ai_client_span_streaming_responses_async_api(
         "gen_ai.operation.name": "responses",
         "gen_ai.response.streaming": True,
         "gen_ai.system": "openai",
-        "gen_ai.response.model": "response-model-id",
+        "gen_ai.response.time_to_first_token": mock.ANY,
         "gen_ai.usage.input_tokens": 20,
         "gen_ai.usage.input_tokens.cached": 5,
         "gen_ai.usage.output_tokens": 10,
         "gen_ai.usage.output_tokens.reasoning": 8,
         "gen_ai.usage.total_tokens": 30,
         "gen_ai.request.model": "gpt-4o",
-        "gen_ai.response.text": "the model response",
+        "gen_ai.response.text": "hello world",
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }

From 3fe34993b93302bf67e3b9c9c168c320ec83386e Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 26 Feb 2026 16:04:21 +0100
Subject: [PATCH 02/10] .

---
 sentry_sdk/integrations/openai.py | 139 ------------------------------
 1 file changed, 139 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 36bcbbccc0..1a408ef0b8 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -473,8 +473,6 @@ def _common_set_output_data(
     if hasattr(response, "model"):
         set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)
 
-    ttft: "Optional[float]" = None
-
     if hasattr(response, "choices"):
         if should_send_default_pii() and integration.include_prompts:
             response_text = [
@@ -525,143 +523,6 @@ def _common_set_output_data(
 
         if finish_span:
             span.__exit__(None, None, None)
-
-    elif hasattr(response, "_iterator"):
-        data_buf: "list[list[str]]" = []  # one for each choice
-
-        old_iterator = response._iterator
-
-        def new_iterator() -> "Iterator[ChatCompletionChunk]":
-            nonlocal ttft
-            count_tokens_manually = True
-            for x in old_iterator:
-                with capture_internal_exceptions():
-                    # OpenAI chat completion API
-                    if hasattr(x, "choices"):
-                        choice_index = 0
-                        for choice in x.choices:
-                            if hasattr(choice, "delta") and hasattr(
-                                choice.delta, "content"
-                            ):
-                                if start_time is not None and ttft is None:
-                                    ttft = time.perf_counter() - start_time
-                                content = choice.delta.content
-                                if len(data_buf) <= choice_index:
-                                    data_buf.append([])
-                                data_buf[choice_index].append(content or "")
-                            choice_index += 1
-
-                    # OpenAI responses API
-                    elif hasattr(x, "delta"):
-                        if start_time is not None and ttft is None:
-                            ttft = time.perf_counter() - start_time
-                        if len(data_buf) == 0:
-                            data_buf.append([])
-                        data_buf[0].append(x.delta or "")
-
-                    # OpenAI responses API end of streaming response
-                    if RESPONSES_API_ENABLED and isinstance(x, ResponseCompletedEvent):
-                        _calculate_token_usage(
-                            input,
-                            x.response,
-                            span,
-                            None,
-                            integration.count_tokens,
-                        )
-                        count_tokens_manually = False
-
-                yield x
-
-            with capture_internal_exceptions():
-                if ttft is not None:
-                    set_data_normalized(
-                        span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
-                    )
-                if len(data_buf) > 0:
-                    all_responses = ["".join(chunk) for chunk in data_buf]
-                    if should_send_default_pii() and integration.include_prompts:
-                        set_data_normalized(
-                            span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
-                        )
-                    if count_tokens_manually:
-                        _calculate_token_usage(
-                            input,
-                            response,
-                            span,
-                            all_responses,
-                            integration.count_tokens,
-                        )
-
-            if finish_span:
-                span.__exit__(None, None, None)
-
-        async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
-            nonlocal ttft
-            count_tokens_manually = True
-            async for x in old_iterator:
-                with capture_internal_exceptions():
-                    # OpenAI chat completion API
-                    if hasattr(x, "choices"):
-                        choice_index = 0
-                        for choice in x.choices:
-                            if hasattr(choice, "delta") and hasattr(
-                                choice.delta, "content"
-                            ):
-                                if start_time is not None and ttft is None:
-                                    ttft = time.perf_counter() - start_time
-                                content = choice.delta.content
-                                if len(data_buf) <= choice_index:
-                                    data_buf.append([])
-                                data_buf[choice_index].append(content or "")
-                            choice_index += 1
-
-                    # OpenAI responses API
-                    elif hasattr(x, "delta"):
-                        if start_time is not None and ttft is None:
-                            ttft = time.perf_counter() - start_time
-                        if len(data_buf) == 0:
-                            data_buf.append([])
-                        data_buf[0].append(x.delta or "")
-
-                    # OpenAI responses API end of streaming response
-                    if RESPONSES_API_ENABLED and isinstance(x, ResponseCompletedEvent):
-                        _calculate_token_usage(
-                            input,
-                            x.response,
-                            span,
-                            None,
-                            integration.count_tokens,
-                        )
-                        count_tokens_manually = False
-
-                yield x
-
-            with capture_internal_exceptions():
-                if ttft is not None:
-                    set_data_normalized(
-                        span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
-                    )
-                if len(data_buf) > 0:
-                    all_responses = ["".join(chunk) for chunk in data_buf]
-                    if should_send_default_pii() and integration.include_prompts:
-                        set_data_normalized(
-                            span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
-                        )
-                    if count_tokens_manually:
-                        _calculate_token_usage(
-                            input,
-                            response,
-                            span,
-                            all_responses,
-                            integration.count_tokens,
-                        )
-            if finish_span:
-                span.__exit__(None, None, None)
-
-        if str(type(response._iterator)) == "<class 'async_generator'>":
-            response._iterator = new_iterator_async()
-        else:
-            response._iterator = new_iterator()
     else:
         _calculate_token_usage(input, response, span, None, integration.count_tokens)
         if finish_span:

From 6c5f254ad345350ee5cfc037f1130ccd98dd5fc0 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 26 Feb 2026 16:18:01 +0100
Subject: [PATCH 03/10] .

---
 tests/integrations/openai/test_openai.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index cb6f65565f..328c368ca2 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -3018,7 +3018,9 @@ def test_streaming_chat_completion_ttft(sentry_init, capture_events):
 
     with start_transaction(name="openai tx"):
         response_stream = client.chat.completions.create(
-            model="some-model", messages=[{"role": "user", "content": "Say hello"}]
+            model="some-model",
+            messages=[{"role": "user", "content": "Say hello"}],
+            stream=True,
         )
         # Consume the stream
         for _ in response_stream:
@@ -3082,7 +3084,9 @@ async def test_streaming_chat_completion_ttft_async(sentry_init, capture_events)
 
     with start_transaction(name="openai tx"):
         response_stream = await client.chat.completions.create(
-            model="some-model", messages=[{"role": "user", "content": "Say hello"}]
+            model="some-model",
+            messages=[{"role": "user", "content": "Say hello"}],
+            stream=True,
         )
         # Consume the stream
         async for _ in response_stream:

From 5a36e3dc0b41b7566024bce376bebb4b5a0677e7 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 26 Feb 2026 16:21:10 +0100
Subject: [PATCH 04/10] .

---
 tests/integrations/openai/test_openai.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 328c368ca2..0d583c8e01 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -2350,16 +2350,6 @@ async def test_ai_client_span_responses_async_api(
     assert spans[0]["data"] == expected_data
 
 
-async def example_response_stream():
-    yield EXAMPLE_RESPONSES_STREAM[0]
-    yield EXAMPLE_RESPONSES_STREAM[1]
-    yield EXAMPLE_RESPONSES_STREAM[2]
-    yield EXAMPLE_RESPONSES_STREAM[3]
-    yield EXAMPLE_RESPONSES_STREAM[4]
-
-    return
-
-
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "instructions",
@@ -2438,7 +2428,7 @@ async def test_ai_client_span_streaming_responses_async_api(
 
     client = AsyncOpenAI(api_key="z")
     returned_stream = AsyncStream(cast_to=None, response=None, client=client)
-    returned_stream._iterator = example_response_stream()
+    returned_stream._iterator = async_iterator(EXAMPLE_RESPONSES_STREAM)
     client.responses._post = mock.AsyncMock(return_value=returned_stream)
 
     with start_transaction(name="openai tx"):

From f1fdaf974e14d1d9bdeffa9012d6b5855bd0759f Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 26 Feb 2026 16:30:22 +0100
Subject: [PATCH 05/10] .

---
 tests/integrations/openai/test_openai.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 0d583c8e01..48453b573e 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1,8 +1,6 @@
 import json
 import pytest
 
-from typing import Union
-
 from sentry_sdk.utils import package_version
 
 try:
@@ -23,8 +21,6 @@
 from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as DeltaChoice
 from openai.types.create_embedding_response import Usage as EmbeddingTokenUsage
 
-from openai.types.responses.response_text_delta_event import ResponseTextDeltaEvent
-
 SKIP_RESPONSES_TESTS = False
 
 try:

From f7c5356dc3df099be56b6655768b5f8dbead7f96 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 26 Feb 2026 16:34:19 +0100
Subject: [PATCH 06/10] .

---
 tests/integrations/openai/test_openai.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 48453b573e..060600ee65 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -948,7 +948,6 @@ def test_bad_chat_completion(sentry_init, capture_events):
         client.chat.completions.create(
             model="some-model",
             messages=[{"role": "system", "content": "hello"}],
-            stream=True,
         )
 
     (event,) = events

From edf0a4f9c719d31014b15aed1b09edc749913459 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 26 Feb 2026 17:11:32 +0100
Subject: [PATCH 07/10] remove unused parameter

---
 sentry_sdk/integrations/openai.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 1a408ef0b8..c5de0458b1 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -467,7 +467,6 @@ def _common_set_output_data(
     response: "Any",
     input: "Any",
     integration: "OpenAIIntegration",
-    start_time: "Optional[float]" = None,
     finish_span: bool = True,
 ) -> None:
     if hasattr(response, "model"):
@@ -565,7 +564,7 @@ def _new_chat_completion_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any
         )
     else:
         _set_completions_api_output_data(
-            span, response, kwargs, integration, start_time, finish_span=True
+            span, response, kwargs, integration, finish_span=True
         )
 
     return response
@@ -576,7 +575,6 @@ def _set_completions_api_output_data(
     response: "Any",
     kwargs: "dict[str, Any]",
     integration: "OpenAIIntegration",
-    start_time: "Optional[float]" = None,
     finish_span: bool = True,
 ) -> None:
     messages = kwargs.get("messages")
@@ -589,7 +587,6 @@ def _set_completions_api_output_data(
         response,
         messages,
         integration,
-        start_time,
         finish_span,
     )
 
@@ -711,7 +708,6 @@ def _set_responses_api_output_data(
     response: "Any",
     kwargs: "dict[str, Any]",
     integration: "OpenAIIntegration",
-    start_time: "Optional[float]" = None,
     finish_span: bool = True,
 ) -> None:
     input = kwargs.get("input")
@@ -724,7 +720,6 @@ def _set_responses_api_output_data(
         response,
         input,
         integration,
-        start_time,
         finish_span,
     )
 
@@ -851,7 +846,6 @@ def _set_embeddings_output_data(
     response: "Any",
     kwargs: "dict[str, Any]",
     integration: "OpenAIIntegration",
-    start_time: "Optional[float]" = None,
     finish_span: bool = True,
 ) -> None:
     input = kwargs.get("input")
@@ -864,7 +858,6 @@ def _set_embeddings_output_data(
         response,
         input,
         integration,
-        start_time,
         finish_span,
     )
 
@@ -1052,7 +1045,7 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An
         )
     else:
         _set_responses_api_output_data(
-            span, response, kwargs, integration, start_time, finish_span=True
+            span, response, kwargs, integration, finish_span=True
         )
 
     return response

From 85e17915459f6199b31055c07667e6f916fd67fd Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Thu, 26 Feb 2026 17:26:55 +0100
Subject: [PATCH 08/10] .

---
 sentry_sdk/integrations/openai.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index c5de0458b1..169e0d8227 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -658,7 +658,6 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
         count_tokens_manually = True
         async for x in old_iterator:
             with capture_internal_exceptions():
-                # OpenAI chat completion API
                 if hasattr(x, "choices"):
                     choice_index = 0
                     for choice in x.choices:

From 3fb20f04e8b24025c15f704ad84481c702ce0612 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 27 Feb 2026 10:09:17 +0100
Subject: [PATCH 09/10] remove manual token counting for completions

---
 sentry_sdk/integrations/openai.py | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 169e0d8227..9d4b5cfd14 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -611,7 +611,6 @@ def _set_streaming_completions_api_output_data(
 
     def new_iterator() -> "Iterator[ChatCompletionChunk]":
         nonlocal ttft
-        count_tokens_manually = True
         for x in old_iterator:
             with capture_internal_exceptions():
                 if hasattr(x, "choices"):
@@ -641,21 +640,12 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                     set_data_normalized(
                         span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                     )
-                if count_tokens_manually:
-                    _calculate_token_usage(
-                        messages,
-                        response,
-                        span,
-                        all_responses,
-                        integration.count_tokens,
-                    )
 
         if finish_span:
             span.__exit__(None, None, None)
 
     async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
         nonlocal ttft
-        count_tokens_manually = True
         async for x in old_iterator:
             with capture_internal_exceptions():
                 if hasattr(x, "choices"):
@@ -685,14 +675,7 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
                     set_data_normalized(
                         span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                     )
-                if count_tokens_manually:
-                    _calculate_token_usage(
-                        messages,
-                        response,
-                        span,
-                        all_responses,
-                        integration.count_tokens,
-                    )
+
         if finish_span:
             span.__exit__(None, None, None)
 

From 82843e2fd25f8555087e7420c75d3316314c03ec Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 27 Feb 2026 10:15:18 +0100
Subject: [PATCH 10/10] add unconditional manual token counting for completions

---
 sentry_sdk/integrations/openai.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 9d4b5cfd14..d4fe6e2c26 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -640,6 +640,13 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                     set_data_normalized(
                         span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                     )
+                _calculate_token_usage(
+                    messages,
+                    response,
+                    span,
+                    all_responses,
+                    integration.count_tokens,
+                )
 
         if finish_span:
             span.__exit__(None, None, None)
@@ -675,6 +682,13 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
                     set_data_normalized(
                         span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                     )
+                _calculate_token_usage(
+                    messages,
+                    response,
+                    span,
+                    all_responses,
+                    integration.count_tokens,
+                )
 
         if finish_span:
             span.__exit__(None, None, None)