Merge branch 'webb/openai/add-response-model' into webb/openai/add-response-model-completions

alexander-alderman-webb · alexander-alderman-webb · commit b07d1df59ea9 · 2026-02-27T10:11:26.000+01:00
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
@@ -611,7 +611,6 @@ def _set_streaming_completions_api_output_data(
 
     def new_iterator() -> "Iterator[ChatCompletionChunk]":
         nonlocal ttft
-        count_tokens_manually = True
         for x in old_iterator:
             span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model)
 
@@ -643,21 +642,12 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                     set_data_normalized(
                         span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                     )
-                if count_tokens_manually:
-                    _calculate_token_usage(
-                        messages,
-                        response,
-                        span,
-                        all_responses,
-                        integration.count_tokens,
-                    )
 
         if finish_span:
             span.__exit__(None, None, None)
 
     async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
         nonlocal ttft
-        count_tokens_manually = True
         async for x in old_iterator:
             span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model)
 
@@ -689,14 +679,7 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
                     set_data_normalized(
                         span, SPANDATA.GEN_AI_RESPONSE_TEXT, all_responses
                     )
-                if count_tokens_manually:
-                    _calculate_token_usage(
-                        messages,
-                        response,
-                        span,
-                        all_responses,
-                        integration.count_tokens,
-                    )
+
         if finish_span:
             span.__exit__(None, None, None)