From 6198542f3a510a5f38dda6b6a3938161d03be5ac Mon Sep 17 00:00:00 2001 From: Stanley Chiu Date: Sun, 1 Feb 2026 19:24:41 +0800 Subject: [PATCH 1/2] fix: include full usage details in generation span usage --- src/agents/extensions/models/litellm_model.py | 12 ++++++++++++ src/agents/models/openai_chatcompletions.py | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/agents/extensions/models/litellm_model.py b/src/agents/extensions/models/litellm_model.py index 3fb0c20ea1..49611b97ef 100644 --- a/src/agents/extensions/models/litellm_model.py +++ b/src/agents/extensions/models/litellm_model.py @@ -234,8 +234,12 @@ async def get_response( [message.model_dump()] if message is not None else [] ) span_generation.span_data.usage = { + "requests": usage.requests, "input_tokens": usage.input_tokens, "output_tokens": usage.output_tokens, + "total_tokens": usage.total_tokens, + "input_tokens_details": usage.input_tokens_details.model_dump(), + "output_tokens_details": usage.output_tokens_details.model_dump(), } # Build provider_data for provider specific fields @@ -304,8 +308,16 @@ async def stream_response( if final_response and final_response.usage: span_generation.span_data.usage = { + "requests": 1, "input_tokens": final_response.usage.input_tokens, "output_tokens": final_response.usage.output_tokens, + "total_tokens": final_response.usage.total_tokens, + "input_tokens_details": ( + final_response.usage.input_tokens_details.model_dump() + ), + "output_tokens_details": ( + final_response.usage.output_tokens_details.model_dump() + ), } @overload diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py index 2ca4c11979..5e28f90fdd 100644 --- a/src/agents/models/openai_chatcompletions.py +++ b/src/agents/models/openai_chatcompletions.py @@ -119,8 +119,12 @@ async def get_response( [message.model_dump()] if message is not None else [] ) span_generation.span_data.usage = { + "requests": usage.requests, "input_tokens": usage.input_tokens, "output_tokens": usage.output_tokens, + "total_tokens": usage.total_tokens, + "input_tokens_details": usage.input_tokens_details.model_dump(), + "output_tokens_details": usage.output_tokens_details.model_dump(), } # Build provider_data for provider_specific_fields @@ -209,8 +213,16 @@ async def stream_response( if final_response and final_response.usage: span_generation.span_data.usage = { + "requests": 1, "input_tokens": final_response.usage.input_tokens, "output_tokens": final_response.usage.output_tokens, + "total_tokens": final_response.usage.total_tokens, + "input_tokens_details": ( + final_response.usage.input_tokens_details.model_dump() + ), + "output_tokens_details": ( + final_response.usage.output_tokens_details.model_dump() + ), } @overload From 9eb5ea54d9fd32577180e4eb0c6d93f0d07efefb Mon Sep 17 00:00:00 2001 From: Stanley Chiu Date: Mon, 2 Feb 2026 12:43:02 +0800 Subject: [PATCH 2/2] fix: guard streamed usage detail dumps --- src/agents/extensions/models/litellm_model.py | 4 ++++ src/agents/models/openai_chatcompletions.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/agents/extensions/models/litellm_model.py b/src/agents/extensions/models/litellm_model.py index 49611b97ef..fd80151afb 100644 --- a/src/agents/extensions/models/litellm_model.py +++ b/src/agents/extensions/models/litellm_model.py @@ -314,9 +314,13 @@ async def stream_response( "total_tokens": final_response.usage.total_tokens, "input_tokens_details": ( final_response.usage.input_tokens_details.model_dump() + if final_response.usage.input_tokens_details + else {"cached_tokens": 0} ), "output_tokens_details": ( final_response.usage.output_tokens_details.model_dump() + if final_response.usage.output_tokens_details + else {"reasoning_tokens": 0} ), } diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py index 5e28f90fdd..560fb3fe6a 100644 --- a/src/agents/models/openai_chatcompletions.py +++ b/src/agents/models/openai_chatcompletions.py @@ -219,9 +219,13 @@ async def stream_response( "total_tokens": final_response.usage.total_tokens, "input_tokens_details": ( final_response.usage.input_tokens_details.model_dump() + if final_response.usage.input_tokens_details + else {"cached_tokens": 0} ), "output_tokens_details": ( final_response.usage.output_tokens_details.model_dump() + if final_response.usage.output_tokens_details + else {"reasoning_tokens": 0} ), }