@@ -2265,6 +2265,156 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events):
22652265 assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 20
22662266
22672267
2268+ def test_input_tokens_include_cached_nonstreaming (sentry_init , capture_events ):
2269+ """
2270+ Test that gen_ai.usage.input_tokens includes cached tokens.
2271+
2272+ Anthropic's usage.input_tokens excludes cached/cache_write tokens,
2273+ but gen_ai.usage.input_tokens should be the TOTAL input tokens
2274+ (including cached + cache_write) so that downstream cost calculations
2275+ don't produce negative values.
2276+
2277+ See: negative gen_ai.cost.input_tokens bug when cache_read > input_tokens.
2278+ """
2279+ sentry_init (integrations = [AnthropicIntegration ()], traces_sample_rate = 1.0 )
2280+ events = capture_events ()
2281+ client = Anthropic (api_key = "z" )
2282+
2283+ # Simulate Anthropic response where input_tokens=100 EXCLUDES cached tokens
2284+ # cache_read=80 and cache_write=20 are separate
2285+ # Total input tokens processed = 100 + 80 + 20 = 200
2286+ client .messages ._post = mock .Mock (
2287+ return_value = Message (
2288+ id = "id" ,
2289+ model = "claude-3-5-sonnet-20241022" ,
2290+ role = "assistant" ,
2291+ content = [TextBlock (type = "text" , text = "Response" )],
2292+ type = "message" ,
2293+ usage = Usage (
2294+ input_tokens = 100 ,
2295+ output_tokens = 50 ,
2296+ cache_read_input_tokens = 80 ,
2297+ cache_creation_input_tokens = 20 ,
2298+ ),
2299+ )
2300+ )
2301+
2302+ with start_transaction (name = "anthropic" ):
2303+ client .messages .create (
2304+ max_tokens = 1024 ,
2305+ messages = [{"role" : "user" , "content" : "Hello" }],
2306+ model = "claude-3-5-sonnet-20241022" ,
2307+ )
2308+
2309+ (span ,) = events [0 ]["spans" ]
2310+
2311+ # input_tokens should be total: 100 (non-cached) + 80 (cache_read) + 20 (cache_write) = 200
2312+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 200
2313+
2314+ # total_tokens should include the full input count
2315+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 250 # 200 + 50
2316+
2317+ # Cache fields should still be reported correctly
2318+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 80
2319+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 20
2320+
2321+
2322+ def test_input_tokens_include_cached_streaming (sentry_init , capture_events ):
2323+ """
2324+ Test that gen_ai.usage.input_tokens includes cached tokens for streaming responses.
2325+
2326+ Same bug as non-streaming: Anthropic's input_tokens excludes cached tokens,
2327+ leading to negative cost calculations when cache_read > input_tokens.
2328+ """
2329+ client = Anthropic (api_key = "z" )
2330+ returned_stream = Stream (cast_to = None , response = None , client = client )
2331+ returned_stream ._iterator = [
2332+ MessageStartEvent (
2333+ type = "message_start" ,
2334+ message = Message (
2335+ id = "id" ,
2336+ model = "claude-3-5-sonnet-20241022" ,
2337+ role = "assistant" ,
2338+ content = [],
2339+ type = "message" ,
2340+ usage = Usage (
2341+ input_tokens = 100 ,
2342+ output_tokens = 0 ,
2343+ cache_read_input_tokens = 80 ,
2344+ cache_creation_input_tokens = 20 ,
2345+ ),
2346+ ),
2347+ ),
2348+ MessageDeltaEvent (
2349+ type = "message_delta" ,
2350+ delta = Delta (stop_reason = "end_turn" ),
2351+ usage = MessageDeltaUsage (output_tokens = 50 ),
2352+ ),
2353+ ]
2354+
2355+ sentry_init (integrations = [AnthropicIntegration ()], traces_sample_rate = 1.0 )
2356+ events = capture_events ()
2357+ client .messages ._post = mock .Mock (return_value = returned_stream )
2358+
2359+ with start_transaction (name = "anthropic" ):
2360+ for _ in client .messages .create (
2361+ max_tokens = 1024 ,
2362+ messages = [{"role" : "user" , "content" : "Hello" }],
2363+ model = "claude-3-5-sonnet-20241022" ,
2364+ stream = True ,
2365+ ):
2366+ pass
2367+
2368+ (span ,) = events [0 ]["spans" ]
2369+
2370+ # input_tokens should be total: 100 + 80 + 20 = 200
2371+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 200
2372+
2373+ # total_tokens should include the full input count
2374+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 250 # 200 + 50
2375+
2376+ # Cache fields should still be reported correctly
2377+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHED ] == 80
2378+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE ] == 20
2379+
2380+
2381+ def test_input_tokens_unchanged_without_caching (sentry_init , capture_events ):
2382+ """
2383+ Test that input_tokens is unchanged when there are no cached tokens.
2384+ Ensures the fix doesn't break the non-caching case.
2385+ """
2386+ sentry_init (integrations = [AnthropicIntegration ()], traces_sample_rate = 1.0 )
2387+ events = capture_events ()
2388+ client = Anthropic (api_key = "z" )
2389+
2390+ client .messages ._post = mock .Mock (
2391+ return_value = Message (
2392+ id = "id" ,
2393+ model = "claude-3-5-sonnet-20241022" ,
2394+ role = "assistant" ,
2395+ content = [TextBlock (type = "text" , text = "Response" )],
2396+ type = "message" ,
2397+ usage = Usage (
2398+ input_tokens = 100 ,
2399+ output_tokens = 50 ,
2400+ ),
2401+ )
2402+ )
2403+
2404+ with start_transaction (name = "anthropic" ):
2405+ client .messages .create (
2406+ max_tokens = 1024 ,
2407+ messages = [{"role" : "user" , "content" : "Hello" }],
2408+ model = "claude-3-5-sonnet-20241022" ,
2409+ )
2410+
2411+ (span ,) = events [0 ]["spans" ]
2412+
2413+ # Without caching, input_tokens should remain as-is
2414+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_INPUT_TOKENS ] == 100
2415+ assert span ["data" ][SPANDATA .GEN_AI_USAGE_TOTAL_TOKENS ] == 150 # 100 + 50
2416+
2417+
22682418def test_cache_tokens_streaming (sentry_init , capture_events ):
22692419 """Test cache tokens are tracked for streaming responses."""
22702420 client = Anthropic (api_key = "z" )
0 commit comments