@@ -1638,6 +1638,95 @@ def test_anthropic_provider_subtracts_cache_tokens(mock_client):
16381638 assert generation_args ["properties" ]["$ai_cache_read_input_tokens" ] == 800
16391639
16401640
1641+ def test_anthropic_provider_subtracts_cache_write_tokens (mock_client ):
1642+ """Test that Anthropic provider correctly subtracts cache write tokens from input tokens."""
1643+ from langchain_core .outputs import LLMResult , ChatGeneration
1644+ from langchain_core .messages import AIMessage
1645+ from uuid import uuid4
1646+
1647+ cb = CallbackHandler (mock_client )
1648+ run_id = uuid4 ()
1649+
1650+ # Set up with Anthropic provider
1651+ cb ._set_llm_metadata (
1652+ serialized = {},
1653+ run_id = run_id ,
1654+ messages = [{"role" : "user" , "content" : "test" }],
1655+ metadata = {"ls_provider" : "anthropic" , "ls_model_name" : "claude-3-sonnet" },
1656+ )
1657+
1658+ # Response with cache creation: 1000 input (includes 800 being written to cache)
1659+ response = LLMResult (
1660+ generations = [
1661+ [
1662+ ChatGeneration (
1663+ message = AIMessage (content = "Response" ),
1664+ generation_info = {
1665+ "usage_metadata" : {
1666+ "input_tokens" : 1000 ,
1667+ "output_tokens" : 50 ,
1668+ "cache_creation_input_tokens" : 800 ,
1669+ }
1670+ },
1671+ )
1672+ ]
1673+ ],
1674+ llm_output = {},
1675+ )
1676+
1677+ cb ._pop_run_and_capture_generation (run_id , None , response )
1678+
1679+ generation_args = mock_client .capture .call_args_list [0 ][1 ]
1680+ assert generation_args ["properties" ]["$ai_input_tokens" ] == 200 # 1000 - 800
1681+ assert generation_args ["properties" ]["$ai_cache_creation_input_tokens" ] == 800
1682+
1683+
1684+ def test_anthropic_provider_subtracts_both_cache_read_and_write_tokens (mock_client ):
1685+ """Test that Anthropic provider correctly subtracts both cache read and write tokens."""
1686+ from langchain_core .outputs import LLMResult , ChatGeneration
1687+ from langchain_core .messages import AIMessage
1688+ from uuid import uuid4
1689+
1690+ cb = CallbackHandler (mock_client )
1691+ run_id = uuid4 ()
1692+
1693+ # Set up with Anthropic provider
1694+ cb ._set_llm_metadata (
1695+ serialized = {},
1696+ run_id = run_id ,
1697+ messages = [{"role" : "user" , "content" : "test" }],
1698+ metadata = {"ls_provider" : "anthropic" , "ls_model_name" : "claude-3-sonnet" },
1699+ )
1700+
1701+ # Response with both cache read and creation
1702+ response = LLMResult (
1703+ generations = [
1704+ [
1705+ ChatGeneration (
1706+ message = AIMessage (content = "Response" ),
1707+ generation_info = {
1708+ "usage_metadata" : {
1709+ "input_tokens" : 2000 ,
1710+ "output_tokens" : 50 ,
1711+ "cache_read_input_tokens" : 800 ,
1712+ "cache_creation_input_tokens" : 500 ,
1713+ }
1714+ },
1715+ )
1716+ ]
1717+ ],
1718+ llm_output = {},
1719+ )
1720+
1721+ cb ._pop_run_and_capture_generation (run_id , None , response )
1722+
1723+ generation_args = mock_client .capture .call_args_list [0 ][1 ]
1724+ # 2000 - 800 (read) - 500 (write) = 700
1725+ assert generation_args ["properties" ]["$ai_input_tokens" ] == 700
1726+ assert generation_args ["properties" ]["$ai_cache_read_input_tokens" ] == 800
1727+ assert generation_args ["properties" ]["$ai_cache_creation_input_tokens" ] == 500
1728+
1729+
16411730def test_openai_cache_read_tokens (mock_client ):
16421731 """Test that OpenAI cache read tokens are captured correctly."""
16431732 prompt = ChatPromptTemplate .from_messages (
@@ -2092,10 +2181,12 @@ def test_zero_input_tokens_with_cache_read(mock_client):
20922181 assert generation_props ["$ai_cache_read_input_tokens" ] == 50
20932182
20942183
2095- def test_cache_write_tokens_not_subtracted_from_input (mock_client ):
2096- """Test that cache_creation_input_tokens (cache write) do NOT affect input_tokens.
2184+ def test_non_anthropic_cache_write_tokens_not_subtracted_from_input (mock_client ):
2185+ """Test that cache_creation_input_tokens do NOT affect input_tokens for non-Anthropic providers .
20972186
2098- Only cache_read_tokens should be subtracted from input_tokens, not cache_write_tokens.
2187+ When no provider metadata is set (or for non-Anthropic providers), cache tokens should
2188+ NOT be subtracted from input_tokens. This is because different providers report tokens
2189+ differently - only Anthropic's LangChain integration requires subtraction.
20992190 """
21002191 prompt = ChatPromptTemplate .from_messages ([("user" , "Create cache" )])
21012192
0 commit comments