Skip to content

Commit f1c6da2

Browse files
fix: double counting anthropic langchain (#399)
1 parent 7ac63e1 commit f1c6da2

File tree

4 files changed

+111
-13
lines changed

4 files changed

+111
-13
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 7.4.3 - 2026-01-02
2+
3+
Fixes cache creation cost for Langchain with Anthropic
4+
15
# 7.4.2 - 2025-12-22
26

37
feat: add `in_app_modules` option to control code variables capturing
@@ -13,6 +17,7 @@ When using OpenAI stored prompts, the model is defined in the OpenAI dashboard r
1317
feat: Add automatic retries for feature flag requests
1418

1519
Feature flag API requests now automatically retry on transient failures:
20+
1621
- Network errors (connection refused, DNS failures, timeouts)
1722
- Server errors (500, 502, 503, 504)
1823
- Up to 2 retries with exponential backoff (0.5s, 1s delays)

posthog/ai/langchain/callbacks.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -773,24 +773,26 @@ def _parse_usage_model(
773773
for mapped_key, dataclass_key in field_mapping.items()
774774
},
775775
)
776-
# For Anthropic providers, LangChain reports input_tokens as the sum of input and cache read tokens.
776+
# For Anthropic providers, LangChain reports input_tokens as the sum of all input tokens.
777777
# Our cost calculation expects them to be separate for Anthropic, so we subtract cache tokens.
778-
# For other providers (OpenAI, etc.), input_tokens already includes cache tokens as expected.
778+
# Both cache_read and cache_write tokens should be subtracted since Anthropic's raw API
779+
# reports input_tokens as tokens NOT read from or used to create a cache.
780+
# For other providers (OpenAI, etc.), input_tokens already excludes cache tokens as expected.
779781
# Match logic consistent with plugin-server: exact match on provider OR substring match on model
780782
is_anthropic = False
781783
if provider and provider.lower() == "anthropic":
782784
is_anthropic = True
783785
elif model and "anthropic" in model.lower():
784786
is_anthropic = True
785787

786-
if (
787-
is_anthropic
788-
and normalized_usage.input_tokens
789-
and normalized_usage.cache_read_tokens
790-
):
791-
normalized_usage.input_tokens = max(
792-
normalized_usage.input_tokens - normalized_usage.cache_read_tokens, 0
788+
if is_anthropic and normalized_usage.input_tokens:
789+
cache_tokens = (normalized_usage.cache_read_tokens or 0) + (
790+
normalized_usage.cache_write_tokens or 0
793791
)
792+
if cache_tokens > 0:
793+
normalized_usage.input_tokens = max(
794+
normalized_usage.input_tokens - cache_tokens, 0
795+
)
794796
return normalized_usage
795797

796798

posthog/test/ai/langchain/test_callbacks.py

Lines changed: 94 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,6 +1638,95 @@ def test_anthropic_provider_subtracts_cache_tokens(mock_client):
16381638
assert generation_args["properties"]["$ai_cache_read_input_tokens"] == 800
16391639

16401640

1641+
def test_anthropic_provider_subtracts_cache_write_tokens(mock_client):
1642+
"""Test that Anthropic provider correctly subtracts cache write tokens from input tokens."""
1643+
from langchain_core.outputs import LLMResult, ChatGeneration
1644+
from langchain_core.messages import AIMessage
1645+
from uuid import uuid4
1646+
1647+
cb = CallbackHandler(mock_client)
1648+
run_id = uuid4()
1649+
1650+
# Set up with Anthropic provider
1651+
cb._set_llm_metadata(
1652+
serialized={},
1653+
run_id=run_id,
1654+
messages=[{"role": "user", "content": "test"}],
1655+
metadata={"ls_provider": "anthropic", "ls_model_name": "claude-3-sonnet"},
1656+
)
1657+
1658+
# Response with cache creation: 1000 input (includes 800 being written to cache)
1659+
response = LLMResult(
1660+
generations=[
1661+
[
1662+
ChatGeneration(
1663+
message=AIMessage(content="Response"),
1664+
generation_info={
1665+
"usage_metadata": {
1666+
"input_tokens": 1000,
1667+
"output_tokens": 50,
1668+
"cache_creation_input_tokens": 800,
1669+
}
1670+
},
1671+
)
1672+
]
1673+
],
1674+
llm_output={},
1675+
)
1676+
1677+
cb._pop_run_and_capture_generation(run_id, None, response)
1678+
1679+
generation_args = mock_client.capture.call_args_list[0][1]
1680+
assert generation_args["properties"]["$ai_input_tokens"] == 200 # 1000 - 800
1681+
assert generation_args["properties"]["$ai_cache_creation_input_tokens"] == 800
1682+
1683+
1684+
def test_anthropic_provider_subtracts_both_cache_read_and_write_tokens(mock_client):
1685+
"""Test that Anthropic provider correctly subtracts both cache read and write tokens."""
1686+
from langchain_core.outputs import LLMResult, ChatGeneration
1687+
from langchain_core.messages import AIMessage
1688+
from uuid import uuid4
1689+
1690+
cb = CallbackHandler(mock_client)
1691+
run_id = uuid4()
1692+
1693+
# Set up with Anthropic provider
1694+
cb._set_llm_metadata(
1695+
serialized={},
1696+
run_id=run_id,
1697+
messages=[{"role": "user", "content": "test"}],
1698+
metadata={"ls_provider": "anthropic", "ls_model_name": "claude-3-sonnet"},
1699+
)
1700+
1701+
# Response with both cache read and creation
1702+
response = LLMResult(
1703+
generations=[
1704+
[
1705+
ChatGeneration(
1706+
message=AIMessage(content="Response"),
1707+
generation_info={
1708+
"usage_metadata": {
1709+
"input_tokens": 2000,
1710+
"output_tokens": 50,
1711+
"cache_read_input_tokens": 800,
1712+
"cache_creation_input_tokens": 500,
1713+
}
1714+
},
1715+
)
1716+
]
1717+
],
1718+
llm_output={},
1719+
)
1720+
1721+
cb._pop_run_and_capture_generation(run_id, None, response)
1722+
1723+
generation_args = mock_client.capture.call_args_list[0][1]
1724+
# 2000 - 800 (read) - 500 (write) = 700
1725+
assert generation_args["properties"]["$ai_input_tokens"] == 700
1726+
assert generation_args["properties"]["$ai_cache_read_input_tokens"] == 800
1727+
assert generation_args["properties"]["$ai_cache_creation_input_tokens"] == 500
1728+
1729+
16411730
def test_openai_cache_read_tokens(mock_client):
16421731
"""Test that OpenAI cache read tokens are captured correctly."""
16431732
prompt = ChatPromptTemplate.from_messages(
@@ -2092,10 +2181,12 @@ def test_zero_input_tokens_with_cache_read(mock_client):
20922181
assert generation_props["$ai_cache_read_input_tokens"] == 50
20932182

20942183

2095-
def test_cache_write_tokens_not_subtracted_from_input(mock_client):
2096-
"""Test that cache_creation_input_tokens (cache write) do NOT affect input_tokens.
2184+
def test_non_anthropic_cache_write_tokens_not_subtracted_from_input(mock_client):
2185+
"""Test that cache_creation_input_tokens do NOT affect input_tokens for non-Anthropic providers.
20972186
2098-
Only cache_read_tokens should be subtracted from input_tokens, not cache_write_tokens.
2187+
When no provider metadata is set (or for non-Anthropic providers), cache tokens should
2188+
NOT be subtracted from input_tokens. This is because different providers report tokens
2189+
differently - only Anthropic's LangChain integration requires subtraction.
20992190
"""
21002191
prompt = ChatPromptTemplate.from_messages([("user", "Create cache")])
21012192

posthog/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
VERSION = "7.4.2"
1+
VERSION = "7.4.3"
22

33
if __name__ == "__main__":
44
print(VERSION, end="") # noqa: T201

0 commit comments

Comments
 (0)