Skip to content

Commit 150e24b

Browse files
authored
feat(llma): add $ai_tokens_source property to detect token value overrides (#444)
* feat: add $ai_tokens_source property to detect token value overrides When users pass token properties (e.g. $ai_input_tokens) via posthog_properties, these override the SDK-computed values. This new $ai_tokens_source property ("sdk" or "passthrough") lets us distinguish whether token values came from the SDK or were externally injected, which is critical for diagnosing cost calculation discrepancies. * chore: bump version to 7.9.4 * chore: add changelog entry for 7.9.4 * chore: fix ruff formatting * chore: remove unused pytest import
1 parent a8b5529 commit 150e24b

6 files changed

Lines changed: 138 additions & 12 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# posthog
22

3+
## 7.9.4 — 2026-02-25
4+
5+
feat(llma): add `$ai_tokens_source` property ("sdk" or "passthrough") to all `$ai_generation` events to detect when token values are externally overridden via `posthog_properties`
6+
37
## 7.9.3 — 2026-02-18
48

59
### Patch changes

posthog/ai/utils.py

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,28 @@
1313
from posthog.client import Client as PostHogClient
1414

1515

16+
_TOKEN_PROPERTY_KEYS = frozenset(
17+
{
18+
"$ai_input_tokens",
19+
"$ai_output_tokens",
20+
"$ai_cache_read_input_tokens",
21+
"$ai_cache_creation_input_tokens",
22+
"$ai_total_tokens",
23+
"$ai_reasoning_tokens",
24+
}
25+
)
26+
27+
28+
def _get_tokens_source(
29+
sdk_tags: Dict[str, Any], posthog_properties: Optional[Dict[str, Any]]
30+
) -> str:
31+
if posthog_properties and any(
32+
key in posthog_properties for key in _TOKEN_PROPERTY_KEYS
33+
):
34+
return "passthrough"
35+
return "sdk"
36+
37+
1638
def serialize_raw_usage(raw_usage: Any) -> Optional[Dict[str, Any]]:
1739
"""
1840
Convert raw provider usage objects to JSON-serializable dicts.
@@ -413,14 +435,19 @@ def call_llm_and_track_usage(
413435

414436
# send the event to posthog
415437
if hasattr(ph_client, "capture") and callable(ph_client.capture):
438+
sdk_tags = get_tags()
439+
merged_properties = {
440+
**sdk_tags,
441+
**(posthog_properties or {}),
442+
**(error_params or {}),
443+
}
444+
merged_properties["$ai_tokens_source"] = _get_tokens_source(
445+
sdk_tags, posthog_properties
446+
)
416447
ph_client.capture(
417448
distinct_id=posthog_distinct_id or posthog_trace_id,
418449
event="$ai_generation",
419-
properties={
420-
**get_tags(),
421-
**(posthog_properties or {}),
422-
**(error_params or {}),
423-
},
450+
properties=merged_properties,
424451
groups=posthog_groups,
425452
)
426453

@@ -543,14 +570,19 @@ async def call_llm_and_track_usage_async(
543570

544571
# send the event to posthog
545572
if hasattr(ph_client, "capture") and callable(ph_client.capture):
573+
sdk_tags = get_tags()
574+
merged_properties = {
575+
**sdk_tags,
576+
**(posthog_properties or {}),
577+
**(error_params or {}),
578+
}
579+
merged_properties["$ai_tokens_source"] = _get_tokens_source(
580+
sdk_tags, posthog_properties
581+
)
546582
ph_client.capture(
547583
distinct_id=posthog_distinct_id or posthog_trace_id,
548584
event="$ai_generation",
549-
properties={
550-
**get_tags(),
551-
**(posthog_properties or {}),
552-
**(error_params or {}),
553-
},
585+
properties=merged_properties,
554586
groups=posthog_groups,
555587
)
556588

@@ -627,6 +659,15 @@ def capture_streaming_event(
627659
**(event_data.get("properties") or {}),
628660
}
629661

662+
# Determine token source: SDK-computed vs externally overridden
663+
sdk_token_tags = {
664+
"$ai_input_tokens": event_data["usage_stats"].get("input_tokens", 0),
665+
"$ai_output_tokens": event_data["usage_stats"].get("output_tokens", 0),
666+
}
667+
event_properties["$ai_tokens_source"] = _get_tokens_source(
668+
sdk_token_tags, event_data.get("properties")
669+
)
670+
630671
# Extract and add tools based on provider
631672
available_tools = extract_available_tool_calls(
632673
event_data["provider"],

posthog/test/ai/anthropic/test_anthropic.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ def test_basic_completion(mock_client, mock_anthropic_response):
306306
assert props["$ai_output_tokens"] == 10
307307
assert props["$ai_http_status"] == 200
308308
assert props["foo"] == "bar"
309+
assert props["$ai_tokens_source"] == "sdk"
309310
assert isinstance(props["$ai_latency"], float)
310311
# Verify raw usage metadata is passed for backend processing
311312
assert "$ai_usage" in props
@@ -318,6 +319,23 @@ def test_basic_completion(mock_client, mock_anthropic_response):
318319
assert "output_tokens" in props["$ai_usage"]
319320

320321

322+
def test_tokens_source_passthrough(mock_client, mock_anthropic_response):
323+
with patch(
324+
"anthropic.resources.Messages.create", return_value=mock_anthropic_response
325+
):
326+
client = Anthropic(api_key="test-key", posthog_client=mock_client)
327+
client.messages.create(
328+
model="claude-3-opus-20240229",
329+
messages=[{"role": "user", "content": "Hello"}],
330+
posthog_distinct_id="test-id",
331+
posthog_properties={"$ai_input_tokens": 99999},
332+
)
333+
334+
props = mock_client.capture.call_args[1]["properties"]
335+
assert props["$ai_tokens_source"] == "passthrough"
336+
assert props["$ai_input_tokens"] == 99999
337+
338+
321339
def test_groups(mock_client, mock_anthropic_response):
322340
with patch(
323341
"anthropic.resources.Messages.create", return_value=mock_anthropic_response
@@ -927,6 +945,7 @@ def test_streaming_with_tool_calls(mock_client, mock_anthropic_stream_with_tools
927945
assert props["$ai_output_tokens"] == 25
928946
assert props["$ai_cache_read_input_tokens"] == 5
929947
assert props["$ai_cache_creation_input_tokens"] == 0
948+
assert props["$ai_tokens_source"] == "sdk"
930949

931950
# Verify raw usage is captured in streaming mode (merged from events)
932951
assert "$ai_usage" in props
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
from parameterized import parameterized
2+
3+
from posthog.ai.utils import _get_tokens_source
4+
5+
6+
@parameterized.expand(
7+
[
8+
("no_posthog_properties", {"$ai_input_tokens": 100}, None, "sdk"),
9+
("empty_posthog_properties", {"$ai_input_tokens": 100}, {}, "sdk"),
10+
(
11+
"unrelated_posthog_properties",
12+
{"$ai_input_tokens": 100},
13+
{"foo": "bar"},
14+
"sdk",
15+
),
16+
(
17+
"override_input_tokens",
18+
{"$ai_input_tokens": 100},
19+
{"$ai_input_tokens": 999},
20+
"passthrough",
21+
),
22+
(
23+
"override_output_tokens",
24+
{"$ai_output_tokens": 50},
25+
{"$ai_output_tokens": 999},
26+
"passthrough",
27+
),
28+
(
29+
"override_total_tokens",
30+
{"$ai_input_tokens": 100},
31+
{"$ai_total_tokens": 999},
32+
"passthrough",
33+
),
34+
(
35+
"override_cache_read",
36+
{"$ai_input_tokens": 100},
37+
{"$ai_cache_read_input_tokens": 500},
38+
"passthrough",
39+
),
40+
(
41+
"override_cache_creation",
42+
{"$ai_input_tokens": 100},
43+
{"$ai_cache_creation_input_tokens": 200},
44+
"passthrough",
45+
),
46+
(
47+
"override_reasoning_tokens",
48+
{"$ai_input_tokens": 100},
49+
{"$ai_reasoning_tokens": 300},
50+
"passthrough",
51+
),
52+
(
53+
"mixed_override_and_custom",
54+
{"$ai_input_tokens": 100},
55+
{"$ai_input_tokens": 999, "custom_key": "value"},
56+
"passthrough",
57+
),
58+
]
59+
)
60+
def test_get_tokens_source(name, sdk_tags, posthog_properties, expected):
61+
result = _get_tokens_source(sdk_tags, posthog_properties)
62+
assert result == expected

posthog/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
VERSION = "7.9.3"
1+
VERSION = "7.9.4"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "posthog"
7-
version = "7.9.3"
7+
version = "7.9.4"
88
description = "Integrate PostHog into any python application."
99
authors = [{ name = "PostHog", email = "hey@posthog.com" }]
1010
maintainers = [{ name = "PostHog", email = "hey@posthog.com" }]

0 commit comments

Comments
 (0)