getsentry
diff --git a/‎sentry_sdk/ai/_openai_completions_api.py‎
Lines changed: 27 additions & 9 deletions b/‎sentry_sdk/ai/_openai_completions_api.py‎
Lines changed: 27 additions & 9 deletions
diff --git a/‎sentry_sdk/integrations/anthropic.py‎
Lines changed: 75 additions & 57 deletions b/‎sentry_sdk/integrations/anthropic.py‎
Lines changed: 75 additions & 57 deletions
diff --git a/‎sentry_sdk/integrations/openai.py‎
Lines changed: 11 additions & 3 deletions b/‎sentry_sdk/integrations/openai.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎sentry_sdk/integrations/openai_agents/patches/models.py‎
Lines changed: 14 additions & 0 deletions b/‎sentry_sdk/integrations/openai_agents/patches/models.py‎
Lines changed: 14 additions & 0 deletions
@@ -4,10 +4,12 @@
 
 if TYPE_CHECKING:
     from sentry_sdk._types import TextPart
+    from typing import Union
 
     from openai.types.chat import (
         ChatCompletionMessageParam,
         ChatCompletionSystemMessageParam,
+        ChatCompletionContentPartParam,
     )
 
 
@@ -24,6 +26,25 @@ def _get_system_instructions(
     return [message for message in messages if _is_system_instruction(message)]
 
 
+def _get_text_items(
+    content: "Union[str, Iterable[ChatCompletionContentPartParam]]",
+) -> "list[str]":
+    if isinstance(content, str):
+        return [content]
+
+    if not isinstance(content, Iterable):
+        return []
+
+    text_items = []
+    for part in content:
+        if isinstance(part, dict) and part.get("type") == "text":
+            text = part.get("text", None)
+            if text is not None:
+                text_items.append(text)
+
+    return text_items
+
+
 def _transform_system_instructions(
     system_instructions: "list[ChatCompletionSystemMessageParam]",
 ) -> "list[TextPart]":
@@ -34,15 +55,12 @@ def _transform_system_instructions(
             continue
 
         content = instruction.get("content")
+        if content is None:
+            continue
 
-        if isinstance(content, str):
-            instruction_text_parts.append({"type": "text", "content": content})
-
-        elif isinstance(content, list):
-            for part in content:
-                if isinstance(part, dict) and part.get("type") == "text":
-                    text = part.get("text", None)
-                    if text is not None:
-                        instruction_text_parts.append({"type": "text", "content": text})
+        text_parts: "list[TextPart]" = [
+            {"type": "text", "content": text} for text in _get_text_items(content)
+        ]
+        instruction_text_parts += text_parts
 
     return instruction_text_parts
@@ -50,6 +50,13 @@
     from sentry_sdk._types import TextPart
 
 
+class _RecordedUsage:
+    output_tokens: int = 0
+    input_tokens: int = 0
+    cache_write_input_tokens: "Optional[int]" = 0
+    cache_read_input_tokens: "Optional[int]" = 0
+
+
 class AnthropicIntegration(Integration):
     identifier = "anthropic"
     origin = f"auto.ai.{identifier}"
@@ -112,31 +119,15 @@ def _get_token_usage(result: "Messages") -> "tuple[int, int, int, int]":
 def _collect_ai_data(
     event: "MessageStreamEvent",
     model: "str | None",
-    input_tokens: int,
-    output_tokens: int,
-    cache_read_input_tokens: int,
-    cache_write_input_tokens: int,
+    usage: "_RecordedUsage",
     content_blocks: "list[str]",
-) -> "tuple[str | None, int, int, int, int, list[str]]":
+) -> "tuple[str | None, _RecordedUsage, list[str]]":
     """
     Collect model information, token usage, and collect content blocks from the AI streaming response.
     """
     with capture_internal_exceptions():
         if hasattr(event, "type"):
-            if event.type == "message_start":
-                usage = event.message.usage
-                input_tokens += usage.input_tokens
-                output_tokens += usage.output_tokens
-                if hasattr(usage, "cache_read_input_tokens") and isinstance(
-                    usage.cache_read_input_tokens, int
-                ):
-                    cache_read_input_tokens += usage.cache_read_input_tokens
-                if hasattr(usage, "cache_creation_input_tokens") and isinstance(
-                    usage.cache_creation_input_tokens, int
-                ):
-                    cache_write_input_tokens += usage.cache_creation_input_tokens
-                model = event.message.model or model
-            elif event.type == "content_block_start":
+            if event.type == "content_block_start":
                 pass
             elif event.type == "content_block_delta":
                 if hasattr(event.delta, "text"):
@@ -145,15 +136,60 @@ def _collect_ai_data(
                     content_blocks.append(event.delta.partial_json)
             elif event.type == "content_block_stop":
                 pass
-            elif event.type == "message_delta":
-                output_tokens += event.usage.output_tokens
+
+            # Token counting logic mirrors anthropic SDK, which also extracts already accumulated tokens.
+            # https://github.com/anthropics/anthropic-sdk-python/blob/9c485f6966e10ae0ea9eabb3a921d2ea8145a25b/src/anthropic/lib/streaming/_messages.py#L433-L518
+            if event.type == "message_start":
+                model = event.message.model or model
+
+                incoming_usage = event.message.usage
+                usage.output_tokens = incoming_usage.output_tokens
+                usage.input_tokens = incoming_usage.input_tokens
+
+                usage.cache_write_input_tokens = getattr(
+                    incoming_usage, "cache_creation_input_tokens", None
+                )
+                usage.cache_read_input_tokens = getattr(
+                    incoming_usage, "cache_read_input_tokens", None
+                )
+
+                return (
+                    model,
+                    usage,
+                    content_blocks,
+                )
+
+            # Counterintuitive, but message_delta contains cumulative token counts :)
+            if event.type == "message_delta":
+                usage.output_tokens = event.usage.output_tokens
+
+                # Update other usage fields if they exist in the event
+                input_tokens = getattr(event.usage, "input_tokens", None)
+                if input_tokens is not None:
+                    usage.input_tokens = input_tokens
+
+                cache_creation_input_tokens = getattr(
+                    event.usage, "cache_creation_input_tokens", None
+                )
+                if cache_creation_input_tokens is not None:
+                    usage.cache_write_input_tokens = cache_creation_input_tokens
+
+                cache_read_input_tokens = getattr(
+                    event.usage, "cache_read_input_tokens", None
+                )
+                if cache_read_input_tokens is not None:
+                    usage.cache_read_input_tokens = cache_read_input_tokens
+                # TODO: Record event.usage.server_tool_use
+
+                return (
+                    model,
+                    usage,
+                    content_blocks,
+                )
 
     return (
         model,
-        input_tokens,
-        output_tokens,
-        cache_read_input_tokens,
-        cache_write_input_tokens,
+        usage,
         content_blocks,
     )
 
@@ -414,27 +450,18 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A
 
             def new_iterator() -> "Iterator[MessageStreamEvent]":
                 model = None
-                input_tokens = 0
-                output_tokens = 0
-                cache_read_input_tokens = 0
-                cache_write_input_tokens = 0
+                usage = _RecordedUsage()
                 content_blocks: "list[str]" = []
 
                 for event in old_iterator:
                     (
                         model,
-                        input_tokens,
-                        output_tokens,
-                        cache_read_input_tokens,
-                        cache_write_input_tokens,
+                        usage,
                         content_blocks,
                     ) = _collect_ai_data(
                         event,
                         model,
-                        input_tokens,
-                        output_tokens,
-                        cache_read_input_tokens,
-                        cache_write_input_tokens,
+                        usage,
                         content_blocks,
                     )
                     yield event
@@ -443,37 +470,28 @@ def new_iterator() -> "Iterator[MessageStreamEvent]":
                     span=span,
                     integration=integration,
                     model=model,
-                    input_tokens=input_tokens,
-                    output_tokens=output_tokens,
-                    cache_read_input_tokens=cache_read_input_tokens,
-                    cache_write_input_tokens=cache_write_input_tokens,
+                    input_tokens=usage.input_tokens,
+                    output_tokens=usage.output_tokens,
+                    cache_read_input_tokens=usage.cache_read_input_tokens,
+                    cache_write_input_tokens=usage.cache_write_input_tokens,
                     content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
                     finish_span=True,
                 )
 
             async def new_iterator_async() -> "AsyncIterator[MessageStreamEvent]":
                 model = None
-                input_tokens = 0
-                output_tokens = 0
-                cache_read_input_tokens = 0
-                cache_write_input_tokens = 0
+                usage = _RecordedUsage()
                 content_blocks: "list[str]" = []
 
                 async for event in old_iterator:
                     (
                         model,
-                        input_tokens,
-                        output_tokens,
-                        cache_read_input_tokens,
-                        cache_write_input_tokens,
+                        usage,
                         content_blocks,
                     ) = _collect_ai_data(
                         event,
                         model,
-                        input_tokens,
-                        output_tokens,
-                        cache_read_input_tokens,
-                        cache_write_input_tokens,
+                        usage,
                         content_blocks,
                     )
                     yield event
@@ -482,10 +500,10 @@ async def new_iterator_async() -> "AsyncIterator[MessageStreamEvent]":
                     span=span,
                     integration=integration,
                     model=model,
-                    input_tokens=input_tokens,
-                    output_tokens=output_tokens,
-                    cache_read_input_tokens=cache_read_input_tokens,
-                    cache_write_input_tokens=cache_write_input_tokens,
+                    input_tokens=usage.input_tokens,
+                    output_tokens=usage.output_tokens,
+                    cache_read_input_tokens=usage.cache_read_input_tokens,
+                    cache_write_input_tokens=usage.cache_write_input_tokens,
                     content_blocks=[{"text": "".join(content_blocks), "type": "text"}],
                     finish_span=True,
                 )
 
@@ -17,6 +17,7 @@
     _is_system_instruction as _is_system_instruction_completions,
     _get_system_instructions as _get_system_instructions_completions,
     _transform_system_instructions,
+    _get_text_items,
 )
 from sentry_sdk.ai._openai_responses_api import (
     _is_system_instruction as _is_system_instruction_responses,
@@ -181,10 +182,17 @@ def _calculate_token_usage(
     # Manually count tokens
     if input_tokens == 0:
         for message in messages or []:
-            if isinstance(message, dict) and "content" in message:
-                input_tokens += count_tokens(message["content"])
-            elif isinstance(message, str):
+            if isinstance(message, str):
                 input_tokens += count_tokens(message)
+                continue
+            elif isinstance(message, dict):
+                message_content = message.get("content")
+                if message_content is None:
+                    continue
+                # Deliberate use of Completions function for both Completions and Responses input format.
+                text_items = _get_text_items(message_content)
+                input_tokens += sum(count_tokens(text) for text in text_items)
+                continue
 
     if output_tokens == 0:
         if streaming_message_responses is not None:
 
@@ -145,7 +145,21 @@ async def wrapped_stream_response(*args: "Any", **kwargs: "Any") -> "Any":
                 if len(args) > 1:
                     span_kwargs["input"] = args[1]
 
+                hosted_tools = []
+                if len(args) > 3:
+                    mcp_tools = args[3]
+
+                    if mcp_tools is not None:
+                        hosted_tools = [
+                            tool
+                            for tool in mcp_tools
+                            if isinstance(tool, HostedMCPTool)
+                        ]
+
                 with ai_client_span(agent, span_kwargs) as span:
+                    for hosted_tool in hosted_tools:
+                        _inject_trace_propagation_headers(hosted_tool, span=span)
+
                     span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True)
 
                     streaming_response = None