diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index 4ac58ca70d..6e0d13e895 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -31,7 +31,7 @@ event_from_exception, safe_serialize, ) -from google.genai.types import GenerateContentConfig, Part, Content +from google.genai.types import GenerateContentConfig, Part, Content, PartDict from itertools import chain if TYPE_CHECKING: @@ -47,6 +47,18 @@ ContentUnion, ) +_is_PIL_available = False +try: + from PIL import Image as PILImage # type: ignore[import-not-found] + + _is_PIL_available = True +except ImportError: + pass + +# Keys to use when checking to see if a dict provided by the user +# is Part-like (as opposed to a Content or multi-turn conversation entry). +_PART_DICT_KEYS = PartDict.__optional_keys__ + class UsageData(TypedDict): """Structure for token usage data.""" @@ -169,12 +181,23 @@ def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, A if isinstance(contents, str): return [{"role": "user", "content": contents}] - # Handle list case - process each item (non-recursive, flatten at top level) + # Handle list case if isinstance(contents, list): - for item in contents: - item_messages = extract_contents_messages(item) - messages.extend(item_messages) - return messages + if contents and all(_is_part_like(item) for item in contents): + # All items are parts — merge into a single multi-part user message + content_parts = [] + for item in contents: + part = _extract_part_from_item(item) + if part is not None: + content_parts.append(part) + + return [{"role": "user", "content": content_parts}] + else: + # Multi-turn conversation or mixed content types + for item in contents: + item_messages = extract_contents_messages(item) + messages.extend(item_messages) + return messages # Handle dictionary case (ContentDict) if isinstance(contents, dict): @@ -206,13 +229,23 @@ def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, A # Add tool messages messages.extend(tool_messages) elif "text" in contents: - # Simple text in dict messages.append( { - "role": role or "user", + "role": role, "content": [{"text": contents["text"], "type": "text"}], } ) + elif "inline_data" in contents: + # The "data" will always be bytes (or bytes within a string), + # so if this is present, it's safe to automatically substitute with the placeholder + messages.append( + { + "inline_data": { + "mime_type": contents["inline_data"].get("mime_type", ""), + "data": BLOB_DATA_SUBSTITUTE, + } + } + ) return messages @@ -248,15 +281,10 @@ def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, A return [{"role": "user", "content": [part_result]}] # Handle PIL.Image.Image - try: - from PIL import Image as PILImage # type: ignore[import-not-found] - - if isinstance(contents, PILImage.Image): - blob_part = _extract_pil_image(contents) - if blob_part: - return [{"role": "user", "content": [blob_part]}] - except ImportError: - pass + if _is_PIL_available and isinstance(contents, PILImage.Image): + blob_part = _extract_pil_image(contents) + if blob_part: + return [{"role": "user", "content": [blob_part]}] # Handle File object if hasattr(contents, "uri") and hasattr(contents, "mime_type"): @@ -310,11 +338,9 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": if result is not None: # For inline_data with bytes data, substitute the content if "inline_data" in part: - inline_data = part["inline_data"] - if isinstance(inline_data, dict) and isinstance( - inline_data.get("data"), bytes - ): - result["content"] = BLOB_DATA_SUBSTITUTE + # inline_data.data will always be bytes, or a string containing base64-encoded bytes, + # so can automatically substitute without further checks + result["content"] = BLOB_DATA_SUBSTITUTE return result return None @@ -357,18 +383,11 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": if mime_type is None: mime_type = "" - # Handle both bytes (binary data) and str (base64-encoded data) - if isinstance(data, bytes): - content = BLOB_DATA_SUBSTITUTE - else: - # For non-bytes data (e.g., base64 strings), use as-is - content = data - return { "type": "blob", "modality": get_modality_from_mime_type(mime_type), "mime_type": mime_type, - "content": content, + "content": BLOB_DATA_SUBSTITUTE, } return None @@ -429,25 +448,78 @@ def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]": def _extract_pil_image(image: "Any") -> "Optional[dict[str, Any]]": """Extract blob part from PIL.Image.Image.""" - try: - from PIL import Image as PILImage + if not _is_PIL_available or not isinstance(image, PILImage.Image): + return None - if not isinstance(image, PILImage.Image): - return None + # Get format, default to JPEG + format_str = image.format or "JPEG" + suffix = format_str.lower() + mime_type = f"image/{suffix}" + + return { + "type": "blob", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "content": BLOB_DATA_SUBSTITUTE, + } - # Get format, default to JPEG - format_str = image.format or "JPEG" - suffix = format_str.lower() - mime_type = f"image/{suffix}" +def _is_part_like(item: "Any") -> bool: + """Check if item is a part-like value (PartUnionDict) rather than a Content/multi-turn entry.""" + if isinstance(item, (str, Part)): + return True + if isinstance(item, (list, Content)): + return False + if isinstance(item, dict): + if "role" in item or "parts" in item: + return False + # Part objects that came in as plain dicts + return bool(_PART_DICT_KEYS & item.keys()) + # File objects + if hasattr(item, "uri"): + return True + # PIL.Image + if _is_PIL_available and isinstance(item, PILImage.Image): + return True + return False + + +def _extract_part_from_item(item: "Any") -> "Optional[dict[str, Any]]": + """Convert a single part-like item to a content part dict.""" + if isinstance(item, str): + return {"text": item, "type": "text"} + + # Handle bare inline_data dicts directly to preserve the raw format + if isinstance(item, dict) and "inline_data" in item: return { - "type": "blob", - "modality": get_modality_from_mime_type(mime_type), - "mime_type": mime_type, - "content": BLOB_DATA_SUBSTITUTE, + "inline_data": { + "mime_type": item["inline_data"].get("mime_type", ""), + "data": BLOB_DATA_SUBSTITUTE, + } } - except Exception: - return None + + # For other dicts and Part objects, use existing _extract_part_content + result = _extract_part_content(item) + if result is not None: + return result + + # PIL.Image + if _is_PIL_available and isinstance(item, PILImage.Image): + return _extract_pil_image(item) + + # File objects + if hasattr(item, "uri") and hasattr(item, "mime_type"): + file_uri = getattr(item, "uri", None) + mime_type = getattr(item, "mime_type", None) or "" + if file_uri is not None: + return { + "type": "uri", + "modality": get_modality_from_mime_type(mime_type), + "mime_type": mime_type, + "uri": file_uri, + } + + return None def extract_contents_text(contents: "ContentListUnion") -> "Optional[str]": diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 68ae9d234f..6b5f197e89 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -942,11 +942,9 @@ def test_google_genai_message_truncation( assert isinstance(parsed_messages, list) assert len(parsed_messages) == 1 assert parsed_messages[0]["role"] == "user" - assert small_content in parsed_messages[0]["content"] - assert ( - event["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 2 - ) + # What "small content" becomes because the large message used the entire character limit + assert "..." in parsed_messages[0]["content"][1]["text"] # Sample embed content API response JSON @@ -1595,6 +1593,12 @@ def test_generate_content_with_function_response( mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) + # Conversation with the function call from the model + function_call = genai_types.FunctionCall( + name="get_weather", + args={"location": "Paris"}, + ) + # Conversation with function response (tool result) function_response = genai_types.FunctionResponse( id="call_123", name="get_weather", response={"output": "Sunny, 72F"} @@ -1603,6 +1607,9 @@ def test_generate_content_with_function_response( genai_types.Content( role="user", parts=[genai_types.Part(text="What's the weather in Paris?")] ), + genai_types.Content( + role="model", parts=[genai_types.Part(function_call=function_call)] + ), genai_types.Content( role="user", parts=[genai_types.Part(function_response=function_response)] ), @@ -1708,7 +1715,13 @@ def test_generate_content_with_part_object_directly( def test_generate_content_with_list_of_dicts( sentry_init, capture_events, mock_genai_client ): - """Test generate_content with list of dict format inputs.""" + """ + Test generate_content with list of dict format inputs. + + We only keep (and assert) the last dict in `content` because we've made popping the last message a form of + message truncation to keep the span size within limits. If we were following OTEL conventions, all 3 dicts + would be present. + """ sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1788,6 +1801,98 @@ def test_generate_content_with_dict_inline_data( assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE +def test_generate_content_without_parts_property_inline_data( + sentry_init, capture_events, mock_genai_client +): + sentry_init( + integrations=[GoogleGenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) + + contents = [ + {"text": "What's in this image?"}, + {"inline_data": {"data": b"fake_binary_data", "mime_type": "image/gif"}}, + ] + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=contents, config=create_test_config() + ) + + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + assert len(messages) == 1 + + assert len(messages[0]["content"]) == 2 + assert messages[0]["role"] == "user" + assert messages[0]["content"][0] == { + "text": "What's in this image?", + "type": "text", + } + assert messages[0]["content"][1]["inline_data"] + + assert messages[0]["content"][1]["inline_data"]["data"] == BLOB_DATA_SUBSTITUTE + assert messages[0]["content"][1]["inline_data"]["mime_type"] == "image/gif" + + +def test_generate_content_without_parts_property_inline_data_and_binary_data_within_string( + sentry_init, capture_events, mock_genai_client +): + sentry_init( + integrations=[GoogleGenAIIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) + + contents = [ + {"text": "What's in this image?"}, + { + "inline_data": { + "data": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8z8BQz0AEYBxVSF+FABJADveWkH6oAAAAAElFTkSuQmCC", + "mime_type": "image/png", + } + }, + ] + + with mock.patch.object( + mock_genai_client._api_client, "request", return_value=mock_http_response + ): + with start_transaction(name="google_genai"): + mock_genai_client.models.generate_content( + model="gemini-1.5-flash", contents=contents, config=create_test_config() + ) + + (event,) = events + invoke_span = event["spans"][0] + + messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + assert len(messages) == 1 + assert messages[0]["role"] == "user" + + assert len(messages[0]["content"]) == 2 + assert messages[0]["content"][0] == { + "text": "What's in this image?", + "type": "text", + } + assert messages[0]["content"][1]["inline_data"] + + assert messages[0]["content"][1]["inline_data"]["data"] == BLOB_DATA_SUBSTITUTE + assert messages[0]["content"][1]["inline_data"]["mime_type"] == "image/png" + + # Tests for extract_contents_messages function def test_extract_contents_messages_none(): """Test extract_contents_messages with None input"""