From 7f1429cff67031781fa11b6d7e5d7a53008209a7 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 21 Apr 2026 07:13:00 +0200 Subject: [PATCH] ref(google-genai): Revert input transformation --- sentry_sdk/integrations/google_genai/utils.py | 417 +-------- .../google_genai/test_google_genai.py | 833 ------------------ 2 files changed, 30 insertions(+), 1220 deletions(-) diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index 25763ebe07..30b5d0e7a1 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -3,7 +3,6 @@ import inspect from functools import wraps from .consts import ORIGIN, TOOL_ATTRIBUTES_MAP, GEN_AI_SYSTEM -from sentry_sdk._types import BLOB_DATA_SUBSTITUTE from typing import ( TYPE_CHECKING, Iterable, @@ -13,7 +12,6 @@ Optional, Union, TypedDict, - Dict, ) import sentry_sdk @@ -21,8 +19,6 @@ set_data_normalized, truncate_and_annotate_messages, normalize_message_roles, - transform_google_content_part, - get_modality_from_mime_type, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.scope import should_send_default_pii @@ -31,7 +27,7 @@ event_from_exception, safe_serialize, ) -from google.genai.types import GenerateContentConfig, Part, Content, PartDict +from google.genai.types import GenerateContentConfig, Part, Content from itertools import chain if TYPE_CHECKING: @@ -47,18 +43,6 @@ ContentUnion, ) -_is_PIL_available = False -try: - from PIL import Image as PILImage # type: ignore[import-not-found] - - _is_PIL_available = True -except ImportError: - pass - -# Keys to use when checking to see if a dict provided by the user -# is Part-like (as opposed to a Content or multi-turn conversation entry). -_PART_DICT_KEYS = PartDict.__optional_keys__ - class UsageData(TypedDict): """Structure for token usage data.""" @@ -165,386 +149,44 @@ def get_model_name(model: "Union[str, Model]") -> str: return str(model) -def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, Any]]": - """Extract messages from contents parameter which can have various formats. - - Returns a list of message dictionaries in the format: - - System: {"role": "system", "content": "string"} - - User/Assistant: {"role": "user"|"assistant", "content": [{"text": "...", "type": "text"}, ...]} - """ +def extract_contents_text(contents: "ContentListUnion") -> "Optional[str]": + """Extract text from contents parameter which can have various formats.""" if contents is None: - return [] - - messages = [] + return None - # Handle string case + # Simple string case if isinstance(contents, str): - return [{"role": "user", "content": contents}] + return contents - # Handle list case + # List of contents or parts if isinstance(contents, list): - if contents and all(_is_part_like(item) for item in contents): - # All items are parts — merge into a single multi-part user message - content_parts = [] - for item in contents: - part = _extract_part_from_item(item) - if part is not None: - content_parts.append(part) - - return [{"role": "user", "content": content_parts}] - else: - # Multi-turn conversation or mixed content types - for item in contents: - item_messages = extract_contents_messages(item) - messages.extend(item_messages) - return messages - - # Handle dictionary case (ContentDict) + texts = [] + for item in contents: + # Recursively extract text from each item + extracted = extract_contents_text(item) + if extracted: + texts.append(extracted) + return " ".join(texts) if texts else None + + # Dictionary case if isinstance(contents, dict): - role = contents.get("role", "user") - parts = contents.get("parts") - - if parts: - content_parts = [] - tool_messages = [] - - for part in parts: - part_result = _extract_part_content(part) - if part_result is None: - continue - - if isinstance(part_result, dict) and part_result.get("role") == "tool": - # Tool message - add separately - tool_messages.append(part_result) - else: - # Regular content part - content_parts.append(part_result) - - # Add main message if we have content parts - if content_parts: - # Normalize role: "model" -> "assistant" - normalized_role = "assistant" if role == "model" else role or "user" - messages.append({"role": normalized_role, "content": content_parts}) - - # Add tool messages - messages.extend(tool_messages) - elif "text" in contents: - messages.append( - { - "role": role, - "content": [{"text": contents["text"], "type": "text"}], - } - ) - elif "inline_data" in contents: - # The "data" will always be bytes (or bytes within a string), - # so if this is present, it's safe to automatically substitute with the placeholder - messages.append( - { - "inline_data": { - "mime_type": contents["inline_data"].get("mime_type", ""), - "data": BLOB_DATA_SUBSTITUTE, - } - } - ) - - return messages - - # Handle Content object - if hasattr(contents, "parts") and contents.parts: - role = getattr(contents, "role", None) or "user" - content_parts = [] - tool_messages = [] - - for part in contents.parts: - part_result = _extract_part_content(part) - if part_result is None: - continue - - if isinstance(part_result, dict) and part_result.get("role") == "tool": - tool_messages.append(part_result) - else: - content_parts.append(part_result) - - if content_parts: - normalized_role = "assistant" if role == "model" else role - messages.append({"role": normalized_role, "content": content_parts}) - - messages.extend(tool_messages) - return messages - - # Handle Part object directly - part_result = _extract_part_content(contents) - if part_result: - if isinstance(part_result, dict) and part_result.get("role") == "tool": - return [part_result] - else: - return [{"role": "user", "content": [part_result]}] - - # Handle PIL.Image.Image - if _is_PIL_available and isinstance(contents, PILImage.Image): - blob_part = _extract_pil_image(contents) - if blob_part: - return [{"role": "user", "content": [blob_part]}] - - # Handle File object - if hasattr(contents, "uri") and hasattr(contents, "mime_type"): - # File object - file_uri = getattr(contents, "uri", None) - mime_type = getattr(contents, "mime_type", None) - # Process if we have file_uri, even if mime_type is missing - if file_uri is not None: - # Default to empty string if mime_type is None - if mime_type is None: - mime_type = "" - - blob_part = { - "type": "uri", - "modality": get_modality_from_mime_type(mime_type), - "mime_type": mime_type, - "uri": file_uri, - } - return [{"role": "user", "content": [blob_part]}] - - # Handle direct text attribute - if hasattr(contents, "text") and contents.text: - return [ - {"role": "user", "content": [{"text": str(contents.text), "type": "text"}]} - ] - - return [] - - -def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]": - """Extract content from a Part object or dict. - - Returns: - - dict for content part (text/blob) or tool message - - None if part should be skipped - """ - if part is None: - return None + if "text" in contents: + return contents["text"] + # Try to extract from parts if present in dict + if "parts" in contents: + return extract_contents_text(contents["parts"]) - # Handle dict Part - if isinstance(part, dict): - # Check for function_response first (tool message) - if "function_response" in part: - return _extract_tool_message_from_part(part) - - if part.get("text"): - return {"text": part["text"], "type": "text"} - - # Try using Google-specific transform for dict formats (inline_data, file_data) - result = transform_google_content_part(part) - if result is not None: - # For inline_data with bytes data, substitute the content - if "inline_data" in part: - # inline_data.data will always be bytes, or a string containing base64-encoded bytes, - # so can automatically substitute without further checks - result["content"] = BLOB_DATA_SUBSTITUTE - return result + # Content object with parts - recurse into parts + if getattr(contents, "parts", None): + return extract_contents_text(contents.parts) - return None - - # Handle Part object - # Check for function_response (tool message) - if hasattr(part, "function_response") and part.function_response: - return _extract_tool_message_from_part(part) - - # Handle text - if hasattr(part, "text") and part.text: - return {"text": part.text, "type": "text"} - - # Handle file_data - if hasattr(part, "file_data") and part.file_data: - file_data = part.file_data - file_uri = getattr(file_data, "file_uri", None) - mime_type = getattr(file_data, "mime_type", None) - # Process if we have file_uri, even if mime_type is missing (consistent with dict handling) - if file_uri is not None: - # Default to empty string if mime_type is None (consistent with transform_google_content_part) - if mime_type is None: - mime_type = "" - - return { - "type": "uri", - "modality": get_modality_from_mime_type(mime_type), - "mime_type": mime_type, - "uri": file_uri, - } - - # Handle inline_data - if hasattr(part, "inline_data") and part.inline_data: - inline_data = part.inline_data - data = getattr(inline_data, "data", None) - mime_type = getattr(inline_data, "mime_type", None) - # Process if we have data, even if mime_type is missing/empty (consistent with dict handling) - if data is not None: - # Default to empty string if mime_type is None (consistent with transform_google_content_part) - if mime_type is None: - mime_type = "" - - return { - "type": "blob", - "modality": get_modality_from_mime_type(mime_type), - "mime_type": mime_type, - "content": BLOB_DATA_SUBSTITUTE, - } + # Direct text attribute + if hasattr(contents, "text"): + return contents.text return None -def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]": - """Extract tool message from a Part with function_response. - - Returns: - {"role": "tool", "content": {"toolCallId": "...", "toolName": "...", "output": "..."}} - or None if not a valid tool message - """ - function_response = None - - if isinstance(part, dict): - function_response = part.get("function_response") - elif hasattr(part, "function_response"): - function_response = part.function_response - - if not function_response: - return None - - # Extract fields from function_response - tool_call_id = None - tool_name = None - output = None - - if isinstance(function_response, dict): - tool_call_id = function_response.get("id") - tool_name = function_response.get("name") - response_dict = function_response.get("response", {}) - # Prefer "output" key if present, otherwise use entire response - output = response_dict.get("output", response_dict) - else: - # FunctionResponse object - tool_call_id = getattr(function_response, "id", None) - tool_name = getattr(function_response, "name", None) - response_obj = getattr(function_response, "response", None) - if response_obj is None: - response_obj = {} - if isinstance(response_obj, dict): - output = response_obj.get("output", response_obj) - else: - output = response_obj - - if not tool_name: - return None - - return { - "role": "tool", - "content": { - "toolCallId": str(tool_call_id) if tool_call_id else None, - "toolName": str(tool_name), - "output": safe_serialize(output) if output is not None else None, - }, - } - - -def _extract_pil_image(image: "Any") -> "Optional[dict[str, Any]]": - """Extract blob part from PIL.Image.Image.""" - if not _is_PIL_available or not isinstance(image, PILImage.Image): - return None - - # Get format, default to JPEG - format_str = image.format or "JPEG" - suffix = format_str.lower() - mime_type = f"image/{suffix}" - - return { - "type": "blob", - "modality": get_modality_from_mime_type(mime_type), - "mime_type": mime_type, - "content": BLOB_DATA_SUBSTITUTE, - } - - -def _is_part_like(item: "Any") -> bool: - """Check if item is a part-like value (PartUnionDict) rather than a Content/multi-turn entry.""" - if isinstance(item, (str, Part)): - return True - if isinstance(item, (list, Content)): - return False - if isinstance(item, dict): - if "role" in item or "parts" in item: - return False - # Part objects that came in as plain dicts - return bool(_PART_DICT_KEYS & item.keys()) - # File objects - if hasattr(item, "uri"): - return True - # PIL.Image - if _is_PIL_available and isinstance(item, PILImage.Image): - return True - return False - - -def _extract_part_from_item(item: "Any") -> "Optional[dict[str, Any]]": - """Convert a single part-like item to a content part dict.""" - if isinstance(item, str): - return {"text": item, "type": "text"} - - # Handle bare inline_data dicts directly to preserve the raw format - if isinstance(item, dict) and "inline_data" in item: - return { - "inline_data": { - "mime_type": item["inline_data"].get("mime_type", ""), - "data": BLOB_DATA_SUBSTITUTE, - } - } - - # For other dicts and Part objects, use existing _extract_part_content - result = _extract_part_content(item) - if result is not None: - return result - - # PIL.Image - if _is_PIL_available and isinstance(item, PILImage.Image): - return _extract_pil_image(item) - - # File objects - if hasattr(item, "uri") and hasattr(item, "mime_type"): - file_uri = getattr(item, "uri", None) - mime_type = getattr(item, "mime_type", None) or "" - if file_uri is not None: - return { - "type": "uri", - "modality": get_modality_from_mime_type(mime_type), - "mime_type": mime_type, - "uri": file_uri, - } - - return None - - -def extract_contents_text(contents: "ContentListUnion") -> "Optional[str]": - """Extract text from contents parameter which can have various formats. - - This is a compatibility function that extracts text from messages. - For new code, use extract_contents_messages instead. - """ - messages = extract_contents_messages(contents) - if not messages: - return None - - texts = [] - for message in messages: - content = message.get("content") - if isinstance(content, str): - texts.append(content) - elif isinstance(content, list): - for part in content: - if isinstance(part, dict) and part.get("type") == "text": - texts.append(part.get("text", "")) - - return " ".join(texts) if texts else None - - def _format_tools_for_span( tools: "Iterable[Tool | Callable[..., Any]]", ) -> "Optional[List[dict[str, Any]]]": @@ -887,8 +529,9 @@ def set_span_data_for_request( ) # Extract messages from contents - contents_messages = extract_contents_messages(contents) - messages.extend(contents_messages) + contents_text = extract_contents_text(contents) + if contents_text: + messages.append({"role": "user", "content": contents_text}) if messages: normalized_messages = normalize_message_roles(messages) diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index 3974041314..bf591a64fd 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -7,10 +7,8 @@ from google.genai.types import Content, Part from sentry_sdk import start_transaction -from sentry_sdk._types import BLOB_DATA_SUBSTITUTE from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations.google_genai import GoogleGenAIIntegration -from sentry_sdk.integrations.google_genai.utils import extract_contents_messages @pytest.fixture @@ -897,47 +895,6 @@ def test_tool_calls_extraction(sentry_init, capture_items, mock_genai_client): assert json.loads(tool_calls[1]["arguments"]) == {"timezone": "PST"} -def test_google_genai_message_truncation(sentry_init, capture_items, mock_genai_client): - """Test that large messages are truncated properly in Google GenAI integration.""" - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - large_content = ( - "This is a very long message that will exceed our size limits. " * 1000 - ) - small_content = "This is a small user message" - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", - contents=[large_content, small_content], - config=create_test_config(), - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in invoke_span["attributes"] - - messages_data = invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert parsed_messages[0]["role"] == "user" - - # What "small content" becomes because the large message used the entire character limit - assert "..." in parsed_messages[0]["content"][1]["text"] - - # Sample embed content API response JSON EXAMPLE_EMBED_RESPONSE_JSON = { "embeddings": [ @@ -1402,793 +1359,3 @@ async def test_async_embed_content_span_origin( spans = [item.payload for item in items if item.type == "span"] for span in spans: assert span["attributes"]["sentry.origin"] == "auto.ai.google_genai" - - -# Integration tests for generate_content with different input message formats -def test_generate_content_with_content_object( - sentry_init, capture_items, mock_genai_client -): - """Test generate_content with Content object input.""" - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - # Create Content object - content = genai_types.Content( - role="user", parts=[genai_types.Part(text="Hello from Content object")] - ) - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=content, config=create_test_config() - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 1 - assert messages[0]["role"] == "user" - assert messages[0]["content"] == [ - {"text": "Hello from Content object", "type": "text"} - ] - - -def test_generate_content_with_dict_format( - sentry_init, capture_items, mock_genai_client -): - """Test generate_content with dict format input (ContentDict).""" - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - # Dict format content - contents = {"role": "user", "parts": [{"text": "Hello from dict format"}]} - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 1 - assert messages[0]["role"] == "user" - assert messages[0]["content"] == [ - {"text": "Hello from dict format", "type": "text"} - ] - - -def test_generate_content_with_file_data(sentry_init, capture_items, mock_genai_client): - """Test generate_content with file_data (external file reference).""" - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - # Content with file_data - file_data = genai_types.FileData( - file_uri="gs://bucket/image.jpg", mime_type="image/jpeg" - ) - content = genai_types.Content( - role="user", - parts=[ - genai_types.Part(text="What's in this image?"), - genai_types.Part(file_data=file_data), - ], - ) - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=content, config=create_test_config() - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 1 - assert messages[0]["role"] == "user" - assert len(messages[0]["content"]) == 2 - assert messages[0]["content"][0] == { - "text": "What's in this image?", - "type": "text", - } - assert messages[0]["content"][1]["type"] == "uri" - assert messages[0]["content"][1]["modality"] == "image" - assert messages[0]["content"][1]["mime_type"] == "image/jpeg" - assert messages[0]["content"][1]["uri"] == "gs://bucket/image.jpg" - - -def test_generate_content_with_inline_data( - sentry_init, capture_items, mock_genai_client -): - """Test generate_content with inline_data (binary data).""" - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - # Content with inline binary data - image_bytes = b"fake_image_binary_data" - blob = genai_types.Blob(data=image_bytes, mime_type="image/png") - content = genai_types.Content( - role="user", - parts=[ - genai_types.Part(text="Describe this image"), - genai_types.Part(inline_data=blob), - ], - ) - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=content, config=create_test_config() - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 1 - assert messages[0]["role"] == "user" - assert len(messages[0]["content"]) == 2 - assert messages[0]["content"][0] == {"text": "Describe this image", "type": "text"} - assert messages[0]["content"][1]["type"] == "blob" - assert messages[0]["content"][1]["mime_type"] == "image/png" - # Binary data should be substituted for privacy - assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE - - -def test_generate_content_with_function_response( - sentry_init, capture_items, mock_genai_client -): - """Test generate_content with function_response (tool result).""" - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - # Conversation with the function call from the model - function_call = genai_types.FunctionCall( - name="get_weather", - args={"location": "Paris"}, - ) - - # Conversation with function response (tool result) - function_response = genai_types.FunctionResponse( - id="call_123", name="get_weather", response={"output": "Sunny, 72F"} - ) - contents = [ - genai_types.Content( - role="user", parts=[genai_types.Part(text="What's the weather in Paris?")] - ), - genai_types.Content( - role="model", parts=[genai_types.Part(function_call=function_call)] - ), - genai_types.Content( - role="user", parts=[genai_types.Part(function_response=function_response)] - ), - ] - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 1 - # First message is user message - assert messages[0]["role"] == "tool" - assert messages[0]["content"]["toolCallId"] == "call_123" - assert messages[0]["content"]["toolName"] == "get_weather" - assert messages[0]["content"]["output"] == "Sunny, 72F" - - -def test_generate_content_with_mixed_string_and_content( - sentry_init, capture_items, mock_genai_client -): - """Test generate_content with mixed string and Content objects in list.""" - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - # Mix of strings and Content objects - contents = [ - "Hello, this is a string message", - genai_types.Content( - role="model", - parts=[genai_types.Part(text="Hi! How can I help you?")], - ), - genai_types.Content( - role="user", - parts=[genai_types.Part(text="Tell me a joke")], - ), - ] - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 1 - # User message - assert messages[0]["role"] == "user" - assert messages[0]["content"] == [{"text": "Tell me a joke", "type": "text"}] - - -def test_generate_content_with_part_object_directly( - sentry_init, capture_items, mock_genai_client -): - """Test generate_content with Part object directly (not wrapped in Content).""" - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - # Part object directly - part = genai_types.Part(text="Direct Part object") - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=part, config=create_test_config() - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 1 - assert messages[0]["role"] == "user" - assert messages[0]["content"] == [{"text": "Direct Part object", "type": "text"}] - - -def test_generate_content_with_list_of_dicts( - sentry_init, capture_items, mock_genai_client -): - """ - Test generate_content with list of dict format inputs. - - We only keep (and assert) the last dict in `content` because we've made popping the last message a form of - message truncation to keep the span size within limits. If we were following OTEL conventions, all 3 dicts - would be present. - """ - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - # List of dicts (conversation in dict format) - contents = [ - {"role": "user", "parts": [{"text": "First user message"}]}, - {"role": "model", "parts": [{"text": "First model response"}]}, - {"role": "user", "parts": [{"text": "Second user message"}]}, - ] - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 1 - assert messages[0]["role"] == "user" - assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}] - - -def test_generate_content_with_dict_inline_data( - sentry_init, capture_items, mock_genai_client -): - """Test generate_content with dict format containing inline_data.""" - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - # Dict with inline_data - contents = { - "role": "user", - "parts": [ - {"text": "What's in this image?"}, - {"inline_data": {"data": b"fake_binary_data", "mime_type": "image/gif"}}, - ], - } - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 1 - assert messages[0]["role"] == "user" - assert len(messages[0]["content"]) == 2 - assert messages[0]["content"][0] == { - "text": "What's in this image?", - "type": "text", - } - assert messages[0]["content"][1]["type"] == "blob" - assert messages[0]["content"][1]["mime_type"] == "image/gif" - assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE - - -def test_generate_content_without_parts_property_inline_data( - sentry_init, capture_items, mock_genai_client -): - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - contents = [ - {"text": "What's in this image?"}, - {"inline_data": {"data": b"fake_binary_data", "mime_type": "image/gif"}}, - ] - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - - assert len(messages) == 1 - - assert len(messages[0]["content"]) == 2 - assert messages[0]["role"] == "user" - assert messages[0]["content"][0] == { - "text": "What's in this image?", - "type": "text", - } - assert messages[0]["content"][1]["inline_data"] - - assert messages[0]["content"][1]["inline_data"]["data"] == BLOB_DATA_SUBSTITUTE - assert messages[0]["content"][1]["inline_data"]["mime_type"] == "image/gif" - - -def test_generate_content_without_parts_property_inline_data_and_binary_data_within_string( - sentry_init, capture_items, mock_genai_client -): - sentry_init( - integrations=[GoogleGenAIIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("span") - - mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON) - - contents = [ - {"text": "What's in this image?"}, - { - "inline_data": { - "data": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8z8BQz0AEYBxVSF+FABJADveWkH6oAAAAAElFTkSuQmCC", - "mime_type": "image/png", - } - }, - ] - - with mock.patch.object( - mock_genai_client._api_client, "request", return_value=mock_http_response - ): - with start_transaction(name="google_genai"): - mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents=contents, config=create_test_config() - ) - - invoke_span = next(item.payload for item in items if item.type == "span") - - messages = json.loads(invoke_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(messages) == 1 - assert messages[0]["role"] == "user" - - assert len(messages[0]["content"]) == 2 - assert messages[0]["content"][0] == { - "text": "What's in this image?", - "type": "text", - } - assert messages[0]["content"][1]["inline_data"] - - assert messages[0]["content"][1]["inline_data"]["data"] == BLOB_DATA_SUBSTITUTE - assert messages[0]["content"][1]["inline_data"]["mime_type"] == "image/png" - - -# Tests for extract_contents_messages function -def test_extract_contents_messages_none(): - """Test extract_contents_messages with None input""" - result = extract_contents_messages(None) - assert result == [] - - -def test_extract_contents_messages_string(): - """Test extract_contents_messages with string input""" - result = extract_contents_messages("Hello world") - assert result == [{"role": "user", "content": "Hello world"}] - - -def test_extract_contents_messages_content_object(): - """Test extract_contents_messages with Content object""" - content = genai_types.Content( - role="user", parts=[genai_types.Part(text="Test message")] - ) - result = extract_contents_messages(content) - assert len(result) == 1 - assert result[0]["role"] == "user" - assert result[0]["content"] == [{"text": "Test message", "type": "text"}] - - -def test_extract_contents_messages_content_object_model_role(): - """Test extract_contents_messages with Content object having model role""" - content = genai_types.Content( - role="model", parts=[genai_types.Part(text="Assistant response")] - ) - result = extract_contents_messages(content) - assert len(result) == 1 - assert result[0]["role"] == "assistant" - assert result[0]["content"] == [{"text": "Assistant response", "type": "text"}] - - -def test_extract_contents_messages_content_object_no_role(): - """Test extract_contents_messages with Content object without role""" - content = genai_types.Content(parts=[genai_types.Part(text="No role message")]) - result = extract_contents_messages(content) - assert len(result) == 1 - assert result[0]["role"] == "user" - assert result[0]["content"] == [{"text": "No role message", "type": "text"}] - - -def test_extract_contents_messages_part_object(): - """Test extract_contents_messages with Part object""" - part = genai_types.Part(text="Direct part") - result = extract_contents_messages(part) - assert len(result) == 1 - assert result[0]["role"] == "user" - assert result[0]["content"] == [{"text": "Direct part", "type": "text"}] - - -def test_extract_contents_messages_file_data(): - """Test extract_contents_messages with file_data""" - file_data = genai_types.FileData( - file_uri="gs://bucket/file.jpg", mime_type="image/jpeg" - ) - part = genai_types.Part(file_data=file_data) - content = genai_types.Content(parts=[part]) - result = extract_contents_messages(content) - - assert len(result) == 1 - assert result[0]["role"] == "user" - assert len(result[0]["content"]) == 1 - blob_part = result[0]["content"][0] - assert blob_part["type"] == "uri" - assert blob_part["modality"] == "image" - assert blob_part["mime_type"] == "image/jpeg" - assert blob_part["uri"] == "gs://bucket/file.jpg" - - -def test_extract_contents_messages_inline_data(): - """Test extract_contents_messages with inline_data (binary)""" - # Create inline data with bytes - image_bytes = b"fake_image_data" - blob = genai_types.Blob(data=image_bytes, mime_type="image/png") - part = genai_types.Part(inline_data=blob) - content = genai_types.Content(parts=[part]) - result = extract_contents_messages(content) - - assert len(result) == 1 - assert result[0]["role"] == "user" - assert len(result[0]["content"]) == 1 - blob_part = result[0]["content"][0] - assert blob_part["type"] == "blob" - assert blob_part["mime_type"] == "image/png" - assert blob_part["content"] == BLOB_DATA_SUBSTITUTE - - -def test_extract_contents_messages_function_response(): - """Test extract_contents_messages with function_response (tool message)""" - function_response = genai_types.FunctionResponse( - id="call_123", name="get_weather", response={"output": "sunny"} - ) - part = genai_types.Part(function_response=function_response) - content = genai_types.Content(parts=[part]) - result = extract_contents_messages(content) - - assert len(result) == 1 - assert result[0]["role"] == "tool" - assert result[0]["content"]["toolCallId"] == "call_123" - assert result[0]["content"]["toolName"] == "get_weather" - assert result[0]["content"]["output"] == "sunny" - - -def test_extract_contents_messages_function_response_with_output_key(): - """Test extract_contents_messages with function_response that has output key""" - function_response = genai_types.FunctionResponse( - id="call_456", name="get_time", response={"output": "3:00 PM", "error": None} - ) - part = genai_types.Part(function_response=function_response) - content = genai_types.Content(parts=[part]) - result = extract_contents_messages(content) - - assert len(result) == 1 - assert result[0]["role"] == "tool" - assert result[0]["content"]["toolCallId"] == "call_456" - assert result[0]["content"]["toolName"] == "get_time" - # Should prefer "output" key - assert result[0]["content"]["output"] == "3:00 PM" - - -def test_extract_contents_messages_mixed_parts(): - """Test extract_contents_messages with mixed content parts""" - content = genai_types.Content( - role="user", - parts=[ - genai_types.Part(text="Text part"), - genai_types.Part( - file_data=genai_types.FileData( - file_uri="gs://bucket/image.jpg", mime_type="image/jpeg" - ) - ), - ], - ) - result = extract_contents_messages(content) - - assert len(result) == 1 - assert result[0]["role"] == "user" - assert len(result[0]["content"]) == 2 - assert result[0]["content"][0] == {"text": "Text part", "type": "text"} - assert result[0]["content"][1]["type"] == "uri" - assert result[0]["content"][1]["modality"] == "image" - assert result[0]["content"][1]["uri"] == "gs://bucket/image.jpg" - - -def test_extract_contents_messages_list(): - """Test extract_contents_messages with list input""" - contents = [ - "First message", - genai_types.Content( - role="user", parts=[genai_types.Part(text="Second message")] - ), - ] - result = extract_contents_messages(contents) - - assert len(result) == 2 - assert result[0] == {"role": "user", "content": "First message"} - assert result[1]["role"] == "user" - assert result[1]["content"] == [{"text": "Second message", "type": "text"}] - - -def test_extract_contents_messages_dict_content(): - """Test extract_contents_messages with dict (ContentDict)""" - content_dict = {"role": "user", "parts": [{"text": "Dict message"}]} - result = extract_contents_messages(content_dict) - - assert len(result) == 1 - assert result[0]["role"] == "user" - assert result[0]["content"] == [{"text": "Dict message", "type": "text"}] - - -def test_extract_contents_messages_dict_with_text(): - """Test extract_contents_messages with dict containing text key""" - content_dict = {"role": "user", "text": "Simple text"} - result = extract_contents_messages(content_dict) - - assert len(result) == 1 - assert result[0]["role"] == "user" - assert result[0]["content"] == [{"text": "Simple text", "type": "text"}] - - -def test_extract_contents_messages_file_object(): - """Test extract_contents_messages with File object""" - file_obj = genai_types.File( - name="files/123", uri="gs://bucket/file.pdf", mime_type="application/pdf" - ) - result = extract_contents_messages(file_obj) - - assert len(result) == 1 - assert result[0]["role"] == "user" - assert len(result[0]["content"]) == 1 - blob_part = result[0]["content"][0] - assert blob_part["type"] == "uri" - assert blob_part["modality"] == "document" - assert blob_part["mime_type"] == "application/pdf" - assert blob_part["uri"] == "gs://bucket/file.pdf" - - -@pytest.mark.skipif( - not hasattr(genai_types, "PIL_Image") or genai_types.PIL_Image is None, - reason="PIL not available", -) -def test_extract_contents_messages_pil_image(): - """Test extract_contents_messages with PIL.Image.Image""" - try: - from PIL import Image as PILImage - - # Create a simple test image - img = PILImage.new("RGB", (10, 10), color="red") - result = extract_contents_messages(img) - - assert len(result) == 1 - assert result[0]["role"] == "user" - assert len(result[0]["content"]) == 1 - blob_part = result[0]["content"][0] - assert blob_part["type"] == "blob" - assert blob_part["mime_type"].startswith("image/") - assert "content" in blob_part - # Binary content is substituted with placeholder for privacy - assert blob_part["content"] == "[Blob substitute]" - except ImportError: - pytest.skip("PIL not available") - - -def test_extract_contents_messages_tool_and_text(): - """Test extract_contents_messages with both tool message and text""" - content = genai_types.Content( - role="user", - parts=[ - genai_types.Part(text="User question"), - genai_types.Part( - function_response=genai_types.FunctionResponse( - id="call_789", name="search", response={"output": "results"} - ) - ), - ], - ) - result = extract_contents_messages(content) - - # Should have two messages: one user message and one tool message - assert len(result) == 2 - # First should be user message with text - assert result[0]["role"] == "user" - assert result[0]["content"] == [{"text": "User question", "type": "text"}] - # Second should be tool message - assert result[1]["role"] == "tool" - assert result[1]["content"]["toolCallId"] == "call_789" - assert result[1]["content"]["toolName"] == "search" - - -def test_extract_contents_messages_empty_parts(): - """Test extract_contents_messages with Content object with empty parts""" - content = genai_types.Content(role="user", parts=[]) - result = extract_contents_messages(content) - - assert result == [] - - -def test_extract_contents_messages_empty_list(): - """Test extract_contents_messages with empty list""" - result = extract_contents_messages([]) - assert result == [] - - -def test_extract_contents_messages_dict_inline_data(): - """Test extract_contents_messages with dict containing inline_data""" - content_dict = { - "role": "user", - "parts": [{"inline_data": {"data": b"binary_data", "mime_type": "image/gif"}}], - } - result = extract_contents_messages(content_dict) - - assert len(result) == 1 - assert result[0]["role"] == "user" - assert len(result[0]["content"]) == 1 - blob_part = result[0]["content"][0] - assert blob_part["type"] == "blob" - assert blob_part["mime_type"] == "image/gif" - assert blob_part["content"] == BLOB_DATA_SUBSTITUTE - - -def test_extract_contents_messages_dict_function_response(): - """Test extract_contents_messages with dict containing function_response""" - content_dict = { - "role": "user", - "parts": [ - { - "function_response": { - "id": "dict_call_1", - "name": "dict_tool", - "response": {"result": "success"}, - } - } - ], - } - result = extract_contents_messages(content_dict) - - assert len(result) == 1 - assert result[0]["role"] == "tool" - assert result[0]["content"]["toolCallId"] == "dict_call_1" - assert result[0]["content"]["toolName"] == "dict_tool" - assert result[0]["content"]["output"] == '{"result": "success"}' - - -def test_extract_contents_messages_object_with_text_attribute(): - """Test extract_contents_messages with object that has text attribute""" - - class TextObject: - def __init__(self): - self.text = "Object text" - - obj = TextObject() - result = extract_contents_messages(obj) - - assert len(result) == 1 - assert result[0]["role"] == "user" - assert result[0]["content"] == [{"text": "Object text", "type": "text"}]