diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index efc2f70ffd..b57f6bbab4 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -12,7 +12,6 @@ normalize_message_roles, truncate_and_annotate_messages, get_start_span_function, - transform_anthropic_content_part, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import _check_minimum_version, DidNotEnable, Integration @@ -323,27 +322,6 @@ def _collect_ai_data( ) -def _transform_anthropic_content_block( - content_block: "dict[str, Any]", -) -> "dict[str, Any]": - """ - Transform an Anthropic content block using the Anthropic-specific transformer, - with special handling for Anthropic's text-type documents. - """ - # Handle Anthropic's text-type documents specially (not covered by shared function) - if content_block.get("type") == "document": - source = content_block.get("source") - if isinstance(source, dict) and source.get("type") == "text": - return { - "type": "text", - "text": source.get("data", ""), - } - - # Use Anthropic-specific transformation - result = transform_anthropic_content_part(content_block) - return result if result is not None else content_block - - def _transform_system_instructions( system_instructions: "Union[str, Iterable[TextBlockParam]]", ) -> "list[TextPart]": @@ -401,41 +379,19 @@ def _set_common_input_data( and "content" in message and isinstance(message["content"], (list, tuple)) ): - transformed_content = [] for item in message["content"]: - # Skip tool_result items - they can contain images/documents - # with nested structures that are difficult to redact properly - if isinstance(item, dict) and item.get("type") == "tool_result": - continue - - # Transform content blocks (images, documents, etc.) - transformed_content.append( - _transform_anthropic_content_block(item) - if isinstance(item, dict) - else item - ) - - # If there are non-tool-result items, add them as a message - if transformed_content: - normalized_messages.append( - { - "role": message.get("role"), - "content": transformed_content, - } - ) + if item.get("type") == "tool_result": + normalized_messages.append( + { + "role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL, + "content": { + "tool_use_id": item.get("tool_use_id"), + "output": item.get("content"), + }, + } + ) else: - # Transform content for non-list messages or assistant messages - transformed_message = message.copy() - if "content" in transformed_message: - content = transformed_message["content"] - if isinstance(content, (list, tuple)): - transformed_message["content"] = [ - _transform_anthropic_content_block(item) - if isinstance(item, dict) - else item - for item in content - ] - normalized_messages.append(transformed_message) + normalized_messages.append(message) role_normalized_messages = normalize_message_roles(normalized_messages) scope = sentry_sdk.get_current_scope() diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index 865013f0b4..3565b5b2de 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -55,16 +55,13 @@ async def __call__(self, *args, **kwargs): from anthropic.types.content_block import ContentBlock as TextBlock from sentry_sdk import start_transaction, start_span -from sentry_sdk._types import BLOB_DATA_SUBSTITUTE from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations.anthropic import ( AnthropicIntegration, _set_output_data, _collect_ai_data, - _transform_anthropic_content_block, _RecordedUsage, ) -from sentry_sdk.ai.utils import transform_content_part, transform_message_content from sentry_sdk.utils import package_version @@ -3333,701 +3330,6 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_items): assert stored_messages[0]["content"] == "Hello" -# Tests for transform_content_part (shared) and _transform_anthropic_content_block helper functions - - -def test_transform_content_part_anthropic_base64_image(): - """Test that base64 encoded images are transformed to blob format.""" - content_block = { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/jpeg", - "data": "base64encodeddata...", - }, - } - - result = transform_content_part(content_block) - - assert result == { - "type": "blob", - "modality": "image", - "mime_type": "image/jpeg", - "content": "base64encodeddata...", - } - - -def test_transform_content_part_anthropic_url_image(): - """Test that URL-referenced images are transformed to uri format.""" - content_block = { - "type": "image", - "source": { - "type": "url", - "url": "https://example.com/image.jpg", - }, - } - - result = transform_content_part(content_block) - - assert result == { - "type": "uri", - "modality": "image", - "mime_type": "", - "uri": "https://example.com/image.jpg", - } - - -def test_transform_content_part_anthropic_file_image(): - """Test that file_id-referenced images are transformed to file format.""" - content_block = { - "type": "image", - "source": { - "type": "file", - "file_id": "file_abc123", - }, - } - - result = transform_content_part(content_block) - - assert result == { - "type": "file", - "modality": "image", - "mime_type": "", - "file_id": "file_abc123", - } - - -def test_transform_content_part_anthropic_base64_document(): - """Test that base64 encoded PDFs are transformed to blob format.""" - content_block = { - "type": "document", - "source": { - "type": "base64", - "media_type": "application/pdf", - "data": "base64encodedpdfdata...", - }, - } - - result = transform_content_part(content_block) - - assert result == { - "type": "blob", - "modality": "document", - "mime_type": "application/pdf", - "content": "base64encodedpdfdata...", - } - - -def test_transform_content_part_anthropic_url_document(): - """Test that URL-referenced documents are transformed to uri format.""" - content_block = { - "type": "document", - "source": { - "type": "url", - "url": "https://example.com/document.pdf", - }, - } - - result = transform_content_part(content_block) - - assert result == { - "type": "uri", - "modality": "document", - "mime_type": "", - "uri": "https://example.com/document.pdf", - } - - -def test_transform_content_part_anthropic_file_document(): - """Test that file_id-referenced documents are transformed to file format.""" - content_block = { - "type": "document", - "source": { - "type": "file", - "file_id": "file_doc456", - "media_type": "application/pdf", - }, - } - - result = transform_content_part(content_block) - - assert result == { - "type": "file", - "modality": "document", - "mime_type": "application/pdf", - "file_id": "file_doc456", - } - - -def test_transform_anthropic_content_block_text_document(): - """Test that plain text documents are transformed correctly (Anthropic-specific).""" - content_block = { - "type": "document", - "source": { - "type": "text", - "media_type": "text/plain", - "data": "This is plain text content.", - }, - } - - # Use Anthropic-specific helper for text-type documents - result = _transform_anthropic_content_block(content_block) - - assert result == { - "type": "text", - "text": "This is plain text content.", - } - - -def test_transform_content_part_text_block(): - """Test that regular text blocks return None (not transformed).""" - content_block = { - "type": "text", - "text": "Hello, world!", - } - - # Shared transform_content_part returns None for text blocks - result = transform_content_part(content_block) - - assert result is None - - -def test_transform_message_content_string(): - """Test that string content is returned as-is.""" - result = transform_message_content("Hello, world!") - assert result == "Hello, world!" - - -def test_transform_message_content_list_anthropic(): - """Test that list content with Anthropic format is transformed correctly.""" - content = [ - {"type": "text", "text": "Hello!"}, - { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/png", - "data": "base64data...", - }, - }, - ] - - result = transform_message_content(content) - - assert len(result) == 2 - # Text block stays as-is (transform returns None, keeps original) - assert result[0] == {"type": "text", "text": "Hello!"} - assert result[1] == { - "type": "blob", - "modality": "image", - "mime_type": "image/png", - "content": "base64data...", - } - - -# Integration tests for binary data in messages - - -def test_message_with_base64_image(sentry_init, capture_items): - """Test that messages with base64 images are properly captured.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("transaction", "span") - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "What's in this image?"}, - { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/jpeg", - "data": "base64encodeddatahere...", - }, - }, - ], - } - ] - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - - assert len(stored_messages) == 1 - assert stored_messages[0]["role"] == "user" - content = stored_messages[0]["content"] - assert len(content) == 2 - assert content[0] == {"type": "text", "text": "What's in this image?"} - assert content[1] == { - "type": "blob", - "modality": "image", - "mime_type": "image/jpeg", - "content": BLOB_DATA_SUBSTITUTE, - } - - -def test_message_with_url_image(sentry_init, capture_items): - """Test that messages with URL-referenced images are properly captured.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("transaction", "span") - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Describe this image."}, - { - "type": "image", - "source": { - "type": "url", - "url": "https://example.com/photo.png", - }, - }, - ], - } - ] - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - content = stored_messages[0]["content"] - assert content[1] == { - "type": "uri", - "modality": "image", - "mime_type": "", - "uri": "https://example.com/photo.png", - } - - -def test_message_with_file_image(sentry_init, capture_items): - """Test that messages with file_id-referenced images are properly captured.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("transaction", "span") - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "What do you see?"}, - { - "type": "image", - "source": { - "type": "file", - "file_id": "file_img_12345", - "media_type": "image/webp", - }, - }, - ], - } - ] - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - content = stored_messages[0]["content"] - assert content[1] == { - "type": "file", - "modality": "image", - "mime_type": "image/webp", - "file_id": "file_img_12345", - } - - -def test_message_with_base64_pdf(sentry_init, capture_items): - """Test that messages with base64-encoded PDF documents are properly captured.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("transaction", "span") - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Summarize this document."}, - { - "type": "document", - "source": { - "type": "base64", - "media_type": "application/pdf", - "data": "JVBERi0xLjQKJeLj...base64pdfdata", - }, - }, - ], - } - ] - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - content = stored_messages[0]["content"] - assert content[1] == { - "type": "blob", - "modality": "document", - "mime_type": "application/pdf", - "content": BLOB_DATA_SUBSTITUTE, - } - - -def test_message_with_url_pdf(sentry_init, capture_items): - """Test that messages with URL-referenced PDF documents are properly captured.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("transaction", "span") - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "What is in this PDF?"}, - { - "type": "document", - "source": { - "type": "url", - "url": "https://example.com/report.pdf", - }, - }, - ], - } - ] - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - content = stored_messages[0]["content"] - assert content[1] == { - "type": "uri", - "modality": "document", - "mime_type": "", - "uri": "https://example.com/report.pdf", - } - - -def test_message_with_file_document(sentry_init, capture_items): - """Test that messages with file_id-referenced documents are properly captured.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("transaction", "span") - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Analyze this document."}, - { - "type": "document", - "source": { - "type": "file", - "file_id": "file_doc_67890", - "media_type": "application/pdf", - }, - }, - ], - } - ] - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - content = stored_messages[0]["content"] - assert content[1] == { - "type": "file", - "modality": "document", - "mime_type": "application/pdf", - "file_id": "file_doc_67890", - } - - -def test_message_with_mixed_content(sentry_init, capture_items): - """Test that messages with mixed content (text, images, documents) are properly captured.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("transaction", "span") - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "Compare this image with the document."}, - { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/png", - "data": "iVBORw0KGgo...base64imagedata", - }, - }, - { - "type": "image", - "source": { - "type": "url", - "url": "https://example.com/comparison.jpg", - }, - }, - { - "type": "document", - "source": { - "type": "base64", - "media_type": "application/pdf", - "data": "JVBERi0xLjQK...base64pdfdata", - }, - }, - {"type": "text", "text": "Please provide a detailed analysis."}, - ], - } - ] - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - content = stored_messages[0]["content"] - - assert len(content) == 5 - assert content[0] == { - "type": "text", - "text": "Compare this image with the document.", - } - assert content[1] == { - "type": "blob", - "modality": "image", - "mime_type": "image/png", - "content": BLOB_DATA_SUBSTITUTE, - } - assert content[2] == { - "type": "uri", - "modality": "image", - "mime_type": "", - "uri": "https://example.com/comparison.jpg", - } - assert content[3] == { - "type": "blob", - "modality": "document", - "mime_type": "application/pdf", - "content": BLOB_DATA_SUBSTITUTE, - } - assert content[4] == { - "type": "text", - "text": "Please provide a detailed analysis.", - } - - -def test_message_with_multiple_images_different_formats(sentry_init, capture_items): - """Test that messages with multiple images of different source types are handled.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("transaction", "span") - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - messages = [ - { - "role": "user", - "content": [ - { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/jpeg", - "data": "base64data1...", - }, - }, - { - "type": "image", - "source": { - "type": "url", - "url": "https://example.com/img2.gif", - }, - }, - { - "type": "image", - "source": { - "type": "file", - "file_id": "file_img_789", - "media_type": "image/webp", - }, - }, - {"type": "text", "text": "Compare these three images."}, - ], - } - ] - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - stored_messages = json.loads(span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - content = stored_messages[0]["content"] - - assert len(content) == 4 - assert content[0] == { - "type": "blob", - "modality": "image", - "mime_type": "image/jpeg", - "content": BLOB_DATA_SUBSTITUTE, - } - assert content[1] == { - "type": "uri", - "modality": "image", - "mime_type": "", - "uri": "https://example.com/img2.gif", - } - assert content[2] == { - "type": "file", - "modality": "image", - "mime_type": "image/webp", - "file_id": "file_img_789", - } - assert content[3] == {"type": "text", "text": "Compare these three images."} - - -def test_binary_content_not_stored_when_pii_disabled(sentry_init, capture_items): - """Test that binary content is not stored when send_default_pii is False.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=False, - ) - items = capture_items("transaction", "span") - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "What's in this image?"}, - { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/jpeg", - "data": "base64encodeddatahere...", - }, - }, - ], - } - ] - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - # Messages should not be stored - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - - -def test_binary_content_not_stored_when_prompts_disabled(sentry_init, capture_items): - """Test that binary content is not stored when include_prompts is False.""" - sentry_init( - integrations=[AnthropicIntegration(include_prompts=False)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("transaction", "span") - client = Anthropic(api_key="z") - client.messages._post = mock.Mock(return_value=EXAMPLE_MESSAGE) - - messages = [ - { - "role": "user", - "content": [ - {"type": "text", "text": "What's in this image?"}, - { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/jpeg", - "data": "base64encodeddatahere...", - }, - }, - ], - } - ] - - with start_transaction(name="anthropic"): - client.messages.create(max_tokens=1024, messages=messages, model="model") - - spans = [item.payload for item in items if item.type == "span"] - (span,) = spans - - # Messages should not be stored - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["attributes"] - - def test_cache_tokens_nonstreaming(sentry_init, capture_items): """Test cache read/write tokens are tracked for non-streaming responses.""" sentry_init(