From 47acddb7501eabafe509af501f49d97e5a71bdd2 Mon Sep 17 00:00:00 2001 From: Erica Pisani Date: Tue, 21 Apr 2026 13:59:07 -0400 Subject: [PATCH] feat(huggingface_hub): Migrate to span first Migrates the huggingface_hub integration to the span-first (streaming span) architecture. When `_experiments={"trace_lifecycle": "stream"}` is enabled, the integration now uses `StreamedSpan` via `sentry_sdk.traces.start_span` instead of the legacy `Span`-based (transactions) path. The legacy path remains unchanged for backwards compatibility. To support both span types from shared AI monitoring utilities, `record_token_usage` and `set_data_normalized` now accept `Union[Span, StreamedSpan]` and route attribute writes through a new private `_set_span_data_attribute` helper in `sentry_sdk.ai.utils` (calling `set_attribute` on `StreamedSpan` and `set_data` on `Span`). Part of the broader span-first integration migration. Fixes PY-2332 Co-Authored-By: Claude Opus 4.7 --- sentry_sdk/ai/monitoring.py | 23 +- sentry_sdk/ai/utils.py | 20 +- sentry_sdk/integrations/huggingface_hub.py | 42 +- .../huggingface_hub/test_huggingface_hub.py | 445 ++++++++++++++++++ 4 files changed, 505 insertions(+), 25 deletions(-) diff --git a/sentry_sdk/ai/monitoring.py b/sentry_sdk/ai/monitoring.py index 581e967bd4..7488086c6e 100644 --- a/sentry_sdk/ai/monitoring.py +++ b/sentry_sdk/ai/monitoring.py @@ -2,10 +2,12 @@ import sys from functools import wraps +from sentry_sdk.ai.utils import _set_span_data_attribute from sentry_sdk.consts import SPANDATA import sentry_sdk.utils from sentry_sdk import start_span from sentry_sdk.tracing import Span +from sentry_sdk.traces import StreamedSpan from sentry_sdk.utils import ContextVar, reraise, capture_internal_exceptions from typing import TYPE_CHECKING @@ -97,7 +99,7 @@ async def async_wrapped(*args: "Any", **kwargs: "Any") -> "Any": def record_token_usage( - span: "Span", + span: "Union[Span, StreamedSpan]", input_tokens: "Optional[int]" = None, input_tokens_cached: "Optional[int]" = None, input_tokens_cache_write: "Optional[int]" = None, @@ -108,28 +110,33 @@ def record_token_usage( # TODO: move pipeline name elsewhere ai_pipeline_name = get_ai_pipeline_name() if ai_pipeline_name: - span.set_data(SPANDATA.GEN_AI_PIPELINE_NAME, ai_pipeline_name) + _set_span_data_attribute(span, SPANDATA.GEN_AI_PIPELINE_NAME, ai_pipeline_name) if input_tokens is not None: - span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, input_tokens) + _set_span_data_attribute(span, SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, input_tokens) if input_tokens_cached is not None: - span.set_data( + _set_span_data_attribute( + span, SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED, input_tokens_cached, ) if input_tokens_cache_write is not None: - span.set_data( + _set_span_data_attribute( + span, SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE, input_tokens_cache_write, ) if output_tokens is not None: - span.set_data(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens) + _set_span_data_attribute( + span, SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens + ) if output_tokens_reasoning is not None: - span.set_data( + _set_span_data_attribute( + span, SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING, output_tokens_reasoning, ) @@ -138,4 +145,4 @@ def record_token_usage( total_tokens = input_tokens + output_tokens if total_tokens is not None: - span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, total_tokens) + _set_span_data_attribute(span, SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, total_tokens) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 4103736969..8efa077ce5 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -7,7 +7,7 @@ from sentry_sdk.ai.consts import DATA_URL_BASE64_REGEX if TYPE_CHECKING: - from typing import Any, Callable, Dict, List, Optional, Tuple + from typing import Any, Callable, Dict, List, Optional, Tuple, Union from sentry_sdk.tracing import Span @@ -490,13 +490,25 @@ def _normalize_data(data: "Any", unpack: bool = True) -> "Any": def set_data_normalized( - span: "Span", key: str, value: "Any", unpack: bool = True + span: "Union[Span, StreamedSpan]", + key: str, + value: "Any", + unpack: bool = True, ) -> None: normalized = _normalize_data(value, unpack=unpack) if isinstance(normalized, (int, float, bool, str)): - span.set_data(key, normalized) + _set_span_data_attribute(span, key, normalized) else: - span.set_data(key, json.dumps(normalized)) + _set_span_data_attribute(span, key, json.dumps(normalized)) + + +def _set_span_data_attribute( + span: "Union[Span, StreamedSpan]", key: str, value: "Any" +) -> None: + if isinstance(span, StreamedSpan): + span.set_attribute(key, value) + else: + span.set_data(key, value) def normalize_message_role(role: str) -> str: diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index d628ccf546..a388baa136 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -5,10 +5,12 @@ import sentry_sdk from sentry_sdk.ai.monitoring import record_token_usage -from sentry_sdk.ai.utils import set_data_normalized +from sentry_sdk.ai.utils import _set_span_data_attribute, set_data_normalized from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.traces import StreamedSpan +from sentry_sdk.tracing_utils import has_span_streaming_enabled from sentry_sdk.utils import ( capture_internal_exceptions, event_from_exception, @@ -16,7 +18,9 @@ ) if TYPE_CHECKING: - from typing import Any, Callable, Iterable + from typing import Any, Callable, Iterable, Union + + from sentry_sdk.tracing import Span try: import huggingface_hub.inference._client @@ -83,17 +87,27 @@ def new_huggingface_task(*args: "Any", **kwargs: "Any") -> "Any": model = client.model or kwargs.get("model") or "" operation_name = op.split(".")[-1] - span = sentry_sdk.start_span( - op=op, - name=f"{operation_name} {model}", - origin=HuggingfaceHubIntegration.origin, - ) + span: "Union[Span, StreamedSpan]" + if has_span_streaming_enabled(sentry_sdk.get_client().options): + span = sentry_sdk.traces.start_span( + name=f"{operation_name} {model}", + attributes={ + "sentry.op": op, + "sentry.origin": HuggingfaceHubIntegration.origin, + }, + ) + else: + span = sentry_sdk.start_span( + op=op, + name=f"{operation_name} {model}", + origin=HuggingfaceHubIntegration.origin, + ) span.__enter__() - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, operation_name) + _set_span_data_attribute(span, SPANDATA.GEN_AI_OPERATION_NAME, operation_name) if model: - span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model) + _set_span_data_attribute(span, SPANDATA.GEN_AI_REQUEST_MODEL, model) # Input attributes if should_send_default_pii() and integration.include_prompts: @@ -116,7 +130,7 @@ def new_huggingface_task(*args: "Any", **kwargs: "Any") -> "Any": value = kwargs.get(attribute, None) if value is not None: if isinstance(value, (int, float, bool, str)): - span.set_data(span_attribute, value) + _set_span_data_attribute(span, span_attribute, value) else: set_data_normalized(span, span_attribute, value, unpack=False) @@ -177,7 +191,9 @@ def new_huggingface_task(*args: "Any", **kwargs: "Any") -> "Any": response_text_buffer.append(choice.message.content) if response_model is not None: - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) + _set_span_data_attribute( + span, SPANDATA.GEN_AI_RESPONSE_MODEL, response_model + ) if finish_reason is not None: set_data_normalized( @@ -328,8 +344,8 @@ def new_iterator() -> "Iterable[str]": yield chunk if response_model is not None: - span.set_data( - SPANDATA.GEN_AI_RESPONSE_MODEL, response_model + _set_span_data_attribute( + span, SPANDATA.GEN_AI_RESPONSE_MODEL, response_model ) if finish_reason is not None: diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 9dd15ca4b5..9acb6087e4 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -1015,3 +1015,448 @@ def test_chat_completion_streaming_with_tools( assert "gen_ai.response.tool_calls" not in expected_data assert span["data"] == expected_data + + +def _get_gen_ai_span(items): + return next( + item.payload + for item in items + if item.payload.get("attributes", {}).get("sentry.op", "").startswith("gen_ai") + ) + + +@pytest.mark.httpx_mock(assert_all_requests_were_expected=False) +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_text_generation_span_streaming( + sentry_init: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_text_generation_api: "Any", +) -> None: + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"trace_lifecycle": "stream"}, + ) + items = capture_items("span") + + client = InferenceClient(model="test-model") + + client.text_generation( + "Hello", + stream=False, + details=True, + ) + + sentry_sdk.flush() + + span = _get_gen_ai_span(items) + + assert span["name"] == "text_completion test-model" + assert span["attributes"]["sentry.op"] == "gen_ai.text_completion" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + expected_attributes = { + "gen_ai.operation.name": "text_completion", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": False, + "gen_ai.usage.total_tokens": 10, + } + + if send_default_pii and include_prompts: + expected_attributes["gen_ai.request.messages"] = "Hello" + expected_attributes["gen_ai.response.text"] = ( + "[mocked] Hello! How can i help you?" + ) + + for key, value in expected_attributes.items(): + assert span["attributes"][key] == value, f"Mismatch on {key}" + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] + + # text generation does not set the response model + assert "gen_ai.response.model" not in span["attributes"] + + +@pytest.mark.httpx_mock(assert_all_requests_were_expected=False) +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_text_generation_streaming_span_streaming( + sentry_init: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_text_generation_api_streaming: "Any", +) -> None: + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"trace_lifecycle": "stream"}, + ) + items = capture_items("span") + + client = InferenceClient(model="test-model") + + for _ in client.text_generation( + prompt="Hello", + stream=True, + details=True, + ): + pass + + sentry_sdk.flush() + + span = _get_gen_ai_span(items) + + assert span["name"] == "text_completion test-model" + assert span["attributes"]["sentry.op"] == "gen_ai.text_completion" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + expected_attributes = { + "gen_ai.operation.name": "text_completion", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": True, + "gen_ai.usage.total_tokens": 10, + } + + if send_default_pii and include_prompts: + expected_attributes["gen_ai.request.messages"] = "Hello" + expected_attributes["gen_ai.response.text"] = "the mocked model response" + + for key, value in expected_attributes.items(): + assert span["attributes"][key] == value, f"Mismatch on {key}" + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] + + assert "gen_ai.response.model" not in span["attributes"] + + +@pytest.mark.httpx_mock(assert_all_requests_were_expected=False) +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_chat_completion_span_streaming( + sentry_init: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_chat_completion_api: "Any", +) -> None: + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"trace_lifecycle": "stream"}, + ) + items = capture_items("span") + + client = get_hf_provider_inference_client() + + client.chat_completion( + messages=[{"role": "user", "content": "Hello!"}], + stream=False, + ) + + sentry_sdk.flush() + + span = _get_gen_ai_span(items) + + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + expected_attributes = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "stop", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": False, + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 8, + "gen_ai.usage.total_tokens": 18, + } + + if send_default_pii and include_prompts: + expected_attributes["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "Hello!"}]' + ) + expected_attributes["gen_ai.response.text"] = ( + "[mocked] Hello! How can I help you today?" + ) + + for key, value in expected_attributes.items(): + assert span["attributes"][key] == value, f"Mismatch on {key}" + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] + + +@pytest.mark.httpx_mock(assert_all_requests_were_expected=False) +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_chat_completion_streaming_span_streaming( + sentry_init: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_chat_completion_api_streaming: "Any", +) -> None: + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"trace_lifecycle": "stream"}, + ) + items = capture_items("span") + + client = get_hf_provider_inference_client() + + _ = list( + client.chat_completion( + [{"role": "user", "content": "Hello!"}], + stream=True, + ) + ) + + sentry_sdk.flush() + + span = _get_gen_ai_span(items) + + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + expected_attributes = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "stop", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": True, + } + if HF_VERSION and HF_VERSION >= (0, 26, 0): + expected_attributes["gen_ai.usage.input_tokens"] = 183 + expected_attributes["gen_ai.usage.output_tokens"] = 14 + expected_attributes["gen_ai.usage.total_tokens"] = 197 + + if send_default_pii and include_prompts: + expected_attributes["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "Hello!"}]' + ) + expected_attributes["gen_ai.response.text"] = "the mocked model response" + + for key, value in expected_attributes.items(): + assert span["attributes"][key] == value, f"Mismatch on {key}" + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] + + +@pytest.mark.httpx_mock(assert_all_requests_were_expected=False) +def test_chat_completion_api_error_span_streaming( + sentry_init: "Any", capture_items: "Any", mock_hf_api_with_errors: "Any" +) -> None: + sentry_init( + traces_sample_rate=1.0, + _experiments={"trace_lifecycle": "stream"}, + ) + items = capture_items("event", "span") + + client = get_hf_provider_inference_client() + + with pytest.raises(HfHubHTTPError): + client.chat_completion( + messages=[{"role": "user", "content": "Hello!"}], + ) + + sentry_sdk.flush() + + error_event = next(item.payload for item in items if item.type == "event") + span = _get_gen_ai_span(items) + + assert error_event["exception"]["values"][0]["mechanism"]["type"] == ( + "huggingface_hub" + ) + assert not error_event["exception"]["values"][0]["mechanism"]["handled"] + assert error_event["level"] == "error" + + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + assert span["status"] == "error" + + assert error_event["contexts"]["trace"]["trace_id"] == span["trace_id"] + + assert span["attributes"]["gen_ai.operation.name"] == "chat" + assert span["attributes"]["gen_ai.request.model"] == "test-model" + + +@pytest.mark.httpx_mock(assert_all_requests_were_expected=False) +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_chat_completion_with_tools_span_streaming( + sentry_init: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_chat_completion_api_tools: "Any", +) -> None: + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"trace_lifecycle": "stream"}, + ) + items = capture_items("span") + + client = get_hf_provider_inference_client() + + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + }, + } + ] + + client.chat_completion( + messages=[{"role": "user", "content": "What is the weather in Paris?"}], + tools=tools, + tool_choice="auto", + ) + + sentry_sdk.flush() + + span = _get_gen_ai_span(items) + + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + expected_attributes = { + "gen_ai.operation.name": "chat", + "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "tool_calls", + "gen_ai.response.model": "test-model-123", + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 8, + "gen_ai.usage.total_tokens": 18, + } + + if send_default_pii and include_prompts: + expected_attributes["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "What is the weather in Paris?"}]' + ) + expected_attributes["gen_ai.response.tool_calls"] = ( + '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]' + ) + + for key, value in expected_attributes.items(): + assert span["attributes"][key] == value, f"Mismatch on {key}" + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] + assert "gen_ai.response.tool_calls" not in span["attributes"] + + +@pytest.mark.httpx_mock(assert_all_requests_were_expected=False) +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_chat_completion_streaming_with_tools_span_streaming( + sentry_init: "Any", + capture_items: "Any", + send_default_pii: "Any", + include_prompts: "Any", + mock_hf_chat_completion_api_streaming_tools: "Any", +) -> None: + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + _experiments={"trace_lifecycle": "stream"}, + ) + items = capture_items("span") + + client = get_hf_provider_inference_client() + + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + }, + } + ] + + _ = list( + client.chat_completion( + messages=[{"role": "user", "content": "What is the weather in Paris?"}], + stream=True, + tools=tools, + tool_choice="auto", + ) + ) + + sentry_sdk.flush() + + span = _get_gen_ai_span(items) + + assert span["name"] == "chat test-model" + assert span["attributes"]["sentry.op"] == "gen_ai.chat" + assert span["attributes"]["sentry.origin"] == "auto.ai.huggingface_hub" + + expected_attributes = { + "gen_ai.operation.name": "chat", + "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "tool_calls", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": True, + } + + if HF_VERSION and HF_VERSION >= (0, 26, 0): + expected_attributes["gen_ai.usage.input_tokens"] = 183 + expected_attributes["gen_ai.usage.output_tokens"] = 14 + expected_attributes["gen_ai.usage.total_tokens"] = 197 + + if send_default_pii and include_prompts: + expected_attributes["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "What is the weather in Paris?"}]' + ) + expected_attributes["gen_ai.response.text"] = "response with tool calls follows" + expected_attributes["gen_ai.response.tool_calls"] = ( + '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather"}, "id": "call_123", "type": "function", "index": "None"}]' + ) + + for key, value in expected_attributes.items(): + assert span["attributes"][key] == value, f"Mismatch on {key}" + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in span["attributes"] + assert "gen_ai.response.text" not in span["attributes"] + assert "gen_ai.response.tool_calls" not in span["attributes"]