diff --git a/scripts/populate_tox/config.py b/scripts/populate_tox/config.py index 679ffddf2c..bc20d531b3 100644 --- a/scripts/populate_tox/config.py +++ b/scripts/populate_tox/config.py @@ -155,6 +155,9 @@ }, "huggingface_hub": { "package": "huggingface_hub", + "deps": { + "*": ["responses"], + }, }, "langchain-base": { "package": "langchain", diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 4f015643d4..cc3c9b1612 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -795,6 +795,7 @@ class OP: GEN_AI_CREATE_AGENT = "gen_ai.create_agent" GEN_AI_EMBEDDINGS = "gen_ai.embeddings" GEN_AI_EXECUTE_TOOL = "gen_ai.execute_tool" + GEN_AI_GENERATE_TEXT = "gen_ai.generate_text" GEN_AI_HANDOFF = "gen_ai.handoff" GEN_AI_PIPELINE = "gen_ai.pipeline" GEN_AI_INVOKE_AGENT = "gen_ai.invoke_agent" diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py index 7f202221a7..2f5a1f397e 100644 --- a/sentry_sdk/integrations/__init__.py +++ b/sentry_sdk/integrations/__init__.py @@ -141,7 +141,7 @@ def iter_default_integrations(with_auto_enabling_integrations): "gql": (3, 4, 1), "graphene": (3, 3), "grpc": (1, 32, 0), # grpcio - "huggingface_hub": (0, 22), + "huggingface_hub": (0, 24, 7), "langchain": (0, 1, 0), "langgraph": (0, 6, 6), "launchdarkly": (9, 8, 0), diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 2dfcb5925a..cb76ccf507 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -1,24 +1,24 @@ +import inspect from functools import wraps -from sentry_sdk import consts +import sentry_sdk from sentry_sdk.ai.monitoring import record_token_usage from sentry_sdk.ai.utils import set_data_normalized -from sentry_sdk.consts import SPANDATA - -from typing import Any, Iterable, Callable - -import sentry_sdk -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration +from sentry_sdk.scope import should_send_default_pii from sentry_sdk.utils import ( capture_internal_exceptions, event_from_exception, ) +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Callable, Iterable + try: import huggingface_hub.inference._client - - from huggingface_hub import ChatCompletionStreamOutput, TextGenerationOutput except ImportError: raise DidNotEnable("Huggingface not installed") @@ -34,9 +34,18 @@ def __init__(self, include_prompts=True): @staticmethod def setup_once(): # type: () -> None + + # Other tasks that can be called: https://huggingface.co/docs/huggingface_hub/guides/inference#supported-providers-and-tasks huggingface_hub.inference._client.InferenceClient.text_generation = ( - _wrap_text_generation( - huggingface_hub.inference._client.InferenceClient.text_generation + _wrap_huggingface_task( + huggingface_hub.inference._client.InferenceClient.text_generation, + OP.GEN_AI_GENERATE_TEXT, + ) + ) + huggingface_hub.inference._client.InferenceClient.chat_completion = ( + _wrap_huggingface_task( + huggingface_hub.inference._client.InferenceClient.chat_completion, + OP.GEN_AI_CHAT, ) ) @@ -51,131 +60,318 @@ def _capture_exception(exc): sentry_sdk.capture_event(event, hint=hint) -def _wrap_text_generation(f): - # type: (Callable[..., Any]) -> Callable[..., Any] +def _wrap_huggingface_task(f, op): + # type: (Callable[..., Any], str) -> Callable[..., Any] @wraps(f) - def new_text_generation(*args, **kwargs): + def new_huggingface_task(*args, **kwargs): # type: (*Any, **Any) -> Any integration = sentry_sdk.get_client().get_integration(HuggingfaceHubIntegration) if integration is None: return f(*args, **kwargs) + prompt = None if "prompt" in kwargs: prompt = kwargs["prompt"] + elif "messages" in kwargs: + prompt = kwargs["messages"] elif len(args) >= 2: - kwargs["prompt"] = args[1] - prompt = kwargs["prompt"] - args = (args[0],) + args[2:] - else: - # invalid call, let it return error + if isinstance(args[1], str) or isinstance(args[1], list): + prompt = args[1] + + if prompt is None: + # invalid call, dont instrument, let it return error return f(*args, **kwargs) - model = kwargs.get("model") - streaming = kwargs.get("stream") + client = args[0] + model = client.model or kwargs.get("model") or "" + operation_name = op.split(".")[-1] span = sentry_sdk.start_span( - op=consts.OP.HUGGINGFACE_HUB_CHAT_COMPLETIONS_CREATE, - name="Text Generation", + op=op, + name=f"{operation_name} {model}", origin=HuggingfaceHubIntegration.origin, ) span.__enter__() + + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, operation_name) + + if model: + span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model) + + # Input attributes + if should_send_default_pii() and integration.include_prompts: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False + ) + + attribute_mapping = { + "tools": SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, + "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, + "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, + "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, + "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE, + "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P, + "top_k": SPANDATA.GEN_AI_REQUEST_TOP_K, + "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING, + } + + for attribute, span_attribute in attribute_mapping.items(): + value = kwargs.get(attribute, None) + if value is not None: + if isinstance(value, (int, float, bool, str)): + span.set_data(span_attribute, value) + else: + set_data_normalized(span, span_attribute, value, unpack=False) + + # LLM Execution try: res = f(*args, **kwargs) except Exception as e: + # Error Handling + span.set_status("error") _capture_exception(e) span.__exit__(None, None, None) raise e from None + # Output attributes + finish_reason = None + response_model = None + response_text_buffer: list[str] = [] + tokens_used = 0 + tool_calls = None + usage = None + with capture_internal_exceptions(): - if should_send_default_pii() and integration.include_prompts: - set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, prompt) + if isinstance(res, str) and res is not None: + response_text_buffer.append(res) - set_data_normalized(span, SPANDATA.AI_MODEL_ID, model) - set_data_normalized(span, SPANDATA.AI_STREAMING, streaming) + if hasattr(res, "generated_text") and res.generated_text is not None: + response_text_buffer.append(res.generated_text) - if isinstance(res, str): - if should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, - SPANDATA.AI_RESPONSES, - [res], - ) - span.__exit__(None, None, None) - return res + if hasattr(res, "model") and res.model is not None: + response_model = res.model + + if hasattr(res, "details") and hasattr(res.details, "finish_reason"): + finish_reason = res.details.finish_reason + + if ( + hasattr(res, "details") + and hasattr(res.details, "generated_tokens") + and res.details.generated_tokens is not None + ): + tokens_used = res.details.generated_tokens + + if hasattr(res, "usage") and res.usage is not None: + usage = res.usage + + if hasattr(res, "choices") and res.choices is not None: + for choice in res.choices: + if hasattr(choice, "finish_reason"): + finish_reason = choice.finish_reason + if hasattr(choice, "message") and hasattr( + choice.message, "tool_calls" + ): + tool_calls = choice.message.tool_calls + if ( + hasattr(choice, "message") + and hasattr(choice.message, "content") + and choice.message.content is not None + ): + response_text_buffer.append(choice.message.content) - if isinstance(res, TextGenerationOutput): - if should_send_default_pii() and integration.include_prompts: + if response_model is not None: + span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) + + if finish_reason is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, + finish_reason, + ) + + if should_send_default_pii() and integration.include_prompts: + if tool_calls is not None and len(tool_calls) > 0: set_data_normalized( span, - SPANDATA.AI_RESPONSES, - [res.generated_text], - ) - if res.details is not None and res.details.generated_tokens > 0: - record_token_usage( - span, - total_tokens=res.details.generated_tokens, + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + tool_calls, + unpack=False, ) - span.__exit__(None, None, None) - return res - if not isinstance(res, Iterable): - # we only know how to deal with strings and iterables, ignore - set_data_normalized(span, "unknown_response", True) + if len(response_text_buffer) > 0: + text_response = "".join(response_text_buffer) + if text_response: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + text_response, + ) + + if usage is not None: + record_token_usage( + span, + input_tokens=usage.prompt_tokens, + output_tokens=usage.completion_tokens, + total_tokens=usage.total_tokens, + ) + elif tokens_used > 0: + record_token_usage( + span, + total_tokens=tokens_used, + ) + + # If the response is not a generator (meaning a streaming response) + # we are done and can return the response + if not inspect.isgenerator(res): span.__exit__(None, None, None) return res if kwargs.get("details", False): - # res is Iterable[TextGenerationStreamOutput] + # text-generation stream output def new_details_iterator(): - # type: () -> Iterable[ChatCompletionStreamOutput] + # type: () -> Iterable[Any] + finish_reason = None + response_text_buffer: list[str] = [] + tokens_used = 0 + with capture_internal_exceptions(): - tokens_used = 0 - data_buf: list[str] = [] - for x in res: - if hasattr(x, "token") and hasattr(x.token, "text"): - data_buf.append(x.token.text) - if hasattr(x, "details") and hasattr( - x.details, "generated_tokens" + for chunk in res: + if ( + hasattr(chunk, "token") + and hasattr(chunk.token, "text") + and chunk.token.text is not None + ): + response_text_buffer.append(chunk.token.text) + + if hasattr(chunk, "details") and hasattr( + chunk.details, "finish_reason" + ): + finish_reason = chunk.details.finish_reason + + if ( + hasattr(chunk, "details") + and hasattr(chunk.details, "generated_tokens") + and chunk.details.generated_tokens is not None ): - tokens_used = x.details.generated_tokens - yield x - if ( - len(data_buf) > 0 - and should_send_default_pii() - and integration.include_prompts - ): + tokens_used = chunk.details.generated_tokens + + yield chunk + + if finish_reason is not None: set_data_normalized( - span, SPANDATA.AI_RESPONSES, "".join(data_buf) + span, + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, + finish_reason, ) + + if should_send_default_pii() and integration.include_prompts: + if len(response_text_buffer) > 0: + text_response = "".join(response_text_buffer) + if text_response: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + text_response, + ) + if tokens_used > 0: record_token_usage( span, total_tokens=tokens_used, ) + span.__exit__(None, None, None) return new_details_iterator() - else: - # res is Iterable[str] + else: + # chat-completion stream output def new_iterator(): # type: () -> Iterable[str] - data_buf: list[str] = [] + finish_reason = None + response_model = None + response_text_buffer: list[str] = [] + tool_calls = None + usage = None + with capture_internal_exceptions(): - for s in res: - if isinstance(s, str): - data_buf.append(s) - yield s - if ( - len(data_buf) > 0 - and should_send_default_pii() - and integration.include_prompts - ): + for chunk in res: + if hasattr(chunk, "model") and chunk.model is not None: + response_model = chunk.model + + if hasattr(chunk, "usage") and chunk.usage is not None: + usage = chunk.usage + + if isinstance(chunk, str): + if chunk is not None: + response_text_buffer.append(chunk) + + if hasattr(chunk, "choices") and chunk.choices is not None: + for choice in chunk.choices: + if ( + hasattr(choice, "delta") + and hasattr(choice.delta, "content") + and choice.delta.content is not None + ): + response_text_buffer.append( + choice.delta.content + ) + + if ( + hasattr(choice, "finish_reason") + and choice.finish_reason is not None + ): + finish_reason = choice.finish_reason + + if ( + hasattr(choice, "delta") + and hasattr(choice.delta, "tool_calls") + and choice.delta.tool_calls is not None + ): + tool_calls = choice.delta.tool_calls + + yield chunk + + if response_model is not None: + span.set_data( + SPANDATA.GEN_AI_RESPONSE_MODEL, response_model + ) + + if finish_reason is not None: set_data_normalized( - span, SPANDATA.AI_RESPONSES, "".join(data_buf) + span, + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, + finish_reason, ) + + if should_send_default_pii() and integration.include_prompts: + if tool_calls is not None and len(tool_calls) > 0: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + tool_calls, + unpack=False, + ) + + if len(response_text_buffer) > 0: + text_response = "".join(response_text_buffer) + if text_response: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + text_response, + ) + + if usage is not None: + record_token_usage( + span, + input_tokens=usage.prompt_tokens, + output_tokens=usage.completion_tokens, + total_tokens=usage.total_tokens, + ) + span.__exit__(None, None, None) return new_iterator() - return new_text_generation + return new_huggingface_task diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index df0c6c6d76..86f9c10109 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -1,186 +1,815 @@ -import itertools from unittest import mock - import pytest -from huggingface_hub import ( - InferenceClient, -) -from huggingface_hub.errors import OverloadedError +import responses + +from huggingface_hub import InferenceClient -from sentry_sdk import start_transaction -from sentry_sdk.consts import SPANDATA +import sentry_sdk +from sentry_sdk.utils import package_version from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration +from typing import TYPE_CHECKING -def mock_client_post(client, post_mock): - # huggingface-hub==0.28.0 deprecates the `post` method - # so patch `_inner_post` instead - if hasattr(client, "post"): - client.post = post_mock - if hasattr(client, "_inner_post"): - client._inner_post = post_mock +try: + from huggingface_hub.utils._errors import HfHubHTTPError +except ImportError: + from huggingface_hub.errors import HfHubHTTPError -@pytest.mark.parametrize( - "send_default_pii, include_prompts, details_arg", - itertools.product([True, False], repeat=3), -) -def test_nonstreaming_chat_completion( - sentry_init, capture_events, send_default_pii, include_prompts, details_arg -): - sentry_init( - integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - traces_sample_rate=1.0, - send_default_pii=send_default_pii, +if TYPE_CHECKING: + from typing import Any + + +HF_VERSION = package_version("huggingface-hub") + +if HF_VERSION and HF_VERSION < (0, 30, 0): + MODEL_ENDPOINT = "https://api-inference.huggingface.co/models/{model_name}" + INFERENCE_ENDPOINT = "https://api-inference.huggingface.co/models/{model_name}" +else: + MODEL_ENDPOINT = "https://huggingface.co/api/models/{model_name}" + INFERENCE_ENDPOINT = ( + "https://router.huggingface.co/hf-inference/models/{model_name}" ) - events = capture_events() - client = InferenceClient(model="https://") - if details_arg: - post_mock = mock.Mock( - return_value=b"""[{ - "generated_text": "the model response", +@pytest.fixture +def mock_hf_text_generation_api(): + # type: () -> Any + """Mock HuggingFace text generation API""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={ + "id": model_name, + "pipeline_tag": "text-generation", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "text-generation", + } + }, + }, + status=200, + ) + + # Mock text generation endpoint + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name), + json={ + "generated_text": "[mocked] Hello! How can i help you?", "details": { "finish_reason": "length", "generated_tokens": 10, "prefill": [], - "tokens": [] - } - }]""" + "tokens": [], + }, + }, + status=200, + ) + + yield rsps + + +@pytest.fixture +def mock_hf_api_with_errors(): + # type: () -> Any + """Mock HuggingFace API that always raises errors for any request""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint with error + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={"error": "Model not found"}, + status=404, + ) + + # Mock text generation endpoint with error + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name), + json={"error": "Internal server error", "message": "Something went wrong"}, + status=500, + ) + + # Mock chat completion endpoint with error + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions", + json={"error": "Internal server error", "message": "Something went wrong"}, + status=500, + ) + + # Catch-all pattern for any other model requests + rsps.add( + responses.GET, + "https://huggingface.co/api/models/test-model-error", + json={"error": "Generic model error"}, + status=500, + ) + + yield rsps + + +@pytest.fixture +def mock_hf_text_generation_api_streaming(): + # type: () -> Any + """Mock streaming HuggingFace text generation API""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={ + "id": model_name, + "pipeline_tag": "text-generation", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "text-generation", + } + }, + }, + status=200, + ) + + # Mock text generation endpoint for streaming + streaming_response = b'data:{"token":{"id":1, "special": false, "text": "the mocked "}}\n\ndata:{"token":{"id":2, "special": false, "text": "model response"}, "details":{"finish_reason": "length", "generated_tokens": 10, "seed": 0}}\n\n' + + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name), + body=streaming_response, + status=200, + headers={ + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + }, + ) + + yield rsps + + +@pytest.fixture +def mock_hf_chat_completion_api(): + # type: () -> Any + """Mock HuggingFace chat completion API""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={ + "id": model_name, + "pipeline_tag": "conversational", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "conversational", + } + }, + }, + status=200, + ) + + # Mock chat completion endpoint + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions", + json={ + "id": "xyz-123", + "created": 1234567890, + "model": f"{model_name}-123", + "system_fingerprint": "fp_123", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "[mocked] Hello! How can I help you today?", + }, + } + ], + "usage": { + "completion_tokens": 8, + "prompt_tokens": 10, + "total_tokens": 18, + }, + }, + status=200, + ) + + yield rsps + + +@pytest.fixture +def mock_hf_chat_completion_api_tools(): + # type: () -> Any + """Mock HuggingFace chat completion API with tool calls.""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={ + "id": model_name, + "pipeline_tag": "conversational", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "conversational", + } + }, + }, + status=200, + ) + + # Mock chat completion endpoint + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions", + json={ + "id": "xyz-123", + "created": 1234567890, + "model": f"{model_name}-123", + "system_fingerprint": "fp_123", + "choices": [ + { + "index": 0, + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "tool_calls": [ + { + "id": "call_123", + "type": "function", + "function": { + "name": "get_weather", + "arguments": {"location": "Paris"}, + }, + } + ], + }, + } + ], + "usage": { + "completion_tokens": 8, + "prompt_tokens": 10, + "total_tokens": 18, + }, + }, + status=200, + ) + + yield rsps + + +@pytest.fixture +def mock_hf_chat_completion_api_streaming(): + # type: () -> Any + """Mock streaming HuggingFace chat completion API""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={ + "id": model_name, + "pipeline_tag": "conversational", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "conversational", + } + }, + }, + status=200, + ) + + # Mock chat completion streaming endpoint + streaming_chat_response = ( + b'data:{"id":"xyz-123","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"the mocked "},"index":0,"finish_reason":null}],"usage":null}\n\n' + b'data:{"id":"xyz-124","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"model response"},"index":0,"finish_reason":"stop"}],"usage":{"prompt_tokens":183,"completion_tokens":14,"total_tokens":197}}\n\n' + ) + + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions", + body=streaming_chat_response, + status=200, + headers={ + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + }, + ) + + yield rsps + + +@pytest.fixture +def mock_hf_chat_completion_api_streaming_tools(): + # type: () -> Any + """Mock streaming HuggingFace chat completion API with tool calls.""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={ + "id": model_name, + "pipeline_tag": "conversational", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "conversational", + } + }, + }, + status=200, + ) + + # Mock chat completion streaming endpoint + streaming_chat_response = ( + b'data:{"id":"xyz-123","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"response with tool calls follows"},"index":0,"finish_reason":null}],"usage":null}\n\n' + b'data:{"id":"xyz-124","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","tool_calls": [{"id": "call_123","type": "function","function": {"name": "get_weather", "arguments": {"location": "Paris"}}}]},"index":0,"finish_reason":"tool_calls"}],"usage":{"prompt_tokens":183,"completion_tokens":14,"total_tokens":197}}\n\n' ) - else: - post_mock = mock.Mock( - return_value=b'[{"generated_text": "the model response"}]' + + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions", + body=streaming_chat_response, + status=200, + headers={ + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + }, ) - mock_client_post(client, post_mock) - with start_transaction(name="huggingface_hub tx"): - response = client.text_generation( - prompt="hello", - details=details_arg, + yield rsps + + +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_text_generation( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_text_generation_api, +): + # type: (Any, Any, Any, Any, Any) -> None + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + ) + events = capture_events() + + client = InferenceClient(model="test-model") + + with sentry_sdk.start_transaction(name="test"): + client.text_generation( + "Hello", stream=False, + details=True, ) - if details_arg: - assert response.generated_text == "the model response" - else: - assert response == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "ai.chat_completions.create.huggingface_hub" + + (transaction,) = events + (span,) = transaction["spans"] + + assert span["op"] == "gen_ai.generate_text" + assert span["description"] == "generate_text test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "generate_text", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": False, + "gen_ai.usage.total_tokens": 10, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } if send_default_pii and include_prompts: - assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES] - assert "the model response" in span["data"][SPANDATA.AI_RESPONSES] - else: - assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] - assert SPANDATA.AI_RESPONSES not in span["data"] - - if details_arg: - assert span["data"]["gen_ai.usage.total_tokens"] == 10 - - -@pytest.mark.parametrize( - "send_default_pii, include_prompts, details_arg", - itertools.product([True, False], repeat=3), -) -def test_streaming_chat_completion( - sentry_init, capture_events, send_default_pii, include_prompts, details_arg + expected_data["gen_ai.request.messages"] = "Hello" + expected_data["gen_ai.response.text"] = "[mocked] Hello! How can i help you?" + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["data"] == expected_data + + # text generation does not set the response model + assert "gen_ai.response.model" not in span["data"] + + +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_text_generation_streaming( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_text_generation_api_streaming, ): + # type: (Any, Any, Any, Any, Any) -> None sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + ) + events = capture_events() + + client = InferenceClient(model="test-model") + + with sentry_sdk.start_transaction(name="test"): + for _ in client.text_generation( + prompt="Hello", + stream=True, + details=True, + ): + pass + + (transaction,) = events + (span,) = transaction["spans"] + + assert span["op"] == "gen_ai.generate_text" + assert span["description"] == "generate_text test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "generate_text", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": True, + "gen_ai.usage.total_tokens": 10, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = "Hello" + expected_data["gen_ai.response.text"] = "the mocked model response" + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["data"] == expected_data + + # text generation does not set the response model + assert "gen_ai.response.model" not in span["data"] + + +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_chat_completion( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api, +): + # type: (Any, Any, Any, Any, Any) -> None + sentry_init( traces_sample_rate=1.0, send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], ) events = capture_events() - client = InferenceClient(model="https://") - - post_mock = mock.Mock( - return_value=[ - b"""data:{ - "token":{"id":1, "special": false, "text": "the model "} - }""", - b"""data:{ - "token":{"id":2, "special": false, "text": "response"}, - "details":{"finish_reason": "length", "generated_tokens": 10, "seed": 0} - }""", - ] + client = InferenceClient(model="test-model") + + with sentry_sdk.start_transaction(name="test"): + client.chat_completion( + messages=[{"role": "user", "content": "Hello!"}], + stream=False, + ) + + (transaction,) = events + (span,) = transaction["spans"] + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "stop", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": False, + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 8, + "gen_ai.usage.total_tokens": 18, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "Hello!"}]' + ) + expected_data["gen_ai.response.text"] = ( + "[mocked] Hello! How can I help you today?" + ) + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["data"] == expected_data + + +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_chat_completion_streaming( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api_streaming, +): + # type: (Any, Any, Any, Any, Any) -> None + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], ) - mock_client_post(client, post_mock) + events = capture_events() + + client = InferenceClient(model="test-model") - with start_transaction(name="huggingface_hub tx"): - response = list( - client.text_generation( - prompt="hello", - details=details_arg, + with sentry_sdk.start_transaction(name="test"): + _ = list( + client.chat_completion( + [{"role": "user", "content": "Hello!"}], stream=True, ) ) - assert len(response) == 2 - if details_arg: - assert response[0].token.text + response[1].token.text == "the model response" - else: - assert response[0] + response[1] == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "ai.chat_completions.create.huggingface_hub" + (transaction,) = events + (span,) = transaction["spans"] + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "stop", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": True, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + # usage is not available in older versions of the library + if HF_VERSION and HF_VERSION >= (0, 26, 0): + expected_data["gen_ai.usage.input_tokens"] = 183 + expected_data["gen_ai.usage.output_tokens"] = 14 + expected_data["gen_ai.usage.total_tokens"] = 197 if send_default_pii and include_prompts: - assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES] - assert "the model response" in span["data"][SPANDATA.AI_RESPONSES] - else: - assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] - assert SPANDATA.AI_RESPONSES not in span["data"] + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "Hello!"}]' + ) + expected_data["gen_ai.response.text"] = "the mocked model response" - if details_arg: - assert span["data"]["gen_ai.usage.total_tokens"] == 10 + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + assert span["data"] == expected_data -def test_bad_chat_completion(sentry_init, capture_events): - sentry_init(integrations=[HuggingfaceHubIntegration()], traces_sample_rate=1.0) + +def test_chat_completion_api_error( + sentry_init, capture_events, mock_hf_api_with_errors +): + # type: (Any, Any, Any) -> None + sentry_init(traces_sample_rate=1.0) events = capture_events() - client = InferenceClient(model="https://") - post_mock = mock.Mock(side_effect=OverloadedError("The server is overloaded")) - mock_client_post(client, post_mock) + client = InferenceClient(model="test-model") + + with sentry_sdk.start_transaction(name="test"): + with pytest.raises(HfHubHTTPError): + client.chat_completion( + messages=[{"role": "user", "content": "Hello!"}], + ) + + ( + error, + transaction, + ) = events - with pytest.raises(OverloadedError): - client.text_generation(prompt="hello") + assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub" + assert not error["exception"]["values"][0]["mechanism"]["handled"] - (event,) = events - assert event["level"] == "error" + (span,) = transaction["spans"] + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + assert span.get("tags", {}).get("status") == "error" -def test_span_origin(sentry_init, capture_events): + assert ( + error["contexts"]["trace"]["trace_id"] + == transaction["contexts"]["trace"]["trace_id"] + ) + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + assert span["data"] == expected_data + + +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_chat_completion_with_tools( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api_tools, +): + # type: (Any, Any, Any, Any, Any) -> None sentry_init( - integrations=[HuggingfaceHubIntegration()], traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], ) events = capture_events() - client = InferenceClient(model="https://") - post_mock = mock.Mock( - return_value=[ - b"""data:{ - "token":{"id":1, "special": false, "text": "the model "} - }""", - ] + client = InferenceClient(model="test-model") + + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + }, + } + ] + + with sentry_sdk.start_transaction(name="test"): + client.chat_completion( + messages=[{"role": "user", "content": "What is the weather in Paris?"}], + tools=tools, + tool_choice="auto", + ) + + (transaction,) = events + (span,) = transaction["spans"] + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "tool_calls", + "gen_ai.response.model": "test-model-123", + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 8, + "gen_ai.usage.total_tokens": 18, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "What is the weather in Paris?"}]' + ) + expected_data["gen_ai.response.tool_calls"] = ( + '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]' + ) + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + assert "gen_ai.response.tool_calls" not in expected_data + + assert span["data"] == expected_data + + +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_chat_completion_streaming_with_tools( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api_streaming_tools, +): + # type: (Any, Any, Any, Any, Any) -> None + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], ) - mock_client_post(client, post_mock) + events = capture_events() - with start_transaction(name="huggingface_hub tx"): - list( - client.text_generation( - prompt="hello", + client = InferenceClient(model="test-model") + + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + }, + } + ] + + with sentry_sdk.start_transaction(name="test"): + _ = list( + client.chat_completion( + messages=[{"role": "user", "content": "What is the weather in Paris?"}], stream=True, + tools=tools, + tool_choice="auto", ) ) - (event,) = events + (transaction,) = events + (span,) = transaction["spans"] + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "tool_calls", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": True, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if HF_VERSION and HF_VERSION >= (0, 26, 0): + expected_data["gen_ai.usage.input_tokens"] = 183 + expected_data["gen_ai.usage.output_tokens"] = 14 + expected_data["gen_ai.usage.total_tokens"] = 197 + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "What is the weather in Paris?"}]' + ) + expected_data["gen_ai.response.text"] = "response with tool calls follows" + expected_data["gen_ai.response.tool_calls"] = ( + '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather"}, "id": "call_123", "type": "function", "index": "None"}]' + ) + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + assert "gen_ai.response.tool_calls" not in expected_data - assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.huggingface_hub" + assert span["data"] == expected_data diff --git a/tox.ini b/tox.ini index ff2403f515..1bc9757b9a 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ # The file (and all resulting CI YAMLs) then need to be regenerated via # "scripts/generate-test-files.sh". # -# Last generated: 2025-09-08T11:35:09.849536+00:00 +# Last generated: 2025-09-09T08:24:12.875177+00:00 [tox] requires = @@ -116,12 +116,12 @@ envlist = {py3.8,py3.11,py3.12}-openai-base-v1.0.1 {py3.8,py3.11,py3.12}-openai-base-v1.36.1 {py3.8,py3.11,py3.12}-openai-base-v1.71.0 - {py3.8,py3.12,py3.13}-openai-base-v1.106.1 + {py3.8,py3.12,py3.13}-openai-base-v1.107.0 {py3.8,py3.11,py3.12}-openai-notiktoken-v1.0.1 {py3.8,py3.11,py3.12}-openai-notiktoken-v1.36.1 {py3.8,py3.11,py3.12}-openai-notiktoken-v1.71.0 - {py3.8,py3.12,py3.13}-openai-notiktoken-v1.106.1 + {py3.8,py3.12,py3.13}-openai-notiktoken-v1.107.0 {py3.9,py3.12,py3.13}-langgraph-v0.6.7 {py3.10,py3.12,py3.13}-langgraph-v1.0.0a3 @@ -130,8 +130,8 @@ envlist = {py3.10,py3.12,py3.13}-openai_agents-v0.1.0 {py3.10,py3.12,py3.13}-openai_agents-v0.2.11 - {py3.8,py3.10,py3.11}-huggingface_hub-v0.22.2 - {py3.8,py3.11,py3.12}-huggingface_hub-v0.26.5 + {py3.8,py3.10,py3.11}-huggingface_hub-v0.24.7 + {py3.8,py3.12,py3.13}-huggingface_hub-v0.27.1 {py3.8,py3.12,py3.13}-huggingface_hub-v0.30.2 {py3.8,py3.12,py3.13}-huggingface_hub-v0.34.4 {py3.8,py3.12,py3.13}-huggingface_hub-v0.35.0rc0 @@ -141,7 +141,7 @@ envlist = {py3.6,py3.7}-boto3-v1.12.49 {py3.6,py3.9,py3.10}-boto3-v1.20.54 {py3.7,py3.11,py3.12}-boto3-v1.28.85 - {py3.9,py3.12,py3.13}-boto3-v1.40.25 + {py3.9,py3.12,py3.13}-boto3-v1.40.26 {py3.6,py3.7,py3.8}-chalice-v1.16.0 {py3.6,py3.7,py3.8}-chalice-v1.21.9 @@ -487,7 +487,7 @@ deps = openai-base-v1.0.1: openai==1.0.1 openai-base-v1.36.1: openai==1.36.1 openai-base-v1.71.0: openai==1.71.0 - openai-base-v1.106.1: openai==1.106.1 + openai-base-v1.107.0: openai==1.107.0 openai-base: pytest-asyncio openai-base: tiktoken openai-base-v1.0.1: httpx<0.28 @@ -496,7 +496,7 @@ deps = openai-notiktoken-v1.0.1: openai==1.0.1 openai-notiktoken-v1.36.1: openai==1.36.1 openai-notiktoken-v1.71.0: openai==1.71.0 - openai-notiktoken-v1.106.1: openai==1.106.1 + openai-notiktoken-v1.107.0: openai==1.107.0 openai-notiktoken: pytest-asyncio openai-notiktoken-v1.0.1: httpx<0.28 openai-notiktoken-v1.36.1: httpx<0.28 @@ -509,18 +509,19 @@ deps = openai_agents-v0.2.11: openai-agents==0.2.11 openai_agents: pytest-asyncio - huggingface_hub-v0.22.2: huggingface_hub==0.22.2 - huggingface_hub-v0.26.5: huggingface_hub==0.26.5 + huggingface_hub-v0.24.7: huggingface_hub==0.24.7 + huggingface_hub-v0.27.1: huggingface_hub==0.27.1 huggingface_hub-v0.30.2: huggingface_hub==0.30.2 huggingface_hub-v0.34.4: huggingface_hub==0.34.4 huggingface_hub-v0.35.0rc0: huggingface_hub==0.35.0rc0 + huggingface_hub: responses # ~~~ Cloud ~~~ boto3-v1.12.49: boto3==1.12.49 boto3-v1.20.54: boto3==1.20.54 boto3-v1.28.85: boto3==1.28.85 - boto3-v1.40.25: boto3==1.40.25 + boto3-v1.40.26: boto3==1.40.26 {py3.7,py3.8}-boto3: urllib3<2.0.0 chalice-v1.16.0: chalice==1.16.0