diff --git a/tests/conftest.py b/tests/conftest.py index 815ba02d2b..5dd62931f1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -48,6 +48,12 @@ from sentry_sdk.transport import Transport from sentry_sdk.utils import reraise +try: + import openai +except ImportError: + openai = None + + from tests import _warning_recorder, _warning_recorder_mgr from typing import TYPE_CHECKING @@ -1033,10 +1039,11 @@ def inner(events, include_event_type=True): @pytest.fixture def get_model_response(): - def inner(response_content, serialize_pydantic=False): + def inner(response_content, serialize_pydantic=False, request_headers={}): model_request = HttpxRequest( "POST", "/responses", + headers=request_headers, ) if serialize_pydantic: @@ -1053,6 +1060,45 @@ def inner(response_content, serialize_pydantic=False): return inner +@pytest.fixture +def nonstreaming_responses_model_response(): + return openai.types.responses.Response( + id="resp_123", + output=[ + openai.types.responses.ResponseOutputMessage( + id="msg_123", + type="message", + status="completed", + content=[ + openai.types.responses.ResponseOutputText( + text="Hello, how can I help you?", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + parallel_tool_calls=False, + tool_choice="none", + tools=[], + created_at=10000000, + model="gpt-4", + object="response", + usage=openai.types.responses.ResponseUsage( + input_tokens=10, + input_tokens_details=openai.types.responses.response_usage.InputTokensDetails( + cached_tokens=0, + ), + output_tokens=20, + output_tokens_details=openai.types.responses.response_usage.OutputTokensDetails( + reasoning_tokens=5, + ), + total_tokens=30, + ), + ) + + class MockServerRequestHandler(BaseHTTPRequestHandler): def do_GET(self): # noqa: N802 # Process an HTTP GET request and return a response. diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 87720b7725..fe34a342fc 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -22,6 +22,7 @@ import sentry_sdk from sentry_sdk import start_transaction +from sentry_sdk.utils import package_version from sentry_sdk.integrations.langchain import ( LangchainIntegration, SentryLangchainCallback, @@ -32,13 +33,14 @@ try: # langchain v1+ from langchain.tools import tool + from langchain.agents import create_agent from langchain_classic.agents import AgentExecutor, create_openai_tools_agent # type: ignore[import-not-found] except ImportError: # langchain int: @@ -81,6 +85,132 @@ def _llm_type(self) -> str: return llm_type +@pytest.mark.skipif( + LANGCHAIN_VERSION < (1,), + reason="LangChain 1.0+ required (ONE AGENT refactor)", +) +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +@pytest.mark.parametrize( + "system_instructions_content", + [ + "You are very powerful assistant, but don't know current events", + [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + ], + ids=["string", "blocks"], +) +def test_langchain_create_agent( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + system_instructions_content, + request, + get_model_response, + nonstreaming_responses_model_response, +): + sentry_init( + integrations=[ + LangchainIntegration( + include_prompts=include_prompts, + ) + ], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + model_repsonse = get_model_response( + nonstreaming_responses_model_response, + serialize_pydantic=True, + request_headers={ + "X-Stainless-Raw-Response": "True", + }, + ) + + llm = ChatOpenAI( + model_name="gpt-3.5-turbo", + temperature=0, + openai_api_key="badkey", + use_responses_api=True, + ) + agent = create_agent( + model=llm, + tools=[get_word_length], + system_prompt=SystemMessage(content=system_instructions_content), + name="word_length_agent", + ) + + with patch.object( + llm.client._client._client, + "send", + return_value=model_repsonse, + ) as _: + with start_transaction(): + agent.invoke( + { + "messages": [ + HumanMessage(content="How many letters in the word eudca"), + ], + }, + ) + + tx = events[0] + assert tx["type"] == "transaction" + assert tx["contexts"]["trace"]["origin"] == "manual" + + chat_spans = list(x for x in tx["spans"] if x["op"] == "gen_ai.chat") + assert len(chat_spans) == 1 + assert chat_spans[0]["origin"] == "auto.ai.langchain" + + # Token usage is only available in newer versions of langchain (v0.2+) + # where usage_metadata is supported on AIMessageChunk + if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]: + assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 10 + assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 20 + assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 30 + + if send_default_pii and include_prompts: + assert ( + chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + == "Hello, how can I help you?" + ) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + + @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -102,7 +232,7 @@ def _llm_type(self) -> str: ], ids=["string", "list", "blocks"], ) -def test_langchain_agent( +def test_langchain_openai_tools_agent( sentry_init, capture_events, send_default_pii, diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 9edaa8501a..1442a2001b 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -102,45 +102,6 @@ def mock_usage(): ) -@pytest.fixture -def mock_model_response(): - return Response( - id="resp_123", - output=[ - ResponseOutputMessage( - id="msg_123", - type="message", - status="completed", - content=[ - ResponseOutputText( - text="Hello, how can I help you?", - type="output_text", - annotations=[], - ) - ], - role="assistant", - ) - ], - parallel_tool_calls=False, - tool_choice="none", - tools=[], - created_at=10000000, - model="gpt-4", - object="response", - usage=ResponseUsage( - input_tokens=10, - input_tokens_details=InputTokensDetails( - cached_tokens=0, - ), - output_tokens=20, - output_tokens_details=OutputTokensDetails( - reasoning_tokens=5, - ), - total_tokens=30, - ), - ) - - @pytest.fixture def test_agent(): """Create a real Agent instance for testing.""" @@ -198,13 +159,19 @@ def test_agent_custom_model(): @pytest.mark.asyncio async def test_agent_invocation_span_no_pii( - sentry_init, capture_events, test_agent, mock_model_response, get_model_response + sentry_init, + capture_events, + test_agent, + nonstreaming_responses_model_response, + get_model_response, ): client = AsyncOpenAI(api_key="test-key") model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -340,7 +307,7 @@ async def test_agent_invocation_span( sentry_init, capture_events, test_agent_with_instructions, - mock_model_response, + nonstreaming_responses_model_response, instructions, input, request, @@ -353,7 +320,9 @@ async def test_agent_invocation_span( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent_with_instructions(instructions).clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -503,7 +472,7 @@ async def test_client_span_custom_model( sentry_init, capture_events, test_agent_custom_model, - mock_model_response, + nonstreaming_responses_model_response, get_model_response, ): """ @@ -514,7 +483,9 @@ async def test_client_span_custom_model( model = OpenAIResponsesModel(model="my-custom-model", openai_client=client) agent = test_agent_custom_model.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -547,7 +518,7 @@ def test_agent_invocation_span_sync_no_pii( sentry_init, capture_events, test_agent, - mock_model_response, + nonstreaming_responses_model_response, get_model_response, ): """ @@ -557,7 +528,9 @@ def test_agent_invocation_span_sync_no_pii( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -687,7 +660,7 @@ def test_agent_invocation_span_sync( sentry_init, capture_events, test_agent_with_instructions, - mock_model_response, + nonstreaming_responses_model_response, instructions, input, request, @@ -700,7 +673,9 @@ def test_agent_invocation_span_sync( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent_with_instructions(instructions).clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -1370,7 +1345,11 @@ def simple_test_tool(message: str) -> str: @pytest.mark.asyncio async def test_hosted_mcp_tool_propagation_header_streamed( - sentry_init, test_agent, async_iterator, server_side_event_chunks + sentry_init, + test_agent, + get_model_response, + async_iterator, + server_side_event_chunks, ): """ Test responses API is given trace propagation headers with HostedMCPTool. @@ -1402,11 +1381,7 @@ async def test_hosted_mcp_tool_propagation_header_streamed( release="d08ebdb9309e1b004c6f52202de58a09c2268e42", ) - request = httpx.Request( - "POST", - "/responses", - ) - + request_headers = {} # openai-agents calls with_streaming_response() if available starting with # https://github.com/openai/openai-agents-python/commit/159beb56130f7d85192acfd593c9168757984dc0. # When using with_streaming_response() the header set below changes the response type: @@ -1414,12 +1389,10 @@ async def test_hosted_mcp_tool_propagation_header_streamed( if parse_version(OPENAI_AGENTS_VERSION) >= (0, 10, 3) and hasattr( agent_with_tool.model._client.responses, "with_streaming_response" ): - request.headers["X-Stainless-Raw-Response"] = "stream" + request_headers["X-Stainless-Raw-Response"] = "stream" - response = httpx.Response( - 200, - request=request, - content=async_iterator( + response = get_model_response( + async_iterator( server_side_event_chunks( [ ResponseCreatedEvent( @@ -1478,6 +1451,7 @@ async def test_hosted_mcp_tool_propagation_header_streamed( ] ) ), + request_headers=request_headers, ) # Patching https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1604 @@ -2230,7 +2204,11 @@ async def test_mcp_tool_execution_without_pii( @pytest.mark.asyncio async def test_multiple_agents_asyncio( - sentry_init, capture_events, test_agent, mock_model_response, get_model_response + sentry_init, + capture_events, + test_agent, + nonstreaming_responses_model_response, + get_model_response, ): """ Test that multiple agents can be run at the same time in asyncio tasks @@ -2240,7 +2218,9 @@ async def test_multiple_agents_asyncio( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -3154,7 +3134,11 @@ async def test_streaming_span_update_captures_response_data( @pytest.mark.asyncio async def test_streaming_ttft_on_chat_span( - sentry_init, test_agent, async_iterator, server_side_event_chunks + sentry_init, + test_agent, + get_model_response, + async_iterator, + server_side_event_chunks, ): """ Test that time-to-first-token (TTFT) is recorded on chat spans during streaming. @@ -3182,11 +3166,7 @@ async def test_streaming_ttft_on_chat_span( traces_sample_rate=1.0, ) - request = httpx.Request( - "POST", - "/responses", - ) - + request_headers = {} # openai-agents calls with_streaming_response() if available starting with # https://github.com/openai/openai-agents-python/commit/159beb56130f7d85192acfd593c9168757984dc0. # When using with_streaming_response() the header set below changes the response type: @@ -3194,12 +3174,10 @@ async def test_streaming_ttft_on_chat_span( if parse_version(OPENAI_AGENTS_VERSION) >= (0, 10, 3) and hasattr( agent_with_tool.model._client.responses, "with_streaming_response" ): - request.headers["X-Stainless-Raw-Response"] = "stream" + request_headers["X-Stainless-Raw-Response"] = "stream" - response = httpx.Response( - 200, - request=request, - content=async_iterator( + response = get_model_response( + async_iterator( server_side_event_chunks( [ ResponseCreatedEvent( @@ -3276,6 +3254,7 @@ async def test_streaming_ttft_on_chat_span( ] ) ), + request_headers=request_headers, ) # Patching https://github.com/openai/openai-python/blob/656e3cab4a18262a49b961d41293367e45ee71b9/src/openai/_base_client.py#L1604 @@ -3313,7 +3292,11 @@ async def test_streaming_ttft_on_chat_span( ) @pytest.mark.asyncio async def test_conversation_id_on_all_spans( - sentry_init, capture_events, test_agent, mock_model_response, get_model_response + sentry_init, + capture_events, + test_agent, + nonstreaming_responses_model_response, + get_model_response, ): """ Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run(). @@ -3323,7 +3306,9 @@ async def test_conversation_id_on_all_spans( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client, @@ -3508,7 +3493,7 @@ async def test_no_conversation_id_when_not_provided( sentry_init, capture_events, test_agent, - mock_model_response, + nonstreaming_responses_model_response, get_model_response, ): """ @@ -3519,7 +3504,9 @@ async def test_no_conversation_id_when_not_provided( model = OpenAIResponsesModel(model="gpt-4", openai_client=client) agent = test_agent.clone(model=model) - response = get_model_response(mock_model_response, serialize_pydantic=True) + response = get_model_response( + nonstreaming_responses_model_response, serialize_pydantic=True + ) with patch.object( agent.model._client._client,