From bbec9ac1dc3d1bdb573e628643326fbf0b01e734 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Fri, 13 Feb 2026 11:28:55 +0100 Subject: [PATCH 1/3] fix: prevent repeating instructions in continued Responses API conversations - Instructions are now only prepended to messages on the first turn - When conversation_id/response_id exists (continuation), instructions are skipped - Covers OpenAI and Azure Responses API paths - Adds regression tests for all continuation scenarios Fixes #3498 --- .../openai/_responses_client.py | 6 +- ...est_responses_instructions_continuation.py | 207 ++++++++++++++++++ 2 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 python/packages/core/tests/openai/test_responses_instructions_continuation.py diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py index 5ab414dc85..55fdaeeda3 100644 --- a/python/packages/core/agent_framework/openai/_responses_client.py +++ b/python/packages/core/agent_framework/openai/_responses_client.py @@ -782,8 +782,12 @@ async def _prepare_options( # messages # Handle instructions by prepending to messages as system message - if instructions := options.get("instructions"): + # Only prepend instructions for the first turn (when no conversation/response ID exists) + conversation_id = self._get_current_conversation_id(options, **kwargs) + if (instructions := options.get("instructions")) and not conversation_id: + # First turn: prepend instructions as system message messages = prepend_instructions_to_messages(list(messages), instructions, role="system") + # Continuation turn: instructions already exist in conversation context, skip prepending request_input = self._prepare_messages_for_openai(messages) if not request_input: raise ServiceInvalidRequestError("Messages are required for chat completions") diff --git a/python/packages/core/tests/openai/test_responses_instructions_continuation.py b/python/packages/core/tests/openai/test_responses_instructions_continuation.py new file mode 100644 index 0000000000..f5a16efee1 --- /dev/null +++ b/python/packages/core/tests/openai/test_responses_instructions_continuation.py @@ -0,0 +1,207 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Tests for ensuring instructions are not repeated in continued conversations.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from agent_framework import Message +from agent_framework.openai import OpenAIResponsesClient + + +@pytest.mark.asyncio +async def test_instructions_not_repeated_with_conversation_id() -> None: + """Test that instructions are not sent again when conversation_id is present.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + # Mock the OpenAI client + mock_response = MagicMock() + mock_response.id = "resp_123" + mock_response.model = "test-model" + mock_response.created_at = 1000000000 + mock_response.output_parsed = None + mock_response.metadata = {} + mock_response.usage = None + mock_response.finish_reason = None + + mock_message_content = MagicMock() + mock_message_content.type = "output_text" + mock_message_content.text = "Hello! How can I help?" + mock_message_content.annotations = [] + + mock_message_item = MagicMock() + mock_message_item.type = "message" + mock_message_item.content = [mock_message_content] + + mock_response.output = [mock_message_item] + + with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: + # First call - no conversation_id, should include instructions + await client.get_response( + messages=[Message(role="user", text="Hello")], + options={"instructions": "Reply in uppercase."}, + ) + + # Check first call included instructions + first_call_args = mock_create.call_args + first_input_messages = first_call_args.kwargs["input"] + + # Should have 2 messages: system (instructions) + user + assert len(first_input_messages) == 2 + assert first_input_messages[0]["role"] == "system" + assert any("Reply in uppercase" in str(c) for c in first_input_messages[0]["content"]) + assert first_input_messages[1]["role"] == "user" + + # Second call - with conversation_id (server-side continuation) + # Instructions should NOT be sent again + await client.get_response( + messages=[Message(role="user", text="Tell me a joke")], + options={ + "instructions": "Reply in uppercase.", + "conversation_id": "resp_123", + }, + ) + + # Check second call + second_call_args = mock_create.call_args + second_input_messages = second_call_args.kwargs["input"] + + # Should have only 1 message: user message (no system instructions) + assert len(second_input_messages) == 1, ( + f"Expected 1 message (user only) when conversation_id is present, " + f"but got {len(second_input_messages)} messages" + ) + assert second_input_messages[0]["role"] == "user" + # Ensure no system message with instructions + assert not any(msg["role"] == "system" for msg in second_input_messages) + + +@pytest.mark.asyncio +async def test_instructions_not_repeated_with_response_id() -> None: + """Test that instructions are not sent again when response_id (resp_) format is used.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_response = MagicMock() + mock_response.id = "resp_456" + mock_response.model = "test-model" + mock_response.created_at = 1000000000 + mock_response.output_parsed = None + mock_response.metadata = {} + mock_response.usage = None + mock_response.finish_reason = None + + mock_message_content = MagicMock() + mock_message_content.type = "output_text" + mock_message_content.text = "Response" + mock_message_content.annotations = [] + + mock_message_item = MagicMock() + mock_message_item.type = "message" + mock_message_item.content = [mock_message_content] + + mock_response.output = [mock_message_item] + + with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: + # Call with response_id format (resp_) + await client.get_response( + messages=[Message(role="user", text="Continue conversation")], + options={ + "instructions": "Be helpful.", + "conversation_id": "resp_456", + }, + ) + + call_args = mock_create.call_args + input_messages = call_args.kwargs["input"] + + # Should only have user message, no system instructions + assert len(input_messages) == 1 + assert input_messages[0]["role"] == "user" + assert not any(msg["role"] == "system" for msg in input_messages) + + +@pytest.mark.asyncio +async def test_instructions_not_repeated_with_conv_id() -> None: + """Test that instructions are not sent again when conv_ format is used.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_response = MagicMock() + mock_response.id = "resp_789" + mock_response.model = "test-model" + mock_response.created_at = 1000000000 + mock_response.output_parsed = None + mock_response.metadata = {} + mock_response.usage = None + mock_response.finish_reason = None + + mock_message_content = MagicMock() + mock_message_content.type = "output_text" + mock_message_content.text = "Response" + mock_message_content.annotations = [] + + mock_message_item = MagicMock() + mock_message_item.type = "message" + mock_message_item.content = [mock_message_content] + + mock_response.output = [mock_message_item] + + with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: + # Call with conversation_id format (conv_) + await client.get_response( + messages=[Message(role="user", text="Continue conversation")], + options={ + "instructions": "Be helpful.", + "conversation_id": "conv_abc123", + }, + ) + + call_args = mock_create.call_args + input_messages = call_args.kwargs["input"] + + # Should only have user message, no system instructions + assert len(input_messages) == 1 + assert input_messages[0]["role"] == "user" + assert not any(msg["role"] == "system" for msg in input_messages) + + +@pytest.mark.asyncio +async def test_instructions_included_without_conversation_id() -> None: + """Test that instructions ARE included in initial requests (no conversation_id).""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_response = MagicMock() + mock_response.id = "resp_new" + mock_response.model = "test-model" + mock_response.created_at = 1000000000 + mock_response.output_parsed = None + mock_response.metadata = {} + mock_response.usage = None + mock_response.finish_reason = None + + mock_message_content = MagicMock() + mock_message_content.type = "output_text" + mock_message_content.text = "Response" + mock_message_content.annotations = [] + + mock_message_item = MagicMock() + mock_message_item.type = "message" + mock_message_item.content = [mock_message_content] + + mock_response.output = [mock_message_item] + + with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: + # Call without conversation_id - this is a NEW conversation + await client.get_response( + messages=[Message(role="user", text="Hello")], + options={"instructions": "You are a helpful assistant."}, + ) + + call_args = mock_create.call_args + input_messages = call_args.kwargs["input"] + + # Should have 2 messages: system (instructions) + user + assert len(input_messages) == 2 + assert input_messages[0]["role"] == "system" + assert any("helpful assistant" in str(c) for c in input_messages[0]["content"]) + assert input_messages[1]["role"] == "user" From 79b0faf5891007da1c54147b562b282033cb8e06 Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Fri, 13 Feb 2026 11:33:16 +0100 Subject: [PATCH 2/3] Apply lint fixes to continuation tests --- ...est_responses_instructions_continuation.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/python/packages/core/tests/openai/test_responses_instructions_continuation.py b/python/packages/core/tests/openai/test_responses_instructions_continuation.py index f5a16efee1..ef1adbd357 100644 --- a/python/packages/core/tests/openai/test_responses_instructions_continuation.py +++ b/python/packages/core/tests/openai/test_responses_instructions_continuation.py @@ -2,7 +2,7 @@ """Tests for ensuring instructions are not repeated in continued conversations.""" -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import MagicMock, patch import pytest @@ -24,7 +24,7 @@ async def test_instructions_not_repeated_with_conversation_id() -> None: mock_response.metadata = {} mock_response.usage = None mock_response.finish_reason = None - + mock_message_content = MagicMock() mock_message_content.type = "output_text" mock_message_content.text = "Hello! How can I help?" @@ -46,7 +46,7 @@ async def test_instructions_not_repeated_with_conversation_id() -> None: # Check first call included instructions first_call_args = mock_create.call_args first_input_messages = first_call_args.kwargs["input"] - + # Should have 2 messages: system (instructions) + user assert len(first_input_messages) == 2 assert first_input_messages[0]["role"] == "system" @@ -66,7 +66,7 @@ async def test_instructions_not_repeated_with_conversation_id() -> None: # Check second call second_call_args = mock_create.call_args second_input_messages = second_call_args.kwargs["input"] - + # Should have only 1 message: user message (no system instructions) assert len(second_input_messages) == 1, ( f"Expected 1 message (user only) when conversation_id is present, " @@ -90,7 +90,7 @@ async def test_instructions_not_repeated_with_response_id() -> None: mock_response.metadata = {} mock_response.usage = None mock_response.finish_reason = None - + mock_message_content = MagicMock() mock_message_content.type = "output_text" mock_message_content.text = "Response" @@ -103,7 +103,7 @@ async def test_instructions_not_repeated_with_response_id() -> None: mock_response.output = [mock_message_item] with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: - # Call with response_id format (resp_) + # Call with response_id format (resp_) await client.get_response( messages=[Message(role="user", text="Continue conversation")], options={ @@ -114,7 +114,7 @@ async def test_instructions_not_repeated_with_response_id() -> None: call_args = mock_create.call_args input_messages = call_args.kwargs["input"] - + # Should only have user message, no system instructions assert len(input_messages) == 1 assert input_messages[0]["role"] == "user" @@ -134,7 +134,7 @@ async def test_instructions_not_repeated_with_conv_id() -> None: mock_response.metadata = {} mock_response.usage = None mock_response.finish_reason = None - + mock_message_content = MagicMock() mock_message_content.type = "output_text" mock_message_content.text = "Response" @@ -158,7 +158,7 @@ async def test_instructions_not_repeated_with_conv_id() -> None: call_args = mock_create.call_args input_messages = call_args.kwargs["input"] - + # Should only have user message, no system instructions assert len(input_messages) == 1 assert input_messages[0]["role"] == "user" @@ -178,7 +178,7 @@ async def test_instructions_included_without_conversation_id() -> None: mock_response.metadata = {} mock_response.usage = None mock_response.finish_reason = None - + mock_message_content = MagicMock() mock_message_content.type = "output_text" mock_message_content.text = "Response" @@ -199,7 +199,7 @@ async def test_instructions_included_without_conversation_id() -> None: call_args = mock_create.call_args input_messages = call_args.kwargs["input"] - + # Should have 2 messages: system (instructions) + user assert len(input_messages) == 2 assert input_messages[0]["role"] == "system" From a1acfb516da63f4f9be4c7c0a284cc44174622ae Mon Sep 17 00:00:00 2001 From: eavanvalkenburg Date: Fri, 13 Feb 2026 11:52:11 +0100 Subject: [PATCH 3/3] Consolidate responses continuation tests --- .../openai/test_openai_responses_client.py | 84 +++++++ ...est_responses_instructions_continuation.py | 207 ------------------ 2 files changed, 84 insertions(+), 207 deletions(-) delete mode 100644 python/packages/core/tests/openai/test_responses_instructions_continuation.py diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py index a83c4a398b..749939783e 100644 --- a/python/packages/core/tests/openai/test_openai_responses_client.py +++ b/python/packages/core/tests/openai/test_openai_responses_client.py @@ -2168,6 +2168,90 @@ async def test_conversation_id_precedence_kwargs_over_options() -> None: assert "conversation" not in run_opts +def _create_mock_responses_text_response(*, response_id: str) -> MagicMock: + mock_response = MagicMock() + mock_response.id = response_id + mock_response.model = "test-model" + mock_response.created_at = 1000000000 + mock_response.output_parsed = None + mock_response.metadata = {} + mock_response.usage = None + mock_response.finish_reason = None + + mock_message_content = MagicMock() + mock_message_content.type = "output_text" + mock_message_content.text = "Hello! How can I help?" + mock_message_content.annotations = [] + + mock_message_item = MagicMock() + mock_message_item.type = "message" + mock_message_item.content = [mock_message_content] + + mock_response.output = [mock_message_item] + return mock_response + + +async def test_instructions_sent_first_turn_then_skipped_for_continuation() -> None: + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + mock_response = _create_mock_responses_text_response(response_id="resp_123") + + with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: + await client.get_response( + messages=[Message(role="user", text="Hello")], + options={"instructions": "Reply in uppercase."}, + ) + + first_input_messages = mock_create.call_args.kwargs["input"] + assert len(first_input_messages) == 2 + assert first_input_messages[0]["role"] == "system" + assert any("Reply in uppercase" in str(c) for c in first_input_messages[0]["content"]) + assert first_input_messages[1]["role"] == "user" + + await client.get_response( + messages=[Message(role="user", text="Tell me a joke")], + options={"instructions": "Reply in uppercase.", "conversation_id": "resp_123"}, + ) + + second_input_messages = mock_create.call_args.kwargs["input"] + assert len(second_input_messages) == 1 + assert second_input_messages[0]["role"] == "user" + assert not any(message["role"] == "system" for message in second_input_messages) + + +@pytest.mark.parametrize("conversation_id", ["resp_456", "conv_abc123"]) +async def test_instructions_not_repeated_for_continuation_ids(conversation_id: str) -> None: + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + mock_response = _create_mock_responses_text_response(response_id="resp_456") + + with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: + await client.get_response( + messages=[Message(role="user", text="Continue conversation")], + options={"instructions": "Be helpful.", "conversation_id": conversation_id}, + ) + + input_messages = mock_create.call_args.kwargs["input"] + assert len(input_messages) == 1 + assert input_messages[0]["role"] == "user" + assert not any(message["role"] == "system" for message in input_messages) + + +async def test_instructions_included_without_conversation_id() -> None: + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + mock_response = _create_mock_responses_text_response(response_id="resp_new") + + with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: + await client.get_response( + messages=[Message(role="user", text="Hello")], + options={"instructions": "You are a helpful assistant."}, + ) + + input_messages = mock_create.call_args.kwargs["input"] + assert len(input_messages) == 2 + assert input_messages[0]["role"] == "system" + assert any("helpful assistant" in str(c) for c in input_messages[0]["content"]) + assert input_messages[1]["role"] == "user" + + def test_with_callable_api_key() -> None: """Test OpenAIResponsesClient initialization with callable API key.""" diff --git a/python/packages/core/tests/openai/test_responses_instructions_continuation.py b/python/packages/core/tests/openai/test_responses_instructions_continuation.py deleted file mode 100644 index ef1adbd357..0000000000 --- a/python/packages/core/tests/openai/test_responses_instructions_continuation.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -"""Tests for ensuring instructions are not repeated in continued conversations.""" - -from unittest.mock import MagicMock, patch - -import pytest - -from agent_framework import Message -from agent_framework.openai import OpenAIResponsesClient - - -@pytest.mark.asyncio -async def test_instructions_not_repeated_with_conversation_id() -> None: - """Test that instructions are not sent again when conversation_id is present.""" - client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") - - # Mock the OpenAI client - mock_response = MagicMock() - mock_response.id = "resp_123" - mock_response.model = "test-model" - mock_response.created_at = 1000000000 - mock_response.output_parsed = None - mock_response.metadata = {} - mock_response.usage = None - mock_response.finish_reason = None - - mock_message_content = MagicMock() - mock_message_content.type = "output_text" - mock_message_content.text = "Hello! How can I help?" - mock_message_content.annotations = [] - - mock_message_item = MagicMock() - mock_message_item.type = "message" - mock_message_item.content = [mock_message_content] - - mock_response.output = [mock_message_item] - - with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: - # First call - no conversation_id, should include instructions - await client.get_response( - messages=[Message(role="user", text="Hello")], - options={"instructions": "Reply in uppercase."}, - ) - - # Check first call included instructions - first_call_args = mock_create.call_args - first_input_messages = first_call_args.kwargs["input"] - - # Should have 2 messages: system (instructions) + user - assert len(first_input_messages) == 2 - assert first_input_messages[0]["role"] == "system" - assert any("Reply in uppercase" in str(c) for c in first_input_messages[0]["content"]) - assert first_input_messages[1]["role"] == "user" - - # Second call - with conversation_id (server-side continuation) - # Instructions should NOT be sent again - await client.get_response( - messages=[Message(role="user", text="Tell me a joke")], - options={ - "instructions": "Reply in uppercase.", - "conversation_id": "resp_123", - }, - ) - - # Check second call - second_call_args = mock_create.call_args - second_input_messages = second_call_args.kwargs["input"] - - # Should have only 1 message: user message (no system instructions) - assert len(second_input_messages) == 1, ( - f"Expected 1 message (user only) when conversation_id is present, " - f"but got {len(second_input_messages)} messages" - ) - assert second_input_messages[0]["role"] == "user" - # Ensure no system message with instructions - assert not any(msg["role"] == "system" for msg in second_input_messages) - - -@pytest.mark.asyncio -async def test_instructions_not_repeated_with_response_id() -> None: - """Test that instructions are not sent again when response_id (resp_) format is used.""" - client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") - - mock_response = MagicMock() - mock_response.id = "resp_456" - mock_response.model = "test-model" - mock_response.created_at = 1000000000 - mock_response.output_parsed = None - mock_response.metadata = {} - mock_response.usage = None - mock_response.finish_reason = None - - mock_message_content = MagicMock() - mock_message_content.type = "output_text" - mock_message_content.text = "Response" - mock_message_content.annotations = [] - - mock_message_item = MagicMock() - mock_message_item.type = "message" - mock_message_item.content = [mock_message_content] - - mock_response.output = [mock_message_item] - - with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: - # Call with response_id format (resp_) - await client.get_response( - messages=[Message(role="user", text="Continue conversation")], - options={ - "instructions": "Be helpful.", - "conversation_id": "resp_456", - }, - ) - - call_args = mock_create.call_args - input_messages = call_args.kwargs["input"] - - # Should only have user message, no system instructions - assert len(input_messages) == 1 - assert input_messages[0]["role"] == "user" - assert not any(msg["role"] == "system" for msg in input_messages) - - -@pytest.mark.asyncio -async def test_instructions_not_repeated_with_conv_id() -> None: - """Test that instructions are not sent again when conv_ format is used.""" - client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") - - mock_response = MagicMock() - mock_response.id = "resp_789" - mock_response.model = "test-model" - mock_response.created_at = 1000000000 - mock_response.output_parsed = None - mock_response.metadata = {} - mock_response.usage = None - mock_response.finish_reason = None - - mock_message_content = MagicMock() - mock_message_content.type = "output_text" - mock_message_content.text = "Response" - mock_message_content.annotations = [] - - mock_message_item = MagicMock() - mock_message_item.type = "message" - mock_message_item.content = [mock_message_content] - - mock_response.output = [mock_message_item] - - with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: - # Call with conversation_id format (conv_) - await client.get_response( - messages=[Message(role="user", text="Continue conversation")], - options={ - "instructions": "Be helpful.", - "conversation_id": "conv_abc123", - }, - ) - - call_args = mock_create.call_args - input_messages = call_args.kwargs["input"] - - # Should only have user message, no system instructions - assert len(input_messages) == 1 - assert input_messages[0]["role"] == "user" - assert not any(msg["role"] == "system" for msg in input_messages) - - -@pytest.mark.asyncio -async def test_instructions_included_without_conversation_id() -> None: - """Test that instructions ARE included in initial requests (no conversation_id).""" - client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") - - mock_response = MagicMock() - mock_response.id = "resp_new" - mock_response.model = "test-model" - mock_response.created_at = 1000000000 - mock_response.output_parsed = None - mock_response.metadata = {} - mock_response.usage = None - mock_response.finish_reason = None - - mock_message_content = MagicMock() - mock_message_content.type = "output_text" - mock_message_content.text = "Response" - mock_message_content.annotations = [] - - mock_message_item = MagicMock() - mock_message_item.type = "message" - mock_message_item.content = [mock_message_content] - - mock_response.output = [mock_message_item] - - with patch.object(client.client.responses, "create", return_value=mock_response) as mock_create: - # Call without conversation_id - this is a NEW conversation - await client.get_response( - messages=[Message(role="user", text="Hello")], - options={"instructions": "You are a helpful assistant."}, - ) - - call_args = mock_create.call_args - input_messages = call_args.kwargs["input"] - - # Should have 2 messages: system (instructions) + user - assert len(input_messages) == 2 - assert input_messages[0]["role"] == "system" - assert any("helpful assistant" in str(c) for c in input_messages[0]["content"]) - assert input_messages[1]["role"] == "user"