Skip to content

Commit aab80d9

Browse files
Python: Fix Eval samples (#4033)
* fix red team sample * Updated self-reflection * fix for workflow eval sample * fix test
1 parent 6a39d5a commit aab80d9

38 files changed

+541
-2634
lines changed
File renamed without changes.

python/packages/core/agent_framework/openai/_responses_client.py

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,9 @@ async def _prepare_options(
788788
request_input = self._prepare_messages_for_openai(messages)
789789
if not request_input:
790790
raise ServiceInvalidRequestError("Messages are required for chat completions")
791+
792+
conversation_id = self._get_current_conversation_id(options, **kwargs)
793+
791794
run_options["input"] = request_input
792795

793796
# model id
@@ -911,8 +914,11 @@ def _prepare_message_for_openai(
911914
for content in message.contents:
912915
match content.type:
913916
case "text_reasoning":
914-
# Don't send reasoning content back to model
915-
continue
917+
# Reasoning items must be sent back as top-level input items
918+
# for reasoning models that require them alongside function_calls
919+
reasoning = self._prepare_content_for_openai(message.role, content, call_id_to_id) # type: ignore[arg-type]
920+
if reasoning:
921+
all_messages.append(reasoning)
916922
case "function_result":
917923
new_args: dict[str, Any] = {}
918924
new_args.update(self._prepare_content_for_openai(message.role, content, call_id_to_id)) # type: ignore[arg-type]
@@ -967,6 +973,8 @@ def _prepare_content_for_openai(
967973
}
968974
props: dict[str, Any] | None = getattr(content, "additional_properties", None)
969975
if props:
976+
if reasoning_id := props.get("reasoning_id"):
977+
ret["id"] = reasoning_id
970978
if status := props.get("status"):
971979
ret["status"] = status
972980
if reasoning_text := props.get("reasoning_text"):
@@ -1184,22 +1192,29 @@ def _parse_response_from_openai(
11841192
)
11851193
)
11861194
case "reasoning": # ResponseOutputReasoning
1195+
reasoning_id = getattr(item, "id", None)
11871196
if hasattr(item, "content") and item.content:
11881197
for index, reasoning_content in enumerate(item.content):
1189-
additional_properties = None
1198+
additional_properties: dict[str, Any] = {}
1199+
if reasoning_id:
1200+
additional_properties["reasoning_id"] = reasoning_id
11901201
if hasattr(item, "summary") and item.summary and index < len(item.summary):
1191-
additional_properties = {"summary": item.summary[index]}
1202+
additional_properties["summary"] = item.summary[index]
11921203
contents.append(
11931204
Content.from_text_reasoning(
11941205
text=reasoning_content.text,
11951206
raw_representation=reasoning_content,
1196-
additional_properties=additional_properties,
1207+
additional_properties=additional_properties or None,
11971208
)
11981209
)
11991210
if hasattr(item, "summary") and item.summary:
12001211
for summary in item.summary:
12011212
contents.append(
1202-
Content.from_text_reasoning(text=summary.text, raw_representation=summary) # type: ignore[arg-type]
1213+
Content.from_text_reasoning(
1214+
text=summary.text,
1215+
raw_representation=summary, # type: ignore[arg-type]
1216+
additional_properties={"reasoning_id": reasoning_id} if reasoning_id else None,
1217+
)
12031218
)
12041219
case "code_interpreter_call": # ResponseOutputCodeInterpreterCall
12051220
call_id = getattr(item, "call_id", None) or getattr(item, "id", None)
@@ -1413,16 +1428,40 @@ def _parse_chunk_from_openai(
14131428
contents.append(Content.from_text(text=event.delta, raw_representation=event))
14141429
metadata.update(self._get_metadata_from_response(event))
14151430
case "response.reasoning_text.delta":
1416-
contents.append(Content.from_text_reasoning(text=event.delta, raw_representation=event))
1431+
contents.append(
1432+
Content.from_text_reasoning(
1433+
text=event.delta,
1434+
raw_representation=event,
1435+
additional_properties={"reasoning_id": event.item_id},
1436+
)
1437+
)
14171438
metadata.update(self._get_metadata_from_response(event))
14181439
case "response.reasoning_text.done":
1419-
contents.append(Content.from_text_reasoning(text=event.text, raw_representation=event))
1440+
contents.append(
1441+
Content.from_text_reasoning(
1442+
text=event.text,
1443+
raw_representation=event,
1444+
additional_properties={"reasoning_id": event.item_id},
1445+
)
1446+
)
14201447
metadata.update(self._get_metadata_from_response(event))
14211448
case "response.reasoning_summary_text.delta":
1422-
contents.append(Content.from_text_reasoning(text=event.delta, raw_representation=event))
1449+
contents.append(
1450+
Content.from_text_reasoning(
1451+
text=event.delta,
1452+
raw_representation=event,
1453+
additional_properties={"reasoning_id": event.item_id},
1454+
)
1455+
)
14231456
metadata.update(self._get_metadata_from_response(event))
14241457
case "response.reasoning_summary_text.done":
1425-
contents.append(Content.from_text_reasoning(text=event.text, raw_representation=event))
1458+
contents.append(
1459+
Content.from_text_reasoning(
1460+
text=event.text,
1461+
raw_representation=event,
1462+
additional_properties={"reasoning_id": event.item_id},
1463+
)
1464+
)
14261465
metadata.update(self._get_metadata_from_response(event))
14271466
case "response.code_interpreter_call_code.delta":
14281467
call_id = getattr(event, "call_id", None) or getattr(event, "id", None) or event.item_id
@@ -1593,20 +1632,23 @@ def _parse_chunk_from_openai(
15931632
)
15941633
)
15951634
case "reasoning": # ResponseOutputReasoning
1635+
reasoning_id = getattr(event_item, "id", None)
15961636
if hasattr(event_item, "content") and event_item.content:
15971637
for index, reasoning_content in enumerate(event_item.content):
1598-
additional_properties = None
1638+
additional_properties: dict[str, Any] = {}
1639+
if reasoning_id:
1640+
additional_properties["reasoning_id"] = reasoning_id
15991641
if (
16001642
hasattr(event_item, "summary")
16011643
and event_item.summary
16021644
and index < len(event_item.summary)
16031645
):
1604-
additional_properties = {"summary": event_item.summary[index]}
1646+
additional_properties["summary"] = event_item.summary[index]
16051647
contents.append(
16061648
Content.from_text_reasoning(
16071649
text=reasoning_content.text,
16081650
raw_representation=reasoning_content,
1609-
additional_properties=additional_properties,
1651+
additional_properties=additional_properties or None,
16101652
)
16111653
)
16121654
case _:

python/packages/core/tests/azure/test_azure_assistants_client.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,7 @@ def test_azure_assistants_client_init_validation_fail() -> None:
129129
def test_azure_assistants_client_init_missing_deployment_name(azure_openai_unit_test_env: dict[str, str]) -> None:
130130
"""Test AzureOpenAIAssistantsClient initialization with missing deployment name."""
131131
with pytest.raises(ServiceInitializationError):
132-
AzureOpenAIAssistantsClient(
133-
api_key=azure_openai_unit_test_env.get("AZURE_OPENAI_API_KEY", "test-key")
134-
)
132+
AzureOpenAIAssistantsClient(api_key=azure_openai_unit_test_env.get("AZURE_OPENAI_API_KEY", "test-key"))
135133

136134

137135
def test_azure_assistants_client_init_with_default_headers(azure_openai_unit_test_env: dict[str, str]) -> None:

python/packages/core/tests/azure/test_azure_chat_client.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,13 @@ def test_init_endpoint(azure_openai_unit_test_env: dict[str, str]) -> None:
9494
@pytest.mark.parametrize("exclude_list", [["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]], indirect=True)
9595
def test_init_with_empty_deployment_name(azure_openai_unit_test_env: dict[str, str]) -> None:
9696
with pytest.raises(ServiceInitializationError):
97-
AzureOpenAIChatClient(
98-
)
97+
AzureOpenAIChatClient()
9998

10099

101100
@pytest.mark.parametrize("exclude_list", [["AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_BASE_URL"]], indirect=True)
102101
def test_init_with_empty_endpoint_and_base_url(azure_openai_unit_test_env: dict[str, str]) -> None:
103102
with pytest.raises(ServiceInitializationError):
104-
AzureOpenAIChatClient(
105-
)
103+
AzureOpenAIChatClient()
106104

107105

108106
@pytest.mark.parametrize("override_env_param_dict", [{"AZURE_OPENAI_ENDPOINT": "http://test.com"}], indirect=True)

python/packages/core/tests/azure/test_azure_responses_client.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

33
import json
4+
import logging
45
import os
56
from typing import Annotated, Any
67
from unittest.mock import MagicMock
@@ -30,6 +31,8 @@
3031
else "Integration tests are disabled.",
3132
)
3233

34+
logger = logging.getLogger(__name__)
35+
3336

3437
class OutputStruct(BaseModel):
3538
"""A structured output for testing purposes."""
@@ -111,8 +114,7 @@ def test_init_with_default_header(azure_openai_unit_test_env: dict[str, str]) ->
111114
@pytest.mark.parametrize("exclude_list", [["AZURE_OPENAI_RESPONSES_DEPLOYMENT_NAME"]], indirect=True)
112115
def test_init_with_empty_model_id(azure_openai_unit_test_env: dict[str, str]) -> None:
113116
with pytest.raises(ServiceInitializationError):
114-
AzureOpenAIResponsesClient(
115-
)
117+
AzureOpenAIResponsesClient()
116118

117119

118120
def test_init_with_project_client(azure_openai_unit_test_env: dict[str, str]) -> None:

python/packages/core/tests/core/test_function_invocation_logic.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2700,3 +2700,74 @@ def test_func(arg1: str) -> str:
27002700
assert conversation_ids_received[1] == "stream_conv_after_first", (
27012701
"streaming: conversation_id should be updated in options after receiving new conversation_id from API"
27022702
)
2703+
2704+
2705+
async def test_streaming_function_calling_response_includes_reasoning_and_tool_results(
2706+
chat_client_base: SupportsChatGetResponse,
2707+
):
2708+
"""Test that the finalized streaming response includes reasoning, function_call,
2709+
function_result, and final text in its messages.
2710+
2711+
This is critical for workflow chaining: when one agent's response is passed as
2712+
input to the next agent, the conversation must include all items (reasoning,
2713+
function_call, function_call_output) so the API can validate the history.
2714+
"""
2715+
2716+
@tool(name="search", approval_mode="never_require")
2717+
def search_func(query: str) -> str:
2718+
return f"Found results for {query}"
2719+
2720+
chat_client_base.streaming_responses = [
2721+
[
2722+
# First response: reasoning + function_call
2723+
ChatResponseUpdate(
2724+
contents=[
2725+
Content.from_text_reasoning(
2726+
text="Let me search for that",
2727+
additional_properties={"reasoning_id": "rs_test123", "status": "completed"},
2728+
)
2729+
],
2730+
role="assistant",
2731+
),
2732+
ChatResponseUpdate(
2733+
contents=[
2734+
Content.from_function_call(
2735+
call_id="call_1",
2736+
name="search",
2737+
arguments='{"query": "test"}',
2738+
additional_properties={"fc_id": "fc_test456"},
2739+
)
2740+
],
2741+
role="assistant",
2742+
),
2743+
],
2744+
[
2745+
# Second response: final text
2746+
ChatResponseUpdate(
2747+
contents=[Content.from_text(text="Here are the results")],
2748+
role="assistant",
2749+
),
2750+
],
2751+
]
2752+
2753+
stream = chat_client_base.get_response(
2754+
"search for test", options={"tool_choice": "auto", "tools": [search_func]}, stream=True
2755+
)
2756+
2757+
updates = []
2758+
async for update in stream:
2759+
updates.append(update)
2760+
response = await stream.get_final_response()
2761+
2762+
# Verify all content types are in the response messages
2763+
all_content_types = [c.type for msg in response.messages for c in msg.contents]
2764+
assert "text_reasoning" in all_content_types, "Reasoning must be preserved in response messages"
2765+
assert "function_call" in all_content_types, "Function call must be preserved in response messages"
2766+
assert "function_result" in all_content_types, "Function result must be in response messages for chaining"
2767+
assert "text" in all_content_types, "Final text must be in response messages"
2768+
2769+
# Verify reasoning has the reasoning_id preserved
2770+
reasoning_contents = [c for msg in response.messages for c in msg.contents if c.type == "text_reasoning"]
2771+
assert len(reasoning_contents) >= 1
2772+
assert reasoning_contents[0].additional_properties is not None
2773+
assert reasoning_contents[0].additional_properties.get("reasoning_id") == "rs_test123"

python/packages/core/tests/openai/test_openai_assistants_client.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,7 @@ def test_init_validation_fail() -> None:
154154
def test_init_missing_model_id(openai_unit_test_env: dict[str, str]) -> None:
155155
"""Test OpenAIAssistantsClient initialization with missing model ID."""
156156
with pytest.raises(ServiceInitializationError):
157-
OpenAIAssistantsClient(
158-
api_key=openai_unit_test_env.get("OPENAI_API_KEY", "test-key")
159-
)
157+
OpenAIAssistantsClient(api_key=openai_unit_test_env.get("OPENAI_API_KEY", "test-key"))
160158

161159

162160
@pytest.mark.parametrize("exclude_list", [["OPENAI_API_KEY"]], indirect=True)

python/packages/core/tests/openai/test_openai_chat_client.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,7 @@ def test_init_base_url_from_settings_env() -> None:
9797
@pytest.mark.parametrize("exclude_list", [["OPENAI_CHAT_MODEL_ID"]], indirect=True)
9898
def test_init_with_empty_model_id(openai_unit_test_env: dict[str, str]) -> None:
9999
with pytest.raises(ServiceInitializationError):
100-
OpenAIChatClient(
101-
)
100+
OpenAIChatClient()
102101

103102

104103
@pytest.mark.parametrize("exclude_list", [["OPENAI_API_KEY"]], indirect=True)

0 commit comments

Comments
 (0)