From eca4e9ded743e9890267026a18aa9b4a6c17b0c3 Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Thu, 26 Feb 2026 21:40:35 +0900 Subject: [PATCH] Python: Map file citation annotations from TextDeltaBlock in Assistants API streaming (#4316) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During Assistants API streaming, TextDeltaBlock.text.annotations was ignored when creating Content objects. This caused raw placeholder strings like 【4:0†source】 to pass through to downstream consumers (including AG-UI) instead of being resolved to citation metadata. Map FileCitationDeltaAnnotation and FilePathDeltaAnnotation from delta_block.text.annotations to Annotation objects on the Content, consistent with the existing patterns in _responses_client.py and _chat_client.py. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../openai/_assistants_client.py | 50 +++++++- .../openai/test_openai_assistants_client.py | 116 ++++++++++++++++++ 2 files changed, 165 insertions(+), 1 deletion(-) diff --git a/python/packages/core/agent_framework/openai/_assistants_client.py b/python/packages/core/agent_framework/openai/_assistants_client.py index 42a5e32732..dc05411a52 100644 --- a/python/packages/core/agent_framework/openai/_assistants_client.py +++ b/python/packages/core/agent_framework/openai/_assistants_client.py @@ -16,6 +16,8 @@ from openai import AsyncOpenAI from openai.types.beta.threads import ( + FileCitationDeltaAnnotation, + FilePathDeltaAnnotation, ImageURLContentBlockParam, ImageURLParam, MessageContentPartParam, @@ -39,12 +41,14 @@ normalize_tools, ) from .._types import ( + Annotation, ChatOptions, ChatResponse, ChatResponseUpdate, Content, Message, ResponseStream, + TextSpanRegion, UsageDetails, ) from ..observability import ChatTelemetryLayer @@ -554,9 +558,53 @@ async def _process_stream_events(self, stream: Any, thread_id: str) -> AsyncIter for delta_block in delta.content or []: if isinstance(delta_block, TextDeltaBlock) and delta_block.text and delta_block.text.value: + text_content = Content.from_text(delta_block.text.value) + if delta_block.text.annotations: + text_content.annotations = [] + for annotation in delta_block.text.annotations: + if isinstance(annotation, FileCitationDeltaAnnotation): + ann: Annotation = Annotation( + type="citation", + additional_properties={ + "text": annotation.text, + "index": annotation.index, + }, + raw_representation=annotation, + ) + if annotation.file_citation and annotation.file_citation.file_id: + ann["file_id"] = annotation.file_citation.file_id + if annotation.start_index is not None and annotation.end_index is not None: + ann["annotated_regions"] = [ + TextSpanRegion( + type="text_span", + start_index=annotation.start_index, + end_index=annotation.end_index, + ) + ] + text_content.annotations.append(ann) + elif isinstance(annotation, FilePathDeltaAnnotation): + ann = Annotation( + type="citation", + additional_properties={ + "text": annotation.text, + "index": annotation.index, + }, + raw_representation=annotation, + ) + if annotation.file_path and annotation.file_path.file_id: + ann["file_id"] = annotation.file_path.file_id + if annotation.start_index is not None and annotation.end_index is not None: + ann["annotated_regions"] = [ + TextSpanRegion( + type="text_span", + start_index=annotation.start_index, + end_index=annotation.end_index, + ) + ] + text_content.annotations.append(ann) yield ChatResponseUpdate( role=role, # type: ignore[arg-type] - contents=[Content.from_text(delta_block.text.value)], + contents=[text_content], conversation_id=thread_id, message_id=response_id, raw_representation=response.data, diff --git a/python/packages/core/tests/openai/test_openai_assistants_client.py b/python/packages/core/tests/openai/test_openai_assistants_client.py index cf8d74f959..8f39573006 100644 --- a/python/packages/core/tests/openai/test_openai_assistants_client.py +++ b/python/packages/core/tests/openai/test_openai_assistants_client.py @@ -7,6 +7,8 @@ import pytest from openai.types.beta.threads import MessageDeltaEvent, Run, TextDeltaBlock +from openai.types.beta.threads.file_citation_delta_annotation import FileCitationDeltaAnnotation +from openai.types.beta.threads.file_path_delta_annotation import FilePathDeltaAnnotation from openai.types.beta.threads.runs import RunStep from pydantic import Field @@ -443,6 +445,120 @@ async def async_iterator() -> Any: assert update.raw_representation == mock_message_delta +async def test_process_stream_events_message_delta_text_with_file_citation_annotations( + mock_async_openai: MagicMock, +) -> None: + """Test _process_stream_events maps file citation annotations from TextDeltaBlock.""" + client = create_test_openai_assistants_client(mock_async_openai) + + mock_annotation = FileCitationDeltaAnnotation( + index=0, + type="file_citation", + file_citation={"file_id": "file-abc123"}, + start_index=10, + end_index=24, + text="【4:0†source】", + ) + + mock_delta_block = MagicMock(spec=TextDeltaBlock) + mock_delta_block.text = MagicMock() + mock_delta_block.text.value = "Some text 【4:0†source】 more text" + mock_delta_block.text.annotations = [mock_annotation] + + mock_delta = MagicMock() + mock_delta.role = "assistant" + mock_delta.content = [mock_delta_block] + + mock_message_delta = MagicMock(spec=MessageDeltaEvent) + mock_message_delta.delta = mock_delta + + mock_response = MagicMock() + mock_response.event = "thread.message.delta" + mock_response.data = mock_message_delta + + async def async_iterator() -> Any: + yield mock_response + + mock_stream = MagicMock() + mock_stream.__aenter__ = AsyncMock(return_value=async_iterator()) + mock_stream.__aexit__ = AsyncMock(return_value=None) + + thread_id = "thread-789" + updates: list[ChatResponseUpdate] = [] + async for update in client._process_stream_events(mock_stream, thread_id): # type: ignore + updates.append(update) + + assert len(updates) == 1 + update = updates[0] + assert update.text == "Some text 【4:0†source】 more text" + assert update.contents is not None + content = update.contents[0] + assert content.annotations is not None + assert len(content.annotations) == 1 + ann = content.annotations[0] + assert ann["type"] == "citation" + assert ann["file_id"] == "file-abc123" + assert ann["annotated_regions"] is not None + assert ann["annotated_regions"][0]["start_index"] == 10 + assert ann["annotated_regions"][0]["end_index"] == 24 + assert ann["additional_properties"]["text"] == "【4:0†source】" + + +async def test_process_stream_events_message_delta_text_with_file_path_annotations( + mock_async_openai: MagicMock, +) -> None: + """Test _process_stream_events maps file path annotations from TextDeltaBlock.""" + client = create_test_openai_assistants_client(mock_async_openai) + + mock_annotation = FilePathDeltaAnnotation( + index=0, + type="file_path", + file_path={"file_id": "file-xyz789"}, + start_index=5, + end_index=20, + text="sandbox:/path/to/file", + ) + + mock_delta_block = MagicMock(spec=TextDeltaBlock) + mock_delta_block.text = MagicMock() + mock_delta_block.text.value = "Here sandbox:/path/to/file is the file" + mock_delta_block.text.annotations = [mock_annotation] + + mock_delta = MagicMock() + mock_delta.role = "assistant" + mock_delta.content = [mock_delta_block] + + mock_message_delta = MagicMock(spec=MessageDeltaEvent) + mock_message_delta.delta = mock_delta + + mock_response = MagicMock() + mock_response.event = "thread.message.delta" + mock_response.data = mock_message_delta + + async def async_iterator() -> Any: + yield mock_response + + mock_stream = MagicMock() + mock_stream.__aenter__ = AsyncMock(return_value=async_iterator()) + mock_stream.__aexit__ = AsyncMock(return_value=None) + + thread_id = "thread-annotation" + updates: list[ChatResponseUpdate] = [] + async for update in client._process_stream_events(mock_stream, thread_id): # type: ignore + updates.append(update) + + assert len(updates) == 1 + content = updates[0].contents[0] + assert content.annotations is not None + assert len(content.annotations) == 1 + ann = content.annotations[0] + assert ann["type"] == "citation" + assert ann["file_id"] == "file-xyz789" + assert ann["annotated_regions"] is not None + assert ann["annotated_regions"][0]["start_index"] == 5 + assert ann["annotated_regions"][0]["end_index"] == 20 + + async def test_process_stream_events_requires_action(mock_async_openai: MagicMock) -> None: """Test _process_stream_events with thread.run.requires_action event.""" client = create_test_openai_assistants_client(mock_async_openai)