From 6e2837fc290cd064132e3ed54f31fdb2af841c93 Mon Sep 17 00:00:00 2001 From: Dantong Li Date: Wed, 20 May 2026 15:18:41 +0100 Subject: [PATCH 1/6] feat(model-settings): add background and background_poll_interval_seconds fields Append two optional fields to ModelSettings to opt into Responses API background mode. background=True submits via responses.create(background=True) and adaptively polls responses.retrieve(id) until terminal; the optional poll_interval_seconds pins the cadence or defers to the openai-poll-after-ms response header. Fields are appended at the end of the dataclass per AGENTS.md's positional compatibility rule. background is added to _TRACEABLE_MODEL_SETTING_FIELDS so the flag is captured in spans; the interval is operational metadata and is intentionally excluded. --- src/agents/model_settings.py | 24 ++++++++++++++++++++++ tests/model_settings/test_serialization.py | 2 ++ 2 files changed, 26 insertions(+) diff --git a/src/agents/model_settings.py b/src/agents/model_settings.py index 1ef9822f52..71e3b04ffd 100644 --- a/src/agents/model_settings.py +++ b/src/agents/model_settings.py @@ -79,6 +79,7 @@ class MCPToolChoice: "top_logprobs", "retry", "context_management", + "background", ) @@ -191,6 +192,29 @@ class ModelSettings: to enable server-side compaction when the rendered context crosses a token threshold. """ + background: bool | None = None + """Whether to run the model response in the background. + + When ``True``, the SDK submits via ``client.responses.create(background=True)`` + and polls ``client.responses.retrieve(...)`` until the response reaches a + terminal state. Background mode lets long single-turn calls (reasoning models, + deep-research workloads) survive HTTP / proxy / serverless timeouts. + + Only supported by ``OpenAIResponsesModel`` (HTTP transport). Setting this on + ``OpenAIResponsesWSModel`` or ``OpenAIChatCompletionsModel`` raises ``UserError``. + Background mode is not ZDR-compatible and response data is retained server-side + for ~10 minutes. + `Learn more `_. + """ + + background_poll_interval_seconds: float | None = None + """Polling interval (seconds) when ``background=True``. + + When unset, the SDK honors the ``openai-poll-after-ms`` response header from + the most recent ``retrieve()``; falls back to 1.0 second when the header is + absent. Ignored when ``background`` is not enabled. + """ + def resolve(self, override: ModelSettings | None) -> ModelSettings: """Produce a new ModelSettings by overlaying any non-None values from the override on top of this instance.""" diff --git a/tests/model_settings/test_serialization.py b/tests/model_settings/test_serialization.py index 2e1cde6466..abf1e43b2e 100644 --- a/tests/model_settings/test_serialization.py +++ b/tests/model_settings/test_serialization.py @@ -76,6 +76,8 @@ def test_all_fields_serialization() -> None: ), ), context_management=[{"type": "compaction", "compact_threshold": 200000}], + background=True, + background_poll_interval_seconds=0.5, ) # Verify that every single field is set to a non-None value From 26ebadffb391e2cd5dcd2f50e895172c1dddfed3 Mon Sep 17 00:00:00 2001 From: Dantong Li Date: Wed, 20 May 2026 15:19:06 +0100 Subject: [PATCH 2/6] feat(openai-responses-model): submit + adaptive-poll loop for background mode When ModelSettings.background is True, OpenAIResponsesModel.get_response now submits via responses.create(background=True), then polls responses.retrieve(id) until the response reaches a terminal status (completed | failed | cancelled | incomplete). Streaming pass-through is unchanged: stream_response forwards background=True to responses.create(stream=True, background=True) for server-side durability without client-side auto-resume. Polling honors the openai-poll-after-ms response header for adaptive intervals (matches openai-python's create_and_poll pattern); an explicit background_poll_interval_seconds overrides the header; the fallback is 1.0s. On asyncio.CancelledError or a non-recoverable error mid-poll, the SDK schedules a fire-and-forget responses.cancel(id) so server-side compute is not leaked, then re-raises. Non-completed terminal states raise the existing response_terminal_failure_error helper. background is plumbed through _build_response_create_kwargs alongside store and prompt_cache_retention, so the existing extra_args duplicate-key check catches accidental double-spec. --- src/agents/models/openai_responses.py | 112 +++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 1 deletion(-) diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index 3af75481bf..8d4d0e1413 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -92,6 +92,32 @@ value for value in get_args(ResponseIncludable) if isinstance(value, str) ) +# Terminal `Response.status` values per the OpenAI Responses API. Mirrors the +# `ResponseStatus` literal type in `openai-python`. A response whose status is +# absent from this set (`queued` / `in_progress`) is still being generated and +# must be polled. +_RESPONSE_TERMINAL_STATUSES: frozenset[str] = frozenset( + {"completed", "failed", "cancelled", "incomplete"} +) + +# Default polling interval when `background=True` and no explicit interval or +# server header is available. Matches the fallback used by openai-python's +# `create_and_poll` helpers. +_DEFAULT_BACKGROUND_POLL_INTERVAL_SECONDS = 1.0 + +# Server-sent hint header advising the next poll delay (in milliseconds). When +# the caller has not pinned an explicit `background_poll_interval_seconds`, we +# honor this header so the loop adapts to server backpressure. +_BACKGROUND_POLL_AFTER_HEADER = "openai-poll-after-ms" + + +def _is_response_terminal_status(status: str | None) -> bool: + """True when `status` is a terminal value (or unset, which we treat as + terminal to avoid spinning on unexpected payloads).""" + if status is None: + return True + return status in _RESPONSE_TERMINAL_STATUSES + class _NamespaceToolParam(TypedDict): type: Literal["namespace"] @@ -444,6 +470,82 @@ def _consume_background_cleanup_task_result(task: asyncio.Task[Any]) -> None: except Exception as exc: logger.debug(f"Background stream cleanup failed after cancellation: {exc}") + def _schedule_background_response_cancel(self, client: AsyncOpenAI, response_id: str) -> None: + """Best-effort fire-and-forget cancel of an in-flight background response. + + Invoked when the poll loop is cancelled or hits a non-recoverable error + before reaching a terminal state, so that server-side compute is not + leaked. Failures from the cancel call itself are swallowed. + """ + + async def _do_cancel() -> None: + try: + await client.responses.cancel(response_id) + except Exception as exc: + logger.debug( + f"Background response cancel for {response_id} failed (ignored): {exc}" + ) + + try: + task = asyncio.create_task(_do_cancel()) + except RuntimeError: + # No running loop available (e.g. interpreter shutdown). Nothing we + # can do here; the server response will time out on its own. + return + task.add_done_callback(self._consume_background_cleanup_task_result) + + async def _poll_background_response_until_terminal( + self, + *, + client: AsyncOpenAI, + response: Response, + poll_interval_seconds: float | None, + ) -> Response: + """Poll `responses.retrieve(id)` until the response reaches a terminal status. + + When `poll_interval_seconds` is provided it pins the cadence; otherwise the + loop honors the `openai-poll-after-ms` response header and falls back to + ``_DEFAULT_BACKGROUND_POLL_INTERVAL_SECONDS`` when no header is present. + Mirrors the adaptive-polling pattern used by `openai-python`'s + `create_and_poll` helpers. + + On cancellation or unexpected error mid-poll, the in-flight server-side + response is cancelled best-effort via + `_schedule_background_response_cancel` so compute is not leaked. + Reaching a non-`completed` terminal state (`failed` / `cancelled` / + `incomplete`) raises `ModelBehaviorError`. + """ + response_id = response.id + explicit_interval = poll_interval_seconds + interval = ( + explicit_interval + if explicit_interval is not None + else _DEFAULT_BACKGROUND_POLL_INTERVAL_SECONDS + ) + try: + while not _is_response_terminal_status(response.status): + await asyncio.sleep(interval) + raw = await client.responses.with_raw_response.retrieve(response_id) + response = raw.parse() + if explicit_interval is None: + header_value = raw.headers.get(_BACKGROUND_POLL_AFTER_HEADER) + if header_value is not None: + try: + interval = float(header_value) / 1000.0 + except (TypeError, ValueError): + # Server sent a malformed header; keep current interval. + pass + except BaseException: + self._schedule_background_response_cancel(client, response_id) + raise + + if response.status != "completed": + # Non-`completed` terminal status; the server has already finished + # so we don't need to cancel. Raise a model-error so callers see a + # consistent failure type. + raise response_terminal_failure_error(f"response.{response.status}", response) + return response + async def get_response( self, system_instructions: str | None, @@ -693,7 +795,14 @@ async def _fetch_response( if not stream: response = await client.responses.create(**create_kwargs) - return cast(Response, response) + response = cast(Response, response) + if model_settings.background and not _is_response_terminal_status(response.status): + response = await self._poll_background_response_until_terminal( + client=client, + response=response, + poll_interval_seconds=model_settings.background_poll_interval_seconds, + ) + return response streaming_response = getattr(client.responses, "with_streaming_response", None) stream_create = getattr(streaming_response, "create", None) @@ -849,6 +958,7 @@ def _build_response_create_kwargs( "extra_body": model_settings.extra_body, "text": response_format, "store": self._non_null_or_omit(model_settings.store), + "background": self._non_null_or_omit(model_settings.background), "prompt_cache_retention": self._non_null_or_omit(model_settings.prompt_cache_retention), "reasoning": self._non_null_or_omit(model_settings.reasoning), "metadata": self._non_null_or_omit(model_settings.metadata), From 0fe57a8b17727b6a10f3037c7fa2eafba7bb8b75 Mon Sep 17 00:00:00 2001 From: Dantong Li Date: Wed, 20 May 2026 15:19:28 +0100 Subject: [PATCH 3/6] feat(openai-responses-model): reject background=True on WS and Chat Completions adapters Setting ModelSettings.background=True on an adapter that cannot honor it must fail loudly rather than silently drop the durability guarantee the caller opted into: - OpenAIResponsesWSModel: the WebSocket transport always streams and cannot decouple submit from poll. Raise UserError in the overridden _fetch_response so both get_response and stream_response paths are covered. - OpenAIChatCompletionsModel: the Chat Completions API has no background parameter. Add _handle_unsupported_background and call it at the top of get_response and stream_response, mirroring the existing _handle_unsupported_prompt pattern. --- src/agents/models/openai_chatcompletions.py | 11 +++++++++++ src/agents/models/openai_responses.py | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py index cba01163e9..b3b53f46b1 100644 --- a/src/agents/models/openai_chatcompletions.py +++ b/src/agents/models/openai_chatcompletions.py @@ -71,6 +71,15 @@ def _non_null_or_omit(self, value: Any) -> Any: def _supports_default_prompt_cache_key(self) -> bool: return ChatCmplHelpers.is_openai(self._get_client()) + @staticmethod + def _handle_unsupported_background(model_settings: ModelSettings) -> None: + if model_settings.background: + raise UserError( + "ModelSettings.background=True is not supported by " + "OpenAIChatCompletionsModel; the Chat Completions API has no " + "background-mode equivalent. Use OpenAIResponsesModel instead." + ) + def _handle_unsupported_prompt(self, prompt: ResponsePromptParam | None) -> None: if prompt is None: return @@ -140,6 +149,7 @@ async def get_response( conversation_id: str | None = None, prompt: ResponsePromptParam | None = None, ) -> ModelResponse: + self._handle_unsupported_background(model_settings) self._handle_unsupported_server_managed_conversation_state( previous_response_id=previous_response_id, conversation_id=conversation_id, @@ -274,6 +284,7 @@ async def stream_response( """ Yields a partial message as it is generated, as well as the usage information. """ + self._handle_unsupported_background(model_settings) self._handle_unsupported_server_managed_conversation_state( previous_response_id=previous_response_id, conversation_id=conversation_id, diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index 8d4d0e1413..2bb88e30aa 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -1192,6 +1192,13 @@ async def _fetch_response( stream: Literal[True] | Literal[False] = False, prompt: ResponsePromptParam | None = None, ) -> Response | AsyncIterator[ResponseStreamEvent]: + if model_settings.background: + raise UserError( + "ModelSettings.background=True is not supported by " + "OpenAIResponsesWSModel; the WebSocket transport always streams " + "and cannot decouple submit from poll. Use OpenAIResponsesModel " + "(HTTP transport) instead." + ) create_kwargs = self._build_response_create_kwargs( system_instructions=system_instructions, input=input, From fb14a5b3837ce55e71e3ca63323fd9fdcf86849f Mon Sep 17 00:00:00 2001 From: Dantong Li Date: Wed, 20 May 2026 15:19:41 +0100 Subject: [PATCH 4/6] test(openai-responses-model): cover background polling, cancellation, and rejections Add 15 tests for the new background mode: - terminal-on-first-response (no poll triggered) - multi-poll until completed - terminal failures (failed | cancelled | incomplete) raise ModelBehaviorError - openai-poll-after-ms header drives the next sleep interval - explicit background_poll_interval_seconds overrides the header - asyncio.CancelledError mid-poll schedules a fire-and-forget responses.cancel(id) and re-raises (uses a real-sleep handle captured pre-monkeypatch to avoid re-tripping the cancel after the test undoes the patch) - background=True is plumbed into the responses.create() kwargs - extra_args={"background": True} + ModelSettings.background=True surfaces the existing duplicate-key TypeError - streaming + background passes through unchanged - OpenAIResponsesWSModel rejects background=True from both get_response and stream_response - OpenAIChatCompletionsModel rejects background=True from both get_response and stream_response Update test_all_fields_serialization to set the two new ModelSettings fields so the "every field non-None" invariant still holds. --- tests/models/test_openai_chatcompletions.py | 53 +++ tests/models/test_openai_responses.py | 437 ++++++++++++++++++++ 2 files changed, 490 insertions(+) diff --git a/tests/models/test_openai_chatcompletions.py b/tests/models/test_openai_chatcompletions.py index 0f8066b2e6..882aebad46 100644 --- a/tests/models/test_openai_chatcompletions.py +++ b/tests/models/test_openai_chatcompletions.py @@ -320,6 +320,59 @@ async def patched_fetch_response(self, *args, **kwargs): ) +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_get_response_rejects_background_mode(monkeypatch) -> None: + """`background=True` is a Responses-API feature; Chat Completions must fail + loudly so the user-opted durability guarantee isn't silently demoted.""" + + async def patched_fetch_response(self, *args, **kwargs): + raise AssertionError("_fetch_response should not run when background=True") + + monkeypatch.setattr(OpenAIChatCompletionsModel, "_fetch_response", patched_fetch_response) + model = OpenAIProvider(use_responses=False).get_model("gpt-4") + + with pytest.raises(UserError, match="background=True"): + await model.get_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + previous_response_id=None, + conversation_id=None, + prompt=None, + ) + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_stream_response_rejects_background_mode(monkeypatch) -> None: + async def patched_fetch_response(self, *args, **kwargs): + raise AssertionError("_fetch_response should not run when background=True") + + monkeypatch.setattr(OpenAIChatCompletionsModel, "_fetch_response", patched_fetch_response) + model = OpenAIProvider(use_responses=False).get_model("gpt-4") + + stream = model.stream_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + previous_response_id=None, + conversation_id=None, + prompt=None, + ) + with pytest.raises(UserError, match="background=True"): + async for _ in stream: + pass + + @pytest.mark.allow_call_model_methods @pytest.mark.asyncio async def test_get_response_rejects_non_text_tool_output_in_strict_mode() -> None: diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 7d329da6f8..60a6a1a666 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -3819,3 +3819,440 @@ def test_websocket_pre_event_disconnect_retry_respects_websocket_retry_disable() with websocket_pre_event_retries_disabled(True): assert _should_retry_pre_event_websocket_disconnect() is False + + +# --- Background + poll mode ------------------------------------------------- + + +def _make_status_response( + status: str, + response_id: str = "resp-bg-1", + output: list[Any] | None = None, +) -> Any: + """Build a `Response` stub with the requested `status` field set. + + The default `get_response_obj` helper leaves `status=None`, which the + background-poll loop treats as terminal. Tests that exercise the loop need + a non-terminal status, so we patch the field after construction (the + pydantic model accepts assignment). + """ + response = get_response_obj(output or [], response_id=response_id) + response.status = cast(Any, status) + return response + + +class _DummyRawResponse: + """Mimics `openai-python`'s `LegacyAPIResponse` — sync `.parse()` + `.headers`.""" + + def __init__(self, response: Any, headers: dict[str, str] | None = None) -> None: + self._response = response + self.headers = headers or {} + + def parse(self) -> Any: + return self._response + + +class _DummyWithRawResponse: + def __init__(self, retrievals: list[Any]) -> None: + self._retrievals = retrievals + self.calls: list[str] = [] + + async def retrieve(self, response_id: str) -> Any: + self.calls.append(response_id) + if not self._retrievals: + raise AssertionError( + f"retrieve({response_id!r}) called more times than the test queued" + ) + return self._retrievals.pop(0) + + +class _DummyBackgroundResponses: + """Mock for `client.responses` with `create`, `with_raw_response.retrieve`, + `cancel`, and a record of each call's arguments.""" + + def __init__( + self, + create_return: Any, + retrievals: list[Any] | None = None, + cancel_error: Exception | None = None, + ) -> None: + self.create_kwargs: dict[str, Any] = {} + self._create_return = create_return + self.with_raw_response = _DummyWithRawResponse(retrievals or []) + self.cancel_calls: list[str] = [] + self._cancel_error = cancel_error + + async def create(self, **kwargs: Any) -> Any: + self.create_kwargs = kwargs + return self._create_return + + async def cancel(self, response_id: str) -> Any: + self.cancel_calls.append(response_id) + if self._cancel_error is not None: + raise self._cancel_error + return self._create_return + + +class _DummyBackgroundClient: + def __init__(self, responses: _DummyBackgroundResponses) -> None: + self.responses = responses + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_background_terminal_on_first_response_no_poll() -> None: + completed = _make_status_response("completed") + responses = _DummyBackgroundResponses(create_return=completed) + model = OpenAIResponsesModel( + model="gpt-4", + openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)), + ) + + result = await model.get_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + ) + + assert result.response_id == "resp-bg-1" + assert responses.create_kwargs.get("background") is True + assert responses.with_raw_response.calls == [] + assert responses.cancel_calls == [] + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_background_polls_until_completed(monkeypatch: pytest.MonkeyPatch) -> None: + queued = _make_status_response("queued") + in_progress = _make_status_response("in_progress") + completed = _make_status_response("completed") + responses = _DummyBackgroundResponses( + create_return=queued, + retrievals=[_DummyRawResponse(in_progress), _DummyRawResponse(completed)], + ) + model = OpenAIResponsesModel( + model="gpt-4", + openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)), + ) + + # Skip real sleeps so the test runs fast. + sleep_durations: list[float] = [] + + async def _fake_sleep(duration: float) -> None: + sleep_durations.append(duration) + + monkeypatch.setattr(asyncio, "sleep", _fake_sleep) + + result = await model.get_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True, background_poll_interval_seconds=0.25), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + ) + + assert result.response_id == "resp-bg-1" + assert responses.with_raw_response.calls == ["resp-bg-1", "resp-bg-1"] + assert responses.cancel_calls == [] + assert sleep_durations == [0.25, 0.25] + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +@pytest.mark.parametrize("terminal_status", ["failed", "cancelled", "incomplete"]) +async def test_background_non_completed_terminal_status_raises( + monkeypatch: pytest.MonkeyPatch, terminal_status: str +) -> None: + queued = _make_status_response("queued") + terminal = _make_status_response(terminal_status) + responses = _DummyBackgroundResponses( + create_return=queued, + retrievals=[_DummyRawResponse(terminal)], + ) + model = OpenAIResponsesModel( + model="gpt-4", + openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)), + ) + + async def _fake_sleep(_duration: float) -> None: + return None + + monkeypatch.setattr(asyncio, "sleep", _fake_sleep) + + with pytest.raises(ModelBehaviorError): + await model.get_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True, background_poll_interval_seconds=0.01), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + ) + + # Server already reached a terminal state on its own, so we do not cancel. + assert responses.cancel_calls == [] + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_background_honors_openai_poll_after_ms_header( + monkeypatch: pytest.MonkeyPatch, +) -> None: + queued = _make_status_response("queued") + in_progress = _make_status_response("in_progress") + completed = _make_status_response("completed") + responses = _DummyBackgroundResponses( + create_return=queued, + retrievals=[ + _DummyRawResponse(in_progress, headers={"openai-poll-after-ms": "250"}), + _DummyRawResponse(completed, headers={"openai-poll-after-ms": "750"}), + ], + ) + model = OpenAIResponsesModel( + model="gpt-4", + openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)), + ) + + sleep_durations: list[float] = [] + + async def _fake_sleep(duration: float) -> None: + sleep_durations.append(duration) + + monkeypatch.setattr(asyncio, "sleep", _fake_sleep) + + await model.get_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True), # no explicit interval + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + ) + + # First sleep uses the fallback (1.0s) because no header has been seen yet. + # Subsequent sleeps adopt the server-hinted interval (250ms -> 0.25s). + assert sleep_durations == [1.0, 0.25] + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_background_explicit_interval_overrides_header( + monkeypatch: pytest.MonkeyPatch, +) -> None: + queued = _make_status_response("queued") + in_progress = _make_status_response("in_progress") + completed = _make_status_response("completed") + responses = _DummyBackgroundResponses( + create_return=queued, + retrievals=[ + _DummyRawResponse(in_progress, headers={"openai-poll-after-ms": "9999"}), + _DummyRawResponse(completed, headers={"openai-poll-after-ms": "9999"}), + ], + ) + model = OpenAIResponsesModel( + model="gpt-4", + openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)), + ) + + sleep_durations: list[float] = [] + + async def _fake_sleep(duration: float) -> None: + sleep_durations.append(duration) + + monkeypatch.setattr(asyncio, "sleep", _fake_sleep) + + await model.get_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True, background_poll_interval_seconds=0.05), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + ) + + # Explicit interval pins the cadence — header value is ignored. + assert sleep_durations == [0.05, 0.05] + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_background_cancelled_error_schedules_response_cancel( + monkeypatch: pytest.MonkeyPatch, +) -> None: + queued = _make_status_response("queued") + responses = _DummyBackgroundResponses(create_return=queued) + model = OpenAIResponsesModel( + model="gpt-4", + openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)), + ) + + # Keep a reference to the unpatched sleep so we can yield to the background + # cancel task after the poll loop raises CancelledError. + real_sleep = asyncio.sleep + + async def _raise_cancel(_duration: float) -> None: + raise asyncio.CancelledError() + + monkeypatch.setattr(asyncio, "sleep", _raise_cancel) + + with pytest.raises(asyncio.CancelledError): + await model.get_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True, background_poll_interval_seconds=0.01), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + ) + + # Best-effort cancel runs in a background task — let it complete via the + # real sleep so we don't trip the monkeypatched CancelledError again. + monkeypatch.undo() + for _ in range(3): + await real_sleep(0) + assert responses.cancel_calls == ["resp-bg-1"] + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_background_passes_through_in_create_kwargs() -> None: + completed = _make_status_response("completed") + responses = _DummyBackgroundResponses(create_return=completed) + model = OpenAIResponsesModel( + model="gpt-4", + openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)), + ) + + await model.get_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + ) + + assert responses.create_kwargs.get("background") is True + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_background_extra_args_conflict_raises_typeerror() -> None: + completed = _make_status_response("completed") + responses = _DummyBackgroundResponses(create_return=completed) + model = OpenAIResponsesModel( + model="gpt-4", + openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)), + ) + + with pytest.raises(TypeError, match="background"): + await model.get_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings( + background=True, + extra_args={"background": True}, + ), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + ) + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_background_streaming_passes_through() -> None: + """`background=True` is plumbed into the streaming create call; the existing + SSE consumption path is unchanged.""" + called_kwargs: dict[str, Any] = {} + + class DummyStream: + def __aiter__(self) -> Any: + async def gen() -> Any: + yield ResponseCompletedEvent( + type="response.completed", + response=get_response_obj([]), + sequence_number=0, + ) + + return gen() + + class DummyResponses: + async def create(self, **kwargs: Any) -> Any: + nonlocal called_kwargs + called_kwargs = kwargs + return DummyStream() + + class DummyResponsesClient: + def __init__(self) -> None: + self.responses = DummyResponses() + + model = OpenAIResponsesModel( + model="gpt-4", + openai_client=cast(AsyncOpenAI, DummyResponsesClient()), + ) + + stream = model.stream_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + ) + async for _ in stream: + pass + + assert called_kwargs.get("background") is True + assert called_kwargs.get("stream") is True + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_ws_model_rejects_background_get_response() -> None: + async_client = AsyncOpenAI(api_key="test") + model = OpenAIResponsesWSModel(model="gpt-4o-realtime", openai_client=async_client) + + with pytest.raises(UserError, match="background=True"): + await model.get_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + ) + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_ws_model_rejects_background_stream_response() -> None: + async_client = AsyncOpenAI(api_key="test") + model = OpenAIResponsesWSModel(model="gpt-4o-realtime", openai_client=async_client) + + stream = model.stream_response( + system_instructions=None, + input="hi", + model_settings=ModelSettings(background=True), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + ) + with pytest.raises(UserError, match="background=True"): + async for _ in stream: + pass From 06822acf446fa82bf86e10b294e13bcaebca37b7 Mon Sep 17 00:00:00 2001 From: Dantong Li Date: Wed, 20 May 2026 15:20:08 +0100 Subject: [PATCH 5/6] docs: add background-mode guide and register in mkdocs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New docs/background.md describes the transparent use through Runner, the streaming pass-through, retrieving a response by id via the underlying AsyncOpenAI client, the cancel-on-CancelledError behavior, supported backends (Responses HTTP only — WS and Chat Completions raise UserError), and the platform limits (~10-minute retention, not ZDR-compatible). Registered under "Background mode" in all four language nav sections in mkdocs.yml. Translated content for ja/ko/zh will be generated by the existing docs translation pipeline. --- docs/background.md | 62 ++++++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 4 +++ 2 files changed, 66 insertions(+) create mode 100644 docs/background.md diff --git a/docs/background.md b/docs/background.md new file mode 100644 index 0000000000..71f3a5a6e1 --- /dev/null +++ b/docs/background.md @@ -0,0 +1,62 @@ +# Background mode + +OpenAI's [Responses API background mode](https://platform.openai.com/docs/guides/background) lets long-running model calls survive client disconnects: the server keeps processing the request and you poll it to completion. This matters for reasoning-heavy single turns (`gpt-5.2-pro`, deep-research models) that can take minutes and otherwise fall foul of HTTP timeouts on Vercel, Cloudflare Workers, corporate proxies, etc. + +The Agents SDK exposes background mode via two new fields on [`ModelSettings`][agents.model_settings.ModelSettings]: + +- `background: bool | None` — opt in to background mode. +- `background_poll_interval_seconds: float | None` — optional fixed poll interval. When unset, the SDK honors the `openai-poll-after-ms` response header and falls back to 1.0 second. + +## Transparent use through `Runner` + +Set the flag on your agent's `ModelSettings` and run as usual. The SDK submits with `background=True`, polls `client.responses.retrieve(id)` adaptively, and returns the terminal response — `Runner.run` and `Runner.run_streamed` need no other changes. + +```python +from agents import Agent, ModelSettings, Runner + +agent = Agent( + name="reasoner", + model="gpt-5.2-pro", + model_settings=ModelSettings(background=True), +) +result = await Runner.run(agent, "Plan a multi-stage research workflow.") +print(result.final_output) +``` + +For streaming, `background=True` is passed through to `responses.create(stream=True, background=True)` so the server keeps generating across client disconnects. Client-side auto-resume via `starting_after` is intentionally not part of this MVP — plain `openai-python` doesn't auto-resume either. + +```python +async for event in Runner.run_streamed(agent, "Stream me a long answer").stream_events(): + print(event) +``` + +## Retrieving a response by id + +If you captured a `response_id` and want to fetch the latest server state from a different process or worker, call `client.responses.retrieve(response_id)` on the underlying `AsyncOpenAI` client directly — there is no SDK-specific wrapper, deliberately, because that would only add API surface without adding capability. + +```python +from openai import AsyncOpenAI + +client = AsyncOpenAI() +response = await client.responses.retrieve(response_id) +print(response.status) +``` + +## Cancellation + +If the surrounding task is cancelled (`asyncio.CancelledError`) while the SDK is polling, the SDK schedules a best-effort `client.responses.cancel(response_id)` so the in-flight server-side response is not leaked. The `CancelledError` then propagates to the caller as usual. + +## Compatibility + +Background mode is **supported only by the HTTP Responses transport** ([`OpenAIResponsesModel`][agents.models.openai_responses.OpenAIResponsesModel]). Setting `background=True` on either of these adapters raises [`UserError`][agents.exceptions.UserError] so the durability guarantee you opted into is not silently demoted: + +- [`OpenAIResponsesWSModel`][agents.models.openai_responses.OpenAIResponsesWSModel] — the WebSocket transport always streams and cannot decouple submit from poll. +- [`OpenAIChatCompletionsModel`][agents.models.openai_chatcompletions.OpenAIChatCompletionsModel] — the Chat Completions API has no `background` parameter. + +If you're on a non-OpenAI provider via LiteLLM / AnyLLM, the field is read on `ModelSettings` but not plumbed by those adapters; whether it does anything depends on the underlying provider. + +## Limits + +- Background responses are retained server-side for **about 10 minutes**. +- Background mode is **not ZDR-compatible**. +- The `Runner` does not impose its own deadline on a background poll. If you need a hard ceiling, wrap your call (e.g. `asyncio.wait_for(Runner.run(agent, ...), timeout=600)`); on timeout, the SDK's cancel-on-CancelledError logic still fires. diff --git a/mkdocs.yml b/mkdocs.yml index c38e747653..046e84dd22 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -64,6 +64,7 @@ plugins: - Guardrails: guardrails.md - Running agents: running_agents.md - Streaming: streaming.md + - Background mode: background.md - Agent orchestration: multi_agent.md - Handoffs: handoffs.md - Results: results.md @@ -213,6 +214,7 @@ plugins: - guardrails.md - running_agents.md - streaming.md + - background.md - multi_agent.md - handoffs.md - results.md @@ -256,6 +258,7 @@ plugins: - guardrails.md - running_agents.md - streaming.md + - background.md - multi_agent.md - handoffs.md - results.md @@ -299,6 +302,7 @@ plugins: - guardrails.md - running_agents.md - streaming.md + - background.md - multi_agent.md - handoffs.md - results.md From 503c42c26b1ac41bd28bf8530fc15958c17f5694 Mon Sep 17 00:00:00 2001 From: Dantong Li Date: Wed, 20 May 2026 15:20:17 +0100 Subject: [PATCH 6/6] examples: add background-mode example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit examples/background_mode/main.py runs the same prompt twice — once synchronously, once with ModelSettings(background=True) — to demonstrate that opting into background mode is a one-field change at the Agent level and produces equivalent final output, with the durability win coming from the underlying submit + poll transport rather than from the SDK API. --- examples/background_mode/__init__.py | 0 examples/background_mode/main.py | 80 ++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 examples/background_mode/__init__.py create mode 100644 examples/background_mode/main.py diff --git a/examples/background_mode/__init__.py b/examples/background_mode/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/background_mode/main.py b/examples/background_mode/main.py new file mode 100644 index 0000000000..e326042bc2 --- /dev/null +++ b/examples/background_mode/main.py @@ -0,0 +1,80 @@ +"""Example demonstrating Responses API background mode. + +When `ModelSettings(background=True)` is set, the SDK submits the underlying +`client.responses.create()` call with `background=True` and adaptively polls +`client.responses.retrieve(...)` until the response reaches a terminal state. +This lets long-running reasoning calls (gpt-5.2-pro, deep-research-class +workloads) survive HTTP / proxy / serverless timeouts that would otherwise +abort a synchronous call. + +To run this example: + + export OPENAI_API_KEY=... + python -m examples.background_mode.main + +Compare the two runs below: with and without `background=True`. The output +should be equivalent, but only the background variant keeps the server-side +work alive across transient client-side disconnects. +""" + +from __future__ import annotations + +import asyncio +import os + +from agents import Agent, ModelSettings, Runner + +MODEL_NAME = os.getenv("BACKGROUND_MODEL_NAME") or "gpt-5.2-pro" +PROMPT = ( + "Plan a three-stage research workflow for studying the long-term effects " + "of intermittent fasting on cognitive performance. For each stage, list " + "the primary research question, the methods, and one specific risk to " + "external validity." +) + + +async def run_synchronous() -> str: + agent = Agent(name="planner", model=MODEL_NAME) + print("\n=== Without background mode (synchronous) ===") + result = await Runner.run(agent, PROMPT) + return str(result.final_output) + + +async def run_background() -> str: + agent = Agent( + name="planner", + model=MODEL_NAME, + model_settings=ModelSettings(background=True), + ) + print("\n=== With background mode (submit + adaptive poll) ===") + result = await Runner.run(agent, PROMPT) + return str(result.final_output) + + +async def main() -> None: + try: + sync_output = await run_synchronous() + print(sync_output) + + bg_output = await run_background() + print(bg_output) + + # The two transports should produce equivalent final output for the + # same prompt and seed. Background mode's win is durability, not + # different content. + if sync_output.strip() == bg_output.strip(): + print("\nOutputs match.") + else: + print( + "\nOutputs differ — expected when sampling is non-deterministic, " + "but the background variant survived any transient disconnects." + ) + except Exception as exc: + print(f"Error: {exc}") + print("\nNote: background mode is supported only by the Responses API") + print("HTTP transport. Set OPENAI_API_KEY and try a model that") + print("accepts long-running background requests (e.g. gpt-5.2-pro).") + + +if __name__ == "__main__": + asyncio.run(main())