From 6e2837fc290cd064132e3ed54f31fdb2af841c93 Mon Sep 17 00:00:00 2001
From: Dantong Li <dtlics2000@gmail.com>
Date: Wed, 20 May 2026 15:18:41 +0100
Subject: [PATCH 1/6] feat(model-settings): add background and
 background_poll_interval_seconds fields

Append two optional fields to ModelSettings to opt into Responses API
background mode. background=True submits via responses.create(background=True)
and adaptively polls responses.retrieve(id) until terminal; the optional
poll_interval_seconds pins the cadence or defers to the openai-poll-after-ms
response header.

Fields are appended at the end of the dataclass per AGENTS.md's positional
compatibility rule. background is added to _TRACEABLE_MODEL_SETTING_FIELDS so
the flag is captured in spans; the interval is operational metadata and is
intentionally excluded.
---
 src/agents/model_settings.py               | 24 ++++++++++++++++++++++
 tests/model_settings/test_serialization.py |  2 ++
 2 files changed, 26 insertions(+)

diff --git a/src/agents/model_settings.py b/src/agents/model_settings.py
index 1ef9822f52..71e3b04ffd 100644
--- a/src/agents/model_settings.py
+++ b/src/agents/model_settings.py
@@ -79,6 +79,7 @@ class MCPToolChoice:
     "top_logprobs",
     "retry",
     "context_management",
+    "background",
 )
 
 
@@ -191,6 +192,29 @@ class ModelSettings:
     to enable server-side compaction when the rendered context crosses a token threshold.
     """
 
+    background: bool | None = None
+    """Whether to run the model response in the background.
+
+    When ``True``, the SDK submits via ``client.responses.create(background=True)``
+    and polls ``client.responses.retrieve(...)`` until the response reaches a
+    terminal state. Background mode lets long single-turn calls (reasoning models,
+    deep-research workloads) survive HTTP / proxy / serverless timeouts.
+
+    Only supported by ``OpenAIResponsesModel`` (HTTP transport). Setting this on
+    ``OpenAIResponsesWSModel`` or ``OpenAIChatCompletionsModel`` raises ``UserError``.
+    Background mode is not ZDR-compatible and response data is retained server-side
+    for ~10 minutes.
+    `Learn more <https://platform.openai.com/docs/guides/background>`_.
+    """
+
+    background_poll_interval_seconds: float | None = None
+    """Polling interval (seconds) when ``background=True``.
+
+    When unset, the SDK honors the ``openai-poll-after-ms`` response header from
+    the most recent ``retrieve()``; falls back to 1.0 second when the header is
+    absent. Ignored when ``background`` is not enabled.
+    """
+
     def resolve(self, override: ModelSettings | None) -> ModelSettings:
         """Produce a new ModelSettings by overlaying any non-None values from the
         override on top of this instance."""
diff --git a/tests/model_settings/test_serialization.py b/tests/model_settings/test_serialization.py
index 2e1cde6466..abf1e43b2e 100644
--- a/tests/model_settings/test_serialization.py
+++ b/tests/model_settings/test_serialization.py
@@ -76,6 +76,8 @@ def test_all_fields_serialization() -> None:
             ),
         ),
         context_management=[{"type": "compaction", "compact_threshold": 200000}],
+        background=True,
+        background_poll_interval_seconds=0.5,
     )
 
     # Verify that every single field is set to a non-None value

From 26ebadffb391e2cd5dcd2f50e895172c1dddfed3 Mon Sep 17 00:00:00 2001
From: Dantong Li <dtlics2000@gmail.com>
Date: Wed, 20 May 2026 15:19:06 +0100
Subject: [PATCH 2/6] feat(openai-responses-model): submit + adaptive-poll loop
 for background mode

When ModelSettings.background is True, OpenAIResponsesModel.get_response now
submits via responses.create(background=True), then polls responses.retrieve(id)
until the response reaches a terminal status (completed | failed | cancelled |
incomplete). Streaming pass-through is unchanged: stream_response forwards
background=True to responses.create(stream=True, background=True) for
server-side durability without client-side auto-resume.

Polling honors the openai-poll-after-ms response header for adaptive intervals
(matches openai-python's create_and_poll pattern); an explicit
background_poll_interval_seconds overrides the header; the fallback is 1.0s.

On asyncio.CancelledError or a non-recoverable error mid-poll, the SDK
schedules a fire-and-forget responses.cancel(id) so server-side compute is
not leaked, then re-raises. Non-completed terminal states raise the existing
response_terminal_failure_error helper.

background is plumbed through _build_response_create_kwargs alongside store
and prompt_cache_retention, so the existing extra_args duplicate-key check
catches accidental double-spec.
---
 src/agents/models/openai_responses.py | 112 +++++++++++++++++++++++++-
 1 file changed, 111 insertions(+), 1 deletion(-)

diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
index 3af75481bf..8d4d0e1413 100644
--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@@ -92,6 +92,32 @@
     value for value in get_args(ResponseIncludable) if isinstance(value, str)
 )
 
+# Terminal `Response.status` values per the OpenAI Responses API. Mirrors the
+# `ResponseStatus` literal type in `openai-python`. A response whose status is
+# absent from this set (`queued` / `in_progress`) is still being generated and
+# must be polled.
+_RESPONSE_TERMINAL_STATUSES: frozenset[str] = frozenset(
+    {"completed", "failed", "cancelled", "incomplete"}
+)
+
+# Default polling interval when `background=True` and no explicit interval or
+# server header is available. Matches the fallback used by openai-python's
+# `create_and_poll` helpers.
+_DEFAULT_BACKGROUND_POLL_INTERVAL_SECONDS = 1.0
+
+# Server-sent hint header advising the next poll delay (in milliseconds). When
+# the caller has not pinned an explicit `background_poll_interval_seconds`, we
+# honor this header so the loop adapts to server backpressure.
+_BACKGROUND_POLL_AFTER_HEADER = "openai-poll-after-ms"
+
+
+def _is_response_terminal_status(status: str | None) -> bool:
+    """True when `status` is a terminal value (or unset, which we treat as
+    terminal to avoid spinning on unexpected payloads)."""
+    if status is None:
+        return True
+    return status in _RESPONSE_TERMINAL_STATUSES
+
 
 class _NamespaceToolParam(TypedDict):
     type: Literal["namespace"]
@@ -444,6 +470,82 @@ def _consume_background_cleanup_task_result(task: asyncio.Task[Any]) -> None:
         except Exception as exc:
             logger.debug(f"Background stream cleanup failed after cancellation: {exc}")
 
+    def _schedule_background_response_cancel(self, client: AsyncOpenAI, response_id: str) -> None:
+        """Best-effort fire-and-forget cancel of an in-flight background response.
+
+        Invoked when the poll loop is cancelled or hits a non-recoverable error
+        before reaching a terminal state, so that server-side compute is not
+        leaked. Failures from the cancel call itself are swallowed.
+        """
+
+        async def _do_cancel() -> None:
+            try:
+                await client.responses.cancel(response_id)
+            except Exception as exc:
+                logger.debug(
+                    f"Background response cancel for {response_id} failed (ignored): {exc}"
+                )
+
+        try:
+            task = asyncio.create_task(_do_cancel())
+        except RuntimeError:
+            # No running loop available (e.g. interpreter shutdown). Nothing we
+            # can do here; the server response will time out on its own.
+            return
+        task.add_done_callback(self._consume_background_cleanup_task_result)
+
+    async def _poll_background_response_until_terminal(
+        self,
+        *,
+        client: AsyncOpenAI,
+        response: Response,
+        poll_interval_seconds: float | None,
+    ) -> Response:
+        """Poll `responses.retrieve(id)` until the response reaches a terminal status.
+
+        When `poll_interval_seconds` is provided it pins the cadence; otherwise the
+        loop honors the `openai-poll-after-ms` response header and falls back to
+        ``_DEFAULT_BACKGROUND_POLL_INTERVAL_SECONDS`` when no header is present.
+        Mirrors the adaptive-polling pattern used by `openai-python`'s
+        `create_and_poll` helpers.
+
+        On cancellation or unexpected error mid-poll, the in-flight server-side
+        response is cancelled best-effort via
+        `_schedule_background_response_cancel` so compute is not leaked.
+        Reaching a non-`completed` terminal state (`failed` / `cancelled` /
+        `incomplete`) raises `ModelBehaviorError`.
+        """
+        response_id = response.id
+        explicit_interval = poll_interval_seconds
+        interval = (
+            explicit_interval
+            if explicit_interval is not None
+            else _DEFAULT_BACKGROUND_POLL_INTERVAL_SECONDS
+        )
+        try:
+            while not _is_response_terminal_status(response.status):
+                await asyncio.sleep(interval)
+                raw = await client.responses.with_raw_response.retrieve(response_id)
+                response = raw.parse()
+                if explicit_interval is None:
+                    header_value = raw.headers.get(_BACKGROUND_POLL_AFTER_HEADER)
+                    if header_value is not None:
+                        try:
+                            interval = float(header_value) / 1000.0
+                        except (TypeError, ValueError):
+                            # Server sent a malformed header; keep current interval.
+                            pass
+        except BaseException:
+            self._schedule_background_response_cancel(client, response_id)
+            raise
+
+        if response.status != "completed":
+            # Non-`completed` terminal status; the server has already finished
+            # so we don't need to cancel. Raise a model-error so callers see a
+            # consistent failure type.
+            raise response_terminal_failure_error(f"response.{response.status}", response)
+        return response
+
     async def get_response(
         self,
         system_instructions: str | None,
@@ -693,7 +795,14 @@ async def _fetch_response(
 
         if not stream:
             response = await client.responses.create(**create_kwargs)
-            return cast(Response, response)
+            response = cast(Response, response)
+            if model_settings.background and not _is_response_terminal_status(response.status):
+                response = await self._poll_background_response_until_terminal(
+                    client=client,
+                    response=response,
+                    poll_interval_seconds=model_settings.background_poll_interval_seconds,
+                )
+            return response
 
         streaming_response = getattr(client.responses, "with_streaming_response", None)
         stream_create = getattr(streaming_response, "create", None)
@@ -849,6 +958,7 @@ def _build_response_create_kwargs(
             "extra_body": model_settings.extra_body,
             "text": response_format,
             "store": self._non_null_or_omit(model_settings.store),
+            "background": self._non_null_or_omit(model_settings.background),
             "prompt_cache_retention": self._non_null_or_omit(model_settings.prompt_cache_retention),
             "reasoning": self._non_null_or_omit(model_settings.reasoning),
             "metadata": self._non_null_or_omit(model_settings.metadata),

From 0fe57a8b17727b6a10f3037c7fa2eafba7bb8b75 Mon Sep 17 00:00:00 2001
From: Dantong Li <dtlics2000@gmail.com>
Date: Wed, 20 May 2026 15:19:28 +0100
Subject: [PATCH 3/6] feat(openai-responses-model): reject background=True on
 WS and Chat Completions adapters

Setting ModelSettings.background=True on an adapter that cannot honor it
must fail loudly rather than silently drop the durability guarantee the
caller opted into:

- OpenAIResponsesWSModel: the WebSocket transport always streams and cannot
  decouple submit from poll. Raise UserError in the overridden
  _fetch_response so both get_response and stream_response paths are covered.

- OpenAIChatCompletionsModel: the Chat Completions API has no background
  parameter. Add _handle_unsupported_background and call it at the top of
  get_response and stream_response, mirroring the existing
  _handle_unsupported_prompt pattern.
---
 src/agents/models/openai_chatcompletions.py | 11 +++++++++++
 src/agents/models/openai_responses.py       |  7 +++++++
 2 files changed, 18 insertions(+)

diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py
index cba01163e9..b3b53f46b1 100644
--- a/src/agents/models/openai_chatcompletions.py
+++ b/src/agents/models/openai_chatcompletions.py
@@ -71,6 +71,15 @@ def _non_null_or_omit(self, value: Any) -> Any:
     def _supports_default_prompt_cache_key(self) -> bool:
         return ChatCmplHelpers.is_openai(self._get_client())
 
+    @staticmethod
+    def _handle_unsupported_background(model_settings: ModelSettings) -> None:
+        if model_settings.background:
+            raise UserError(
+                "ModelSettings.background=True is not supported by "
+                "OpenAIChatCompletionsModel; the Chat Completions API has no "
+                "background-mode equivalent. Use OpenAIResponsesModel instead."
+            )
+
     def _handle_unsupported_prompt(self, prompt: ResponsePromptParam | None) -> None:
         if prompt is None:
             return
@@ -140,6 +149,7 @@ async def get_response(
         conversation_id: str | None = None,
         prompt: ResponsePromptParam | None = None,
     ) -> ModelResponse:
+        self._handle_unsupported_background(model_settings)
         self._handle_unsupported_server_managed_conversation_state(
             previous_response_id=previous_response_id,
             conversation_id=conversation_id,
@@ -274,6 +284,7 @@ async def stream_response(
         """
         Yields a partial message as it is generated, as well as the usage information.
         """
+        self._handle_unsupported_background(model_settings)
         self._handle_unsupported_server_managed_conversation_state(
             previous_response_id=previous_response_id,
             conversation_id=conversation_id,
diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
index 8d4d0e1413..2bb88e30aa 100644
--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@@ -1192,6 +1192,13 @@ async def _fetch_response(
         stream: Literal[True] | Literal[False] = False,
         prompt: ResponsePromptParam | None = None,
     ) -> Response | AsyncIterator[ResponseStreamEvent]:
+        if model_settings.background:
+            raise UserError(
+                "ModelSettings.background=True is not supported by "
+                "OpenAIResponsesWSModel; the WebSocket transport always streams "
+                "and cannot decouple submit from poll. Use OpenAIResponsesModel "
+                "(HTTP transport) instead."
+            )
         create_kwargs = self._build_response_create_kwargs(
             system_instructions=system_instructions,
             input=input,

From fb14a5b3837ce55e71e3ca63323fd9fdcf86849f Mon Sep 17 00:00:00 2001
From: Dantong Li <dtlics2000@gmail.com>
Date: Wed, 20 May 2026 15:19:41 +0100
Subject: [PATCH 4/6] test(openai-responses-model): cover background polling,
 cancellation, and rejections

Add 15 tests for the new background mode:

- terminal-on-first-response (no poll triggered)
- multi-poll until completed
- terminal failures (failed | cancelled | incomplete) raise ModelBehaviorError
- openai-poll-after-ms header drives the next sleep interval
- explicit background_poll_interval_seconds overrides the header
- asyncio.CancelledError mid-poll schedules a fire-and-forget responses.cancel(id)
  and re-raises (uses a real-sleep handle captured pre-monkeypatch to avoid
  re-tripping the cancel after the test undoes the patch)
- background=True is plumbed into the responses.create() kwargs
- extra_args={"background": True} + ModelSettings.background=True surfaces
  the existing duplicate-key TypeError
- streaming + background passes through unchanged
- OpenAIResponsesWSModel rejects background=True from both get_response
  and stream_response
- OpenAIChatCompletionsModel rejects background=True from both get_response
  and stream_response

Update test_all_fields_serialization to set the two new ModelSettings fields
so the "every field non-None" invariant still holds.
---
 tests/models/test_openai_chatcompletions.py |  53 +++
 tests/models/test_openai_responses.py       | 437 ++++++++++++++++++++
 2 files changed, 490 insertions(+)

diff --git a/tests/models/test_openai_chatcompletions.py b/tests/models/test_openai_chatcompletions.py
index 0f8066b2e6..882aebad46 100644
--- a/tests/models/test_openai_chatcompletions.py
+++ b/tests/models/test_openai_chatcompletions.py
@@ -320,6 +320,59 @@ async def patched_fetch_response(self, *args, **kwargs):
         )
 
 
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_get_response_rejects_background_mode(monkeypatch) -> None:
+    """`background=True` is a Responses-API feature; Chat Completions must fail
+    loudly so the user-opted durability guarantee isn't silently demoted."""
+
+    async def patched_fetch_response(self, *args, **kwargs):
+        raise AssertionError("_fetch_response should not run when background=True")
+
+    monkeypatch.setattr(OpenAIChatCompletionsModel, "_fetch_response", patched_fetch_response)
+    model = OpenAIProvider(use_responses=False).get_model("gpt-4")
+
+    with pytest.raises(UserError, match="background=True"):
+        await model.get_response(
+            system_instructions=None,
+            input="hi",
+            model_settings=ModelSettings(background=True),
+            tools=[],
+            output_schema=None,
+            handoffs=[],
+            tracing=ModelTracing.DISABLED,
+            previous_response_id=None,
+            conversation_id=None,
+            prompt=None,
+        )
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_stream_response_rejects_background_mode(monkeypatch) -> None:
+    async def patched_fetch_response(self, *args, **kwargs):
+        raise AssertionError("_fetch_response should not run when background=True")
+
+    monkeypatch.setattr(OpenAIChatCompletionsModel, "_fetch_response", patched_fetch_response)
+    model = OpenAIProvider(use_responses=False).get_model("gpt-4")
+
+    stream = model.stream_response(
+        system_instructions=None,
+        input="hi",
+        model_settings=ModelSettings(background=True),
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
+        conversation_id=None,
+        prompt=None,
+    )
+    with pytest.raises(UserError, match="background=True"):
+        async for _ in stream:
+            pass
+
+
 @pytest.mark.allow_call_model_methods
 @pytest.mark.asyncio
 async def test_get_response_rejects_non_text_tool_output_in_strict_mode() -> None:
diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py
index 7d329da6f8..60a6a1a666 100644
--- a/tests/models/test_openai_responses.py
+++ b/tests/models/test_openai_responses.py
@@ -3819,3 +3819,440 @@ def test_websocket_pre_event_disconnect_retry_respects_websocket_retry_disable()
 
     with websocket_pre_event_retries_disabled(True):
         assert _should_retry_pre_event_websocket_disconnect() is False
+
+
+# --- Background + poll mode -------------------------------------------------
+
+
+def _make_status_response(
+    status: str,
+    response_id: str = "resp-bg-1",
+    output: list[Any] | None = None,
+) -> Any:
+    """Build a `Response` stub with the requested `status` field set.
+
+    The default `get_response_obj` helper leaves `status=None`, which the
+    background-poll loop treats as terminal. Tests that exercise the loop need
+    a non-terminal status, so we patch the field after construction (the
+    pydantic model accepts assignment).
+    """
+    response = get_response_obj(output or [], response_id=response_id)
+    response.status = cast(Any, status)
+    return response
+
+
+class _DummyRawResponse:
+    """Mimics `openai-python`'s `LegacyAPIResponse` — sync `.parse()` + `.headers`."""
+
+    def __init__(self, response: Any, headers: dict[str, str] | None = None) -> None:
+        self._response = response
+        self.headers = headers or {}
+
+    def parse(self) -> Any:
+        return self._response
+
+
+class _DummyWithRawResponse:
+    def __init__(self, retrievals: list[Any]) -> None:
+        self._retrievals = retrievals
+        self.calls: list[str] = []
+
+    async def retrieve(self, response_id: str) -> Any:
+        self.calls.append(response_id)
+        if not self._retrievals:
+            raise AssertionError(
+                f"retrieve({response_id!r}) called more times than the test queued"
+            )
+        return self._retrievals.pop(0)
+
+
+class _DummyBackgroundResponses:
+    """Mock for `client.responses` with `create`, `with_raw_response.retrieve`,
+    `cancel`, and a record of each call's arguments."""
+
+    def __init__(
+        self,
+        create_return: Any,
+        retrievals: list[Any] | None = None,
+        cancel_error: Exception | None = None,
+    ) -> None:
+        self.create_kwargs: dict[str, Any] = {}
+        self._create_return = create_return
+        self.with_raw_response = _DummyWithRawResponse(retrievals or [])
+        self.cancel_calls: list[str] = []
+        self._cancel_error = cancel_error
+
+    async def create(self, **kwargs: Any) -> Any:
+        self.create_kwargs = kwargs
+        return self._create_return
+
+    async def cancel(self, response_id: str) -> Any:
+        self.cancel_calls.append(response_id)
+        if self._cancel_error is not None:
+            raise self._cancel_error
+        return self._create_return
+
+
+class _DummyBackgroundClient:
+    def __init__(self, responses: _DummyBackgroundResponses) -> None:
+        self.responses = responses
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_background_terminal_on_first_response_no_poll() -> None:
+    completed = _make_status_response("completed")
+    responses = _DummyBackgroundResponses(create_return=completed)
+    model = OpenAIResponsesModel(
+        model="gpt-4",
+        openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)),
+    )
+
+    result = await model.get_response(
+        system_instructions=None,
+        input="hi",
+        model_settings=ModelSettings(background=True),
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+    )
+
+    assert result.response_id == "resp-bg-1"
+    assert responses.create_kwargs.get("background") is True
+    assert responses.with_raw_response.calls == []
+    assert responses.cancel_calls == []
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_background_polls_until_completed(monkeypatch: pytest.MonkeyPatch) -> None:
+    queued = _make_status_response("queued")
+    in_progress = _make_status_response("in_progress")
+    completed = _make_status_response("completed")
+    responses = _DummyBackgroundResponses(
+        create_return=queued,
+        retrievals=[_DummyRawResponse(in_progress), _DummyRawResponse(completed)],
+    )
+    model = OpenAIResponsesModel(
+        model="gpt-4",
+        openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)),
+    )
+
+    # Skip real sleeps so the test runs fast.
+    sleep_durations: list[float] = []
+
+    async def _fake_sleep(duration: float) -> None:
+        sleep_durations.append(duration)
+
+    monkeypatch.setattr(asyncio, "sleep", _fake_sleep)
+
+    result = await model.get_response(
+        system_instructions=None,
+        input="hi",
+        model_settings=ModelSettings(background=True, background_poll_interval_seconds=0.25),
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+    )
+
+    assert result.response_id == "resp-bg-1"
+    assert responses.with_raw_response.calls == ["resp-bg-1", "resp-bg-1"]
+    assert responses.cancel_calls == []
+    assert sleep_durations == [0.25, 0.25]
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+@pytest.mark.parametrize("terminal_status", ["failed", "cancelled", "incomplete"])
+async def test_background_non_completed_terminal_status_raises(
+    monkeypatch: pytest.MonkeyPatch, terminal_status: str
+) -> None:
+    queued = _make_status_response("queued")
+    terminal = _make_status_response(terminal_status)
+    responses = _DummyBackgroundResponses(
+        create_return=queued,
+        retrievals=[_DummyRawResponse(terminal)],
+    )
+    model = OpenAIResponsesModel(
+        model="gpt-4",
+        openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)),
+    )
+
+    async def _fake_sleep(_duration: float) -> None:
+        return None
+
+    monkeypatch.setattr(asyncio, "sleep", _fake_sleep)
+
+    with pytest.raises(ModelBehaviorError):
+        await model.get_response(
+            system_instructions=None,
+            input="hi",
+            model_settings=ModelSettings(background=True, background_poll_interval_seconds=0.01),
+            tools=[],
+            output_schema=None,
+            handoffs=[],
+            tracing=ModelTracing.DISABLED,
+        )
+
+    # Server already reached a terminal state on its own, so we do not cancel.
+    assert responses.cancel_calls == []
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_background_honors_openai_poll_after_ms_header(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    queued = _make_status_response("queued")
+    in_progress = _make_status_response("in_progress")
+    completed = _make_status_response("completed")
+    responses = _DummyBackgroundResponses(
+        create_return=queued,
+        retrievals=[
+            _DummyRawResponse(in_progress, headers={"openai-poll-after-ms": "250"}),
+            _DummyRawResponse(completed, headers={"openai-poll-after-ms": "750"}),
+        ],
+    )
+    model = OpenAIResponsesModel(
+        model="gpt-4",
+        openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)),
+    )
+
+    sleep_durations: list[float] = []
+
+    async def _fake_sleep(duration: float) -> None:
+        sleep_durations.append(duration)
+
+    monkeypatch.setattr(asyncio, "sleep", _fake_sleep)
+
+    await model.get_response(
+        system_instructions=None,
+        input="hi",
+        model_settings=ModelSettings(background=True),  # no explicit interval
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+    )
+
+    # First sleep uses the fallback (1.0s) because no header has been seen yet.
+    # Subsequent sleeps adopt the server-hinted interval (250ms -> 0.25s).
+    assert sleep_durations == [1.0, 0.25]
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_background_explicit_interval_overrides_header(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    queued = _make_status_response("queued")
+    in_progress = _make_status_response("in_progress")
+    completed = _make_status_response("completed")
+    responses = _DummyBackgroundResponses(
+        create_return=queued,
+        retrievals=[
+            _DummyRawResponse(in_progress, headers={"openai-poll-after-ms": "9999"}),
+            _DummyRawResponse(completed, headers={"openai-poll-after-ms": "9999"}),
+        ],
+    )
+    model = OpenAIResponsesModel(
+        model="gpt-4",
+        openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)),
+    )
+
+    sleep_durations: list[float] = []
+
+    async def _fake_sleep(duration: float) -> None:
+        sleep_durations.append(duration)
+
+    monkeypatch.setattr(asyncio, "sleep", _fake_sleep)
+
+    await model.get_response(
+        system_instructions=None,
+        input="hi",
+        model_settings=ModelSettings(background=True, background_poll_interval_seconds=0.05),
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+    )
+
+    # Explicit interval pins the cadence — header value is ignored.
+    assert sleep_durations == [0.05, 0.05]
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_background_cancelled_error_schedules_response_cancel(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    queued = _make_status_response("queued")
+    responses = _DummyBackgroundResponses(create_return=queued)
+    model = OpenAIResponsesModel(
+        model="gpt-4",
+        openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)),
+    )
+
+    # Keep a reference to the unpatched sleep so we can yield to the background
+    # cancel task after the poll loop raises CancelledError.
+    real_sleep = asyncio.sleep
+
+    async def _raise_cancel(_duration: float) -> None:
+        raise asyncio.CancelledError()
+
+    monkeypatch.setattr(asyncio, "sleep", _raise_cancel)
+
+    with pytest.raises(asyncio.CancelledError):
+        await model.get_response(
+            system_instructions=None,
+            input="hi",
+            model_settings=ModelSettings(background=True, background_poll_interval_seconds=0.01),
+            tools=[],
+            output_schema=None,
+            handoffs=[],
+            tracing=ModelTracing.DISABLED,
+        )
+
+    # Best-effort cancel runs in a background task — let it complete via the
+    # real sleep so we don't trip the monkeypatched CancelledError again.
+    monkeypatch.undo()
+    for _ in range(3):
+        await real_sleep(0)
+    assert responses.cancel_calls == ["resp-bg-1"]
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_background_passes_through_in_create_kwargs() -> None:
+    completed = _make_status_response("completed")
+    responses = _DummyBackgroundResponses(create_return=completed)
+    model = OpenAIResponsesModel(
+        model="gpt-4",
+        openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)),
+    )
+
+    await model.get_response(
+        system_instructions=None,
+        input="hi",
+        model_settings=ModelSettings(background=True),
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+    )
+
+    assert responses.create_kwargs.get("background") is True
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_background_extra_args_conflict_raises_typeerror() -> None:
+    completed = _make_status_response("completed")
+    responses = _DummyBackgroundResponses(create_return=completed)
+    model = OpenAIResponsesModel(
+        model="gpt-4",
+        openai_client=cast(AsyncOpenAI, _DummyBackgroundClient(responses)),
+    )
+
+    with pytest.raises(TypeError, match="background"):
+        await model.get_response(
+            system_instructions=None,
+            input="hi",
+            model_settings=ModelSettings(
+                background=True,
+                extra_args={"background": True},
+            ),
+            tools=[],
+            output_schema=None,
+            handoffs=[],
+            tracing=ModelTracing.DISABLED,
+        )
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_background_streaming_passes_through() -> None:
+    """`background=True` is plumbed into the streaming create call; the existing
+    SSE consumption path is unchanged."""
+    called_kwargs: dict[str, Any] = {}
+
+    class DummyStream:
+        def __aiter__(self) -> Any:
+            async def gen() -> Any:
+                yield ResponseCompletedEvent(
+                    type="response.completed",
+                    response=get_response_obj([]),
+                    sequence_number=0,
+                )
+
+            return gen()
+
+    class DummyResponses:
+        async def create(self, **kwargs: Any) -> Any:
+            nonlocal called_kwargs
+            called_kwargs = kwargs
+            return DummyStream()
+
+    class DummyResponsesClient:
+        def __init__(self) -> None:
+            self.responses = DummyResponses()
+
+    model = OpenAIResponsesModel(
+        model="gpt-4",
+        openai_client=cast(AsyncOpenAI, DummyResponsesClient()),
+    )
+
+    stream = model.stream_response(
+        system_instructions=None,
+        input="hi",
+        model_settings=ModelSettings(background=True),
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+    )
+    async for _ in stream:
+        pass
+
+    assert called_kwargs.get("background") is True
+    assert called_kwargs.get("stream") is True
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_ws_model_rejects_background_get_response() -> None:
+    async_client = AsyncOpenAI(api_key="test")
+    model = OpenAIResponsesWSModel(model="gpt-4o-realtime", openai_client=async_client)
+
+    with pytest.raises(UserError, match="background=True"):
+        await model.get_response(
+            system_instructions=None,
+            input="hi",
+            model_settings=ModelSettings(background=True),
+            tools=[],
+            output_schema=None,
+            handoffs=[],
+            tracing=ModelTracing.DISABLED,
+        )
+
+
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_ws_model_rejects_background_stream_response() -> None:
+    async_client = AsyncOpenAI(api_key="test")
+    model = OpenAIResponsesWSModel(model="gpt-4o-realtime", openai_client=async_client)
+
+    stream = model.stream_response(
+        system_instructions=None,
+        input="hi",
+        model_settings=ModelSettings(background=True),
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+    )
+    with pytest.raises(UserError, match="background=True"):
+        async for _ in stream:
+            pass

From 06822acf446fa82bf86e10b294e13bcaebca37b7 Mon Sep 17 00:00:00 2001
From: Dantong Li <dtlics2000@gmail.com>
Date: Wed, 20 May 2026 15:20:08 +0100
Subject: [PATCH 5/6] docs: add background-mode guide and register in mkdocs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New docs/background.md describes the transparent use through Runner, the
streaming pass-through, retrieving a response by id via the underlying
AsyncOpenAI client, the cancel-on-CancelledError behavior, supported
backends (Responses HTTP only — WS and Chat Completions raise UserError),
and the platform limits (~10-minute retention, not ZDR-compatible).

Registered under "Background mode" in all four language nav sections in
mkdocs.yml. Translated content for ja/ko/zh will be generated by the
existing docs translation pipeline.
---
 docs/background.md | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 mkdocs.yml         |  4 +++
 2 files changed, 66 insertions(+)
 create mode 100644 docs/background.md

diff --git a/docs/background.md b/docs/background.md
new file mode 100644
index 0000000000..71f3a5a6e1
--- /dev/null
+++ b/docs/background.md
@@ -0,0 +1,62 @@
+# Background mode
+
+OpenAI's [Responses API background mode](https://platform.openai.com/docs/guides/background) lets long-running model calls survive client disconnects: the server keeps processing the request and you poll it to completion. This matters for reasoning-heavy single turns (`gpt-5.2-pro`, deep-research models) that can take minutes and otherwise fall foul of HTTP timeouts on Vercel, Cloudflare Workers, corporate proxies, etc.
+
+The Agents SDK exposes background mode via two new fields on [`ModelSettings`][agents.model_settings.ModelSettings]:
+
+- `background: bool | None` — opt in to background mode.
+- `background_poll_interval_seconds: float | None` — optional fixed poll interval. When unset, the SDK honors the `openai-poll-after-ms` response header and falls back to 1.0 second.
+
+## Transparent use through `Runner`
+
+Set the flag on your agent's `ModelSettings` and run as usual. The SDK submits with `background=True`, polls `client.responses.retrieve(id)` adaptively, and returns the terminal response — `Runner.run` and `Runner.run_streamed` need no other changes.
+
+```python
+from agents import Agent, ModelSettings, Runner
+
+agent = Agent(
+    name="reasoner",
+    model="gpt-5.2-pro",
+    model_settings=ModelSettings(background=True),
+)
+result = await Runner.run(agent, "Plan a multi-stage research workflow.")
+print(result.final_output)
+```
+
+For streaming, `background=True` is passed through to `responses.create(stream=True, background=True)` so the server keeps generating across client disconnects. Client-side auto-resume via `starting_after` is intentionally not part of this MVP — plain `openai-python` doesn't auto-resume either.
+
+```python
+async for event in Runner.run_streamed(agent, "Stream me a long answer").stream_events():
+    print(event)
+```
+
+## Retrieving a response by id
+
+If you captured a `response_id` and want to fetch the latest server state from a different process or worker, call `client.responses.retrieve(response_id)` on the underlying `AsyncOpenAI` client directly — there is no SDK-specific wrapper, deliberately, because that would only add API surface without adding capability.
+
+```python
+from openai import AsyncOpenAI
+
+client = AsyncOpenAI()
+response = await client.responses.retrieve(response_id)
+print(response.status)
+```
+
+## Cancellation
+
+If the surrounding task is cancelled (`asyncio.CancelledError`) while the SDK is polling, the SDK schedules a best-effort `client.responses.cancel(response_id)` so the in-flight server-side response is not leaked. The `CancelledError` then propagates to the caller as usual.
+
+## Compatibility
+
+Background mode is **supported only by the HTTP Responses transport** ([`OpenAIResponsesModel`][agents.models.openai_responses.OpenAIResponsesModel]). Setting `background=True` on either of these adapters raises [`UserError`][agents.exceptions.UserError] so the durability guarantee you opted into is not silently demoted:
+
+- [`OpenAIResponsesWSModel`][agents.models.openai_responses.OpenAIResponsesWSModel] — the WebSocket transport always streams and cannot decouple submit from poll.
+- [`OpenAIChatCompletionsModel`][agents.models.openai_chatcompletions.OpenAIChatCompletionsModel] — the Chat Completions API has no `background` parameter.
+
+If you're on a non-OpenAI provider via LiteLLM / AnyLLM, the field is read on `ModelSettings` but not plumbed by those adapters; whether it does anything depends on the underlying provider.
+
+## Limits
+
+- Background responses are retained server-side for **about 10 minutes**.
+- Background mode is **not ZDR-compatible**.
+- The `Runner` does not impose its own deadline on a background poll. If you need a hard ceiling, wrap your call (e.g. `asyncio.wait_for(Runner.run(agent, ...), timeout=600)`); on timeout, the SDK's cancel-on-CancelledError logic still fires.
diff --git a/mkdocs.yml b/mkdocs.yml
index c38e747653..046e84dd22 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -64,6 +64,7 @@ plugins:
                 - Guardrails: guardrails.md
                 - Running agents: running_agents.md
                 - Streaming: streaming.md
+                - Background mode: background.md
                 - Agent orchestration: multi_agent.md
                 - Handoffs: handoffs.md
                 - Results: results.md
@@ -213,6 +214,7 @@ plugins:
                 - guardrails.md
                 - running_agents.md
                 - streaming.md
+                - background.md
                 - multi_agent.md
                 - handoffs.md
                 - results.md
@@ -256,6 +258,7 @@ plugins:
                 - guardrails.md
                 - running_agents.md
                 - streaming.md
+                - background.md
                 - multi_agent.md
                 - handoffs.md
                 - results.md
@@ -299,6 +302,7 @@ plugins:
                 - guardrails.md
                 - running_agents.md
                 - streaming.md
+                - background.md
                 - multi_agent.md
                 - handoffs.md
                 - results.md

From 503c42c26b1ac41bd28bf8530fc15958c17f5694 Mon Sep 17 00:00:00 2001
From: Dantong Li <dtlics2000@gmail.com>
Date: Wed, 20 May 2026 15:20:17 +0100
Subject: [PATCH 6/6] examples: add background-mode example
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

examples/background_mode/main.py runs the same prompt twice — once
synchronously, once with ModelSettings(background=True) — to demonstrate
that opting into background mode is a one-field change at the Agent level
and produces equivalent final output, with the durability win coming from
the underlying submit + poll transport rather than from the SDK API.
---
 examples/background_mode/__init__.py |  0
 examples/background_mode/main.py     | 80 ++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 examples/background_mode/__init__.py
 create mode 100644 examples/background_mode/main.py

diff --git a/examples/background_mode/__init__.py b/examples/background_mode/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/examples/background_mode/main.py b/examples/background_mode/main.py
new file mode 100644
index 0000000000..e326042bc2
--- /dev/null
+++ b/examples/background_mode/main.py
@@ -0,0 +1,80 @@
+"""Example demonstrating Responses API background mode.
+
+When `ModelSettings(background=True)` is set, the SDK submits the underlying
+`client.responses.create()` call with `background=True` and adaptively polls
+`client.responses.retrieve(...)` until the response reaches a terminal state.
+This lets long-running reasoning calls (gpt-5.2-pro, deep-research-class
+workloads) survive HTTP / proxy / serverless timeouts that would otherwise
+abort a synchronous call.
+
+To run this example:
+
+    export OPENAI_API_KEY=...
+    python -m examples.background_mode.main
+
+Compare the two runs below: with and without `background=True`. The output
+should be equivalent, but only the background variant keeps the server-side
+work alive across transient client-side disconnects.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+
+from agents import Agent, ModelSettings, Runner
+
+MODEL_NAME = os.getenv("BACKGROUND_MODEL_NAME") or "gpt-5.2-pro"
+PROMPT = (
+    "Plan a three-stage research workflow for studying the long-term effects "
+    "of intermittent fasting on cognitive performance. For each stage, list "
+    "the primary research question, the methods, and one specific risk to "
+    "external validity."
+)
+
+
+async def run_synchronous() -> str:
+    agent = Agent(name="planner", model=MODEL_NAME)
+    print("\n=== Without background mode (synchronous) ===")
+    result = await Runner.run(agent, PROMPT)
+    return str(result.final_output)
+
+
+async def run_background() -> str:
+    agent = Agent(
+        name="planner",
+        model=MODEL_NAME,
+        model_settings=ModelSettings(background=True),
+    )
+    print("\n=== With background mode (submit + adaptive poll) ===")
+    result = await Runner.run(agent, PROMPT)
+    return str(result.final_output)
+
+
+async def main() -> None:
+    try:
+        sync_output = await run_synchronous()
+        print(sync_output)
+
+        bg_output = await run_background()
+        print(bg_output)
+
+        # The two transports should produce equivalent final output for the
+        # same prompt and seed. Background mode's win is durability, not
+        # different content.
+        if sync_output.strip() == bg_output.strip():
+            print("\nOutputs match.")
+        else:
+            print(
+                "\nOutputs differ — expected when sampling is non-deterministic, "
+                "but the background variant survived any transient disconnects."
+            )
+    except Exception as exc:
+        print(f"Error: {exc}")
+        print("\nNote: background mode is supported only by the Responses API")
+        print("HTTP transport. Set OPENAI_API_KEY and try a model that")
+        print("accepts long-running background requests (e.g. gpt-5.2-pro).")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())