From d7c39806b0b4760fc6c23f46d83923d0f352df67 Mon Sep 17 00:00:00 2001
From: Kazuhiro Sera <seratch@openai.com>
Date: Tue, 17 Feb 2026 09:01:43 +0900
Subject: [PATCH 1/2] fix: emit tracing function spans for
 shell/apply_patch/computer runtime tools

---
 src/agents/run_internal/tool_actions.py   | 563 +++++++++++++---------
 src/agents/run_internal/tool_execution.py |  30 +-
 tests/test_apply_patch_tool.py            |  90 +++-
 tests/test_computer_action.py             |  97 ++++
 tests/test_shell_tool.py                  |  82 ++++
 5 files changed, 638 insertions(+), 224 deletions(-)

diff --git a/src/agents/run_internal/tool_actions.py b/src/agents/run_internal/tool_actions.py
index 29086b4c67..010518dd7d 100644
--- a/src/agents/run_internal/tool_actions.py
+++ b/src/agents/run_internal/tool_actions.py
@@ -6,7 +6,9 @@
 from __future__ import annotations
 
 import asyncio
+import dataclasses
 import inspect
+import json
 from typing import TYPE_CHECKING, Any, Literal, cast
 
 from openai.types.responses import ResponseComputerToolCall
@@ -39,6 +41,7 @@
     ShellResult,
     resolve_computer,
 )
+from ..tracing import SpanError
 from ..util import _coro
 from ..util._approvals import evaluate_needs_approval_setting
 from .items import apply_patch_rejection_item, shell_rejection_item
@@ -47,6 +50,7 @@
     coerce_shell_call,
     extract_apply_patch_call_id,
     format_shell_error,
+    get_trace_tool_error,
     normalize_apply_patch_result,
     normalize_max_output_length,
     normalize_shell_output,
@@ -56,6 +60,7 @@
     resolve_approval_status,
     serialize_shell_output,
     truncate_shell_outputs,
+    with_tool_function_span,
 )
 
 if TYPE_CHECKING:
@@ -75,6 +80,22 @@
 ]
 
 
+def _serialize_trace_payload(payload: Any) -> str:
+    """Serialize tool payloads for tracing while tolerating non-JSON values."""
+    if payload is None:
+        return ""
+    if isinstance(payload, str):
+        return payload
+    if hasattr(payload, "model_dump") and callable(payload.model_dump):
+        return json.dumps(payload.model_dump(exclude_none=True))
+    if dataclasses.is_dataclass(payload) and not isinstance(payload, type):
+        return json.dumps(dataclasses.asdict(payload))
+    try:
+        return json.dumps(payload)
+    except TypeError:
+        return str(payload)
+
+
 class ComputerAction:
     """Execute computer tool actions and emit screenshot outputs with hooks fired."""
 
@@ -90,41 +111,76 @@ async def execute(
         acknowledged_safety_checks: list[ComputerCallOutputAcknowledgedSafetyCheck] | None = None,
     ) -> RunItem:
         """Run a computer action, capturing a screenshot and notifying hooks."""
-        computer = await resolve_computer(tool=action.computer_tool, run_context=context_wrapper)
-        agent_hooks = agent.hooks
-        await asyncio.gather(
-            hooks.on_tool_start(context_wrapper, agent, action.computer_tool),
-            (
-                agent_hooks.on_tool_start(context_wrapper, agent, action.computer_tool)
-                if agent_hooks
-                else _coro.noop_coroutine()
-            ),
-        )
 
-        output = await cls._execute_action_and_capture(computer, action.tool_call)
+        async def _run_action(span: Any | None) -> RunItem:
+            if span and config.trace_include_sensitive_data:
+                span.span_data.input = _serialize_trace_payload(action.tool_call.action)
 
-        await asyncio.gather(
-            hooks.on_tool_end(context_wrapper, agent, action.computer_tool, output),
-            (
-                agent_hooks.on_tool_end(context_wrapper, agent, action.computer_tool, output)
-                if agent_hooks
-                else _coro.noop_coroutine()
-            ),
-        )
+            computer = await resolve_computer(
+                tool=action.computer_tool, run_context=context_wrapper
+            )
+            agent_hooks = agent.hooks
+            await asyncio.gather(
+                hooks.on_tool_start(context_wrapper, agent, action.computer_tool),
+                (
+                    agent_hooks.on_tool_start(context_wrapper, agent, action.computer_tool)
+                    if agent_hooks
+                    else _coro.noop_coroutine()
+                ),
+            )
 
-        image_url = f"data:image/png;base64,{output}"
-        return ToolCallOutputItem(
-            agent=agent,
-            output=image_url,
-            raw_item=ComputerCallOutput(
-                call_id=action.tool_call.call_id,
-                output={
-                    "type": "computer_screenshot",
-                    "image_url": image_url,
-                },
-                type="computer_call_output",
-                acknowledged_safety_checks=acknowledged_safety_checks,
-            ),
+            output = ""
+            try:
+                output = await cls._execute_action_and_capture(computer, action.tool_call)
+            except Exception as exc:
+                error_text = format_shell_error(exc)
+                trace_error = get_trace_tool_error(
+                    trace_include_sensitive_data=config.trace_include_sensitive_data,
+                    error_message=error_text,
+                )
+                if span:
+                    span.set_error(
+                        SpanError(
+                            message="Error running tool",
+                            data={
+                                "tool_name": action.computer_tool.name,
+                                "error": trace_error,
+                            },
+                        )
+                    )
+                logger.error("Failed to execute computer action: %s", exc, exc_info=True)
+
+            await asyncio.gather(
+                hooks.on_tool_end(context_wrapper, agent, action.computer_tool, output),
+                (
+                    agent_hooks.on_tool_end(context_wrapper, agent, action.computer_tool, output)
+                    if agent_hooks
+                    else _coro.noop_coroutine()
+                ),
+            )
+
+            image_url = f"data:image/png;base64,{output}" if output else ""
+            if span and config.trace_include_sensitive_data:
+                span.span_data.output = image_url
+
+            return ToolCallOutputItem(
+                agent=agent,
+                output=image_url,
+                raw_item=ComputerCallOutput(
+                    call_id=action.tool_call.call_id,
+                    output={
+                        "type": "computer_screenshot",
+                        "image_url": image_url,
+                    },
+                    type="computer_call_output",
+                    acknowledged_safety_checks=acknowledged_safety_checks,
+                ),
+            )
+
+        return await with_tool_function_span(
+            config=config,
+            tool_name=action.computer_tool.name,
+            fn=_run_action,
         )
 
     @classmethod
@@ -234,134 +290,167 @@ async def execute(
         shell_tool = call.shell_tool
         agent_hooks = agent.hooks
 
-        needs_approval_result = await evaluate_needs_approval_setting(
-            shell_tool.needs_approval, context_wrapper, shell_call.action, shell_call.call_id
-        )
+        async def _run_call(span: Any | None) -> RunItem:
+            if span and config.trace_include_sensitive_data:
+                span.span_data.input = _serialize_trace_payload(
+                    dataclasses.asdict(shell_call.action)
+                )
 
-        if needs_approval_result:
-            approval_status, approval_item = await resolve_approval_status(
-                tool_name=shell_tool.name,
-                call_id=shell_call.call_id,
-                raw_item=call.tool_call,
-                agent=agent,
-                context_wrapper=context_wrapper,
-                on_approval=shell_tool.on_approval,
+            needs_approval_result = await evaluate_needs_approval_setting(
+                shell_tool.needs_approval, context_wrapper, shell_call.action, shell_call.call_id
             )
 
-            if approval_status is False:
-                rejection_message = await resolve_approval_rejection_message(
-                    context_wrapper=context_wrapper,
-                    run_config=config,
-                    tool_type="shell",
+            if needs_approval_result:
+                approval_status, approval_item = await resolve_approval_status(
                     tool_name=shell_tool.name,
                     call_id=shell_call.call_id,
-                )
-                return shell_rejection_item(
-                    agent,
-                    shell_call.call_id,
-                    rejection_message=rejection_message,
+                    raw_item=call.tool_call,
+                    agent=agent,
+                    context_wrapper=context_wrapper,
+                    on_approval=shell_tool.on_approval,
                 )
 
-            if approval_status is not True:
-                return approval_item
-
-        await asyncio.gather(
-            hooks.on_tool_start(context_wrapper, agent, shell_tool),
-            (
-                agent_hooks.on_tool_start(context_wrapper, agent, shell_tool)
-                if agent_hooks
-                else _coro.noop_coroutine()
-            ),
-        )
-        request = ShellCommandRequest(ctx_wrapper=context_wrapper, data=shell_call)
-        status: Literal["completed", "failed"] = "completed"
-        output_text = ""
-        shell_output_payload: list[dict[str, Any]] | None = None
-        provider_meta: dict[str, Any] | None = None
-        max_output_length: int | None = None
-        requested_max_output_length = normalize_max_output_length(
-            shell_call.action.max_output_length
-        )
-
-        try:
-            executor = call.shell_tool.executor
-            if executor is None:
-                raise ModelBehaviorError("Shell tool has no local executor configured.")
-            executor_result = executor(request)
-            result = (
-                await executor_result if inspect.isawaitable(executor_result) else executor_result
+                if approval_status is False:
+                    rejection_message = await resolve_approval_rejection_message(
+                        context_wrapper=context_wrapper,
+                        run_config=config,
+                        tool_type="shell",
+                        tool_name=shell_tool.name,
+                        call_id=shell_call.call_id,
+                    )
+                    return shell_rejection_item(
+                        agent,
+                        shell_call.call_id,
+                        rejection_message=rejection_message,
+                    )
+
+                if approval_status is not True:
+                    return approval_item
+
+            await asyncio.gather(
+                hooks.on_tool_start(context_wrapper, agent, shell_tool),
+                (
+                    agent_hooks.on_tool_start(context_wrapper, agent, shell_tool)
+                    if agent_hooks
+                    else _coro.noop_coroutine()
+                ),
+            )
+            request = ShellCommandRequest(ctx_wrapper=context_wrapper, data=shell_call)
+            status: Literal["completed", "failed"] = "completed"
+            output_text = ""
+            shell_output_payload: list[dict[str, Any]] | None = None
+            provider_meta: dict[str, Any] | None = None
+            max_output_length: int | None = None
+            requested_max_output_length = normalize_max_output_length(
+                shell_call.action.max_output_length
             )
 
-            if isinstance(result, ShellResult):
-                normalized = [normalize_shell_output(entry) for entry in result.output]
-                result_max_output_length = normalize_max_output_length(result.max_output_length)
-                if result_max_output_length is None:
-                    max_output_length = requested_max_output_length
-                elif requested_max_output_length is None:
-                    max_output_length = result_max_output_length
+            try:
+                executor = call.shell_tool.executor
+                if executor is None:
+                    raise ModelBehaviorError("Shell tool has no local executor configured.")
+                executor_result = executor(request)
+                result = (
+                    await executor_result
+                    if inspect.isawaitable(executor_result)
+                    else executor_result
+                )
+
+                if isinstance(result, ShellResult):
+                    normalized = [normalize_shell_output(entry) for entry in result.output]
+                    result_max_output_length = normalize_max_output_length(result.max_output_length)
+                    if result_max_output_length is None:
+                        max_output_length = requested_max_output_length
+                    elif requested_max_output_length is None:
+                        max_output_length = result_max_output_length
+                    else:
+                        max_output_length = min(
+                            result_max_output_length, requested_max_output_length
+                        )
+                    if max_output_length is not None:
+                        normalized = truncate_shell_outputs(normalized, max_output_length)
+                    output_text = render_shell_outputs(normalized)
+                    if max_output_length is not None:
+                        output_text = output_text[:max_output_length]
+                    shell_output_payload = [serialize_shell_output(entry) for entry in normalized]
+                    provider_meta = dict(result.provider_data or {})
                 else:
-                    max_output_length = min(result_max_output_length, requested_max_output_length)
-                if max_output_length is not None:
-                    normalized = truncate_shell_outputs(normalized, max_output_length)
-                output_text = render_shell_outputs(normalized)
-                if max_output_length is not None:
-                    output_text = output_text[:max_output_length]
-                shell_output_payload = [serialize_shell_output(entry) for entry in normalized]
-                provider_meta = dict(result.provider_data or {})
-            else:
-                output_text = str(result)
+                    output_text = str(result)
+                    if requested_max_output_length is not None:
+                        max_output_length = requested_max_output_length
+                        output_text = output_text[:max_output_length]
+            except Exception as exc:
+                status = "failed"
+                output_text = format_shell_error(exc)
+                trace_error = get_trace_tool_error(
+                    trace_include_sensitive_data=config.trace_include_sensitive_data,
+                    error_message=output_text,
+                )
+                if span:
+                    span.set_error(
+                        SpanError(
+                            message="Error running tool",
+                            data={
+                                "tool_name": shell_tool.name,
+                                "error": trace_error,
+                            },
+                        )
+                    )
                 if requested_max_output_length is not None:
                     max_output_length = requested_max_output_length
                     output_text = output_text[:max_output_length]
-        except Exception as exc:
-            status = "failed"
-            output_text = format_shell_error(exc)
-            if requested_max_output_length is not None:
-                max_output_length = requested_max_output_length
-                output_text = output_text[:max_output_length]
-            logger.error("Shell executor failed: %s", exc, exc_info=True)
-
-        await asyncio.gather(
-            hooks.on_tool_end(context_wrapper, agent, call.shell_tool, output_text),
-            (
-                agent_hooks.on_tool_end(context_wrapper, agent, call.shell_tool, output_text)
-                if agent_hooks
-                else _coro.noop_coroutine()
-            ),
-        )
+                logger.error("Shell executor failed: %s", exc, exc_info=True)
+
+            await asyncio.gather(
+                hooks.on_tool_end(context_wrapper, agent, call.shell_tool, output_text),
+                (
+                    agent_hooks.on_tool_end(context_wrapper, agent, call.shell_tool, output_text)
+                    if agent_hooks
+                    else _coro.noop_coroutine()
+                ),
+            )
 
-        raw_entries: list[dict[str, Any]] | None = None
-        if shell_output_payload:
-            raw_entries = shell_output_payload
-        elif output_text:
-            raw_entries = [
-                {
-                    "stdout": output_text,
-                    "stderr": "",
-                    "status": status,
-                    "outcome": "success" if status == "completed" else "failure",
-                }
-            ]
-
-        structured_output = normalize_shell_output_entries(raw_entries) if raw_entries else []
-
-        raw_item: dict[str, Any] = {
-            "type": "shell_call_output",
-            "call_id": shell_call.call_id,
-            "output": structured_output,
-            "status": status,
-        }
-        if max_output_length is not None:
-            raw_item["max_output_length"] = max_output_length
-        if raw_entries:
-            raw_item["shell_output"] = raw_entries
-        if provider_meta:
-            raw_item["provider_data"] = provider_meta
+            raw_entries: list[dict[str, Any]] | None = None
+            if shell_output_payload:
+                raw_entries = shell_output_payload
+            elif output_text:
+                raw_entries = [
+                    {
+                        "stdout": output_text,
+                        "stderr": "",
+                        "status": status,
+                        "outcome": "success" if status == "completed" else "failure",
+                    }
+                ]
+
+            structured_output = normalize_shell_output_entries(raw_entries) if raw_entries else []
+
+            raw_item: dict[str, Any] = {
+                "type": "shell_call_output",
+                "call_id": shell_call.call_id,
+                "output": structured_output,
+                "status": status,
+            }
+            if max_output_length is not None:
+                raw_item["max_output_length"] = max_output_length
+            if raw_entries:
+                raw_item["shell_output"] = raw_entries
+            if provider_meta:
+                raw_item["provider_data"] = provider_meta
+
+            if span and config.trace_include_sensitive_data:
+                span.span_data.output = output_text
+
+            return ToolCallOutputItem(
+                agent=agent,
+                output=output_text,
+                raw_item=raw_item,
+            )
 
-        return ToolCallOutputItem(
-            agent=agent,
-            output=output_text,
-            raw_item=raw_item,
+        return await with_tool_function_span(
+            config=config,
+            tool_name=shell_tool.name,
+            fn=_run_call,
         )
 
 
@@ -385,96 +474,128 @@ async def execute(
             call.tool_call,
             context_wrapper=context_wrapper,
         )
-
         call_id = extract_apply_patch_call_id(call.tool_call)
 
-        needs_approval_result = await evaluate_needs_approval_setting(
-            apply_patch_tool.needs_approval, context_wrapper, operation, call_id
-        )
+        async def _run_call(span: Any | None) -> RunItem:
+            if span and config.trace_include_sensitive_data:
+                span.span_data.input = _serialize_trace_payload(
+                    {
+                        "type": operation.type,
+                        "path": operation.path,
+                        "diff": operation.diff,
+                    }
+                )
 
-        if needs_approval_result:
-            approval_status, approval_item = await resolve_approval_status(
-                tool_name=apply_patch_tool.name,
-                call_id=call_id,
-                raw_item=call.tool_call,
-                agent=agent,
-                context_wrapper=context_wrapper,
-                on_approval=apply_patch_tool.on_approval,
+            needs_approval_result = await evaluate_needs_approval_setting(
+                apply_patch_tool.needs_approval, context_wrapper, operation, call_id
             )
 
-            if approval_status is False:
-                rejection_message = await resolve_approval_rejection_message(
-                    context_wrapper=context_wrapper,
-                    run_config=config,
-                    tool_type="apply_patch",
+            if needs_approval_result:
+                approval_status, approval_item = await resolve_approval_status(
                     tool_name=apply_patch_tool.name,
                     call_id=call_id,
-                )
-                return apply_patch_rejection_item(
-                    agent,
-                    call_id,
-                    rejection_message=rejection_message,
+                    raw_item=call.tool_call,
+                    agent=agent,
+                    context_wrapper=context_wrapper,
+                    on_approval=apply_patch_tool.on_approval,
                 )
 
-            if approval_status is not True:
-                return approval_item
+                if approval_status is False:
+                    rejection_message = await resolve_approval_rejection_message(
+                        context_wrapper=context_wrapper,
+                        run_config=config,
+                        tool_type="apply_patch",
+                        tool_name=apply_patch_tool.name,
+                        call_id=call_id,
+                    )
+                    return apply_patch_rejection_item(
+                        agent,
+                        call_id,
+                        rejection_message=rejection_message,
+                    )
+
+                if approval_status is not True:
+                    return approval_item
+
+            await asyncio.gather(
+                hooks.on_tool_start(context_wrapper, agent, apply_patch_tool),
+                (
+                    agent_hooks.on_tool_start(context_wrapper, agent, apply_patch_tool)
+                    if agent_hooks
+                    else _coro.noop_coroutine()
+                ),
+            )
 
-        await asyncio.gather(
-            hooks.on_tool_start(context_wrapper, agent, apply_patch_tool),
-            (
-                agent_hooks.on_tool_start(context_wrapper, agent, apply_patch_tool)
-                if agent_hooks
-                else _coro.noop_coroutine()
-            ),
-        )
+            status: Literal["completed", "failed"] = "completed"
+            output_text = ""
+
+            try:
+                editor = apply_patch_tool.editor
+                if operation.type == "create_file":
+                    result = editor.create_file(operation)
+                elif operation.type == "update_file":
+                    result = editor.update_file(operation)
+                elif operation.type == "delete_file":
+                    result = editor.delete_file(operation)
+                else:  # pragma: no cover - validated in coerce_apply_patch_operation
+                    raise ModelBehaviorError(f"Unsupported apply_patch operation: {operation.type}")
+
+                awaited = await result if inspect.isawaitable(result) else result
+                normalized = normalize_apply_patch_result(awaited)
+                if normalized:
+                    if normalized.status in {"completed", "failed"}:
+                        status = normalized.status
+                    if normalized.output:
+                        output_text = normalized.output
+            except Exception as exc:
+                status = "failed"
+                output_text = format_shell_error(exc)
+                trace_error = get_trace_tool_error(
+                    trace_include_sensitive_data=config.trace_include_sensitive_data,
+                    error_message=output_text,
+                )
+                if span:
+                    span.set_error(
+                        SpanError(
+                            message="Error running tool",
+                            data={
+                                "tool_name": apply_patch_tool.name,
+                                "error": trace_error,
+                            },
+                        )
+                    )
+                logger.error("Apply patch editor failed: %s", exc, exc_info=True)
+
+            await asyncio.gather(
+                hooks.on_tool_end(context_wrapper, agent, apply_patch_tool, output_text),
+                (
+                    agent_hooks.on_tool_end(context_wrapper, agent, apply_patch_tool, output_text)
+                    if agent_hooks
+                    else _coro.noop_coroutine()
+                ),
+            )
 
-        status: Literal["completed", "failed"] = "completed"
-        output_text = ""
-
-        try:
-            editor = apply_patch_tool.editor
-            if operation.type == "create_file":
-                result = editor.create_file(operation)
-            elif operation.type == "update_file":
-                result = editor.update_file(operation)
-            elif operation.type == "delete_file":
-                result = editor.delete_file(operation)
-            else:  # pragma: no cover - validated in coerce_apply_patch_operation
-                raise ModelBehaviorError(f"Unsupported apply_patch operation: {operation.type}")
-
-            awaited = await result if inspect.isawaitable(result) else result
-            normalized = normalize_apply_patch_result(awaited)
-            if normalized:
-                if normalized.status in {"completed", "failed"}:
-                    status = normalized.status
-                if normalized.output:
-                    output_text = normalized.output
-        except Exception as exc:
-            status = "failed"
-            output_text = format_shell_error(exc)
-            logger.error("Apply patch editor failed: %s", exc, exc_info=True)
+            raw_item: dict[str, Any] = {
+                "type": "apply_patch_call_output",
+                "call_id": call_id,
+                "status": status,
+            }
+            if output_text:
+                raw_item["output"] = output_text
 
-        await asyncio.gather(
-            hooks.on_tool_end(context_wrapper, agent, apply_patch_tool, output_text),
-            (
-                agent_hooks.on_tool_end(context_wrapper, agent, apply_patch_tool, output_text)
-                if agent_hooks
-                else _coro.noop_coroutine()
-            ),
-        )
+            if span and config.trace_include_sensitive_data:
+                span.span_data.output = output_text
 
-        raw_item: dict[str, Any] = {
-            "type": "apply_patch_call_output",
-            "call_id": call_id,
-            "status": status,
-        }
-        if output_text:
-            raw_item["output"] = output_text
+            return ToolCallOutputItem(
+                agent=agent,
+                output=output_text,
+                raw_item=raw_item,
+            )
 
-        return ToolCallOutputItem(
-            agent=agent,
-            output=output_text,
-            raw_item=raw_item,
+        return await with_tool_function_span(
+            config=config,
+            tool_name=apply_patch_tool.name,
+            fn=_run_call,
         )
 
 
diff --git a/src/agents/run_internal/tool_execution.py b/src/agents/run_internal/tool_execution.py
index a51955dee2..cdac898a3b 100644
--- a/src/agents/run_internal/tool_execution.py
+++ b/src/agents/run_internal/tool_execution.py
@@ -10,7 +10,7 @@
 import inspect
 import json
 from collections.abc import Callable, Mapping, Sequence
-from typing import TYPE_CHECKING, Any, Literal, cast
+from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast
 
 from openai.types.responses import ResponseFunctionToolCall
 from openai.types.responses.response_input_item_param import (
@@ -62,7 +62,7 @@
     ToolOutputGuardrailData,
     ToolOutputGuardrailResult,
 )
-from ..tracing import SpanError, function_span
+from ..tracing import Span, SpanError, function_span, get_current_trace
 from ..util import _coro, _error_tracing
 from ..util._approvals import evaluate_needs_approval_setting
 from .approvals import append_approval_error_output
@@ -104,6 +104,8 @@
     "normalize_max_output_length",
     "normalize_shell_output_entries",
     "format_shell_error",
+    "get_trace_tool_error",
+    "with_tool_function_span",
     "build_litellm_json_tool_call",
     "process_hosted_mcp_approvals",
     "collect_manual_mcp_approvals",
@@ -121,6 +123,9 @@
     "execute_approved_tools",
 ]
 
+REDACTED_TOOL_ERROR_MESSAGE = "Tool execution failed. Error details are redacted."
+TToolSpanResult = TypeVar("TToolSpanResult")
+
 
 # --------------------------
 # Public helpers
@@ -531,6 +536,27 @@ def format_shell_error(error: Exception | BaseException | Any) -> str:
         return repr(error)
 
 
+def get_trace_tool_error(*, trace_include_sensitive_data: bool, error_message: str) -> str:
+    """Return a trace-safe tool error string based on the sensitive-data setting."""
+    return error_message if trace_include_sensitive_data else REDACTED_TOOL_ERROR_MESSAGE
+
+
+async def with_tool_function_span(
+    *,
+    config: RunConfig,
+    tool_name: str,
+    fn: Callable[[Span[Any] | None], Any],
+) -> TToolSpanResult:
+    """Execute a tool callback in a function span when tracing is active."""
+    if config.tracing_disabled or get_current_trace() is None:
+        result = fn(None)
+        return await result if inspect.isawaitable(result) else cast(TToolSpanResult, result)
+
+    with function_span(tool_name) as span:
+        result = fn(span)
+        return await result if inspect.isawaitable(result) else cast(TToolSpanResult, result)
+
+
 def build_litellm_json_tool_call(output: ResponseFunctionToolCall) -> FunctionTool:
     """Wrap a JSON string result in a FunctionTool so LiteLLM can stream it."""
 
diff --git a/tests/test_apply_patch_tool.py b/tests/test_apply_patch_tool.py
index 1c07ab2374..4a7e581cef 100644
--- a/tests/test_apply_patch_tool.py
+++ b/tests/test_apply_patch_tool.py
@@ -1,15 +1,25 @@
 from __future__ import annotations
 
+import json
 from dataclasses import dataclass
 from typing import Any, cast
 
 import pytest
 
-from agents import Agent, ApplyPatchTool, RunConfig, RunContextWrapper, RunHooks
+from agents import (
+    Agent,
+    ApplyPatchTool,
+    RunConfig,
+    RunContextWrapper,
+    RunHooks,
+    set_tracing_disabled,
+    trace,
+)
 from agents.editor import ApplyPatchOperation, ApplyPatchResult
 from agents.items import ToolApprovalItem, ToolCallOutputItem
 from agents.run_internal.run_loop import ApplyPatchAction, ToolRunApplyPatchCall
 
+from .testing_processor import SPAN_PROCESSOR_TESTING
 from .utils.hitl import (
     HITL_REJECTION_MSG,
     make_context_wrapper,
@@ -19,6 +29,19 @@
 )
 
 
+def _get_function_span(tool_name: str) -> dict[str, Any]:
+    for span in SPAN_PROCESSOR_TESTING.get_ordered_spans(including_empty=True):
+        exported = span.export()
+        if not exported:
+            continue
+        span_data = exported.get("span_data")
+        if not isinstance(span_data, dict):
+            continue
+        if span_data.get("type") == "function" and span_data.get("name") == tool_name:
+            return exported
+    raise AssertionError(f"Function span for tool '{tool_name}' not found")
+
+
 def _call(call_id: str, operation: dict[str, Any]) -> DummyApplyPatchCall:
     return DummyApplyPatchCall(type="apply_patch_call", call_id=call_id, operation=operation)
 
@@ -124,6 +147,71 @@ def update_file(self, operation: ApplyPatchOperation) -> ApplyPatchResult:
     assert payload_dict["status"] == "failed"
 
 
+@pytest.mark.asyncio
+async def test_apply_patch_tool_emits_function_span() -> None:
+    editor = RecordingEditor()
+    tool = ApplyPatchTool(editor=editor)
+    agent, context_wrapper, tool_run = build_apply_patch_call(
+        tool, "call_apply_trace", {"type": "update_file", "path": "tasks.md", "diff": "-a\n+b\n"}
+    )
+
+    set_tracing_disabled(False)
+    with trace("apply-patch-span-test"):
+        result = await ApplyPatchAction.execute(
+            agent=agent,
+            call=tool_run,
+            hooks=RunHooks[Any](),
+            context_wrapper=context_wrapper,
+            config=RunConfig(),
+        )
+
+    assert isinstance(result, ToolCallOutputItem)
+    function_span = _get_function_span(tool.name)
+    span_data = cast(dict[str, Any], function_span["span_data"])
+    assert "tasks.md" in cast(str, span_data.get("input", ""))
+    assert "Updated tasks.md" in cast(str, span_data.get("output", ""))
+
+
+@pytest.mark.asyncio
+async def test_apply_patch_tool_redacts_span_error_when_sensitive_data_disabled() -> None:
+    secret_error = "patch secret output"
+
+    class ExplodingEditor(RecordingEditor):
+        def update_file(self, operation: ApplyPatchOperation) -> ApplyPatchResult:
+            raise RuntimeError(secret_error)
+
+    tool = ApplyPatchTool(editor=ExplodingEditor())
+    agent, context_wrapper, tool_run = build_apply_patch_call(
+        tool,
+        "call_apply_trace_redacted",
+        {"type": "update_file", "path": "tasks.md", "diff": "-a\n+b\n"},
+    )
+
+    set_tracing_disabled(False)
+    with trace("apply-patch-span-redaction-test"):
+        result = await ApplyPatchAction.execute(
+            agent=agent,
+            call=tool_run,
+            hooks=RunHooks[Any](),
+            context_wrapper=context_wrapper,
+            config=RunConfig(trace_include_sensitive_data=False),
+        )
+
+    assert isinstance(result, ToolCallOutputItem)
+    function_span = _get_function_span(tool.name)
+    assert function_span.get("error") == {
+        "message": "Error running tool",
+        "data": {
+            "tool_name": tool.name,
+            "error": "Tool execution failed. Error details are redacted.",
+        },
+    }
+    assert secret_error not in json.dumps(function_span)
+    span_data = cast(dict[str, Any], function_span["span_data"])
+    assert span_data.get("input") is None
+    assert span_data.get("output") is None
+
+
 @pytest.mark.asyncio
 async def test_apply_patch_tool_accepts_mapping_call() -> None:
     editor = RecordingEditor()
diff --git a/tests/test_computer_action.py b/tests/test_computer_action.py
index fda553626b..e79a2d27ed 100644
--- a/tests/test_computer_action.py
+++ b/tests/test_computer_action.py
@@ -4,6 +4,7 @@
 that screenshots are taken and wrapped appropriately, and that the execute function invokes
 hooks and returns the expected ToolCallOutputItem."""
 
+import json
 from typing import Any, cast
 
 import pytest
@@ -31,12 +32,29 @@
     RunConfig,
     RunContextWrapper,
     RunHooks,
+    set_tracing_disabled,
+    trace,
 )
 from agents.items import ToolCallOutputItem
 from agents.run_internal import run_loop
 from agents.run_internal.run_loop import ComputerAction, ToolRunComputerAction
 from agents.tool import ComputerToolSafetyCheckData
 
+from .testing_processor import SPAN_PROCESSOR_TESTING
+
+
+def _get_function_span(tool_name: str) -> dict[str, Any]:
+    for span in SPAN_PROCESSOR_TESTING.get_ordered_spans(including_empty=True):
+        exported = span.export()
+        if not exported:
+            continue
+        span_data = exported.get("span_data")
+        if not isinstance(span_data, dict):
+            continue
+        if span_data.get("type") == "function" and span_data.get("name") == tool_name:
+            return exported
+    raise AssertionError(f"Function span for tool '{tool_name}' not found")
+
 
 class LoggingComputer(Computer):
     """A `Computer` implementation that logs calls to its methods for verification in tests."""
@@ -313,6 +331,85 @@ async def test_execute_invokes_hooks_and_returns_tool_call_output() -> None:
     assert raw["output"]["image_url"].endswith("xyz")
 
 
+@pytest.mark.asyncio
+async def test_execute_emits_function_span() -> None:
+    computer = LoggingComputer(screenshot_return="trace_img")
+    comptool = ComputerTool(computer=computer)
+    tool_call = ResponseComputerToolCall(
+        id="tool_trace",
+        type="computer_call",
+        action=ActionScreenshot(type="screenshot"),
+        call_id="tool_trace",
+        pending_safety_checks=[],
+        status="completed",
+    )
+    tool_run = ToolRunComputerAction(tool_call=tool_call, computer_tool=comptool)
+    agent = Agent(name="test_agent_trace", tools=[comptool])
+
+    set_tracing_disabled(False)
+    with trace("computer-span-test"):
+        result = await ComputerAction.execute(
+            agent=agent,
+            action=tool_run,
+            hooks=RunHooks[Any](),
+            context_wrapper=RunContextWrapper(context=None),
+            config=RunConfig(),
+        )
+
+    assert isinstance(result, ToolCallOutputItem)
+    function_span = _get_function_span(comptool.name)
+    span_data = cast(dict[str, Any], function_span["span_data"])
+    assert span_data.get("input") is not None
+    assert cast(str, span_data.get("output", "")).startswith("data:image/png;base64,")
+
+
+@pytest.mark.asyncio
+async def test_execute_redacts_span_error_when_sensitive_data_disabled() -> None:
+    secret_error = "computer secret output"
+
+    class FailingComputer(LoggingComputer):
+        def screenshot(self) -> str:
+            raise RuntimeError(secret_error)
+
+    computer = FailingComputer()
+    comptool = ComputerTool(computer=computer)
+    tool_call = ResponseComputerToolCall(
+        id="tool_trace_error",
+        type="computer_call",
+        action=ActionScreenshot(type="screenshot"),
+        call_id="tool_trace_error",
+        pending_safety_checks=[],
+        status="completed",
+    )
+    tool_run = ToolRunComputerAction(tool_call=tool_call, computer_tool=comptool)
+    agent = Agent(name="test_agent_trace_error", tools=[comptool])
+
+    set_tracing_disabled(False)
+    with trace("computer-span-redaction-test"):
+        result = await ComputerAction.execute(
+            agent=agent,
+            action=tool_run,
+            hooks=RunHooks[Any](),
+            context_wrapper=RunContextWrapper(context=None),
+            config=RunConfig(trace_include_sensitive_data=False),
+        )
+
+    assert isinstance(result, ToolCallOutputItem)
+    assert result.output == ""
+    function_span = _get_function_span(comptool.name)
+    assert function_span.get("error") == {
+        "message": "Error running tool",
+        "data": {
+            "tool_name": comptool.name,
+            "error": "Tool execution failed. Error details are redacted.",
+        },
+    }
+    assert secret_error not in json.dumps(function_span)
+    span_data = cast(dict[str, Any], function_span["span_data"])
+    assert span_data.get("input") is None
+    assert span_data.get("output") is None
+
+
 @pytest.mark.asyncio
 async def test_pending_safety_check_acknowledged() -> None:
     """Safety checks should be acknowledged via the callback."""
diff --git a/tests/test_shell_tool.py b/tests/test_shell_tool.py
index 2ec532f049..b513388d37 100644
--- a/tests/test_shell_tool.py
+++ b/tests/test_shell_tool.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import json
 from typing import Any, cast
 
 import pytest
@@ -14,10 +15,13 @@
     ShellResult,
     ShellTool,
     UserError,
+    set_tracing_disabled,
+    trace,
 )
 from agents.items import ToolApprovalItem, ToolCallOutputItem
 from agents.run_internal.run_loop import ShellAction, ToolRunShellCall, execute_shell_calls
 
+from .testing_processor import SPAN_PROCESSOR_TESTING
 from .utils.hitl import (
     HITL_REJECTION_MSG,
     make_context_wrapper,
@@ -29,6 +33,19 @@
 )
 
 
+def _get_function_span(tool_name: str) -> dict[str, Any]:
+    for span in SPAN_PROCESSOR_TESTING.get_ordered_spans(including_empty=True):
+        exported = span.export()
+        if not exported:
+            continue
+        span_data = exported.get("span_data")
+        if not isinstance(span_data, dict):
+            continue
+        if span_data.get("type") == "function" and span_data.get("name") == tool_name:
+            return exported
+    raise AssertionError(f"Function span for tool '{tool_name}' not found")
+
+
 def _shell_call(call_id: str = "call_shell") -> dict[str, Any]:
     return cast(
         dict[str, Any],
@@ -268,6 +285,71 @@ async def test_shell_tool_structured_output_is_rendered() -> None:
     assert "provider_data" not in payload_dict
 
 
+@pytest.mark.asyncio
+async def test_shell_tool_emits_function_span() -> None:
+    shell_tool = ShellTool(executor=lambda request: "shell span output")
+    tool_run = ToolRunShellCall(tool_call=_shell_call("call_shell_trace"), shell_tool=shell_tool)
+    agent = Agent(name="shell-agent", tools=[shell_tool])
+    context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None)
+
+    set_tracing_disabled(False)
+    with trace("shell-span-test"):
+        result = await ShellAction.execute(
+            agent=agent,
+            call=tool_run,
+            hooks=RunHooks[Any](),
+            context_wrapper=context_wrapper,
+            config=RunConfig(),
+        )
+
+    assert isinstance(result, ToolCallOutputItem)
+    function_span = _get_function_span(shell_tool.name)
+    span_data = cast(dict[str, Any], function_span["span_data"])
+    assert "echo hi" in cast(str, span_data.get("input", ""))
+    assert span_data.get("output") == "shell span output"
+
+
+@pytest.mark.asyncio
+async def test_shell_tool_redacts_span_error_when_sensitive_data_disabled() -> None:
+    secret_error = "shell secret output"
+
+    class ExplodingExecutor:
+        def __call__(self, request):
+            raise RuntimeError(secret_error)
+
+    shell_tool = ShellTool(executor=ExplodingExecutor())
+    tool_run = ToolRunShellCall(
+        tool_call=_shell_call("call_shell_trace_redacted"),
+        shell_tool=shell_tool,
+    )
+    agent = Agent(name="shell-agent", tools=[shell_tool])
+    context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None)
+
+    set_tracing_disabled(False)
+    with trace("shell-span-redaction-test"):
+        result = await ShellAction.execute(
+            agent=agent,
+            call=tool_run,
+            hooks=RunHooks[Any](),
+            context_wrapper=context_wrapper,
+            config=RunConfig(trace_include_sensitive_data=False),
+        )
+
+    assert isinstance(result, ToolCallOutputItem)
+    function_span = _get_function_span(shell_tool.name)
+    assert function_span.get("error") == {
+        "message": "Error running tool",
+        "data": {
+            "tool_name": shell_tool.name,
+            "error": "Tool execution failed. Error details are redacted.",
+        },
+    }
+    assert secret_error not in json.dumps(function_span)
+    span_data = cast(dict[str, Any], function_span["span_data"])
+    assert span_data.get("input") is None
+    assert span_data.get("output") is None
+
+
 @pytest.mark.asyncio
 async def test_shell_tool_executor_failure_returns_error() -> None:
     class ExplodingExecutor:

From 4be943580b2060ae8c338a2c48f1e143485131f7 Mon Sep 17 00:00:00 2001
From: Kazuhiro Sera <seratch@openai.com>
Date: Tue, 17 Feb 2026 09:15:14 +0900
Subject: [PATCH 2/2] fix review comment

---
 src/agents/run_internal/tool_actions.py |  2 +-
 tests/test_computer_action.py           | 17 ++++++++---------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/agents/run_internal/tool_actions.py b/src/agents/run_internal/tool_actions.py
index 010518dd7d..41607913fc 100644
--- a/src/agents/run_internal/tool_actions.py
+++ b/src/agents/run_internal/tool_actions.py
@@ -129,7 +129,6 @@ async def _run_action(span: Any | None) -> RunItem:
                 ),
             )
 
-            output = ""
             try:
                 output = await cls._execute_action_and_capture(computer, action.tool_call)
             except Exception as exc:
@@ -149,6 +148,7 @@ async def _run_action(span: Any | None) -> RunItem:
                         )
                     )
                 logger.error("Failed to execute computer action: %s", exc, exc_info=True)
+                raise
 
             await asyncio.gather(
                 hooks.on_tool_end(context_wrapper, agent, action.computer_tool, output),
diff --git a/tests/test_computer_action.py b/tests/test_computer_action.py
index e79a2d27ed..75d3448d60 100644
--- a/tests/test_computer_action.py
+++ b/tests/test_computer_action.py
@@ -386,16 +386,15 @@ def screenshot(self) -> str:
 
     set_tracing_disabled(False)
     with trace("computer-span-redaction-test"):
-        result = await ComputerAction.execute(
-            agent=agent,
-            action=tool_run,
-            hooks=RunHooks[Any](),
-            context_wrapper=RunContextWrapper(context=None),
-            config=RunConfig(trace_include_sensitive_data=False),
-        )
+        with pytest.raises(RuntimeError, match=secret_error):
+            await ComputerAction.execute(
+                agent=agent,
+                action=tool_run,
+                hooks=RunHooks[Any](),
+                context_wrapper=RunContextWrapper(context=None),
+                config=RunConfig(trace_include_sensitive_data=False),
+            )
 
-    assert isinstance(result, ToolCallOutputItem)
-    assert result.output == ""
     function_span = _get_function_span(comptool.name)
     assert function_span.get("error") == {
         "message": "Error running tool",