From e83e6ed03eb66a17bd48b7e71a766b0bf0888e5a Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Sun, 18 Jan 2026 18:04:06 -0800
Subject: [PATCH 1/7] Phase 4: ffmpeg generate video clips;

---
 sentience/failure_artifacts.py       | 178 ++++++++++++++++++++++++++-
 tests/unit/test_failure_artifacts.py | 121 ++++++++++++++++++
 2 files changed, 298 insertions(+), 1 deletion(-)

diff --git a/sentience/failure_artifacts.py b/sentience/failure_artifacts.py
index fd92135..dbf6d1a 100644
--- a/sentience/failure_artifacts.py
+++ b/sentience/failure_artifacts.py
@@ -1,14 +1,34 @@
 from __future__ import annotations
 
 import json
+import logging
 import shutil
+import subprocess
 import tempfile
 import time
 from collections.abc import Callable
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Literal
 
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ClipOptions:
+    """Options for generating video clips from frames."""
+
+    mode: Literal["off", "auto", "on"] = "auto"
+    """Clip generation mode:
+    - "off": Never generate clips
+    - "auto": Generate only if ffmpeg is available on PATH
+    - "on": Always attempt to generate (will warn if ffmpeg missing)
+    """
+    fps: int = 8
+    """Frames per second for the generated video."""
+    seconds: float | None = None
+    """Duration of clip in seconds. If None, uses buffer_seconds."""
+
 
 @dataclass
 class FailureArtifactsOptions:
@@ -19,6 +39,7 @@ class FailureArtifactsOptions:
     output_dir: str = ".sentience/artifacts"
     on_before_persist: Callable[[RedactionContext], RedactionResult] | None = None
     redact_snapshot_values: bool = True
+    clip: ClipOptions = field(default_factory=ClipOptions)
 
 
 @dataclass
@@ -47,6 +68,123 @@ class _FrameRecord:
     path: Path
 
 
+def _is_ffmpeg_available() -> bool:
+    """Check if ffmpeg is available on the system PATH."""
+    try:
+        result = subprocess.run(
+            ["ffmpeg", "-version"],
+            capture_output=True,
+            timeout=5,
+        )
+        return result.returncode == 0
+    except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+        return False
+
+
+def _generate_clip_from_frames(
+    frames_dir: Path,
+    output_path: Path,
+    fps: int = 8,
+    frame_pattern: str = "frame_*.png",
+) -> bool:
+    """
+    Generate an MP4 video clip from a directory of frames using ffmpeg.
+
+    Args:
+        frames_dir: Directory containing frame images
+        output_path: Output path for the MP4 file
+        fps: Frames per second for the output video
+        frame_pattern: Glob pattern to match frame files
+
+    Returns:
+        True if clip was generated successfully, False otherwise
+    """
+    # Find all frames and sort by timestamp (extracted from filename)
+    frame_files = sorted(frames_dir.glob(frame_pattern))
+    if not frame_files:
+        # Try jpeg pattern as well
+        frame_files = sorted(frames_dir.glob("frame_*.jpeg"))
+    if not frame_files:
+        frame_files = sorted(frames_dir.glob("frame_*.jpg"))
+    if not frame_files:
+        logger.warning("No frame files found for clip generation")
+        return False
+
+    # Create a temporary file list for ffmpeg concat demuxer
+    # This approach handles arbitrary frame filenames and timing
+    list_file = frames_dir / "frames_list.txt"
+    try:
+        # Calculate frame duration based on FPS
+        frame_duration = 1.0 / fps
+
+        with open(list_file, "w") as f:
+            for frame_path in frame_files:
+                # ffmpeg concat format: file 'path' + duration
+                f.write(f"file '{frame_path.name}'\n")
+                f.write(f"duration {frame_duration}\n")
+            # Add last frame again (ffmpeg concat quirk)
+            if frame_files:
+                f.write(f"file '{frame_files[-1].name}'\n")
+
+        # Run ffmpeg to generate the clip
+        # -y: overwrite output file
+        # -f concat: use concat demuxer
+        # -safe 0: allow unsafe file paths
+        # -i: input file list
+        # -vsync vfr: variable frame rate
+        # -pix_fmt yuv420p: compatibility with most players
+        # -c:v libx264: H.264 codec
+        # -crf 23: quality (lower = better, 23 is default)
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-f",
+            "concat",
+            "-safe",
+            "0",
+            "-i",
+            str(list_file),
+            "-vsync",
+            "vfr",
+            "-pix_fmt",
+            "yuv420p",
+            "-c:v",
+            "libx264",
+            "-crf",
+            "23",
+            str(output_path),
+        ]
+
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            timeout=60,  # 1 minute timeout
+            cwd=str(frames_dir),  # Run from frames dir for relative paths
+        )
+
+        if result.returncode != 0:
+            logger.warning(
+                f"ffmpeg failed with return code {result.returncode}: "
+                f"{result.stderr.decode('utf-8', errors='replace')[:500]}"
+            )
+            return False
+
+        return output_path.exists()
+
+    except subprocess.TimeoutExpired:
+        logger.warning("ffmpeg timed out during clip generation")
+        return False
+    except Exception as e:
+        logger.warning(f"Error generating clip: {e}")
+        return False
+    finally:
+        # Clean up the list file
+        try:
+            list_file.unlink(missing_ok=True)
+        except Exception:
+            pass
+
+
 class FailureArtifactBuffer:
     """
     Ring buffer of screenshots with minimal persistence on failure.
@@ -215,6 +353,42 @@ def persist(
         if diagnostics_payload is not None:
             self._write_json_atomic(run_dir / "diagnostics.json", diagnostics_payload)
 
+        # Generate video clip from frames (optional, requires ffmpeg)
+        clip_generated = False
+        clip_path: Path | None = None
+        clip_options = self.options.clip
+
+        if not drop_frames and len(frame_paths) > 0 and clip_options.mode != "off":
+            should_generate = False
+
+            if clip_options.mode == "auto":
+                # Only generate if ffmpeg is available
+                should_generate = _is_ffmpeg_available()
+                if not should_generate:
+                    logger.debug("ffmpeg not available, skipping clip generation (mode=auto)")
+            elif clip_options.mode == "on":
+                # Always attempt to generate
+                should_generate = True
+                if not _is_ffmpeg_available():
+                    logger.warning(
+                        "ffmpeg not found on PATH but clip.mode='on'. "
+                        "Install ffmpeg to generate video clips."
+                    )
+                    should_generate = False
+
+            if should_generate:
+                clip_path = run_dir / "failure.mp4"
+                clip_generated = _generate_clip_from_frames(
+                    frames_dir=frames_out,
+                    output_path=clip_path,
+                    fps=clip_options.fps,
+                )
+                if clip_generated:
+                    logger.info(f"Generated failure clip: {clip_path}")
+                else:
+                    logger.warning("Failed to generate video clip")
+                    clip_path = None
+
         manifest = {
             "run_id": self.run_id,
             "created_at_ms": ts,
@@ -227,6 +401,8 @@ def persist(
             ),
             "snapshot": "snapshot.json" if snapshot_payload is not None else None,
             "diagnostics": "diagnostics.json" if diagnostics_payload is not None else None,
+            "clip": "failure.mp4" if clip_generated else None,
+            "clip_fps": clip_options.fps if clip_generated else None,
             "metadata": metadata or {},
             "frames_redacted": not drop_frames and self.options.on_before_persist is not None,
             "frames_dropped": drop_frames,
diff --git a/tests/unit/test_failure_artifacts.py b/tests/unit/test_failure_artifacts.py
index 0122bba..4291338 100644
--- a/tests/unit/test_failure_artifacts.py
+++ b/tests/unit/test_failure_artifacts.py
@@ -1,12 +1,15 @@
 from __future__ import annotations
 
 import json
+from unittest.mock import patch
 
 from sentience.failure_artifacts import (
+    ClipOptions,
     FailureArtifactBuffer,
     FailureArtifactsOptions,
     RedactionContext,
     RedactionResult,
+    _is_ffmpeg_available,
 )
 
 
@@ -90,3 +93,121 @@ def redactor(ctx: RedactionContext) -> RedactionResult:
     manifest = json.loads((run_dir / "manifest.json").read_text())
     assert manifest["frame_count"] == 0
     assert manifest["frames_dropped"] is True
+
+
+# -------------------- Phase 4: Clip generation tests --------------------
+
+
+def test_clip_mode_off_skips_generation(tmp_path) -> None:
+    """When clip.mode='off', no clip generation is attempted."""
+    opts = FailureArtifactsOptions(
+        output_dir=str(tmp_path),
+        clip=ClipOptions(mode="off"),
+    )
+    buf = FailureArtifactBuffer(run_id="run-clip-off", options=opts)
+    buf.add_frame(b"frame")
+
+    run_dir = buf.persist(reason="fail", status="failure")
+    assert run_dir is not None
+    manifest = json.loads((run_dir / "manifest.json").read_text())
+    assert manifest["clip"] is None
+    assert manifest["clip_fps"] is None
+
+
+def test_clip_mode_auto_skips_when_ffmpeg_missing(tmp_path) -> None:
+    """When clip.mode='auto' and ffmpeg is not available, skip silently."""
+    with patch("sentience.failure_artifacts._is_ffmpeg_available", return_value=False):
+        opts = FailureArtifactsOptions(
+            output_dir=str(tmp_path),
+            clip=ClipOptions(mode="auto", fps=10),
+        )
+        buf = FailureArtifactBuffer(run_id="run-clip-auto", options=opts)
+        buf.add_frame(b"frame")
+
+        run_dir = buf.persist(reason="fail", status="failure")
+        assert run_dir is not None
+        manifest = json.loads((run_dir / "manifest.json").read_text())
+        assert manifest["clip"] is None
+        assert manifest["clip_fps"] is None
+
+
+def test_clip_mode_on_warns_when_ffmpeg_missing(tmp_path) -> None:
+    """When clip.mode='on' and ffmpeg is not available, log warning but don't fail."""
+    with patch("sentience.failure_artifacts._is_ffmpeg_available", return_value=False):
+        opts = FailureArtifactsOptions(
+            output_dir=str(tmp_path),
+            clip=ClipOptions(mode="on"),
+        )
+        buf = FailureArtifactBuffer(run_id="run-clip-on-missing", options=opts)
+        buf.add_frame(b"frame")
+
+        run_dir = buf.persist(reason="fail", status="failure")
+        assert run_dir is not None
+        manifest = json.loads((run_dir / "manifest.json").read_text())
+        # Should not have clip since ffmpeg is not available
+        assert manifest["clip"] is None
+
+
+def test_clip_generation_with_mock_ffmpeg(tmp_path) -> None:
+    """Test clip generation logic with mocked ffmpeg subprocess."""
+    with patch("sentience.failure_artifacts._is_ffmpeg_available", return_value=True):
+        with patch("sentience.failure_artifacts._generate_clip_from_frames") as mock_gen:
+            mock_gen.return_value = True  # Simulate successful clip generation
+
+            opts = FailureArtifactsOptions(
+                output_dir=str(tmp_path),
+                clip=ClipOptions(mode="on", fps=12),
+            )
+            buf = FailureArtifactBuffer(run_id="run-clip-mock", options=opts)
+            buf.add_frame(b"frame1")
+            buf.add_frame(b"frame2")
+
+            run_dir = buf.persist(reason="fail", status="failure")
+            assert run_dir is not None
+
+            # Verify _generate_clip_from_frames was called with correct args
+            assert mock_gen.called
+            call_args = mock_gen.call_args
+            assert call_args.kwargs["fps"] == 12
+
+            manifest = json.loads((run_dir / "manifest.json").read_text())
+            assert manifest["clip"] == "failure.mp4"
+            assert manifest["clip_fps"] == 12
+
+
+def test_clip_not_generated_when_frames_dropped(tmp_path) -> None:
+    """Clip should not be generated when frames are dropped by redaction."""
+    with patch("sentience.failure_artifacts._is_ffmpeg_available", return_value=True):
+        with patch("sentience.failure_artifacts._generate_clip_from_frames") as mock_gen:
+            opts = FailureArtifactsOptions(
+                output_dir=str(tmp_path),
+                clip=ClipOptions(mode="on"),
+                on_before_persist=lambda ctx: RedactionResult(drop_frames=True),
+            )
+            buf = FailureArtifactBuffer(run_id="run-clip-dropped", options=opts)
+            buf.add_frame(b"frame")
+
+            run_dir = buf.persist(reason="fail", status="failure")
+            assert run_dir is not None
+
+            # Should not call clip generation when frames are dropped
+            assert not mock_gen.called
+            manifest = json.loads((run_dir / "manifest.json").read_text())
+            assert manifest["clip"] is None
+            assert manifest["frames_dropped"] is True
+
+
+def test_is_ffmpeg_available_with_missing_binary() -> None:
+    """Test _is_ffmpeg_available returns False when ffmpeg is not found."""
+    with patch("sentience.failure_artifacts.subprocess.run") as mock_run:
+        mock_run.side_effect = FileNotFoundError("ffmpeg not found")
+        assert _is_ffmpeg_available() is False
+
+
+def test_is_ffmpeg_available_with_timeout() -> None:
+    """Test _is_ffmpeg_available returns False on timeout."""
+    import subprocess
+
+    with patch("sentience.failure_artifacts.subprocess.run") as mock_run:
+        mock_run.side_effect = subprocess.TimeoutExpired(cmd="ffmpeg", timeout=5)
+        assert _is_ffmpeg_available() is False

From 00009b9b28970f868a541f20963ce29be0f18a09 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Sun, 18 Jan 2026 19:24:28 -0800
Subject: [PATCH 2/7] upload assert failure artifacts

---
 sentience/failure_artifacts.py       | 432 ++++++++++++++++++++++++++-
 tests/unit/test_failure_artifacts.py | 165 ++++++++++
 2 files changed, 596 insertions(+), 1 deletion(-)

diff --git a/sentience/failure_artifacts.py b/sentience/failure_artifacts.py
index dbf6d1a..a2d512b 100644
--- a/sentience/failure_artifacts.py
+++ b/sentience/failure_artifacts.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import gzip
 import json
 import logging
 import shutil
@@ -7,13 +8,34 @@
 import tempfile
 import time
 from collections.abc import Callable
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Literal
+from typing import Any, Literal, Protocol
+
+import requests
+
+from sentience.constants import SENTIENCE_API_URL
 
 logger = logging.getLogger(__name__)
 
 
+class SentienceLogger(Protocol):
+    """Protocol for optional logger interface."""
+
+    def info(self, message: str) -> None:
+        """Log info message."""
+        ...
+
+    def warning(self, message: str) -> None:
+        """Log warning message."""
+        ...
+
+    def error(self, message: str) -> None:
+        """Log error message."""
+        ...
+
+
 @dataclass
 class ClipOptions:
     """Options for generating video clips from frames."""
@@ -415,3 +437,411 @@ def persist(
     def cleanup(self) -> None:
         if self._temp_dir.exists():
             shutil.rmtree(self._temp_dir, ignore_errors=True)
+
+    def upload_to_cloud(
+        self,
+        *,
+        api_key: str,
+        api_url: str | None = None,
+        persisted_dir: Path | None = None,
+        ext_logger: SentienceLogger | None = None,
+    ) -> str | None:
+        """
+        Upload persisted artifacts to cloud storage.
+
+        This method uploads all artifacts from a persisted directory to cloud storage
+        using presigned URLs from the gateway. It follows the same pattern as trace
+        screenshot uploads.
+
+        Args:
+            api_key: Sentience API key for authentication
+            api_url: Sentience API base URL (default: https://api.sentienceapi.com)
+            persisted_dir: Path to persisted artifacts directory. If None, uses the
+                          most recent persist() output directory.
+            ext_logger: Optional logger for progress/error messages
+
+        Returns:
+            artifact_index_key on success, None on failure
+
+        Example:
+            >>> buf = FailureArtifactBuffer(run_id="run-123", options=options)
+            >>> buf.add_frame(screenshot_bytes)
+            >>> run_dir = buf.persist(reason="assertion failed", status="failure")
+            >>> artifact_key = buf.upload_to_cloud(api_key="sk-...")
+            >>> # artifact_key can be passed to /v1/traces/complete
+        """
+        base_url = api_url or SENTIENCE_API_URL
+
+        # Determine which directory to upload
+        if persisted_dir is None:
+            # Find most recent persisted directory
+            output_dir = Path(self.options.output_dir)
+            if not output_dir.exists():
+                if ext_logger:
+                    ext_logger.warning("No artifacts directory found")
+                return None
+
+            # Look for directories matching run_id pattern
+            matching_dirs = sorted(
+                [d for d in output_dir.iterdir() if d.is_dir() and d.name.startswith(self.run_id)],
+                key=lambda p: p.stat().st_mtime,
+                reverse=True,
+            )
+            if not matching_dirs:
+                if ext_logger:
+                    ext_logger.warning(f"No persisted artifacts found for run_id={self.run_id}")
+                return None
+            persisted_dir = matching_dirs[0]
+
+        if not persisted_dir.exists():
+            if ext_logger:
+                ext_logger.warning(f"Artifacts directory not found: {persisted_dir}")
+            return None
+
+        # Read manifest to understand what files need uploading
+        manifest_path = persisted_dir / "manifest.json"
+        if not manifest_path.exists():
+            if ext_logger:
+                ext_logger.warning("manifest.json not found in artifacts directory")
+            return None
+
+        with open(manifest_path, encoding="utf-8") as f:
+            manifest = json.load(f)
+
+        # Build list of artifacts to upload
+        artifacts = self._collect_artifacts_for_upload(persisted_dir, manifest)
+        if not artifacts:
+            if ext_logger:
+                ext_logger.warning("No artifacts to upload")
+            return None
+
+        if ext_logger:
+            ext_logger.info(f"Uploading {len(artifacts)} artifact(s) to cloud")
+
+        # Request presigned URLs from gateway
+        upload_urls = self._request_artifact_urls(
+            api_key=api_key,
+            api_url=base_url,
+            artifacts=artifacts,
+            ext_logger=ext_logger,
+        )
+        if not upload_urls:
+            return None
+
+        # Upload artifacts in parallel
+        artifact_index_key = self._upload_artifacts(
+            artifacts=artifacts,
+            upload_urls=upload_urls,
+            ext_logger=ext_logger,
+        )
+
+        if artifact_index_key:
+            # Report completion to gateway
+            self._complete_artifacts(
+                api_key=api_key,
+                api_url=base_url,
+                artifact_index_key=artifact_index_key,
+                artifacts=artifacts,
+                ext_logger=ext_logger,
+            )
+
+        return artifact_index_key
+
+    def _collect_artifacts_for_upload(
+        self, persisted_dir: Path, manifest: dict[str, Any]
+    ) -> list[dict[str, Any]]:
+        """Collect list of artifacts with their metadata for upload."""
+        artifacts: list[dict[str, Any]] = []
+
+        # Core JSON artifacts
+        json_files = ["manifest.json", "steps.json"]
+        if manifest.get("snapshot"):
+            json_files.append("snapshot.json")
+        if manifest.get("diagnostics"):
+            json_files.append("diagnostics.json")
+
+        for filename in json_files:
+            file_path = persisted_dir / filename
+            if file_path.exists():
+                artifacts.append(
+                    {
+                        "name": filename,
+                        "size_bytes": file_path.stat().st_size,
+                        "content_type": "application/json",
+                        "path": file_path,
+                    }
+                )
+
+        # Video clip
+        if manifest.get("clip"):
+            clip_path = persisted_dir / "failure.mp4"
+            if clip_path.exists():
+                artifacts.append(
+                    {
+                        "name": "failure.mp4",
+                        "size_bytes": clip_path.stat().st_size,
+                        "content_type": "video/mp4",
+                        "path": clip_path,
+                    }
+                )
+
+        # Frames
+        frames_dir = persisted_dir / "frames"
+        if frames_dir.exists():
+            for frame_file in sorted(frames_dir.iterdir()):
+                if frame_file.is_file() and frame_file.suffix in {".jpeg", ".jpg", ".png"}:
+                    content_type = (
+                        "image/jpeg" if frame_file.suffix in {".jpeg", ".jpg"} else "image/png"
+                    )
+                    artifacts.append(
+                        {
+                            "name": f"frames/{frame_file.name}",
+                            "size_bytes": frame_file.stat().st_size,
+                            "content_type": content_type,
+                            "path": frame_file,
+                        }
+                    )
+
+        return artifacts
+
+    def _request_artifact_urls(
+        self,
+        *,
+        api_key: str,
+        api_url: str,
+        artifacts: list[dict[str, Any]],
+        ext_logger: SentienceLogger | None,
+    ) -> dict[str, Any] | None:
+        """Request presigned upload URLs from gateway."""
+        try:
+            # Prepare request payload (exclude local path)
+            artifacts_payload = [
+                {
+                    "name": a["name"],
+                    "size_bytes": a["size_bytes"],
+                    "content_type": a["content_type"],
+                }
+                for a in artifacts
+            ]
+
+            response = requests.post(
+                f"{api_url}/v1/traces/artifacts/init",
+                headers={"Authorization": f"Bearer {api_key}"},
+                json={
+                    "run_id": self.run_id,
+                    "artifacts": artifacts_payload,
+                },
+                timeout=30,
+            )
+
+            if response.status_code != 200:
+                if ext_logger:
+                    ext_logger.warning(
+                        f"Failed to get artifact upload URLs: HTTP {response.status_code}"
+                    )
+                return None
+
+            return response.json()
+
+        except Exception as e:
+            if ext_logger:
+                ext_logger.error(f"Error requesting artifact upload URLs: {e}")
+            return None
+
+    def _upload_artifacts(
+        self,
+        *,
+        artifacts: list[dict[str, Any]],
+        upload_urls: dict[str, Any],
+        ext_logger: SentienceLogger | None,
+    ) -> str | None:
+        """Upload artifacts to cloud storage using presigned URLs."""
+        url_map = {item["name"]: item for item in upload_urls.get("upload_urls", [])}
+        index_upload = upload_urls.get("artifact_index_upload")
+
+        uploaded_count = 0
+        failed_names: list[str] = []
+        storage_keys: dict[str, str] = {}
+
+        def upload_one(artifact: dict[str, Any]) -> bool:
+            """Upload a single artifact. Returns True if successful."""
+            name = artifact["name"]
+            url_info = url_map.get(name)
+            if not url_info:
+                return False
+
+            try:
+                file_path = artifact["path"]
+                with open(file_path, "rb") as f:
+                    data = f.read()
+
+                response = requests.put(
+                    url_info["upload_url"],
+                    data=data,
+                    headers={"Content-Type": artifact["content_type"]},
+                    timeout=60,
+                )
+
+                if response.status_code == 200:
+                    storage_keys[name] = url_info.get("storage_key", "")
+                    return True
+                else:
+                    if ext_logger:
+                        ext_logger.warning(
+                            f"Artifact {name} upload failed: HTTP {response.status_code}"
+                        )
+                    return False
+
+            except Exception as e:
+                if ext_logger:
+                    ext_logger.warning(f"Artifact {name} upload error: {e}")
+                return False
+
+        # Upload in parallel (max 10 concurrent)
+        with ThreadPoolExecutor(max_workers=10) as executor:
+            futures = {executor.submit(upload_one, a): a["name"] for a in artifacts}
+
+            for future in as_completed(futures):
+                name = futures[future]
+                if future.result():
+                    uploaded_count += 1
+                else:
+                    failed_names.append(name)
+
+        if ext_logger:
+            if uploaded_count == len(artifacts):
+                ext_logger.info(f"All {uploaded_count} artifacts uploaded successfully")
+            else:
+                ext_logger.warning(
+                    f"Uploaded {uploaded_count}/{len(artifacts)} artifacts. "
+                    f"Failed: {failed_names}"
+                )
+
+        # Upload artifact index file
+        if index_upload and uploaded_count > 0:
+            artifact_index_key = self._upload_artifact_index(
+                artifacts=artifacts,
+                storage_keys=storage_keys,
+                index_upload=index_upload,
+                ext_logger=ext_logger,
+            )
+            return artifact_index_key
+
+        return None
+
+    def _upload_artifact_index(
+        self,
+        *,
+        artifacts: list[dict[str, Any]],
+        storage_keys: dict[str, str],
+        index_upload: dict[str, Any],
+        ext_logger: SentienceLogger | None,
+    ) -> str | None:
+        """Create and upload artifact index file."""
+        try:
+            # Build index content
+            index_data = {
+                "run_id": self.run_id,
+                "created_at_ms": int(time.time() * 1000),
+                "artifacts": [
+                    {
+                        "name": a["name"],
+                        "storage_key": storage_keys.get(a["name"], ""),
+                        "content_type": a["content_type"],
+                    }
+                    for a in artifacts
+                    if a["name"] in storage_keys
+                ],
+            }
+
+            # Compress and upload
+            index_json = json.dumps(index_data, indent=2).encode("utf-8")
+            compressed = gzip.compress(index_json)
+
+            response = requests.put(
+                index_upload["upload_url"],
+                data=compressed,
+                headers={
+                    "Content-Type": "application/json",
+                    "Content-Encoding": "gzip",
+                },
+                timeout=30,
+            )
+
+            if response.status_code == 200:
+                artifact_index_key = index_upload.get("storage_key", "")
+                if ext_logger:
+                    ext_logger.info("Artifact index uploaded successfully")
+                return artifact_index_key
+            else:
+                if ext_logger:
+                    ext_logger.warning(f"Artifact index upload failed: HTTP {response.status_code}")
+                return None
+
+        except Exception as e:
+            if ext_logger:
+                ext_logger.warning(f"Error uploading artifact index: {e}")
+            return None
+
+    def _complete_artifacts(
+        self,
+        *,
+        api_key: str,
+        api_url: str,
+        artifact_index_key: str,
+        artifacts: list[dict[str, Any]],
+        ext_logger: SentienceLogger | None,
+    ) -> None:
+        """Report artifact upload completion to gateway."""
+        try:
+            # Calculate stats
+            total_size = sum(a["size_bytes"] for a in artifacts)
+            frames_artifacts = [a for a in artifacts if a["name"].startswith("frames/")]
+            frames_total = sum(a["size_bytes"] for a in frames_artifacts)
+
+            # Get individual file sizes
+            manifest_size = next(
+                (a["size_bytes"] for a in artifacts if a["name"] == "manifest.json"), 0
+            )
+            snapshot_size = next(
+                (a["size_bytes"] for a in artifacts if a["name"] == "snapshot.json"), 0
+            )
+            diagnostics_size = next(
+                (a["size_bytes"] for a in artifacts if a["name"] == "diagnostics.json"), 0
+            )
+            steps_size = next((a["size_bytes"] for a in artifacts if a["name"] == "steps.json"), 0)
+            clip_size = next((a["size_bytes"] for a in artifacts if a["name"] == "failure.mp4"), 0)
+
+            response = requests.post(
+                f"{api_url}/v1/traces/artifacts/complete",
+                headers={"Authorization": f"Bearer {api_key}"},
+                json={
+                    "run_id": self.run_id,
+                    "artifact_index_key": artifact_index_key,
+                    "stats": {
+                        "manifest_size_bytes": manifest_size,
+                        "snapshot_size_bytes": snapshot_size,
+                        "diagnostics_size_bytes": diagnostics_size,
+                        "steps_size_bytes": steps_size,
+                        "clip_size_bytes": clip_size,
+                        "frames_total_size_bytes": frames_total,
+                        "frames_count": len(frames_artifacts),
+                        "total_artifact_size_bytes": total_size,
+                    },
+                },
+                timeout=10,
+            )
+
+            if response.status_code == 200:
+                if ext_logger:
+                    ext_logger.info("Artifact completion reported to gateway")
+            else:
+                if ext_logger:
+                    ext_logger.warning(
+                        f"Failed to report artifact completion: HTTP {response.status_code}"
+                    )
+
+        except Exception as e:
+            # Best-effort - log but don't fail
+            if ext_logger:
+                ext_logger.warning(f"Error reporting artifact completion: {e}")
diff --git a/tests/unit/test_failure_artifacts.py b/tests/unit/test_failure_artifacts.py
index 4291338..cd3b6f0 100644
--- a/tests/unit/test_failure_artifacts.py
+++ b/tests/unit/test_failure_artifacts.py
@@ -211,3 +211,168 @@ def test_is_ffmpeg_available_with_timeout() -> None:
     with patch("sentience.failure_artifacts.subprocess.run") as mock_run:
         mock_run.side_effect = subprocess.TimeoutExpired(cmd="ffmpeg", timeout=5)
         assert _is_ffmpeg_available() is False
+
+
+# -------------------- Phase 5: Cloud upload tests --------------------
+
+
+def test_upload_to_cloud_returns_none_when_no_artifacts_dir(tmp_path) -> None:
+    """upload_to_cloud returns None when artifacts directory doesn't exist."""
+    opts = FailureArtifactsOptions(output_dir=str(tmp_path / "nonexistent"))
+    buf = FailureArtifactBuffer(run_id="run-upload-1", options=opts)
+
+    result = buf.upload_to_cloud(api_key="test-key")
+    assert result is None
+
+
+def test_upload_to_cloud_returns_none_when_no_manifest(tmp_path) -> None:
+    """upload_to_cloud returns None when manifest.json is missing."""
+    # Create a directory but no manifest
+    run_dir = tmp_path / "run-upload-2-123"
+    run_dir.mkdir(parents=True)
+
+    opts = FailureArtifactsOptions(output_dir=str(tmp_path))
+    buf = FailureArtifactBuffer(run_id="run-upload-2", options=opts)
+
+    result = buf.upload_to_cloud(api_key="test-key", persisted_dir=run_dir)
+    assert result is None
+
+
+def test_collect_artifacts_for_upload(tmp_path) -> None:
+    """Test that _collect_artifacts_for_upload collects correct files."""
+    opts = FailureArtifactsOptions(output_dir=str(tmp_path))
+    buf = FailureArtifactBuffer(run_id="run-collect", options=opts)
+    buf.add_frame(b"frame1")
+
+    run_dir = buf.persist(
+        reason="fail",
+        status="failure",
+        snapshot={"status": "success"},
+        diagnostics={"confidence": 0.9},
+    )
+    assert run_dir is not None
+
+    # Read manifest
+    manifest = json.loads((run_dir / "manifest.json").read_text())
+
+    # Collect artifacts
+    artifacts = buf._collect_artifacts_for_upload(run_dir, manifest)
+
+    # Should have: manifest.json, steps.json, snapshot.json, diagnostics.json, and 1 frame
+    artifact_names = [a["name"] for a in artifacts]
+    assert "manifest.json" in artifact_names
+    assert "steps.json" in artifact_names
+    assert "snapshot.json" in artifact_names
+    assert "diagnostics.json" in artifact_names
+    assert any(a.startswith("frames/") for a in artifact_names)
+
+    # Verify all files exist
+    for artifact in artifacts:
+        assert artifact["path"].exists()
+        assert artifact["size_bytes"] > 0
+        assert artifact["content_type"] in ["application/json", "image/png", "image/jpeg"]
+
+
+def test_upload_to_cloud_with_mocked_gateway(tmp_path) -> None:
+    """Test full upload flow with mocked HTTP requests."""
+    from unittest.mock import MagicMock
+
+    opts = FailureArtifactsOptions(output_dir=str(tmp_path))
+    buf = FailureArtifactBuffer(run_id="run-mock-upload", options=opts)
+    buf.add_frame(b"frame1")
+
+    run_dir = buf.persist(
+        reason="fail",
+        status="failure",
+        snapshot={"status": "success"},
+    )
+    assert run_dir is not None
+
+    # Mock the HTTP requests
+    mock_response_init = MagicMock()
+    mock_response_init.status_code = 200
+    mock_response_init.json.return_value = {
+        "upload_urls": [
+            {
+                "name": "manifest.json",
+                "upload_url": "https://mock.com/manifest",
+                "storage_key": "test/manifest.json",
+            },
+            {
+                "name": "steps.json",
+                "upload_url": "https://mock.com/steps",
+                "storage_key": "test/steps.json",
+            },
+            {
+                "name": "snapshot.json",
+                "upload_url": "https://mock.com/snapshot",
+                "storage_key": "test/snapshot.json",
+            },
+        ],
+        "artifact_index_upload": {
+            "upload_url": "https://mock.com/index",
+            "storage_key": "test/index.json",
+        },
+    }
+
+    mock_response_upload = MagicMock()
+    mock_response_upload.status_code = 200
+
+    mock_response_complete = MagicMock()
+    mock_response_complete.status_code = 200
+
+    with patch("sentience.failure_artifacts.requests.post") as mock_post:
+        with patch("sentience.failure_artifacts.requests.put") as mock_put:
+            mock_post.side_effect = [mock_response_init, mock_response_complete]
+            mock_put.return_value = mock_response_upload
+
+            result = buf.upload_to_cloud(api_key="test-key", persisted_dir=run_dir)
+
+            # Should return artifact index key
+            assert result == "test/index.json"
+
+            # Verify POST calls were made
+            assert mock_post.call_count == 2
+
+            # Verify PUT calls were made for each artifact + index
+            assert mock_put.call_count >= 3  # At least manifest, steps, snapshot
+
+
+def test_upload_to_cloud_handles_gateway_error(tmp_path) -> None:
+    """Test that upload_to_cloud handles gateway errors gracefully."""
+    from unittest.mock import MagicMock
+
+    opts = FailureArtifactsOptions(output_dir=str(tmp_path))
+    buf = FailureArtifactBuffer(run_id="run-error", options=opts)
+    buf.add_frame(b"frame1")
+
+    run_dir = buf.persist(reason="fail", status="failure")
+    assert run_dir is not None
+
+    # Mock gateway returning error
+    mock_response = MagicMock()
+    mock_response.status_code = 500
+
+    with patch("sentience.failure_artifacts.requests.post") as mock_post:
+        mock_post.return_value = mock_response
+
+        result = buf.upload_to_cloud(api_key="test-key", persisted_dir=run_dir)
+        assert result is None
+
+
+def test_upload_to_cloud_handles_network_error(tmp_path) -> None:
+    """Test that upload_to_cloud handles network errors gracefully."""
+    import requests
+
+    opts = FailureArtifactsOptions(output_dir=str(tmp_path))
+    buf = FailureArtifactBuffer(run_id="run-network-error", options=opts)
+    buf.add_frame(b"frame1")
+
+    run_dir = buf.persist(reason="fail", status="failure")
+    assert run_dir is not None
+
+    with patch("sentience.failure_artifacts.requests.post") as mock_post:
+        mock_post.side_effect = requests.exceptions.ConnectionError("Network error")
+
+        result = buf.upload_to_cloud(api_key="test-key", persisted_dir=run_dir)
+        assert result is None

From de7d9bc26780dd74889e39953cb52bd16ff4df11 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Sun, 18 Jan 2026 19:54:00 -0800
Subject: [PATCH 3/7] P6: passthrough fields for captcha detection

---
 sentience/models.py   | 19 ++++++++++++++++++-
 sentience/snapshot.py |  8 ++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/sentience/models.py b/sentience/models.py
index 74560ea..3fe323b 100644
--- a/sentience/models.py
+++ b/sentience/models.py
@@ -290,12 +290,29 @@ class SnapshotDiagnosticsMetrics(BaseModel):
     raw_elements_count: int | None = None
 
 
+class CaptchaEvidence(BaseModel):
+    text_hits: list[str] = Field(default_factory=list)
+    selector_hits: list[str] = Field(default_factory=list)
+    iframe_src_hits: list[str] = Field(default_factory=list)
+    url_hits: list[str] = Field(default_factory=list)
+
+
+class CaptchaDiagnostics(BaseModel):
+    """Detection-only CAPTCHA signal (no solving/bypass)."""
+
+    detected: bool = False
+    provider_hint: str | None = None
+    confidence: float = 0.0
+    evidence: CaptchaEvidence = Field(default_factory=CaptchaEvidence)
+
+
 class SnapshotDiagnostics(BaseModel):
     """Runtime stability/debug information (reserved for diagnostics, not ML metadata)."""
 
     confidence: float | None = None
-    reasons: list[str] = []
+    reasons: list[str] = Field(default_factory=list)
     metrics: SnapshotDiagnosticsMetrics | None = None
+    captcha: CaptchaDiagnostics | None = None
 
     def get_grid_bounds(self, grid_id: int | None = None) -> list[GridInfo]:
         """
diff --git a/sentience/snapshot.py b/sentience/snapshot.py
index 8cf75cf..34b1960 100644
--- a/sentience/snapshot.py
+++ b/sentience/snapshot.py
@@ -103,10 +103,17 @@ def _build_snapshot_payload(
     """
     diagnostics = raw_result.get("diagnostics") or {}
     client_metrics = None
+    client_diagnostics = None
     try:
         client_metrics = diagnostics.get("metrics")
     except Exception:
         client_metrics = None
+    try:
+        captcha = diagnostics.get("captcha")
+        if captcha is not None:
+            client_diagnostics = {"captcha": captcha}
+    except Exception:
+        client_diagnostics = None
 
     return {
         "raw_elements": raw_result.get("raw_elements", []),
@@ -118,6 +125,7 @@ def _build_snapshot_payload(
             "filter": options.filter.model_dump() if options.filter else None,
         },
         "client_metrics": client_metrics,
+        "client_diagnostics": client_diagnostics,
     }
 
 

From a8bb166fee7b0284b319b31acb3a9f1a6757a6cd Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Sun, 18 Jan 2026 20:33:03 -0800
Subject: [PATCH 4/7] P7: runtime safety net testing

---
 .../test_agent_runtime_regression_safety.py   | 175 ++++++++++++++++++
 1 file changed, 175 insertions(+)
 create mode 100644 tests/unit/test_agent_runtime_regression_safety.py

diff --git a/tests/unit/test_agent_runtime_regression_safety.py b/tests/unit/test_agent_runtime_regression_safety.py
new file mode 100644
index 0000000..f807c1e
--- /dev/null
+++ b/tests/unit/test_agent_runtime_regression_safety.py
@@ -0,0 +1,175 @@
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from sentience.agent_runtime import AgentRuntime
+from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues
+from sentience.verification import AssertContext, AssertOutcome, is_checked, is_disabled, is_enabled, value_contains
+
+
+class MockBackend:
+    async def screenshot_png(self) -> bytes:
+        return b""
+
+
+class MockTracer:
+    def __init__(self) -> None:
+        self.events: list[dict] = []
+
+    def emit(self, event_type: str, data: dict, step_id: str | None = None) -> None:
+        self.events.append({"type": event_type, "data": data, "step_id": step_id})
+
+
+def make_element(
+    element_id: int,
+    *,
+    role: str,
+    text: str | None,
+    disabled: bool | None = None,
+    checked: bool | None = None,
+    value: str | None = None,
+    input_type: str | None = None,
+) -> Element:
+    return Element(
+        id=element_id,
+        role=role,
+        text=text,
+        importance=10,
+        bbox=BBox(x=0, y=0, width=100, height=40),
+        visual_cues=VisualCues(is_primary=False, is_clickable=True, background_color_name=None),
+        in_viewport=True,
+        is_occluded=False,
+        disabled=disabled,
+        checked=checked,
+        value=value,
+        input_type=input_type,
+    )
+
+
+def make_snapshot(elements: list[Element], url: str) -> Snapshot:
+    return Snapshot(
+        status="success",
+        url=url,
+        elements=elements,
+        viewport=Viewport(width=1280, height=720),
+    )
+
+
+def test_v1_state_assertions_enabled_disabled_checked_value() -> None:
+    runtime = AgentRuntime(backend=MockBackend(), tracer=MockTracer())
+    runtime.begin_step(goal="Test")
+
+    elements = [
+        make_element(1, role="button", text="Submit", disabled=False),
+        make_element(2, role="checkbox", text=None, checked=True),
+        make_element(3, role="textbox", text=None, value="hello", input_type="text"),
+        make_element(4, role="button", text="Disabled", disabled=True),
+    ]
+    runtime.last_snapshot = make_snapshot(elements, url="https://example.com")
+
+    assert runtime.assert_(is_enabled("text~'Submit'"), label="enabled") is True
+    assert runtime.assert_(is_disabled("text~'Disabled'"), label="disabled") is True
+    assert runtime.assert_(is_checked("role=checkbox"), label="checked") is True
+    assert runtime.assert_(value_contains("role=textbox", "hello"), label="value") is True
+
+
+@pytest.mark.asyncio
+async def test_eventually_retry_loop_succeeds() -> None:
+    tracer = MockTracer()
+    runtime = AgentRuntime(backend=MockBackend(), tracer=tracer)
+    runtime.begin_step(goal="Test")
+
+    snaps = [
+        make_snapshot([], url="https://example.com"),
+        make_snapshot([], url="https://example.com"),
+        make_snapshot([], url="https://example.com/done"),
+    ]
+
+    async def fake_snapshot(**_kwargs):
+        runtime.last_snapshot = snaps.pop(0)
+        return runtime.last_snapshot
+
+    runtime.snapshot = AsyncMock(side_effect=fake_snapshot)  # type: ignore[method-assign]
+
+    def pred(ctx: AssertContext) -> AssertOutcome:
+        ok = (ctx.url or "").endswith("/done")
+        return AssertOutcome(passed=ok, reason="" if ok else "not done", details={})
+
+    ok = await runtime.check(pred, label="eventually_done").eventually(timeout_s=2.0, poll_s=0.0)
+    assert ok is True
+
+
+@pytest.mark.asyncio
+async def test_min_confidence_snapshot_exhausted() -> None:
+    tracer = MockTracer()
+    runtime = AgentRuntime(backend=MockBackend(), tracer=tracer)
+    runtime.begin_step(goal="Test")
+
+    low_diag = MagicMock()
+    low_diag.confidence = 0.1
+    low_diag.model_dump = lambda: {"confidence": 0.1}
+
+    snaps = [
+        MagicMock(url="https://example.com", elements=[], diagnostics=low_diag),
+        MagicMock(url="https://example.com", elements=[], diagnostics=low_diag),
+    ]
+
+    async def fake_snapshot(**_kwargs):
+        runtime.last_snapshot = snaps.pop(0)
+        return runtime.last_snapshot
+
+    runtime.snapshot = AsyncMock(side_effect=fake_snapshot)  # type: ignore[method-assign]
+
+    def pred(_ctx: AssertContext) -> AssertOutcome:
+        return AssertOutcome(passed=True, reason="would pass", details={})
+
+    ok = await runtime.check(pred, label="min_confidence_gate").eventually(
+        timeout_s=2.0,
+        poll_s=0.0,
+        min_confidence=0.7,
+        max_snapshot_attempts=2,
+    )
+    assert ok is False
+    details = runtime._assertions_this_step[0]["details"]
+    assert details["reason_code"] == "snapshot_exhausted"
+
+
+@pytest.mark.asyncio
+async def test_golden_flow_same_snapshots_actions_no_captcha() -> None:
+    tracer = MockTracer()
+    runtime = AgentRuntime(backend=MockBackend(), tracer=tracer)
+    runtime.begin_step(goal="Test")
+
+    class FakeActionExecutor:
+        def __init__(self) -> None:
+            self.actions: list[str] = []
+
+        def execute(self, action: str) -> dict:
+            self.actions.append(action)
+            return {"success": True}
+
+    executor = FakeActionExecutor()
+    executor.execute("CLICK(1)")
+    executor.execute('TYPE(2, "hello")')
+    assert executor.actions == ["CLICK(1)", 'TYPE(2, "hello")']
+
+    snaps = [
+        make_snapshot([], url="https://example.com"),
+        make_snapshot([], url="https://example.com/after"),
+        make_snapshot([], url="https://example.com/done"),
+    ]
+
+    async def fake_snapshot(**_kwargs):
+        runtime.last_snapshot = snaps.pop(0)
+        return runtime.last_snapshot
+
+    runtime.snapshot = AsyncMock(side_effect=fake_snapshot)  # type: ignore[method-assign]
+
+    def pred(ctx: AssertContext) -> AssertOutcome:
+        ok = (ctx.url or "").endswith("/done")
+        return AssertOutcome(passed=ok, reason="" if ok else "not done", details={})
+
+    ok = await runtime.check(pred, label="golden_flow").eventually(timeout_s=2.0, poll_s=0.0)
+    assert ok is True

From 2a8b01627586977d3372a2462292af47c41ae530 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Sun, 18 Jan 2026 20:48:53 -0800
Subject: [PATCH 5/7] implement policy support for captcha handler with
 examples

---
 README.md                                     |  27 +++-
 examples/agent_runtime_captcha_strategies.py  |  53 ++++++++
 sentience/__init__.py                         |   2 +
 sentience/agent_runtime.py                    | 124 ++++++++++++++++++
 sentience/captcha.py                          |  53 ++++++++
 sentience/captcha_strategies.py               |  67 ++++++++++
 .../test_agent_runtime_regression_safety.py   |   9 +-
 7 files changed, 333 insertions(+), 2 deletions(-)
 create mode 100644 examples/agent_runtime_captcha_strategies.py
 create mode 100644 sentience/captcha.py
 create mode 100644 sentience/captcha_strategies.py

diff --git a/README.md b/README.md
index 66e1a67..f0eab6d 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ Use `AgentRuntime` to add Jest-style assertions to your agent loops. Verify brow
 
 ```python
 import asyncio
-from sentience import AsyncSentienceBrowser, AgentRuntime
+from sentience import AsyncSentienceBrowser, AgentRuntime, CaptchaOptions, HumanHandoffSolver
 from sentience.verification import (
     url_contains,
     exists,
@@ -80,6 +80,11 @@ async def main():
         ).eventually(timeout_s=10.0, poll_s=0.25, min_confidence=0.7, max_snapshot_attempts=3)
         print("eventually() result:", ok)
 
+        # CAPTCHA handling (detection + handoff + verify)
+        runtime.set_captcha_options(
+            CaptchaOptions(policy="callback", handler=HumanHandoffSolver())
+        )
+
         # Check task completion
         if runtime.assert_done(exists("text~'Example'"), label="task_complete"):
             print("✅ Task completed!")
@@ -89,6 +94,26 @@ async def main():
 asyncio.run(main())
 ```
 
+#### CAPTCHA strategies (Batteries Included)
+
+```python
+from sentience import CaptchaOptions, ExternalSolver, HumanHandoffSolver, VisionSolver
+
+# Human-in-loop
+runtime.set_captcha_options(CaptchaOptions(policy="callback", handler=HumanHandoffSolver()))
+
+# Vision verification only
+runtime.set_captcha_options(CaptchaOptions(policy="callback", handler=VisionSolver()))
+
+# External system/webhook
+runtime.set_captcha_options(
+    CaptchaOptions(
+        policy="callback",
+        handler=ExternalSolver(lambda ctx: notify_webhook(ctx)),
+    )
+)
+```
+
 ### Failure Artifact Buffer (Phase 1)
 
 Capture a short ring buffer of screenshots and persist them when a required assertion fails.
diff --git a/examples/agent_runtime_captcha_strategies.py b/examples/agent_runtime_captcha_strategies.py
new file mode 100644
index 0000000..932a0a5
--- /dev/null
+++ b/examples/agent_runtime_captcha_strategies.py
@@ -0,0 +1,53 @@
+import asyncio
+import os
+
+from sentience import (
+    AgentRuntime,
+    AsyncSentienceBrowser,
+    CaptchaOptions,
+    ExternalSolver,
+    HumanHandoffSolver,
+    VisionSolver,
+)
+from sentience.tracing import JsonlTraceSink, Tracer
+
+
+async def notify_webhook(ctx) -> None:
+    # Example hook: send context to your system (no solver logic in Sentience).
+    # Replace with your own client / queue / webhook call.
+    print(f"[captcha] external resolver notified: url={ctx.url} run_id={ctx.run_id}")
+
+
+async def main() -> None:
+    tracer = Tracer(run_id="captcha-demo", sink=JsonlTraceSink("trace.jsonl"))
+
+    async with AsyncSentienceBrowser() as browser:
+        page = await browser.new_page()
+        runtime = await AgentRuntime.from_sentience_browser(
+            browser=browser,
+            page=page,
+            tracer=tracer,
+        )
+
+        # Option 1: Human-in-loop
+        runtime.set_captcha_options(
+            CaptchaOptions(policy="callback", handler=HumanHandoffSolver())
+        )
+
+        # Option 2: Vision-only verification (no actions)
+        runtime.set_captcha_options(
+            CaptchaOptions(policy="callback", handler=VisionSolver())
+        )
+
+        # Option 3: External resolver orchestration
+        runtime.set_captcha_options(
+            CaptchaOptions(policy="callback", handler=ExternalSolver(lambda ctx: notify_webhook(ctx)))
+        )
+
+        await page.goto(os.environ.get("CAPTCHA_TEST_URL", "https://example.com"))
+        runtime.begin_step("Captcha-aware snapshot")
+        await runtime.snapshot()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/sentience/__init__.py b/sentience/__init__.py
index 4f8c65d..22ddc06 100644
--- a/sentience/__init__.py
+++ b/sentience/__init__.py
@@ -39,6 +39,8 @@
 # Agent Layer (Phase 1 & 2)
 from .base_agent import BaseAgent
 from .browser import SentienceBrowser
+from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
+from .captcha_strategies import ExternalSolver, HumanHandoffSolver, VisionSolver
 
 # Tracing (v0.12.0+)
 from .cloud_tracing import CloudTraceSink, SentienceLogger
diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
index 668ed58..ee064d1 100644
--- a/sentience/agent_runtime.py
+++ b/sentience/agent_runtime.py
@@ -70,6 +70,7 @@
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
 
+from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
 from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions
 from .models import Snapshot, SnapshotOptions
 from .verification import AssertContext, AssertOutcome, Predicate
@@ -153,6 +154,10 @@ def __init__(
         self._task_done: bool = False
         self._task_done_label: str | None = None
 
+        # CAPTCHA handling (optional, disabled by default)
+        self._captcha_options: CaptchaOptions | None = None
+        self._captcha_retry_count: int = 0
+
     @classmethod
     async def from_sentience_browser(
         cls,
@@ -248,13 +253,132 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
         from .backends.snapshot import snapshot as backend_snapshot
 
         # Merge default options with call-specific kwargs
+        skip_captcha_handling = bool(kwargs.pop("_skip_captcha_handling", False))
         options_dict = self._snapshot_options.model_dump(exclude_none=True)
         options_dict.update(kwargs)
         options = SnapshotOptions(**options_dict)
 
         self.last_snapshot = await backend_snapshot(self.backend, options=options)
+        if not skip_captcha_handling:
+            await self._handle_captcha_if_needed(self.last_snapshot, source="gateway")
         return self.last_snapshot
 
+    def set_captcha_options(self, options: CaptchaOptions) -> None:
+        """
+        Configure CAPTCHA handling (disabled by default unless set).
+        """
+        self._captcha_options = options
+        self._captcha_retry_count = 0
+
+    def _is_captcha_detected(self, snapshot: Snapshot) -> bool:
+        if not self._captcha_options:
+            return False
+        captcha = getattr(snapshot.diagnostics, "captcha", None) if snapshot.diagnostics else None
+        if not captcha or not getattr(captcha, "detected", False):
+            return False
+        confidence = getattr(captcha, "confidence", 0.0)
+        return confidence >= self._captcha_options.min_confidence
+
+    def _build_captcha_context(self, snapshot: Snapshot, source: str) -> CaptchaContext:
+        captcha = getattr(snapshot.diagnostics, "captcha", None)
+        return CaptchaContext(
+            run_id=self.tracer.run_id,
+            step_index=self.step_index,
+            url=snapshot.url,
+            source=source,  # type: ignore[arg-type]
+            captcha=captcha,
+        )
+
+    def _emit_captcha_event(self, reason_code: str, details: dict[str, Any] | None = None) -> None:
+        payload = {
+            "kind": "captcha",
+            "passed": False,
+            "label": reason_code,
+            "details": {"reason_code": reason_code, **(details or {})},
+        }
+        self.tracer.emit("verification", data=payload, step_id=self.step_id)
+
+    async def _handle_captcha_if_needed(self, snapshot: Snapshot, source: str) -> None:
+        if not self._captcha_options:
+            return
+        if not self._is_captcha_detected(snapshot):
+            return
+
+        captcha = getattr(snapshot.diagnostics, "captcha", None)
+        self._emit_captcha_event(
+            "captcha_detected",
+            {"captcha": getattr(captcha, "model_dump", lambda: captcha)()},
+        )
+
+        resolution: CaptchaResolution
+        if self._captcha_options.policy == "callback":
+            if not self._captcha_options.handler:
+                self._emit_captcha_event("captcha_handler_error")
+                raise CaptchaHandlingError(
+                    "captcha_handler_error",
+                    'Captcha handler is required for policy="callback".',
+                )
+            try:
+                resolution = await self._captcha_options.handler(
+                    self._build_captcha_context(snapshot, source)
+                )
+            except Exception as exc:  # pragma: no cover - defensive
+                self._emit_captcha_event("captcha_handler_error", {"error": str(exc)})
+                raise CaptchaHandlingError(
+                    "captcha_handler_error", "Captcha handler failed."
+                ) from exc
+        else:
+            resolution = CaptchaResolution(action="abort")
+
+        await self._apply_captcha_resolution(resolution, snapshot, source)
+
+    async def _apply_captcha_resolution(
+        self,
+        resolution: CaptchaResolution,
+        snapshot: Snapshot,
+        source: str,
+    ) -> None:
+        if resolution.action == "abort":
+            self._emit_captcha_event("captcha_policy_abort", {"message": resolution.message})
+            raise CaptchaHandlingError(
+                "captcha_policy_abort",
+                resolution.message or "Captcha detected. Aborting per policy.",
+            )
+
+        if resolution.action == "retry_new_session":
+            self._captcha_retry_count += 1
+            self._emit_captcha_event("captcha_retry_new_session")
+            if self._captcha_retry_count > self._captcha_options.max_retries_new_session:
+                self._emit_captcha_event("captcha_retry_exhausted")
+                raise CaptchaHandlingError(
+                    "captcha_retry_exhausted",
+                    "Captcha retry_new_session exhausted.",
+                )
+            if not self._captcha_options.reset_session:
+                raise CaptchaHandlingError(
+                    "captcha_retry_new_session",
+                    "reset_session callback is required for retry_new_session.",
+                )
+            await self._captcha_options.reset_session()
+            return
+
+        if resolution.action == "wait_until_cleared":
+            timeout_ms = resolution.timeout_ms or self._captcha_options.timeout_ms
+            poll_ms = resolution.poll_ms or self._captcha_options.poll_ms
+            await self._wait_until_cleared(timeout_ms=timeout_ms, poll_ms=poll_ms, source=source)
+            self._emit_captcha_event("captcha_resumed")
+
+    async def _wait_until_cleared(self, *, timeout_ms: int, poll_ms: int, source: str) -> None:
+        deadline = time.time() + timeout_ms / 1000.0
+        while time.time() <= deadline:
+            await asyncio.sleep(poll_ms / 1000.0)
+            snap = await self.snapshot(_skip_captcha_handling=True)
+            if not self._is_captcha_detected(snap):
+                self._emit_captcha_event("captcha_cleared", {"source": source})
+                return
+        self._emit_captcha_event("captcha_wait_timeout", {"timeout_ms": timeout_ms})
+        raise CaptchaHandlingError("captcha_wait_timeout", "Captcha wait_until_cleared timed out.")
+
     async def enable_failure_artifacts(
         self,
         options: FailureArtifactsOptions | None = None,
diff --git a/sentience/captcha.py b/sentience/captcha.py
new file mode 100644
index 0000000..6bdc68c
--- /dev/null
+++ b/sentience/captcha.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Awaitable, Callable, Literal, Optional
+
+from .models import CaptchaDiagnostics
+
+CaptchaPolicy = Literal["abort", "callback"]
+CaptchaAction = Literal["abort", "retry_new_session", "wait_until_cleared"]
+CaptchaSource = Literal["extension", "gateway", "runtime"]
+
+
+@dataclass
+class CaptchaContext:
+    run_id: str
+    step_index: int
+    url: str
+    source: CaptchaSource
+    captcha: CaptchaDiagnostics
+    screenshot_path: Optional[str] = None
+    frames_dir: Optional[str] = None
+    snapshot_path: Optional[str] = None
+    live_session_url: Optional[str] = None
+    meta: Optional[dict[str, str]] = None
+
+
+@dataclass
+class CaptchaResolution:
+    action: CaptchaAction
+    message: Optional[str] = None
+    handled_by: Optional[Literal["human", "customer_system", "unknown"]] = None
+    timeout_ms: Optional[int] = None
+    poll_ms: Optional[int] = None
+
+
+CaptchaHandler = Callable[[CaptchaContext], CaptchaResolution | Awaitable[CaptchaResolution]]
+
+
+@dataclass
+class CaptchaOptions:
+    policy: CaptchaPolicy = "abort"
+    min_confidence: float = 0.7
+    timeout_ms: int = 120_000
+    poll_ms: int = 1_000
+    max_retries_new_session: int = 1
+    handler: Optional[CaptchaHandler] = None
+    reset_session: Optional[Callable[[], Awaitable[None]]] = None
+
+
+class CaptchaHandlingError(RuntimeError):
+    def __init__(self, reason_code: str, message: str) -> None:
+        super().__init__(message)
+        self.reason_code = reason_code
diff --git a/sentience/captcha_strategies.py b/sentience/captcha_strategies.py
new file mode 100644
index 0000000..f0a9cb1
--- /dev/null
+++ b/sentience/captcha_strategies.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+import inspect
+from typing import Callable
+
+from .captcha import CaptchaContext, CaptchaHandler, CaptchaResolution
+
+
+def HumanHandoffSolver(
+    *,
+    message: str | None = None,
+    handled_by: str | None = "human",
+    timeout_ms: int | None = None,
+    poll_ms: int | None = None,
+) -> CaptchaHandler:
+    async def _handler(_ctx: CaptchaContext) -> CaptchaResolution:
+        return CaptchaResolution(
+            action="wait_until_cleared",
+            message=message or "Solve CAPTCHA in the live session, then resume.",
+            handled_by=handled_by,
+            timeout_ms=timeout_ms,
+            poll_ms=poll_ms,
+        )
+
+    return _handler
+
+
+def VisionSolver(
+    *,
+    message: str | None = None,
+    handled_by: str | None = "customer_system",
+    timeout_ms: int | None = None,
+    poll_ms: int | None = None,
+) -> CaptchaHandler:
+    async def _handler(_ctx: CaptchaContext) -> CaptchaResolution:
+        return CaptchaResolution(
+            action="wait_until_cleared",
+            message=message or "Waiting for CAPTCHA to clear (vision verification).",
+            handled_by=handled_by,
+            timeout_ms=timeout_ms,
+            poll_ms=poll_ms,
+        )
+
+    return _handler
+
+
+def ExternalSolver(
+    resolver: Callable[[CaptchaContext], None | bool | dict],
+    *,
+    message: str | None = None,
+    handled_by: str | None = "customer_system",
+    timeout_ms: int | None = None,
+    poll_ms: int | None = None,
+) -> CaptchaHandler:
+    async def _handler(ctx: CaptchaContext) -> CaptchaResolution:
+        result = resolver(ctx)
+        if inspect.isawaitable(result):
+            await result
+        return CaptchaResolution(
+            action="wait_until_cleared",
+            message=message or "External solver invoked; waiting for clearance.",
+            handled_by=handled_by,
+            timeout_ms=timeout_ms,
+            poll_ms=poll_ms,
+        )
+
+    return _handler
diff --git a/tests/unit/test_agent_runtime_regression_safety.py b/tests/unit/test_agent_runtime_regression_safety.py
index f807c1e..c82a3fe 100644
--- a/tests/unit/test_agent_runtime_regression_safety.py
+++ b/tests/unit/test_agent_runtime_regression_safety.py
@@ -6,7 +6,14 @@
 
 from sentience.agent_runtime import AgentRuntime
 from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues
-from sentience.verification import AssertContext, AssertOutcome, is_checked, is_disabled, is_enabled, value_contains
+from sentience.verification import (
+    AssertContext,
+    AssertOutcome,
+    is_checked,
+    is_disabled,
+    is_enabled,
+    value_contains,
+)
 
 
 class MockBackend:

From 9576f5ecd061e711ea1e15d190829616f47181d9 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Sun, 18 Jan 2026 21:16:24 -0800
Subject: [PATCH 6/7] updated trace_v1 schema

---
 sentience/schemas/trace_v1.json | 42 ++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/sentience/schemas/trace_v1.json b/sentience/schemas/trace_v1.json
index 37c28cb..25c3d20 100644
--- a/sentience/schemas/trace_v1.json
+++ b/sentience/schemas/trace_v1.json
@@ -73,6 +73,46 @@
             "url": {"type": ["string", "null"]},
             "element_count": {"type": "integer"},
             "timestamp": {"type": ["string", "null"]},
+            "diagnostics": {
+              "type": ["object", "null"],
+              "properties": {
+                "confidence": {"type": ["number", "null"]},
+                "reasons": {"type": "array", "items": {"type": "string"}},
+                "metrics": {
+                  "type": ["object", "null"],
+                  "properties": {
+                    "ready_state": {"type": ["string", "null"]},
+                    "quiet_ms": {"type": ["number", "null"]},
+                    "node_count": {"type": ["integer", "null"]},
+                    "interactive_count": {"type": ["integer", "null"]},
+                    "raw_elements_count": {"type": ["integer", "null"]}
+                  },
+                  "additionalProperties": true
+                },
+                "captcha": {
+                  "type": ["object", "null"],
+                  "properties": {
+                    "detected": {"type": "boolean"},
+                    "provider_hint": {
+                      "type": ["string", "null"],
+                      "enum": ["recaptcha", "hcaptcha", "turnstile", "arkose", "awswaf", "unknown", null]
+                    },
+                    "confidence": {"type": "number"},
+                    "evidence": {
+                      "type": "object",
+                      "properties": {
+                        "text_hits": {"type": "array", "items": {"type": "string"}},
+                        "selector_hits": {"type": "array", "items": {"type": "string"}},
+                        "iframe_src_hits": {"type": "array", "items": {"type": "string"}},
+                        "url_hits": {"type": "array", "items": {"type": "string"}}
+                      }
+                    }
+                  },
+                  "required": ["detected", "confidence", "evidence"]
+                }
+              },
+              "additionalProperties": true
+            },
             "elements": {
               "type": "array",
               "items": {
@@ -289,7 +329,7 @@
             "passed": {"type": "boolean"},
             "kind": {
               "type": "string",
-              "enum": ["assert", "task_done"],
+              "enum": ["assert", "task_done", "captcha"],
               "description": "Type of verification event"
             },
             "label": {"type": "string", "description": "Human-readable label for the assertion"},

From 908d3ef9d096fc3c17e14cdc52aabc7d423a6866 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Mon, 19 Jan 2026 10:10:50 -0800
Subject: [PATCH 7/7] updated trace schema and fix

---
 sentience/agent_runtime.py      | 2 +-
 sentience/schemas/trace_v1.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
index ee064d1..272bc83 100644
--- a/sentience/agent_runtime.py
+++ b/sentience/agent_runtime.py
@@ -579,7 +579,7 @@ def assert_done(
             True if task is complete (assertion passed), False otherwise
         """
         # Convenience wrapper for assert_ with required=True
-        ok = self.assertTrue(predicate, label=label, required=True)
+        ok = self.assert_(predicate, label=label, required=True)
         if ok:
             self._task_done = True
             self._task_done_label = label
diff --git a/sentience/schemas/trace_v1.json b/sentience/schemas/trace_v1.json
index 25c3d20..392dda1 100644
--- a/sentience/schemas/trace_v1.json
+++ b/sentience/schemas/trace_v1.json
@@ -220,7 +220,7 @@
               "type": "object",
               "required": ["kind"],
               "properties": {
-                "kind": {"type": "string", "enum": ["click", "type", "press", "finish", "navigate"]},
+                "kind": {"type": "string", "enum": ["click", "type", "press", "finish", "navigate", "scroll"]},
                 "element_id": {"type": "integer"},
                 "text": {"type": "string"},
                 "key": {"type": "string"},