From e83e6ed03eb66a17bd48b7e71a766b0bf0888e5a Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 18 Jan 2026 18:04:06 -0800 Subject: [PATCH 1/7] Phase 4: ffmpeg generate video clips; --- sentience/failure_artifacts.py | 178 ++++++++++++++++++++++++++- tests/unit/test_failure_artifacts.py | 121 ++++++++++++++++++ 2 files changed, 298 insertions(+), 1 deletion(-) diff --git a/sentience/failure_artifacts.py b/sentience/failure_artifacts.py index fd92135..dbf6d1a 100644 --- a/sentience/failure_artifacts.py +++ b/sentience/failure_artifacts.py @@ -1,14 +1,34 @@ from __future__ import annotations import json +import logging import shutil +import subprocess import tempfile import time from collections.abc import Callable -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Any, Literal +logger = logging.getLogger(__name__) + + +@dataclass +class ClipOptions: + """Options for generating video clips from frames.""" + + mode: Literal["off", "auto", "on"] = "auto" + """Clip generation mode: + - "off": Never generate clips + - "auto": Generate only if ffmpeg is available on PATH + - "on": Always attempt to generate (will warn if ffmpeg missing) + """ + fps: int = 8 + """Frames per second for the generated video.""" + seconds: float | None = None + """Duration of clip in seconds. If None, uses buffer_seconds.""" + @dataclass class FailureArtifactsOptions: @@ -19,6 +39,7 @@ class FailureArtifactsOptions: output_dir: str = ".sentience/artifacts" on_before_persist: Callable[[RedactionContext], RedactionResult] | None = None redact_snapshot_values: bool = True + clip: ClipOptions = field(default_factory=ClipOptions) @dataclass @@ -47,6 +68,123 @@ class _FrameRecord: path: Path +def _is_ffmpeg_available() -> bool: + """Check if ffmpeg is available on the system PATH.""" + try: + result = subprocess.run( + ["ffmpeg", "-version"], + capture_output=True, + timeout=5, + ) + return result.returncode == 0 + except (subprocess.TimeoutExpired, FileNotFoundError, OSError): + return False + + +def _generate_clip_from_frames( + frames_dir: Path, + output_path: Path, + fps: int = 8, + frame_pattern: str = "frame_*.png", +) -> bool: + """ + Generate an MP4 video clip from a directory of frames using ffmpeg. + + Args: + frames_dir: Directory containing frame images + output_path: Output path for the MP4 file + fps: Frames per second for the output video + frame_pattern: Glob pattern to match frame files + + Returns: + True if clip was generated successfully, False otherwise + """ + # Find all frames and sort by timestamp (extracted from filename) + frame_files = sorted(frames_dir.glob(frame_pattern)) + if not frame_files: + # Try jpeg pattern as well + frame_files = sorted(frames_dir.glob("frame_*.jpeg")) + if not frame_files: + frame_files = sorted(frames_dir.glob("frame_*.jpg")) + if not frame_files: + logger.warning("No frame files found for clip generation") + return False + + # Create a temporary file list for ffmpeg concat demuxer + # This approach handles arbitrary frame filenames and timing + list_file = frames_dir / "frames_list.txt" + try: + # Calculate frame duration based on FPS + frame_duration = 1.0 / fps + + with open(list_file, "w") as f: + for frame_path in frame_files: + # ffmpeg concat format: file 'path' + duration + f.write(f"file '{frame_path.name}'\n") + f.write(f"duration {frame_duration}\n") + # Add last frame again (ffmpeg concat quirk) + if frame_files: + f.write(f"file '{frame_files[-1].name}'\n") + + # Run ffmpeg to generate the clip + # -y: overwrite output file + # -f concat: use concat demuxer + # -safe 0: allow unsafe file paths + # -i: input file list + # -vsync vfr: variable frame rate + # -pix_fmt yuv420p: compatibility with most players + # -c:v libx264: H.264 codec + # -crf 23: quality (lower = better, 23 is default) + cmd = [ + "ffmpeg", + "-y", + "-f", + "concat", + "-safe", + "0", + "-i", + str(list_file), + "-vsync", + "vfr", + "-pix_fmt", + "yuv420p", + "-c:v", + "libx264", + "-crf", + "23", + str(output_path), + ] + + result = subprocess.run( + cmd, + capture_output=True, + timeout=60, # 1 minute timeout + cwd=str(frames_dir), # Run from frames dir for relative paths + ) + + if result.returncode != 0: + logger.warning( + f"ffmpeg failed with return code {result.returncode}: " + f"{result.stderr.decode('utf-8', errors='replace')[:500]}" + ) + return False + + return output_path.exists() + + except subprocess.TimeoutExpired: + logger.warning("ffmpeg timed out during clip generation") + return False + except Exception as e: + logger.warning(f"Error generating clip: {e}") + return False + finally: + # Clean up the list file + try: + list_file.unlink(missing_ok=True) + except Exception: + pass + + class FailureArtifactBuffer: """ Ring buffer of screenshots with minimal persistence on failure. @@ -215,6 +353,42 @@ def persist( if diagnostics_payload is not None: self._write_json_atomic(run_dir / "diagnostics.json", diagnostics_payload) + # Generate video clip from frames (optional, requires ffmpeg) + clip_generated = False + clip_path: Path | None = None + clip_options = self.options.clip + + if not drop_frames and len(frame_paths) > 0 and clip_options.mode != "off": + should_generate = False + + if clip_options.mode == "auto": + # Only generate if ffmpeg is available + should_generate = _is_ffmpeg_available() + if not should_generate: + logger.debug("ffmpeg not available, skipping clip generation (mode=auto)") + elif clip_options.mode == "on": + # Always attempt to generate + should_generate = True + if not _is_ffmpeg_available(): + logger.warning( + "ffmpeg not found on PATH but clip.mode='on'. " + "Install ffmpeg to generate video clips." + ) + should_generate = False + + if should_generate: + clip_path = run_dir / "failure.mp4" + clip_generated = _generate_clip_from_frames( + frames_dir=frames_out, + output_path=clip_path, + fps=clip_options.fps, + ) + if clip_generated: + logger.info(f"Generated failure clip: {clip_path}") + else: + logger.warning("Failed to generate video clip") + clip_path = None + manifest = { "run_id": self.run_id, "created_at_ms": ts, @@ -227,6 +401,8 @@ def persist( ), "snapshot": "snapshot.json" if snapshot_payload is not None else None, "diagnostics": "diagnostics.json" if diagnostics_payload is not None else None, + "clip": "failure.mp4" if clip_generated else None, + "clip_fps": clip_options.fps if clip_generated else None, "metadata": metadata or {}, "frames_redacted": not drop_frames and self.options.on_before_persist is not None, "frames_dropped": drop_frames, diff --git a/tests/unit/test_failure_artifacts.py b/tests/unit/test_failure_artifacts.py index 0122bba..4291338 100644 --- a/tests/unit/test_failure_artifacts.py +++ b/tests/unit/test_failure_artifacts.py @@ -1,12 +1,15 @@ from __future__ import annotations import json +from unittest.mock import patch from sentience.failure_artifacts import ( + ClipOptions, FailureArtifactBuffer, FailureArtifactsOptions, RedactionContext, RedactionResult, + _is_ffmpeg_available, ) @@ -90,3 +93,121 @@ def redactor(ctx: RedactionContext) -> RedactionResult: manifest = json.loads((run_dir / "manifest.json").read_text()) assert manifest["frame_count"] == 0 assert manifest["frames_dropped"] is True + + +# -------------------- Phase 4: Clip generation tests -------------------- + + +def test_clip_mode_off_skips_generation(tmp_path) -> None: + """When clip.mode='off', no clip generation is attempted.""" + opts = FailureArtifactsOptions( + output_dir=str(tmp_path), + clip=ClipOptions(mode="off"), + ) + buf = FailureArtifactBuffer(run_id="run-clip-off", options=opts) + buf.add_frame(b"frame") + + run_dir = buf.persist(reason="fail", status="failure") + assert run_dir is not None + manifest = json.loads((run_dir / "manifest.json").read_text()) + assert manifest["clip"] is None + assert manifest["clip_fps"] is None + + +def test_clip_mode_auto_skips_when_ffmpeg_missing(tmp_path) -> None: + """When clip.mode='auto' and ffmpeg is not available, skip silently.""" + with patch("sentience.failure_artifacts._is_ffmpeg_available", return_value=False): + opts = FailureArtifactsOptions( + output_dir=str(tmp_path), + clip=ClipOptions(mode="auto", fps=10), + ) + buf = FailureArtifactBuffer(run_id="run-clip-auto", options=opts) + buf.add_frame(b"frame") + + run_dir = buf.persist(reason="fail", status="failure") + assert run_dir is not None + manifest = json.loads((run_dir / "manifest.json").read_text()) + assert manifest["clip"] is None + assert manifest["clip_fps"] is None + + +def test_clip_mode_on_warns_when_ffmpeg_missing(tmp_path) -> None: + """When clip.mode='on' and ffmpeg is not available, log warning but don't fail.""" + with patch("sentience.failure_artifacts._is_ffmpeg_available", return_value=False): + opts = FailureArtifactsOptions( + output_dir=str(tmp_path), + clip=ClipOptions(mode="on"), + ) + buf = FailureArtifactBuffer(run_id="run-clip-on-missing", options=opts) + buf.add_frame(b"frame") + + run_dir = buf.persist(reason="fail", status="failure") + assert run_dir is not None + manifest = json.loads((run_dir / "manifest.json").read_text()) + # Should not have clip since ffmpeg is not available + assert manifest["clip"] is None + + +def test_clip_generation_with_mock_ffmpeg(tmp_path) -> None: + """Test clip generation logic with mocked ffmpeg subprocess.""" + with patch("sentience.failure_artifacts._is_ffmpeg_available", return_value=True): + with patch("sentience.failure_artifacts._generate_clip_from_frames") as mock_gen: + mock_gen.return_value = True # Simulate successful clip generation + + opts = FailureArtifactsOptions( + output_dir=str(tmp_path), + clip=ClipOptions(mode="on", fps=12), + ) + buf = FailureArtifactBuffer(run_id="run-clip-mock", options=opts) + buf.add_frame(b"frame1") + buf.add_frame(b"frame2") + + run_dir = buf.persist(reason="fail", status="failure") + assert run_dir is not None + + # Verify _generate_clip_from_frames was called with correct args + assert mock_gen.called + call_args = mock_gen.call_args + assert call_args.kwargs["fps"] == 12 + + manifest = json.loads((run_dir / "manifest.json").read_text()) + assert manifest["clip"] == "failure.mp4" + assert manifest["clip_fps"] == 12 + + +def test_clip_not_generated_when_frames_dropped(tmp_path) -> None: + """Clip should not be generated when frames are dropped by redaction.""" + with patch("sentience.failure_artifacts._is_ffmpeg_available", return_value=True): + with patch("sentience.failure_artifacts._generate_clip_from_frames") as mock_gen: + opts = FailureArtifactsOptions( + output_dir=str(tmp_path), + clip=ClipOptions(mode="on"), + on_before_persist=lambda ctx: RedactionResult(drop_frames=True), + ) + buf = FailureArtifactBuffer(run_id="run-clip-dropped", options=opts) + buf.add_frame(b"frame") + + run_dir = buf.persist(reason="fail", status="failure") + assert run_dir is not None + + # Should not call clip generation when frames are dropped + assert not mock_gen.called + manifest = json.loads((run_dir / "manifest.json").read_text()) + assert manifest["clip"] is None + assert manifest["frames_dropped"] is True + + +def test_is_ffmpeg_available_with_missing_binary() -> None: + """Test _is_ffmpeg_available returns False when ffmpeg is not found.""" + with patch("sentience.failure_artifacts.subprocess.run") as mock_run: + mock_run.side_effect = FileNotFoundError("ffmpeg not found") + assert _is_ffmpeg_available() is False + + +def test_is_ffmpeg_available_with_timeout() -> None: + """Test _is_ffmpeg_available returns False on timeout.""" + import subprocess + + with patch("sentience.failure_artifacts.subprocess.run") as mock_run: + mock_run.side_effect = subprocess.TimeoutExpired(cmd="ffmpeg", timeout=5) + assert _is_ffmpeg_available() is False From 00009b9b28970f868a541f20963ce29be0f18a09 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 18 Jan 2026 19:24:28 -0800 Subject: [PATCH 2/7] upload assert failure artifacts --- sentience/failure_artifacts.py | 432 ++++++++++++++++++++++++++- tests/unit/test_failure_artifacts.py | 165 ++++++++++ 2 files changed, 596 insertions(+), 1 deletion(-) diff --git a/sentience/failure_artifacts.py b/sentience/failure_artifacts.py index dbf6d1a..a2d512b 100644 --- a/sentience/failure_artifacts.py +++ b/sentience/failure_artifacts.py @@ -1,5 +1,6 @@ from __future__ import annotations +import gzip import json import logging import shutil @@ -7,13 +8,34 @@ import tempfile import time from collections.abc import Callable +from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Literal +from typing import Any, Literal, Protocol + +import requests + +from sentience.constants import SENTIENCE_API_URL logger = logging.getLogger(__name__) +class SentienceLogger(Protocol): + """Protocol for optional logger interface.""" + + def info(self, message: str) -> None: + """Log info message.""" + ... + + def warning(self, message: str) -> None: + """Log warning message.""" + ... + + def error(self, message: str) -> None: + """Log error message.""" + ... + + @dataclass class ClipOptions: """Options for generating video clips from frames.""" @@ -415,3 +437,411 @@ def persist( def cleanup(self) -> None: if self._temp_dir.exists(): shutil.rmtree(self._temp_dir, ignore_errors=True) + + def upload_to_cloud( + self, + *, + api_key: str, + api_url: str | None = None, + persisted_dir: Path | None = None, + ext_logger: SentienceLogger | None = None, + ) -> str | None: + """ + Upload persisted artifacts to cloud storage. + + This method uploads all artifacts from a persisted directory to cloud storage + using presigned URLs from the gateway. It follows the same pattern as trace + screenshot uploads. + + Args: + api_key: Sentience API key for authentication + api_url: Sentience API base URL (default: https://api.sentienceapi.com) + persisted_dir: Path to persisted artifacts directory. If None, uses the + most recent persist() output directory. + ext_logger: Optional logger for progress/error messages + + Returns: + artifact_index_key on success, None on failure + + Example: + >>> buf = FailureArtifactBuffer(run_id="run-123", options=options) + >>> buf.add_frame(screenshot_bytes) + >>> run_dir = buf.persist(reason="assertion failed", status="failure") + >>> artifact_key = buf.upload_to_cloud(api_key="sk-...") + >>> # artifact_key can be passed to /v1/traces/complete + """ + base_url = api_url or SENTIENCE_API_URL + + # Determine which directory to upload + if persisted_dir is None: + # Find most recent persisted directory + output_dir = Path(self.options.output_dir) + if not output_dir.exists(): + if ext_logger: + ext_logger.warning("No artifacts directory found") + return None + + # Look for directories matching run_id pattern + matching_dirs = sorted( + [d for d in output_dir.iterdir() if d.is_dir() and d.name.startswith(self.run_id)], + key=lambda p: p.stat().st_mtime, + reverse=True, + ) + if not matching_dirs: + if ext_logger: + ext_logger.warning(f"No persisted artifacts found for run_id={self.run_id}") + return None + persisted_dir = matching_dirs[0] + + if not persisted_dir.exists(): + if ext_logger: + ext_logger.warning(f"Artifacts directory not found: {persisted_dir}") + return None + + # Read manifest to understand what files need uploading + manifest_path = persisted_dir / "manifest.json" + if not manifest_path.exists(): + if ext_logger: + ext_logger.warning("manifest.json not found in artifacts directory") + return None + + with open(manifest_path, encoding="utf-8") as f: + manifest = json.load(f) + + # Build list of artifacts to upload + artifacts = self._collect_artifacts_for_upload(persisted_dir, manifest) + if not artifacts: + if ext_logger: + ext_logger.warning("No artifacts to upload") + return None + + if ext_logger: + ext_logger.info(f"Uploading {len(artifacts)} artifact(s) to cloud") + + # Request presigned URLs from gateway + upload_urls = self._request_artifact_urls( + api_key=api_key, + api_url=base_url, + artifacts=artifacts, + ext_logger=ext_logger, + ) + if not upload_urls: + return None + + # Upload artifacts in parallel + artifact_index_key = self._upload_artifacts( + artifacts=artifacts, + upload_urls=upload_urls, + ext_logger=ext_logger, + ) + + if artifact_index_key: + # Report completion to gateway + self._complete_artifacts( + api_key=api_key, + api_url=base_url, + artifact_index_key=artifact_index_key, + artifacts=artifacts, + ext_logger=ext_logger, + ) + + return artifact_index_key + + def _collect_artifacts_for_upload( + self, persisted_dir: Path, manifest: dict[str, Any] + ) -> list[dict[str, Any]]: + """Collect list of artifacts with their metadata for upload.""" + artifacts: list[dict[str, Any]] = [] + + # Core JSON artifacts + json_files = ["manifest.json", "steps.json"] + if manifest.get("snapshot"): + json_files.append("snapshot.json") + if manifest.get("diagnostics"): + json_files.append("diagnostics.json") + + for filename in json_files: + file_path = persisted_dir / filename + if file_path.exists(): + artifacts.append( + { + "name": filename, + "size_bytes": file_path.stat().st_size, + "content_type": "application/json", + "path": file_path, + } + ) + + # Video clip + if manifest.get("clip"): + clip_path = persisted_dir / "failure.mp4" + if clip_path.exists(): + artifacts.append( + { + "name": "failure.mp4", + "size_bytes": clip_path.stat().st_size, + "content_type": "video/mp4", + "path": clip_path, + } + ) + + # Frames + frames_dir = persisted_dir / "frames" + if frames_dir.exists(): + for frame_file in sorted(frames_dir.iterdir()): + if frame_file.is_file() and frame_file.suffix in {".jpeg", ".jpg", ".png"}: + content_type = ( + "image/jpeg" if frame_file.suffix in {".jpeg", ".jpg"} else "image/png" + ) + artifacts.append( + { + "name": f"frames/{frame_file.name}", + "size_bytes": frame_file.stat().st_size, + "content_type": content_type, + "path": frame_file, + } + ) + + return artifacts + + def _request_artifact_urls( + self, + *, + api_key: str, + api_url: str, + artifacts: list[dict[str, Any]], + ext_logger: SentienceLogger | None, + ) -> dict[str, Any] | None: + """Request presigned upload URLs from gateway.""" + try: + # Prepare request payload (exclude local path) + artifacts_payload = [ + { + "name": a["name"], + "size_bytes": a["size_bytes"], + "content_type": a["content_type"], + } + for a in artifacts + ] + + response = requests.post( + f"{api_url}/v1/traces/artifacts/init", + headers={"Authorization": f"Bearer {api_key}"}, + json={ + "run_id": self.run_id, + "artifacts": artifacts_payload, + }, + timeout=30, + ) + + if response.status_code != 200: + if ext_logger: + ext_logger.warning( + f"Failed to get artifact upload URLs: HTTP {response.status_code}" + ) + return None + + return response.json() + + except Exception as e: + if ext_logger: + ext_logger.error(f"Error requesting artifact upload URLs: {e}") + return None + + def _upload_artifacts( + self, + *, + artifacts: list[dict[str, Any]], + upload_urls: dict[str, Any], + ext_logger: SentienceLogger | None, + ) -> str | None: + """Upload artifacts to cloud storage using presigned URLs.""" + url_map = {item["name"]: item for item in upload_urls.get("upload_urls", [])} + index_upload = upload_urls.get("artifact_index_upload") + + uploaded_count = 0 + failed_names: list[str] = [] + storage_keys: dict[str, str] = {} + + def upload_one(artifact: dict[str, Any]) -> bool: + """Upload a single artifact. Returns True if successful.""" + name = artifact["name"] + url_info = url_map.get(name) + if not url_info: + return False + + try: + file_path = artifact["path"] + with open(file_path, "rb") as f: + data = f.read() + + response = requests.put( + url_info["upload_url"], + data=data, + headers={"Content-Type": artifact["content_type"]}, + timeout=60, + ) + + if response.status_code == 200: + storage_keys[name] = url_info.get("storage_key", "") + return True + else: + if ext_logger: + ext_logger.warning( + f"Artifact {name} upload failed: HTTP {response.status_code}" + ) + return False + + except Exception as e: + if ext_logger: + ext_logger.warning(f"Artifact {name} upload error: {e}") + return False + + # Upload in parallel (max 10 concurrent) + with ThreadPoolExecutor(max_workers=10) as executor: + futures = {executor.submit(upload_one, a): a["name"] for a in artifacts} + + for future in as_completed(futures): + name = futures[future] + if future.result(): + uploaded_count += 1 + else: + failed_names.append(name) + + if ext_logger: + if uploaded_count == len(artifacts): + ext_logger.info(f"All {uploaded_count} artifacts uploaded successfully") + else: + ext_logger.warning( + f"Uploaded {uploaded_count}/{len(artifacts)} artifacts. " + f"Failed: {failed_names}" + ) + + # Upload artifact index file + if index_upload and uploaded_count > 0: + artifact_index_key = self._upload_artifact_index( + artifacts=artifacts, + storage_keys=storage_keys, + index_upload=index_upload, + ext_logger=ext_logger, + ) + return artifact_index_key + + return None + + def _upload_artifact_index( + self, + *, + artifacts: list[dict[str, Any]], + storage_keys: dict[str, str], + index_upload: dict[str, Any], + ext_logger: SentienceLogger | None, + ) -> str | None: + """Create and upload artifact index file.""" + try: + # Build index content + index_data = { + "run_id": self.run_id, + "created_at_ms": int(time.time() * 1000), + "artifacts": [ + { + "name": a["name"], + "storage_key": storage_keys.get(a["name"], ""), + "content_type": a["content_type"], + } + for a in artifacts + if a["name"] in storage_keys + ], + } + + # Compress and upload + index_json = json.dumps(index_data, indent=2).encode("utf-8") + compressed = gzip.compress(index_json) + + response = requests.put( + index_upload["upload_url"], + data=compressed, + headers={ + "Content-Type": "application/json", + "Content-Encoding": "gzip", + }, + timeout=30, + ) + + if response.status_code == 200: + artifact_index_key = index_upload.get("storage_key", "") + if ext_logger: + ext_logger.info("Artifact index uploaded successfully") + return artifact_index_key + else: + if ext_logger: + ext_logger.warning(f"Artifact index upload failed: HTTP {response.status_code}") + return None + + except Exception as e: + if ext_logger: + ext_logger.warning(f"Error uploading artifact index: {e}") + return None + + def _complete_artifacts( + self, + *, + api_key: str, + api_url: str, + artifact_index_key: str, + artifacts: list[dict[str, Any]], + ext_logger: SentienceLogger | None, + ) -> None: + """Report artifact upload completion to gateway.""" + try: + # Calculate stats + total_size = sum(a["size_bytes"] for a in artifacts) + frames_artifacts = [a for a in artifacts if a["name"].startswith("frames/")] + frames_total = sum(a["size_bytes"] for a in frames_artifacts) + + # Get individual file sizes + manifest_size = next( + (a["size_bytes"] for a in artifacts if a["name"] == "manifest.json"), 0 + ) + snapshot_size = next( + (a["size_bytes"] for a in artifacts if a["name"] == "snapshot.json"), 0 + ) + diagnostics_size = next( + (a["size_bytes"] for a in artifacts if a["name"] == "diagnostics.json"), 0 + ) + steps_size = next((a["size_bytes"] for a in artifacts if a["name"] == "steps.json"), 0) + clip_size = next((a["size_bytes"] for a in artifacts if a["name"] == "failure.mp4"), 0) + + response = requests.post( + f"{api_url}/v1/traces/artifacts/complete", + headers={"Authorization": f"Bearer {api_key}"}, + json={ + "run_id": self.run_id, + "artifact_index_key": artifact_index_key, + "stats": { + "manifest_size_bytes": manifest_size, + "snapshot_size_bytes": snapshot_size, + "diagnostics_size_bytes": diagnostics_size, + "steps_size_bytes": steps_size, + "clip_size_bytes": clip_size, + "frames_total_size_bytes": frames_total, + "frames_count": len(frames_artifacts), + "total_artifact_size_bytes": total_size, + }, + }, + timeout=10, + ) + + if response.status_code == 200: + if ext_logger: + ext_logger.info("Artifact completion reported to gateway") + else: + if ext_logger: + ext_logger.warning( + f"Failed to report artifact completion: HTTP {response.status_code}" + ) + + except Exception as e: + # Best-effort - log but don't fail + if ext_logger: + ext_logger.warning(f"Error reporting artifact completion: {e}") diff --git a/tests/unit/test_failure_artifacts.py b/tests/unit/test_failure_artifacts.py index 4291338..cd3b6f0 100644 --- a/tests/unit/test_failure_artifacts.py +++ b/tests/unit/test_failure_artifacts.py @@ -211,3 +211,168 @@ def test_is_ffmpeg_available_with_timeout() -> None: with patch("sentience.failure_artifacts.subprocess.run") as mock_run: mock_run.side_effect = subprocess.TimeoutExpired(cmd="ffmpeg", timeout=5) assert _is_ffmpeg_available() is False + + +# -------------------- Phase 5: Cloud upload tests -------------------- + + +def test_upload_to_cloud_returns_none_when_no_artifacts_dir(tmp_path) -> None: + """upload_to_cloud returns None when artifacts directory doesn't exist.""" + opts = FailureArtifactsOptions(output_dir=str(tmp_path / "nonexistent")) + buf = FailureArtifactBuffer(run_id="run-upload-1", options=opts) + + result = buf.upload_to_cloud(api_key="test-key") + assert result is None + + +def test_upload_to_cloud_returns_none_when_no_manifest(tmp_path) -> None: + """upload_to_cloud returns None when manifest.json is missing.""" + # Create a directory but no manifest + run_dir = tmp_path / "run-upload-2-123" + run_dir.mkdir(parents=True) + + opts = FailureArtifactsOptions(output_dir=str(tmp_path)) + buf = FailureArtifactBuffer(run_id="run-upload-2", options=opts) + + result = buf.upload_to_cloud(api_key="test-key", persisted_dir=run_dir) + assert result is None + + +def test_collect_artifacts_for_upload(tmp_path) -> None: + """Test that _collect_artifacts_for_upload collects correct files.""" + opts = FailureArtifactsOptions(output_dir=str(tmp_path)) + buf = FailureArtifactBuffer(run_id="run-collect", options=opts) + buf.add_frame(b"frame1") + + run_dir = buf.persist( + reason="fail", + status="failure", + snapshot={"status": "success"}, + diagnostics={"confidence": 0.9}, + ) + assert run_dir is not None + + # Read manifest + manifest = json.loads((run_dir / "manifest.json").read_text()) + + # Collect artifacts + artifacts = buf._collect_artifacts_for_upload(run_dir, manifest) + + # Should have: manifest.json, steps.json, snapshot.json, diagnostics.json, and 1 frame + artifact_names = [a["name"] for a in artifacts] + assert "manifest.json" in artifact_names + assert "steps.json" in artifact_names + assert "snapshot.json" in artifact_names + assert "diagnostics.json" in artifact_names + assert any(a.startswith("frames/") for a in artifact_names) + + # Verify all files exist + for artifact in artifacts: + assert artifact["path"].exists() + assert artifact["size_bytes"] > 0 + assert artifact["content_type"] in ["application/json", "image/png", "image/jpeg"] + + +def test_upload_to_cloud_with_mocked_gateway(tmp_path) -> None: + """Test full upload flow with mocked HTTP requests.""" + from unittest.mock import MagicMock + + opts = FailureArtifactsOptions(output_dir=str(tmp_path)) + buf = FailureArtifactBuffer(run_id="run-mock-upload", options=opts) + buf.add_frame(b"frame1") + + run_dir = buf.persist( + reason="fail", + status="failure", + snapshot={"status": "success"}, + ) + assert run_dir is not None + + # Mock the HTTP requests + mock_response_init = MagicMock() + mock_response_init.status_code = 200 + mock_response_init.json.return_value = { + "upload_urls": [ + { + "name": "manifest.json", + "upload_url": "https://mock.com/manifest", + "storage_key": "test/manifest.json", + }, + { + "name": "steps.json", + "upload_url": "https://mock.com/steps", + "storage_key": "test/steps.json", + }, + { + "name": "snapshot.json", + "upload_url": "https://mock.com/snapshot", + "storage_key": "test/snapshot.json", + }, + ], + "artifact_index_upload": { + "upload_url": "https://mock.com/index", + "storage_key": "test/index.json", + }, + } + + mock_response_upload = MagicMock() + mock_response_upload.status_code = 200 + + mock_response_complete = MagicMock() + mock_response_complete.status_code = 200 + + with patch("sentience.failure_artifacts.requests.post") as mock_post: + with patch("sentience.failure_artifacts.requests.put") as mock_put: + mock_post.side_effect = [mock_response_init, mock_response_complete] + mock_put.return_value = mock_response_upload + + result = buf.upload_to_cloud(api_key="test-key", persisted_dir=run_dir) + + # Should return artifact index key + assert result == "test/index.json" + + # Verify POST calls were made + assert mock_post.call_count == 2 + + # Verify PUT calls were made for each artifact + index + assert mock_put.call_count >= 3 # At least manifest, steps, snapshot + + +def test_upload_to_cloud_handles_gateway_error(tmp_path) -> None: + """Test that upload_to_cloud handles gateway errors gracefully.""" + from unittest.mock import MagicMock + + opts = FailureArtifactsOptions(output_dir=str(tmp_path)) + buf = FailureArtifactBuffer(run_id="run-error", options=opts) + buf.add_frame(b"frame1") + + run_dir = buf.persist(reason="fail", status="failure") + assert run_dir is not None + + # Mock gateway returning error + mock_response = MagicMock() + mock_response.status_code = 500 + + with patch("sentience.failure_artifacts.requests.post") as mock_post: + mock_post.return_value = mock_response + + result = buf.upload_to_cloud(api_key="test-key", persisted_dir=run_dir) + assert result is None + + +def test_upload_to_cloud_handles_network_error(tmp_path) -> None: + """Test that upload_to_cloud handles network errors gracefully.""" + import requests + + opts = FailureArtifactsOptions(output_dir=str(tmp_path)) + buf = FailureArtifactBuffer(run_id="run-network-error", options=opts) + buf.add_frame(b"frame1") + + run_dir = buf.persist(reason="fail", status="failure") + assert run_dir is not None + + with patch("sentience.failure_artifacts.requests.post") as mock_post: + mock_post.side_effect = requests.exceptions.ConnectionError("Network error") + + result = buf.upload_to_cloud(api_key="test-key", persisted_dir=run_dir) + assert result is None From de7d9bc26780dd74889e39953cb52bd16ff4df11 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 18 Jan 2026 19:54:00 -0800 Subject: [PATCH 3/7] P6: passthrough fields for captcha detection --- sentience/models.py | 19 ++++++++++++++++++- sentience/snapshot.py | 8 ++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/sentience/models.py b/sentience/models.py index 74560ea..3fe323b 100644 --- a/sentience/models.py +++ b/sentience/models.py @@ -290,12 +290,29 @@ class SnapshotDiagnosticsMetrics(BaseModel): raw_elements_count: int | None = None +class CaptchaEvidence(BaseModel): + text_hits: list[str] = Field(default_factory=list) + selector_hits: list[str] = Field(default_factory=list) + iframe_src_hits: list[str] = Field(default_factory=list) + url_hits: list[str] = Field(default_factory=list) + + +class CaptchaDiagnostics(BaseModel): + """Detection-only CAPTCHA signal (no solving/bypass).""" + + detected: bool = False + provider_hint: str | None = None + confidence: float = 0.0 + evidence: CaptchaEvidence = Field(default_factory=CaptchaEvidence) + + class SnapshotDiagnostics(BaseModel): """Runtime stability/debug information (reserved for diagnostics, not ML metadata).""" confidence: float | None = None - reasons: list[str] = [] + reasons: list[str] = Field(default_factory=list) metrics: SnapshotDiagnosticsMetrics | None = None + captcha: CaptchaDiagnostics | None = None def get_grid_bounds(self, grid_id: int | None = None) -> list[GridInfo]: """ diff --git a/sentience/snapshot.py b/sentience/snapshot.py index 8cf75cf..34b1960 100644 --- a/sentience/snapshot.py +++ b/sentience/snapshot.py @@ -103,10 +103,17 @@ def _build_snapshot_payload( """ diagnostics = raw_result.get("diagnostics") or {} client_metrics = None + client_diagnostics = None try: client_metrics = diagnostics.get("metrics") except Exception: client_metrics = None + try: + captcha = diagnostics.get("captcha") + if captcha is not None: + client_diagnostics = {"captcha": captcha} + except Exception: + client_diagnostics = None return { "raw_elements": raw_result.get("raw_elements", []), @@ -118,6 +125,7 @@ def _build_snapshot_payload( "filter": options.filter.model_dump() if options.filter else None, }, "client_metrics": client_metrics, + "client_diagnostics": client_diagnostics, } From a8bb166fee7b0284b319b31acb3a9f1a6757a6cd Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 18 Jan 2026 20:33:03 -0800 Subject: [PATCH 4/7] P7: runtime safety net testing --- .../test_agent_runtime_regression_safety.py | 175 ++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 tests/unit/test_agent_runtime_regression_safety.py diff --git a/tests/unit/test_agent_runtime_regression_safety.py b/tests/unit/test_agent_runtime_regression_safety.py new file mode 100644 index 0000000..f807c1e --- /dev/null +++ b/tests/unit/test_agent_runtime_regression_safety.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from sentience.agent_runtime import AgentRuntime +from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues +from sentience.verification import AssertContext, AssertOutcome, is_checked, is_disabled, is_enabled, value_contains + + +class MockBackend: + async def screenshot_png(self) -> bytes: + return b"" + + +class MockTracer: + def __init__(self) -> None: + self.events: list[dict] = [] + + def emit(self, event_type: str, data: dict, step_id: str | None = None) -> None: + self.events.append({"type": event_type, "data": data, "step_id": step_id}) + + +def make_element( + element_id: int, + *, + role: str, + text: str | None, + disabled: bool | None = None, + checked: bool | None = None, + value: str | None = None, + input_type: str | None = None, +) -> Element: + return Element( + id=element_id, + role=role, + text=text, + importance=10, + bbox=BBox(x=0, y=0, width=100, height=40), + visual_cues=VisualCues(is_primary=False, is_clickable=True, background_color_name=None), + in_viewport=True, + is_occluded=False, + disabled=disabled, + checked=checked, + value=value, + input_type=input_type, + ) + + +def make_snapshot(elements: list[Element], url: str) -> Snapshot: + return Snapshot( + status="success", + url=url, + elements=elements, + viewport=Viewport(width=1280, height=720), + ) + + +def test_v1_state_assertions_enabled_disabled_checked_value() -> None: + runtime = AgentRuntime(backend=MockBackend(), tracer=MockTracer()) + runtime.begin_step(goal="Test") + + elements = [ + make_element(1, role="button", text="Submit", disabled=False), + make_element(2, role="checkbox", text=None, checked=True), + make_element(3, role="textbox", text=None, value="hello", input_type="text"), + make_element(4, role="button", text="Disabled", disabled=True), + ] + runtime.last_snapshot = make_snapshot(elements, url="https://example.com") + + assert runtime.assert_(is_enabled("text~'Submit'"), label="enabled") is True + assert runtime.assert_(is_disabled("text~'Disabled'"), label="disabled") is True + assert runtime.assert_(is_checked("role=checkbox"), label="checked") is True + assert runtime.assert_(value_contains("role=textbox", "hello"), label="value") is True + + +@pytest.mark.asyncio +async def test_eventually_retry_loop_succeeds() -> None: + tracer = MockTracer() + runtime = AgentRuntime(backend=MockBackend(), tracer=tracer) + runtime.begin_step(goal="Test") + + snaps = [ + make_snapshot([], url="https://example.com"), + make_snapshot([], url="https://example.com"), + make_snapshot([], url="https://example.com/done"), + ] + + async def fake_snapshot(**_kwargs): + runtime.last_snapshot = snaps.pop(0) + return runtime.last_snapshot + + runtime.snapshot = AsyncMock(side_effect=fake_snapshot) # type: ignore[method-assign] + + def pred(ctx: AssertContext) -> AssertOutcome: + ok = (ctx.url or "").endswith("/done") + return AssertOutcome(passed=ok, reason="" if ok else "not done", details={}) + + ok = await runtime.check(pred, label="eventually_done").eventually(timeout_s=2.0, poll_s=0.0) + assert ok is True + + +@pytest.mark.asyncio +async def test_min_confidence_snapshot_exhausted() -> None: + tracer = MockTracer() + runtime = AgentRuntime(backend=MockBackend(), tracer=tracer) + runtime.begin_step(goal="Test") + + low_diag = MagicMock() + low_diag.confidence = 0.1 + low_diag.model_dump = lambda: {"confidence": 0.1} + + snaps = [ + MagicMock(url="https://example.com", elements=[], diagnostics=low_diag), + MagicMock(url="https://example.com", elements=[], diagnostics=low_diag), + ] + + async def fake_snapshot(**_kwargs): + runtime.last_snapshot = snaps.pop(0) + return runtime.last_snapshot + + runtime.snapshot = AsyncMock(side_effect=fake_snapshot) # type: ignore[method-assign] + + def pred(_ctx: AssertContext) -> AssertOutcome: + return AssertOutcome(passed=True, reason="would pass", details={}) + + ok = await runtime.check(pred, label="min_confidence_gate").eventually( + timeout_s=2.0, + poll_s=0.0, + min_confidence=0.7, + max_snapshot_attempts=2, + ) + assert ok is False + details = runtime._assertions_this_step[0]["details"] + assert details["reason_code"] == "snapshot_exhausted" + + +@pytest.mark.asyncio +async def test_golden_flow_same_snapshots_actions_no_captcha() -> None: + tracer = MockTracer() + runtime = AgentRuntime(backend=MockBackend(), tracer=tracer) + runtime.begin_step(goal="Test") + + class FakeActionExecutor: + def __init__(self) -> None: + self.actions: list[str] = [] + + def execute(self, action: str) -> dict: + self.actions.append(action) + return {"success": True} + + executor = FakeActionExecutor() + executor.execute("CLICK(1)") + executor.execute('TYPE(2, "hello")') + assert executor.actions == ["CLICK(1)", 'TYPE(2, "hello")'] + + snaps = [ + make_snapshot([], url="https://example.com"), + make_snapshot([], url="https://example.com/after"), + make_snapshot([], url="https://example.com/done"), + ] + + async def fake_snapshot(**_kwargs): + runtime.last_snapshot = snaps.pop(0) + return runtime.last_snapshot + + runtime.snapshot = AsyncMock(side_effect=fake_snapshot) # type: ignore[method-assign] + + def pred(ctx: AssertContext) -> AssertOutcome: + ok = (ctx.url or "").endswith("/done") + return AssertOutcome(passed=ok, reason="" if ok else "not done", details={}) + + ok = await runtime.check(pred, label="golden_flow").eventually(timeout_s=2.0, poll_s=0.0) + assert ok is True From 2a8b01627586977d3372a2462292af47c41ae530 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 18 Jan 2026 20:48:53 -0800 Subject: [PATCH 5/7] implement policy support for captcha handler with examples --- README.md | 27 +++- examples/agent_runtime_captcha_strategies.py | 53 ++++++++ sentience/__init__.py | 2 + sentience/agent_runtime.py | 124 ++++++++++++++++++ sentience/captcha.py | 53 ++++++++ sentience/captcha_strategies.py | 67 ++++++++++ .../test_agent_runtime_regression_safety.py | 9 +- 7 files changed, 333 insertions(+), 2 deletions(-) create mode 100644 examples/agent_runtime_captcha_strategies.py create mode 100644 sentience/captcha.py create mode 100644 sentience/captcha_strategies.py diff --git a/README.md b/README.md index 66e1a67..f0eab6d 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Use `AgentRuntime` to add Jest-style assertions to your agent loops. Verify brow ```python import asyncio -from sentience import AsyncSentienceBrowser, AgentRuntime +from sentience import AsyncSentienceBrowser, AgentRuntime, CaptchaOptions, HumanHandoffSolver from sentience.verification import ( url_contains, exists, @@ -80,6 +80,11 @@ async def main(): ).eventually(timeout_s=10.0, poll_s=0.25, min_confidence=0.7, max_snapshot_attempts=3) print("eventually() result:", ok) + # CAPTCHA handling (detection + handoff + verify) + runtime.set_captcha_options( + CaptchaOptions(policy="callback", handler=HumanHandoffSolver()) + ) + # Check task completion if runtime.assert_done(exists("text~'Example'"), label="task_complete"): print("✅ Task completed!") @@ -89,6 +94,26 @@ async def main(): asyncio.run(main()) ``` +#### CAPTCHA strategies (Batteries Included) + +```python +from sentience import CaptchaOptions, ExternalSolver, HumanHandoffSolver, VisionSolver + +# Human-in-loop +runtime.set_captcha_options(CaptchaOptions(policy="callback", handler=HumanHandoffSolver())) + +# Vision verification only +runtime.set_captcha_options(CaptchaOptions(policy="callback", handler=VisionSolver())) + +# External system/webhook +runtime.set_captcha_options( + CaptchaOptions( + policy="callback", + handler=ExternalSolver(lambda ctx: notify_webhook(ctx)), + ) +) +``` + ### Failure Artifact Buffer (Phase 1) Capture a short ring buffer of screenshots and persist them when a required assertion fails. diff --git a/examples/agent_runtime_captcha_strategies.py b/examples/agent_runtime_captcha_strategies.py new file mode 100644 index 0000000..932a0a5 --- /dev/null +++ b/examples/agent_runtime_captcha_strategies.py @@ -0,0 +1,53 @@ +import asyncio +import os + +from sentience import ( + AgentRuntime, + AsyncSentienceBrowser, + CaptchaOptions, + ExternalSolver, + HumanHandoffSolver, + VisionSolver, +) +from sentience.tracing import JsonlTraceSink, Tracer + + +async def notify_webhook(ctx) -> None: + # Example hook: send context to your system (no solver logic in Sentience). + # Replace with your own client / queue / webhook call. + print(f"[captcha] external resolver notified: url={ctx.url} run_id={ctx.run_id}") + + +async def main() -> None: + tracer = Tracer(run_id="captcha-demo", sink=JsonlTraceSink("trace.jsonl")) + + async with AsyncSentienceBrowser() as browser: + page = await browser.new_page() + runtime = await AgentRuntime.from_sentience_browser( + browser=browser, + page=page, + tracer=tracer, + ) + + # Option 1: Human-in-loop + runtime.set_captcha_options( + CaptchaOptions(policy="callback", handler=HumanHandoffSolver()) + ) + + # Option 2: Vision-only verification (no actions) + runtime.set_captcha_options( + CaptchaOptions(policy="callback", handler=VisionSolver()) + ) + + # Option 3: External resolver orchestration + runtime.set_captcha_options( + CaptchaOptions(policy="callback", handler=ExternalSolver(lambda ctx: notify_webhook(ctx))) + ) + + await page.goto(os.environ.get("CAPTCHA_TEST_URL", "https://example.com")) + runtime.begin_step("Captcha-aware snapshot") + await runtime.snapshot() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/sentience/__init__.py b/sentience/__init__.py index 4f8c65d..22ddc06 100644 --- a/sentience/__init__.py +++ b/sentience/__init__.py @@ -39,6 +39,8 @@ # Agent Layer (Phase 1 & 2) from .base_agent import BaseAgent from .browser import SentienceBrowser +from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution +from .captcha_strategies import ExternalSolver, HumanHandoffSolver, VisionSolver # Tracing (v0.12.0+) from .cloud_tracing import CloudTraceSink, SentienceLogger diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index 668ed58..ee064d1 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -70,6 +70,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Any +from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions from .models import Snapshot, SnapshotOptions from .verification import AssertContext, AssertOutcome, Predicate @@ -153,6 +154,10 @@ def __init__( self._task_done: bool = False self._task_done_label: str | None = None + # CAPTCHA handling (optional, disabled by default) + self._captcha_options: CaptchaOptions | None = None + self._captcha_retry_count: int = 0 + @classmethod async def from_sentience_browser( cls, @@ -248,13 +253,132 @@ async def snapshot(self, **kwargs: Any) -> Snapshot: from .backends.snapshot import snapshot as backend_snapshot # Merge default options with call-specific kwargs + skip_captcha_handling = bool(kwargs.pop("_skip_captcha_handling", False)) options_dict = self._snapshot_options.model_dump(exclude_none=True) options_dict.update(kwargs) options = SnapshotOptions(**options_dict) self.last_snapshot = await backend_snapshot(self.backend, options=options) + if not skip_captcha_handling: + await self._handle_captcha_if_needed(self.last_snapshot, source="gateway") return self.last_snapshot + def set_captcha_options(self, options: CaptchaOptions) -> None: + """ + Configure CAPTCHA handling (disabled by default unless set). + """ + self._captcha_options = options + self._captcha_retry_count = 0 + + def _is_captcha_detected(self, snapshot: Snapshot) -> bool: + if not self._captcha_options: + return False + captcha = getattr(snapshot.diagnostics, "captcha", None) if snapshot.diagnostics else None + if not captcha or not getattr(captcha, "detected", False): + return False + confidence = getattr(captcha, "confidence", 0.0) + return confidence >= self._captcha_options.min_confidence + + def _build_captcha_context(self, snapshot: Snapshot, source: str) -> CaptchaContext: + captcha = getattr(snapshot.diagnostics, "captcha", None) + return CaptchaContext( + run_id=self.tracer.run_id, + step_index=self.step_index, + url=snapshot.url, + source=source, # type: ignore[arg-type] + captcha=captcha, + ) + + def _emit_captcha_event(self, reason_code: str, details: dict[str, Any] | None = None) -> None: + payload = { + "kind": "captcha", + "passed": False, + "label": reason_code, + "details": {"reason_code": reason_code, **(details or {})}, + } + self.tracer.emit("verification", data=payload, step_id=self.step_id) + + async def _handle_captcha_if_needed(self, snapshot: Snapshot, source: str) -> None: + if not self._captcha_options: + return + if not self._is_captcha_detected(snapshot): + return + + captcha = getattr(snapshot.diagnostics, "captcha", None) + self._emit_captcha_event( + "captcha_detected", + {"captcha": getattr(captcha, "model_dump", lambda: captcha)()}, + ) + + resolution: CaptchaResolution + if self._captcha_options.policy == "callback": + if not self._captcha_options.handler: + self._emit_captcha_event("captcha_handler_error") + raise CaptchaHandlingError( + "captcha_handler_error", + 'Captcha handler is required for policy="callback".', + ) + try: + resolution = await self._captcha_options.handler( + self._build_captcha_context(snapshot, source) + ) + except Exception as exc: # pragma: no cover - defensive + self._emit_captcha_event("captcha_handler_error", {"error": str(exc)}) + raise CaptchaHandlingError( + "captcha_handler_error", "Captcha handler failed." + ) from exc + else: + resolution = CaptchaResolution(action="abort") + + await self._apply_captcha_resolution(resolution, snapshot, source) + + async def _apply_captcha_resolution( + self, + resolution: CaptchaResolution, + snapshot: Snapshot, + source: str, + ) -> None: + if resolution.action == "abort": + self._emit_captcha_event("captcha_policy_abort", {"message": resolution.message}) + raise CaptchaHandlingError( + "captcha_policy_abort", + resolution.message or "Captcha detected. Aborting per policy.", + ) + + if resolution.action == "retry_new_session": + self._captcha_retry_count += 1 + self._emit_captcha_event("captcha_retry_new_session") + if self._captcha_retry_count > self._captcha_options.max_retries_new_session: + self._emit_captcha_event("captcha_retry_exhausted") + raise CaptchaHandlingError( + "captcha_retry_exhausted", + "Captcha retry_new_session exhausted.", + ) + if not self._captcha_options.reset_session: + raise CaptchaHandlingError( + "captcha_retry_new_session", + "reset_session callback is required for retry_new_session.", + ) + await self._captcha_options.reset_session() + return + + if resolution.action == "wait_until_cleared": + timeout_ms = resolution.timeout_ms or self._captcha_options.timeout_ms + poll_ms = resolution.poll_ms or self._captcha_options.poll_ms + await self._wait_until_cleared(timeout_ms=timeout_ms, poll_ms=poll_ms, source=source) + self._emit_captcha_event("captcha_resumed") + + async def _wait_until_cleared(self, *, timeout_ms: int, poll_ms: int, source: str) -> None: + deadline = time.time() + timeout_ms / 1000.0 + while time.time() <= deadline: + await asyncio.sleep(poll_ms / 1000.0) + snap = await self.snapshot(_skip_captcha_handling=True) + if not self._is_captcha_detected(snap): + self._emit_captcha_event("captcha_cleared", {"source": source}) + return + self._emit_captcha_event("captcha_wait_timeout", {"timeout_ms": timeout_ms}) + raise CaptchaHandlingError("captcha_wait_timeout", "Captcha wait_until_cleared timed out.") + async def enable_failure_artifacts( self, options: FailureArtifactsOptions | None = None, diff --git a/sentience/captcha.py b/sentience/captcha.py new file mode 100644 index 0000000..6bdc68c --- /dev/null +++ b/sentience/captcha.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Awaitable, Callable, Literal, Optional + +from .models import CaptchaDiagnostics + +CaptchaPolicy = Literal["abort", "callback"] +CaptchaAction = Literal["abort", "retry_new_session", "wait_until_cleared"] +CaptchaSource = Literal["extension", "gateway", "runtime"] + + +@dataclass +class CaptchaContext: + run_id: str + step_index: int + url: str + source: CaptchaSource + captcha: CaptchaDiagnostics + screenshot_path: Optional[str] = None + frames_dir: Optional[str] = None + snapshot_path: Optional[str] = None + live_session_url: Optional[str] = None + meta: Optional[dict[str, str]] = None + + +@dataclass +class CaptchaResolution: + action: CaptchaAction + message: Optional[str] = None + handled_by: Optional[Literal["human", "customer_system", "unknown"]] = None + timeout_ms: Optional[int] = None + poll_ms: Optional[int] = None + + +CaptchaHandler = Callable[[CaptchaContext], CaptchaResolution | Awaitable[CaptchaResolution]] + + +@dataclass +class CaptchaOptions: + policy: CaptchaPolicy = "abort" + min_confidence: float = 0.7 + timeout_ms: int = 120_000 + poll_ms: int = 1_000 + max_retries_new_session: int = 1 + handler: Optional[CaptchaHandler] = None + reset_session: Optional[Callable[[], Awaitable[None]]] = None + + +class CaptchaHandlingError(RuntimeError): + def __init__(self, reason_code: str, message: str) -> None: + super().__init__(message) + self.reason_code = reason_code diff --git a/sentience/captcha_strategies.py b/sentience/captcha_strategies.py new file mode 100644 index 0000000..f0a9cb1 --- /dev/null +++ b/sentience/captcha_strategies.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import inspect +from typing import Callable + +from .captcha import CaptchaContext, CaptchaHandler, CaptchaResolution + + +def HumanHandoffSolver( + *, + message: str | None = None, + handled_by: str | None = "human", + timeout_ms: int | None = None, + poll_ms: int | None = None, +) -> CaptchaHandler: + async def _handler(_ctx: CaptchaContext) -> CaptchaResolution: + return CaptchaResolution( + action="wait_until_cleared", + message=message or "Solve CAPTCHA in the live session, then resume.", + handled_by=handled_by, + timeout_ms=timeout_ms, + poll_ms=poll_ms, + ) + + return _handler + + +def VisionSolver( + *, + message: str | None = None, + handled_by: str | None = "customer_system", + timeout_ms: int | None = None, + poll_ms: int | None = None, +) -> CaptchaHandler: + async def _handler(_ctx: CaptchaContext) -> CaptchaResolution: + return CaptchaResolution( + action="wait_until_cleared", + message=message or "Waiting for CAPTCHA to clear (vision verification).", + handled_by=handled_by, + timeout_ms=timeout_ms, + poll_ms=poll_ms, + ) + + return _handler + + +def ExternalSolver( + resolver: Callable[[CaptchaContext], None | bool | dict], + *, + message: str | None = None, + handled_by: str | None = "customer_system", + timeout_ms: int | None = None, + poll_ms: int | None = None, +) -> CaptchaHandler: + async def _handler(ctx: CaptchaContext) -> CaptchaResolution: + result = resolver(ctx) + if inspect.isawaitable(result): + await result + return CaptchaResolution( + action="wait_until_cleared", + message=message or "External solver invoked; waiting for clearance.", + handled_by=handled_by, + timeout_ms=timeout_ms, + poll_ms=poll_ms, + ) + + return _handler diff --git a/tests/unit/test_agent_runtime_regression_safety.py b/tests/unit/test_agent_runtime_regression_safety.py index f807c1e..c82a3fe 100644 --- a/tests/unit/test_agent_runtime_regression_safety.py +++ b/tests/unit/test_agent_runtime_regression_safety.py @@ -6,7 +6,14 @@ from sentience.agent_runtime import AgentRuntime from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues -from sentience.verification import AssertContext, AssertOutcome, is_checked, is_disabled, is_enabled, value_contains +from sentience.verification import ( + AssertContext, + AssertOutcome, + is_checked, + is_disabled, + is_enabled, + value_contains, +) class MockBackend: From 9576f5ecd061e711ea1e15d190829616f47181d9 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 18 Jan 2026 21:16:24 -0800 Subject: [PATCH 6/7] updated trace_v1 schema --- sentience/schemas/trace_v1.json | 42 ++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/sentience/schemas/trace_v1.json b/sentience/schemas/trace_v1.json index 37c28cb..25c3d20 100644 --- a/sentience/schemas/trace_v1.json +++ b/sentience/schemas/trace_v1.json @@ -73,6 +73,46 @@ "url": {"type": ["string", "null"]}, "element_count": {"type": "integer"}, "timestamp": {"type": ["string", "null"]}, + "diagnostics": { + "type": ["object", "null"], + "properties": { + "confidence": {"type": ["number", "null"]}, + "reasons": {"type": "array", "items": {"type": "string"}}, + "metrics": { + "type": ["object", "null"], + "properties": { + "ready_state": {"type": ["string", "null"]}, + "quiet_ms": {"type": ["number", "null"]}, + "node_count": {"type": ["integer", "null"]}, + "interactive_count": {"type": ["integer", "null"]}, + "raw_elements_count": {"type": ["integer", "null"]} + }, + "additionalProperties": true + }, + "captcha": { + "type": ["object", "null"], + "properties": { + "detected": {"type": "boolean"}, + "provider_hint": { + "type": ["string", "null"], + "enum": ["recaptcha", "hcaptcha", "turnstile", "arkose", "awswaf", "unknown", null] + }, + "confidence": {"type": "number"}, + "evidence": { + "type": "object", + "properties": { + "text_hits": {"type": "array", "items": {"type": "string"}}, + "selector_hits": {"type": "array", "items": {"type": "string"}}, + "iframe_src_hits": {"type": "array", "items": {"type": "string"}}, + "url_hits": {"type": "array", "items": {"type": "string"}} + } + } + }, + "required": ["detected", "confidence", "evidence"] + } + }, + "additionalProperties": true + }, "elements": { "type": "array", "items": { @@ -289,7 +329,7 @@ "passed": {"type": "boolean"}, "kind": { "type": "string", - "enum": ["assert", "task_done"], + "enum": ["assert", "task_done", "captcha"], "description": "Type of verification event" }, "label": {"type": "string", "description": "Human-readable label for the assertion"}, From 908d3ef9d096fc3c17e14cdc52aabc7d423a6866 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Mon, 19 Jan 2026 10:10:50 -0800 Subject: [PATCH 7/7] updated trace schema and fix --- sentience/agent_runtime.py | 2 +- sentience/schemas/trace_v1.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index ee064d1..272bc83 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -579,7 +579,7 @@ def assert_done( True if task is complete (assertion passed), False otherwise """ # Convenience wrapper for assert_ with required=True - ok = self.assertTrue(predicate, label=label, required=True) + ok = self.assert_(predicate, label=label, required=True) if ok: self._task_done = True self._task_done_label = label diff --git a/sentience/schemas/trace_v1.json b/sentience/schemas/trace_v1.json index 25c3d20..392dda1 100644 --- a/sentience/schemas/trace_v1.json +++ b/sentience/schemas/trace_v1.json @@ -220,7 +220,7 @@ "type": "object", "required": ["kind"], "properties": { - "kind": {"type": "string", "enum": ["click", "type", "press", "finish", "navigate"]}, + "kind": {"type": "string", "enum": ["click", "type", "press", "finish", "navigate", "scroll"]}, "element_id": {"type": "integer"}, "text": {"type": "string"}, "key": {"type": "string"},