From 92a89a67ee0efd9ffdf78892ce2b4ea98892d5c7 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 18 Jan 2026 09:54:19 -0800 Subject: [PATCH 1/7] add baseline safety net testing --- .github/workflows/test.yml | 9 +++ tests/unit/test_agent_runtime_phase0.py | 81 +++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 tests/unit/test_agent_runtime_phase0.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9277f45..219fe8a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -373,6 +373,15 @@ jobs: print('WARNING: Could not find assert_ method call in assert_done') sys.exit(1) PYEOF + + - name: Phase 0 regression safety net (unit) + shell: bash + run: | + pytest tests/unit/test_agent_runtime_phase0.py -v + + - name: Run full test suite + shell: bash + run: | pytest tests/ -v env: CI: true diff --git a/tests/unit/test_agent_runtime_phase0.py b/tests/unit/test_agent_runtime_phase0.py new file mode 100644 index 0000000..06a5ae4 --- /dev/null +++ b/tests/unit/test_agent_runtime_phase0.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +from unittest.mock import MagicMock + +from sentience.agent_runtime import AgentRuntime +from sentience.models import BBox, Element, VisualCues +from sentience.verification import is_disabled, is_enabled, value_equals + + +class MockBackend: + """Mock BrowserBackend implementation for unit tests.""" + + async def get_url(self) -> str: + return "https://example.com" + + async def refresh_page_info(self): + return None + + +class MockTracer: + """Mock Tracer for unit tests.""" + + def __init__(self) -> None: + self.events: list[dict] = [] + + def emit(self, event_type: str, data: dict, step_id: str | None = None) -> None: + self.events.append( + { + "type": event_type, + "data": data, + "step_id": step_id, + } + ) + + +def test_assert_state_predicates_use_snapshot_context() -> None: + """State-aware predicates should run against snapshot context.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime.begin_step(goal="Test") + + cues = VisualCues(is_primary=False, background_color_name=None, is_clickable=True) + elements = [ + Element( + id=1, + role="button", + text="Submit", + importance=10, + bbox=BBox(x=0, y=0, width=100, height=40), + visual_cues=cues, + disabled=False, + ), + Element( + id=2, + role="textbox", + text=None, + importance=5, + bbox=BBox(x=0, y=50, width=200, height=40), + visual_cues=cues, + value="hello", + input_type="text", + disabled=False, + ), + Element( + id=3, + role="button", + text="Disabled", + importance=4, + bbox=BBox(x=0, y=100, width=120, height=40), + visual_cues=cues, + disabled=True, + ), + ] + + runtime.last_snapshot = MagicMock(url="https://example.com", elements=elements) + + assert runtime.assert_(is_enabled("text~'Submit'"), label="enabled") is True + assert runtime.assert_(is_disabled("text~'Disabled'"), label="disabled") is True + assert runtime.assert_(value_equals("role=textbox", "hello"), label="value") is True + assert len(runtime._assertions_this_step) == 3 From 4ce42469557899dea406d0c24af50311d4262103 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 18 Jan 2026 10:04:30 -0800 Subject: [PATCH 2/7] p1 --- README.md | 15 +++ sentience/agent_runtime.py | 107 ++++++++++++++++++++++ sentience/failure_artifacts.py | 132 +++++++++++++++++++++++++++ tests/unit/test_failure_artifacts.py | 44 +++++++++ 4 files changed, 298 insertions(+) create mode 100644 sentience/failure_artifacts.py create mode 100644 tests/unit/test_failure_artifacts.py diff --git a/README.md b/README.md index 4a38d47..e077944 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,21 @@ async def main(): asyncio.run(main()) ``` +### Failure Artifact Buffer (Phase 1) + +Capture a short ring buffer of screenshots and persist them when a required assertion fails. + +```python +from sentience.failure_artifacts import FailureArtifactsOptions + +await runtime.enable_failure_artifacts( + FailureArtifactsOptions(buffer_seconds=15, capture_on_action=True, fps=0.0) +) + +# After each action, record it (best-effort). +await runtime.record_action("CLICK") +``` + **See examples:** [`examples/asserts/`](examples/asserts/) ## 🚀 Quick Start: Choose Your Abstraction Level diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index ca5ab7c..1608d36 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -70,6 +70,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Any +from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions from .models import Snapshot, SnapshotOptions from .verification import AssertContext, AssertOutcome, Predicate @@ -138,6 +139,10 @@ def __init__( # Snapshot state self.last_snapshot: Snapshot | None = None + # Failure artifacts (Phase 1) + self._artifact_buffer: FailureArtifactBuffer | None = None + self._artifact_timer_task: asyncio.Task | None = None + # Cached URL (updated on snapshot or explicit get_url call) self._cached_url: str | None = None @@ -250,6 +255,90 @@ async def snapshot(self, **kwargs: Any) -> Snapshot: self.last_snapshot = await backend_snapshot(self.backend, options=options) return self.last_snapshot + async def enable_failure_artifacts( + self, + options: FailureArtifactsOptions | None = None, + ) -> None: + """ + Enable failure artifact buffer (Phase 1). + """ + opts = options or FailureArtifactsOptions() + self._artifact_buffer = FailureArtifactBuffer( + run_id=self.tracer.run_id, + options=opts, + ) + if opts.fps > 0: + self._artifact_timer_task = asyncio.create_task(self._artifact_timer_loop()) + + def disable_failure_artifacts(self) -> None: + """ + Disable failure artifact buffer and stop background capture. + """ + if self._artifact_timer_task: + self._artifact_timer_task.cancel() + self._artifact_timer_task = None + + async def record_action( + self, + action: str, + *, + url: str | None = None, + ) -> None: + """ + Record an action in the artifact timeline and capture a frame if enabled. + """ + if not self._artifact_buffer: + return + self._artifact_buffer.record_step( + action=action, + step_id=self.step_id, + step_index=self.step_index, + url=url, + ) + if self._artifact_buffer.options.capture_on_action: + await self._capture_artifact_frame() + + async def _capture_artifact_frame(self) -> None: + if not self._artifact_buffer: + return + try: + image_bytes = await self.backend.screenshot_png() + except Exception: + return + self._artifact_buffer.add_frame(image_bytes, fmt="png") + + async def _artifact_timer_loop(self) -> None: + if not self._artifact_buffer: + return + interval = 1.0 / max(0.001, self._artifact_buffer.options.fps) + try: + while True: + await self._capture_artifact_frame() + await asyncio.sleep(interval) + except asyncio.CancelledError: + return + + def finalize_run(self, *, success: bool) -> None: + """ + Finalize artifact buffer at end of run. + """ + if not self._artifact_buffer: + return + if success: + if self._artifact_buffer.options.persist_mode == "always": + self._artifact_buffer.persist(reason="success", status="success") + self._artifact_buffer.cleanup() + else: + self._persist_failure_artifacts(reason="finalize_failure") + + def _persist_failure_artifacts(self, *, reason: str) -> None: + if not self._artifact_buffer: + return + self._artifact_buffer.persist(reason=reason, status="failure") + self._artifact_buffer.cleanup() + if self._artifact_buffer.options.persist_mode == "onFail": + self.disable_failure_artifacts() + def begin_step(self, goal: str, step_index: int | None = None) -> str: """ Begin a new step in the verification loop. @@ -309,6 +398,8 @@ def assert_( kind="assert", record_in_step=True, ) + if required and not outcome.passed: + self._persist_failure_artifacts(reason=f"assert_failed:{label}") return outcome.passed def check(self, predicate: Predicate, label: str, required: bool = False) -> AssertionHandle: @@ -619,6 +710,10 @@ async def eventually( "vision_fallback": True, }, ) + if self.required and not passed: + self.runtime._persist_failure_artifacts( + reason=f"assert_eventually_failed:{self.label}" + ) return passed except Exception as e: # If vision fallback fails, fall through to snapshot_exhausted. @@ -649,6 +744,10 @@ async def eventually( "exhausted": True, }, ) + if self.required: + self.runtime._persist_failure_artifacts( + reason=f"assert_eventually_failed:{self.label}" + ) return False if time.monotonic() >= deadline: @@ -666,6 +765,10 @@ async def eventually( "timeout": True, }, ) + if self.required: + self.runtime._persist_failure_artifacts( + reason=f"assert_eventually_timeout:{self.label}" + ) return False await asyncio.sleep(poll_s) @@ -705,6 +808,10 @@ async def eventually( record_in_step=True, extra={"eventually": True, "attempt": attempt, "final": True, "timeout": True}, ) + if self.required: + self.runtime._persist_failure_artifacts( + reason=f"assert_eventually_timeout:{self.label}" + ) return False await asyncio.sleep(poll_s) diff --git a/sentience/failure_artifacts.py b/sentience/failure_artifacts.py new file mode 100644 index 0000000..4d81960 --- /dev/null +++ b/sentience/failure_artifacts.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +import json +import shutil +import tempfile +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Callable, Literal + + +@dataclass +class FailureArtifactsOptions: + buffer_seconds: float = 15.0 + capture_on_action: bool = True + fps: float = 0.0 + persist_mode: Literal["onFail", "always"] = "onFail" + output_dir: str = ".sentience/artifacts" + + +@dataclass +class _FrameRecord: + ts: float + file_name: str + path: Path + + +class FailureArtifactBuffer: + """ + Ring buffer of screenshots with minimal persistence on failure. + """ + + def __init__( + self, + *, + run_id: str, + options: FailureArtifactsOptions, + time_fn: Callable[[], float] = time.time, + ) -> None: + self.run_id = run_id + self.options = options + self._time_fn = time_fn + self._temp_dir = Path(tempfile.mkdtemp(prefix="sentience-artifacts-")) + self._frames_dir = self._temp_dir / "frames" + self._frames_dir.mkdir(parents=True, exist_ok=True) + self._frames: list[_FrameRecord] = [] + self._steps: list[dict] = [] + self._persisted = False + + @property + def temp_dir(self) -> Path: + return self._temp_dir + + def record_step( + self, + *, + action: str, + step_id: str | None, + step_index: int | None, + url: str | None, + ) -> None: + self._steps.append( + { + "ts": self._time_fn(), + "action": action, + "step_id": step_id, + "step_index": step_index, + "url": url, + } + ) + + def add_frame(self, image_bytes: bytes, *, fmt: str = "png") -> None: + ts = self._time_fn() + file_name = f"frame_{int(ts * 1000)}.{fmt}" + path = self._frames_dir / file_name + path.write_bytes(image_bytes) + self._frames.append(_FrameRecord(ts=ts, file_name=file_name, path=path)) + self._prune() + + def frame_count(self) -> int: + return len(self._frames) + + def _prune(self) -> None: + cutoff = self._time_fn() - max(0.0, self.options.buffer_seconds) + keep: list[_FrameRecord] = [] + for frame in self._frames: + if frame.ts >= cutoff: + keep.append(frame) + else: + try: + frame.path.unlink(missing_ok=True) + except Exception: + pass + self._frames = keep + + def persist(self, *, reason: str | None, status: Literal["failure", "success"]) -> Path | None: + if self._persisted: + return None + + output_dir = Path(self.options.output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + ts = int(self._time_fn() * 1000) + run_dir = output_dir / f"{self.run_id}-{ts}" + frames_out = run_dir / "frames" + frames_out.mkdir(parents=True, exist_ok=True) + + for frame in self._frames: + shutil.copy2(frame.path, frames_out / frame.file_name) + + steps_path = run_dir / "steps.json" + steps_path.write_text(json.dumps(self._steps, indent=2)) + + manifest = { + "run_id": self.run_id, + "created_at_ms": ts, + "status": status, + "reason": reason, + "buffer_seconds": self.options.buffer_seconds, + "frame_count": len(self._frames), + "frames": [ + {"file": frame.file_name, "ts": frame.ts} for frame in self._frames + ], + } + manifest_path = run_dir / "manifest.json" + manifest_path.write_text(json.dumps(manifest, indent=2)) + + self._persisted = True + return run_dir + + def cleanup(self) -> None: + if self._temp_dir.exists(): + shutil.rmtree(self._temp_dir, ignore_errors=True) diff --git a/tests/unit/test_failure_artifacts.py b/tests/unit/test_failure_artifacts.py new file mode 100644 index 0000000..cce8d22 --- /dev/null +++ b/tests/unit/test_failure_artifacts.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import json + +from sentience.failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions + + +def test_buffer_prunes_by_time(tmp_path) -> None: + now = {"t": 0.0} + + def time_fn() -> float: + return now["t"] + + opts = FailureArtifactsOptions(buffer_seconds=1.0, output_dir=str(tmp_path)) + buf = FailureArtifactBuffer(run_id="run-1", options=opts, time_fn=time_fn) + + buf.add_frame(b"first") + assert buf.frame_count() == 1 + + now["t"] = 2.0 + buf.add_frame(b"second") + assert buf.frame_count() == 1 + + +def test_persist_writes_manifest_and_steps(tmp_path) -> None: + now = {"t": 10.0} + + def time_fn() -> float: + return now["t"] + + opts = FailureArtifactsOptions(output_dir=str(tmp_path)) + buf = FailureArtifactBuffer(run_id="run-2", options=opts, time_fn=time_fn) + + buf.record_step(action="CLICK", step_id="s1", step_index=1, url="https://example.com") + buf.add_frame(b"frame") + + run_dir = buf.persist(reason="assert_failed", status="failure") + assert run_dir is not None + manifest = json.loads((run_dir / "manifest.json").read_text()) + steps = json.loads((run_dir / "steps.json").read_text()) + + assert manifest["run_id"] == "run-2" + assert manifest["frame_count"] == 1 + assert len(steps) == 1 From 341c80c198e54a878e9eb178b92ad2d9963c44f9 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 18 Jan 2026 10:06:49 -0800 Subject: [PATCH 3/7] p2 --- sentience/agent_runtime.py | 27 ++++++++++++++-- sentience/failure_artifacts.py | 47 ++++++++++++++++++++++------ tests/unit/test_failure_artifacts.py | 17 +++++++++- 3 files changed, 79 insertions(+), 12 deletions(-) diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index 1608d36..668ed58 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -326,7 +326,13 @@ def finalize_run(self, *, success: bool) -> None: return if success: if self._artifact_buffer.options.persist_mode == "always": - self._artifact_buffer.persist(reason="success", status="success") + self._artifact_buffer.persist( + reason="success", + status="success", + snapshot=self.last_snapshot, + diagnostics=getattr(self.last_snapshot, "diagnostics", None), + metadata=self._artifact_metadata(), + ) self._artifact_buffer.cleanup() else: self._persist_failure_artifacts(reason="finalize_failure") @@ -334,11 +340,28 @@ def finalize_run(self, *, success: bool) -> None: def _persist_failure_artifacts(self, *, reason: str) -> None: if not self._artifact_buffer: return - self._artifact_buffer.persist(reason=reason, status="failure") + self._artifact_buffer.persist( + reason=reason, + status="failure", + snapshot=self.last_snapshot, + diagnostics=getattr(self.last_snapshot, "diagnostics", None), + metadata=self._artifact_metadata(), + ) self._artifact_buffer.cleanup() if self._artifact_buffer.options.persist_mode == "onFail": self.disable_failure_artifacts() + def _artifact_metadata(self) -> dict[str, Any]: + url = None + if self.last_snapshot is not None: + url = self.last_snapshot.url + elif self._cached_url: + url = self._cached_url + return { + "backend": self.backend.__class__.__name__, + "url": url, + } + def begin_step(self, goal: str, step_index: int | None = None) -> str: """ Begin a new step in the verification loop. diff --git a/sentience/failure_artifacts.py b/sentience/failure_artifacts.py index 4d81960..8a714c5 100644 --- a/sentience/failure_artifacts.py +++ b/sentience/failure_artifacts.py @@ -4,9 +4,10 @@ import shutil import tempfile import time +from collections.abc import Callable from dataclasses import dataclass from pathlib import Path -from typing import Callable, Literal +from typing import Any, Literal @dataclass @@ -93,7 +94,20 @@ def _prune(self) -> None: pass self._frames = keep - def persist(self, *, reason: str | None, status: Literal["failure", "success"]) -> Path | None: + def _write_json_atomic(self, path: Path, data: Any) -> None: + tmp_path = path.with_suffix(path.suffix + ".tmp") + tmp_path.write_text(json.dumps(data, indent=2)) + tmp_path.replace(path) + + def persist( + self, + *, + reason: str | None, + status: Literal["failure", "success"], + snapshot: Any | None = None, + diagnostics: Any | None = None, + metadata: dict[str, Any] | None = None, + ) -> Path | None: if self._persisted: return None @@ -107,8 +121,23 @@ def persist(self, *, reason: str | None, status: Literal["failure", "success"]) for frame in self._frames: shutil.copy2(frame.path, frames_out / frame.file_name) - steps_path = run_dir / "steps.json" - steps_path.write_text(json.dumps(self._steps, indent=2)) + self._write_json_atomic(run_dir / "steps.json", self._steps) + + snapshot_payload = None + if snapshot is not None: + if hasattr(snapshot, "model_dump"): + snapshot_payload = snapshot.model_dump() + else: + snapshot_payload = snapshot + self._write_json_atomic(run_dir / "snapshot.json", snapshot_payload) + + diagnostics_payload = None + if diagnostics is not None: + if hasattr(diagnostics, "model_dump"): + diagnostics_payload = diagnostics.model_dump() + else: + diagnostics_payload = diagnostics + self._write_json_atomic(run_dir / "diagnostics.json", diagnostics_payload) manifest = { "run_id": self.run_id, @@ -117,12 +146,12 @@ def persist(self, *, reason: str | None, status: Literal["failure", "success"]) "reason": reason, "buffer_seconds": self.options.buffer_seconds, "frame_count": len(self._frames), - "frames": [ - {"file": frame.file_name, "ts": frame.ts} for frame in self._frames - ], + "frames": [{"file": frame.file_name, "ts": frame.ts} for frame in self._frames], + "snapshot": "snapshot.json" if snapshot_payload is not None else None, + "diagnostics": "diagnostics.json" if diagnostics_payload is not None else None, + "metadata": metadata or {}, } - manifest_path = run_dir / "manifest.json" - manifest_path.write_text(json.dumps(manifest, indent=2)) + self._write_json_atomic(run_dir / "manifest.json", manifest) self._persisted = True return run_dir diff --git a/tests/unit/test_failure_artifacts.py b/tests/unit/test_failure_artifacts.py index cce8d22..cdc3d01 100644 --- a/tests/unit/test_failure_artifacts.py +++ b/tests/unit/test_failure_artifacts.py @@ -34,11 +34,26 @@ def time_fn() -> float: buf.record_step(action="CLICK", step_id="s1", step_index=1, url="https://example.com") buf.add_frame(b"frame") - run_dir = buf.persist(reason="assert_failed", status="failure") + snapshot = {"status": "success", "url": "https://example.com", "elements": []} + diagnostics = {"confidence": 0.9, "reasons": ["ok"], "metrics": {"quiet_ms": 42}} + run_dir = buf.persist( + reason="assert_failed", + status="failure", + snapshot=snapshot, + diagnostics=diagnostics, + metadata={"backend": "MockBackend", "url": "https://example.com"}, + ) assert run_dir is not None manifest = json.loads((run_dir / "manifest.json").read_text()) steps = json.loads((run_dir / "steps.json").read_text()) + snap_json = json.loads((run_dir / "snapshot.json").read_text()) + diag_json = json.loads((run_dir / "diagnostics.json").read_text()) assert manifest["run_id"] == "run-2" assert manifest["frame_count"] == 1 + assert manifest["snapshot"] == "snapshot.json" + assert manifest["diagnostics"] == "diagnostics.json" + assert manifest["metadata"]["backend"] == "MockBackend" assert len(steps) == 1 + assert snap_json["url"] == "https://example.com" + assert diag_json["confidence"] == 0.9 From 70b1572be832da77d6d5a427452f8793b58b0c7b Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 18 Jan 2026 10:08:19 -0800 Subject: [PATCH 4/7] fix tests --- tests/unit/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index dc65871..de85e7c 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -42,6 +42,7 @@ class _Dummy: async_api_mod.BrowserContext = _Dummy async_api_mod.Page = _Dummy async_api_mod.Playwright = _Dummy +async_api_mod.PlaywrightContextManager = _Dummy async def _async_playwright(): @@ -53,6 +54,7 @@ async def _async_playwright(): sync_api_mod.BrowserContext = _Dummy sync_api_mod.Page = _Dummy sync_api_mod.Playwright = _Dummy +sync_api_mod.PlaywrightContextManager = _Dummy def _sync_playwright(): From 7a76a564ddc8491eaef35b41fc927338f8d33392 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 18 Jan 2026 10:18:33 -0800 Subject: [PATCH 5/7] P3 --- README.md | 16 +++++ sentience/failure_artifacts.py | 96 +++++++++++++++++++++++++--- tests/unit/test_failure_artifacts.py | 37 ++++++++++- 3 files changed, 139 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index e077944..66e1a67 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,22 @@ await runtime.enable_failure_artifacts( await runtime.record_action("CLICK") ``` +### Redaction callback (Phase 3) + +Provide a user-defined callback to redact snapshots and decide whether to persist frames. The SDK does not implement image/video redaction. + +```python +from sentience.failure_artifacts import FailureArtifactsOptions, RedactionContext, RedactionResult + +def redact(ctx: RedactionContext) -> RedactionResult: + # Example: drop frames entirely, keep JSON only. + return RedactionResult(drop_frames=True) + +await runtime.enable_failure_artifacts( + FailureArtifactsOptions(on_before_persist=redact) +) +``` + **See examples:** [`examples/asserts/`](examples/asserts/) ## 🚀 Quick Start: Choose Your Abstraction Level diff --git a/sentience/failure_artifacts.py b/sentience/failure_artifacts.py index 8a714c5..fd92135 100644 --- a/sentience/failure_artifacts.py +++ b/sentience/failure_artifacts.py @@ -17,6 +17,27 @@ class FailureArtifactsOptions: fps: float = 0.0 persist_mode: Literal["onFail", "always"] = "onFail" output_dir: str = ".sentience/artifacts" + on_before_persist: Callable[[RedactionContext], RedactionResult] | None = None + redact_snapshot_values: bool = True + + +@dataclass +class RedactionContext: + run_id: str + reason: str | None + status: Literal["failure", "success"] + snapshot: Any | None + diagnostics: Any | None + frame_paths: list[str] + metadata: dict[str, Any] + + +@dataclass +class RedactionResult: + snapshot: Any | None = None + diagnostics: Any | None = None + frame_paths: list[str] | None = None + drop_frames: bool = False @dataclass @@ -99,6 +120,27 @@ def _write_json_atomic(self, path: Path, data: Any) -> None: tmp_path.write_text(json.dumps(data, indent=2)) tmp_path.replace(path) + def _redact_snapshot_defaults(self, payload: Any) -> Any: + if not isinstance(payload, dict): + return payload + elements = payload.get("elements") + if not isinstance(elements, list): + return payload + redacted = [] + for el in elements: + if not isinstance(el, dict): + redacted.append(el) + continue + input_type = (el.get("input_type") or "").lower() + if input_type in {"password", "email", "tel"} and "value" in el: + el = dict(el) + el["value"] = None + el["value_redacted"] = True + redacted.append(el) + payload = dict(payload) + payload["elements"] = redacted + return payload + def persist( self, *, @@ -118,18 +160,14 @@ def persist( frames_out = run_dir / "frames" frames_out.mkdir(parents=True, exist_ok=True) - for frame in self._frames: - shutil.copy2(frame.path, frames_out / frame.file_name) - - self._write_json_atomic(run_dir / "steps.json", self._steps) - snapshot_payload = None if snapshot is not None: if hasattr(snapshot, "model_dump"): snapshot_payload = snapshot.model_dump() else: snapshot_payload = snapshot - self._write_json_atomic(run_dir / "snapshot.json", snapshot_payload) + if self.options.redact_snapshot_values: + snapshot_payload = self._redact_snapshot_defaults(snapshot_payload) diagnostics_payload = None if diagnostics is not None: @@ -137,6 +175,44 @@ def persist( diagnostics_payload = diagnostics.model_dump() else: diagnostics_payload = diagnostics + + frame_paths = [str(frame.path) for frame in self._frames] + drop_frames = False + + if self.options.on_before_persist is not None: + try: + result = self.options.on_before_persist( + RedactionContext( + run_id=self.run_id, + reason=reason, + status=status, + snapshot=snapshot_payload, + diagnostics=diagnostics_payload, + frame_paths=frame_paths, + metadata=metadata or {}, + ) + ) + if result.snapshot is not None: + snapshot_payload = result.snapshot + if result.diagnostics is not None: + diagnostics_payload = result.diagnostics + if result.frame_paths is not None: + frame_paths = result.frame_paths + drop_frames = result.drop_frames + except Exception: + drop_frames = True + + if not drop_frames: + for frame_path in frame_paths: + src = Path(frame_path) + if not src.exists(): + continue + shutil.copy2(src, frames_out / src.name) + + self._write_json_atomic(run_dir / "steps.json", self._steps) + if snapshot_payload is not None: + self._write_json_atomic(run_dir / "snapshot.json", snapshot_payload) + if diagnostics_payload is not None: self._write_json_atomic(run_dir / "diagnostics.json", diagnostics_payload) manifest = { @@ -145,11 +221,15 @@ def persist( "status": status, "reason": reason, "buffer_seconds": self.options.buffer_seconds, - "frame_count": len(self._frames), - "frames": [{"file": frame.file_name, "ts": frame.ts} for frame in self._frames], + "frame_count": 0 if drop_frames else len(frame_paths), + "frames": ( + [] if drop_frames else [{"file": Path(p).name, "ts": None} for p in frame_paths] + ), "snapshot": "snapshot.json" if snapshot_payload is not None else None, "diagnostics": "diagnostics.json" if diagnostics_payload is not None else None, "metadata": metadata or {}, + "frames_redacted": not drop_frames and self.options.on_before_persist is not None, + "frames_dropped": drop_frames, } self._write_json_atomic(run_dir / "manifest.json", manifest) diff --git a/tests/unit/test_failure_artifacts.py b/tests/unit/test_failure_artifacts.py index cdc3d01..0122bba 100644 --- a/tests/unit/test_failure_artifacts.py +++ b/tests/unit/test_failure_artifacts.py @@ -2,7 +2,12 @@ import json -from sentience.failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions +from sentience.failure_artifacts import ( + FailureArtifactBuffer, + FailureArtifactsOptions, + RedactionContext, + RedactionResult, +) def test_buffer_prunes_by_time(tmp_path) -> None: @@ -34,7 +39,14 @@ def time_fn() -> float: buf.record_step(action="CLICK", step_id="s1", step_index=1, url="https://example.com") buf.add_frame(b"frame") - snapshot = {"status": "success", "url": "https://example.com", "elements": []} + snapshot = { + "status": "success", + "url": "https://example.com", + "elements": [ + {"id": 1, "input_type": "password", "value": "secret"}, + {"id": 2, "input_type": "email", "value": "user@example.com"}, + ], + } diagnostics = {"confidence": 0.9, "reasons": ["ok"], "metrics": {"quiet_ms": 42}} run_dir = buf.persist( reason="assert_failed", @@ -57,3 +69,24 @@ def time_fn() -> float: assert len(steps) == 1 assert snap_json["url"] == "https://example.com" assert diag_json["confidence"] == 0.9 + assert snap_json["elements"][0]["value"] is None + assert snap_json["elements"][0]["value_redacted"] is True + assert snap_json["elements"][1]["value"] is None + assert snap_json["elements"][1]["value_redacted"] is True + + +def test_redaction_callback_can_drop_frames(tmp_path) -> None: + opts = FailureArtifactsOptions(output_dir=str(tmp_path)) + + def redactor(ctx: RedactionContext) -> RedactionResult: + return RedactionResult(drop_frames=True) + + opts.on_before_persist = redactor + buf = FailureArtifactBuffer(run_id="run-3", options=opts) + buf.add_frame(b"frame") + + run_dir = buf.persist(reason="fail", status="failure", snapshot={"status": "success"}) + assert run_dir is not None + manifest = json.loads((run_dir / "manifest.json").read_text()) + assert manifest["frame_count"] == 0 + assert manifest["frames_dropped"] is True From 805754d881aa8f9f48b0ac1d3d8e9c908b493e5c Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 18 Jan 2026 10:20:00 -0800 Subject: [PATCH 6/7] fix tests --- tests/unit/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index de85e7c..35b1482 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -40,6 +40,7 @@ class _Dummy: # Minimal symbols imported by `sentience.browser` async_api_mod.BrowserContext = _Dummy +async_api_mod.Browser = _Dummy async_api_mod.Page = _Dummy async_api_mod.Playwright = _Dummy async_api_mod.PlaywrightContextManager = _Dummy @@ -52,6 +53,7 @@ async def _async_playwright(): async_api_mod.async_playwright = _async_playwright sync_api_mod.BrowserContext = _Dummy +sync_api_mod.Browser = _Dummy sync_api_mod.Page = _Dummy sync_api_mod.Playwright = _Dummy sync_api_mod.PlaywrightContextManager = _Dummy From 1a26e19ad38ab6243306fd60e88cca365ef250b4 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 18 Jan 2026 17:54:10 -0800 Subject: [PATCH 7/7] fix tests --- tests/unit/conftest.py | 84 +++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 35b1482..7047367 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -9,7 +9,8 @@ pure unit/contract tests without requiring Playwright. IMPORTANT: -- These stubs are only active during pytest runs (via conftest import order). +- These stubs are ONLY loaded when Playwright is NOT installed. +- When Playwright IS installed, real Playwright is used for all tests. - Integration/E2E tests that need real Playwright should install Playwright and will typically run in separate environments. """ @@ -20,52 +21,61 @@ import types -def _ensure_module(name: str) -> types.ModuleType: - if name in sys.modules: - return sys.modules[name] - mod = types.ModuleType(name) - sys.modules[name] = mod - return mod +def _ensure_playwright_stubs() -> None: + """ + Provide minimal `playwright.*` stubs so the SDK can be imported in environments + where Playwright isn't installed (e.g., constrained CI/sandbox). + This is only intended to support pure unit/contract tests that don't actually + launch browsers. + """ -# Create top-level playwright module and submodules -playwright_mod = _ensure_module("playwright") -async_api_mod = _ensure_module("playwright.async_api") -sync_api_mod = _ensure_module("playwright.sync_api") + def _ensure_module(name: str) -> types.ModuleType: + if name in sys.modules: + return sys.modules[name] + mod = types.ModuleType(name) + sys.modules[name] = mod + return mod + # Create top-level playwright module and submodules + playwright_mod = _ensure_module("playwright") + async_api_mod = _ensure_module("playwright.async_api") + sync_api_mod = _ensure_module("playwright.sync_api") -class _Dummy: - """Placeholder type used for Playwright classes in unit tests.""" + class _Dummy: + """Placeholder type used for Playwright classes in unit tests.""" + # Minimal symbols imported by `sentience.browser` + async_api_mod.BrowserContext = _Dummy + async_api_mod.Browser = _Dummy + async_api_mod.Page = _Dummy + async_api_mod.Playwright = _Dummy + async_api_mod.PlaywrightContextManager = _Dummy -# Minimal symbols imported by `sentience.browser` -async_api_mod.BrowserContext = _Dummy -async_api_mod.Browser = _Dummy -async_api_mod.Page = _Dummy -async_api_mod.Playwright = _Dummy -async_api_mod.PlaywrightContextManager = _Dummy + async def _async_playwright(): + raise RuntimeError("Playwright is not available in this unit-test environment.") + async_api_mod.async_playwright = _async_playwright -async def _async_playwright(): - raise RuntimeError("Playwright is not available in this unit-test environment.") + sync_api_mod.BrowserContext = _Dummy + sync_api_mod.Browser = _Dummy + sync_api_mod.Page = _Dummy + sync_api_mod.Playwright = _Dummy + sync_api_mod.PlaywrightContextManager = _Dummy + def _sync_playwright(): + raise RuntimeError("Playwright is not available in this unit-test environment.") -async_api_mod.async_playwright = _async_playwright + sync_api_mod.sync_playwright = _sync_playwright -sync_api_mod.BrowserContext = _Dummy -sync_api_mod.Browser = _Dummy -sync_api_mod.Page = _Dummy -sync_api_mod.Playwright = _Dummy -sync_api_mod.PlaywrightContextManager = _Dummy + # Expose submodules on the top-level module for completeness + playwright_mod.async_api = async_api_mod + playwright_mod.sync_api = sync_api_mod -def _sync_playwright(): - raise RuntimeError("Playwright is not available in this unit-test environment.") - - -sync_api_mod.sync_playwright = _sync_playwright - - -# Expose submodules on the top-level module for completeness -playwright_mod.async_api = async_api_mod -playwright_mod.sync_api = sync_api_mod +# Only load stubs if Playwright is NOT available +# This prevents overwriting real Playwright when it IS installed +try: + import playwright # noqa: F401 +except ImportError: + _ensure_playwright_stubs()