diff --git a/README.md b/README.md
index b8802a1..b6222eb 100644
--- a/README.md
+++ b/README.md
@@ -830,6 +830,40 @@ with browser:
+
+🔍 Agent Runtime Verification
+
+`AgentRuntime` provides assertion predicates for runtime verification in agent loops, enabling programmatic verification of browser state during execution.
+
+```python
+from sentience import (
+ AgentRuntime, SentienceBrowser,
+ url_contains, exists, all_of
+)
+from sentience.tracer_factory import create_tracer
+
+browser = SentienceBrowser()
+browser.start()
+tracer = create_tracer(run_id="my-run", upload_trace=False)
+runtime = AgentRuntime(browser, browser.page, tracer)
+
+# Navigate and take snapshot
+browser.page.goto("https://example.com")
+runtime.begin_step("Verify page")
+runtime.snapshot()
+
+# Run assertions
+runtime.assert_(url_contains("example.com"), "on_correct_domain")
+runtime.assert_(exists("role=heading"), "has_heading")
+runtime.assert_done(exists("text~'Example'"), "task_complete")
+
+print(f"Task done: {runtime.is_task_done}")
+```
+
+**See example:** [`examples/agent_runtime_verification.py`](examples/agent_runtime_verification.py)
+
+
+
🧰 Snapshot Utilities
diff --git a/examples/agent_runtime_verification.py b/examples/agent_runtime_verification.py
new file mode 100644
index 0000000..dc93d26
--- /dev/null
+++ b/examples/agent_runtime_verification.py
@@ -0,0 +1,126 @@
+"""
+Example: Agent Runtime with Verification Loop
+
+Demonstrates how to use AgentRuntime for runtime verification in agent loops.
+The AgentRuntime provides assertion predicates to verify browser state during execution.
+
+Key features:
+- Predicate helpers: url_matches, url_contains, exists, not_exists, element_count
+- Combinators: all_of, any_of for complex conditions
+- Task completion: assert_done() for goal verification
+- Trace integration: Assertions emitted to trace for Studio timeline
+
+Requirements:
+- SENTIENCE_API_KEY (Pro or Enterprise tier)
+
+Usage:
+ python examples/agent_runtime_verification.py
+"""
+
+import os
+
+from sentience import (
+ AgentRuntime,
+ SentienceBrowser,
+ all_of,
+ exists,
+ not_exists,
+ url_contains,
+ url_matches,
+)
+from sentience.tracer_factory import create_tracer
+
+
+def main():
+ # Get API key from environment
+ sentience_key = os.environ.get("SENTIENCE_API_KEY")
+
+ if not sentience_key:
+ print("Error: SENTIENCE_API_KEY not set")
+ return
+
+ print("Starting Agent Runtime Verification Demo\n")
+
+ # 1. Create tracer for verification event emission
+ run_id = "verification-demo"
+ tracer = create_tracer(api_key=sentience_key, run_id=run_id, upload_trace=False)
+ print(f"Run ID: {run_id}\n")
+
+ # 2. Create browser
+ browser = SentienceBrowser(api_key=sentience_key, headless=False)
+ browser.start()
+
+ try:
+ # 3. Create AgentRuntime with browser, page, and tracer
+ runtime = AgentRuntime(browser, browser.page, tracer)
+
+ # 4. Navigate to a page
+ print("Navigating to example.com...\n")
+ browser.page.goto("https://example.com")
+ browser.page.wait_for_load_state("networkidle")
+
+ # 5. Begin a verification step
+ runtime.begin_step("Verify page loaded correctly")
+
+ # 6. Take a snapshot (required for element assertions)
+ snapshot = runtime.snapshot()
+ print(f"Snapshot taken: {len(snapshot.elements)} elements found\n")
+
+ # 7. Run assertions against current state
+ print("Running assertions:\n")
+
+ # URL assertions
+ url_ok = runtime.assert_(url_contains("example.com"), "on_example_domain")
+ print(f" [{'PASS' if url_ok else 'FAIL'}] on_example_domain")
+
+ url_match = runtime.assert_(url_matches(r"https://.*example\.com"), "url_is_https")
+ print(f" [{'PASS' if url_match else 'FAIL'}] url_is_https")
+
+ # Element assertions
+ has_heading = runtime.assert_(exists("role=heading"), "has_heading")
+ print(f" [{'PASS' if has_heading else 'FAIL'}] has_heading")
+
+ no_error = runtime.assert_(not_exists("text~'Error'"), "no_error_message")
+ print(f" [{'PASS' if no_error else 'FAIL'}] no_error_message")
+
+ # Combined assertion with all_of
+ page_ready = runtime.assert_(
+ all_of(url_contains("example"), exists("role=link")),
+ "page_fully_ready",
+ )
+ print(f" [{'PASS' if page_ready else 'FAIL'}] page_fully_ready")
+
+ # 8. Check if task is done (required assertion)
+ task_complete = runtime.assert_done(
+ exists("text~'Example Domain'"),
+ "reached_example_page",
+ )
+ print(f"\n [{'DONE' if task_complete else 'NOT DONE'}] reached_example_page")
+
+ # 9. Get accumulated assertions for step_end event
+ assertions_data = runtime.get_assertions_for_step_end()
+ print(f"\nTotal assertions: {len(assertions_data['assertions'])}")
+ print(f"Task done: {assertions_data.get('task_done', False)}")
+
+ # 10. Check overall status
+ print("\nVerification Summary:")
+ print(f" All passed: {runtime.all_assertions_passed()}")
+ print(f" Required passed: {runtime.required_assertions_passed()}")
+ print(f" Task complete: {runtime.is_task_done}")
+
+ except Exception as e:
+ print(f"\nError during execution: {e}")
+ raise
+
+ finally:
+ # Close tracer and browser
+ print("\nClosing tracer...")
+ tracer.close(blocking=True)
+ print(f"Trace saved to: ~/.sentience/traces/{run_id}.jsonl")
+
+ browser.close()
+ print("Done!")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/sentience/__init__.py b/sentience/__init__.py
index d587e8c..47e2745 100644
--- a/sentience/__init__.py
+++ b/sentience/__init__.py
@@ -5,6 +5,7 @@
from .actions import click, click_rect, press, scroll_to, type_text
from .agent import SentienceAgent, SentienceAgentAsync
from .agent_config import AgentConfig
+from .agent_runtime import AgentRuntime
# Agent Layer (Phase 1 & 2)
from .base_agent import BaseAgent
@@ -70,6 +71,21 @@
# Formatting (v0.12.0+)
from .utils.formatting import format_snapshot_for_llm
+
+# Verification (agent assertion loop)
+from .verification import (
+ AssertContext,
+ AssertOutcome,
+ Predicate,
+ all_of,
+ any_of,
+ custom,
+ element_count,
+ exists,
+ not_exists,
+ url_contains,
+ url_matches,
+)
from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync
from .wait import wait_for
@@ -160,4 +176,17 @@
# Enums
"SentienceMethod",
"AgentAction",
+ # Verification (agent assertion loop)
+ "AgentRuntime",
+ "AssertContext",
+ "AssertOutcome",
+ "Predicate",
+ "url_matches",
+ "url_contains",
+ "exists",
+ "not_exists",
+ "element_count",
+ "all_of",
+ "any_of",
+ "custom",
]
diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
new file mode 100644
index 0000000..83f37d6
--- /dev/null
+++ b/sentience/agent_runtime.py
@@ -0,0 +1,316 @@
+"""
+Agent runtime for verification loop support.
+
+This module provides a thin runtime wrapper that combines:
+1. Browser session management
+2. Snapshot/query helpers
+3. Tracer for event emission
+4. Assertion/verification methods
+
+The AgentRuntime is designed to be used in agent verification loops where
+you need to repeatedly take snapshots, execute actions, and verify results.
+
+Example usage:
+ from sentience import AsyncSentienceBrowser
+ from sentience.agent_runtime import AgentRuntime
+ from sentience.verification import url_matches, exists
+ from sentience.tracing import Tracer, JsonlTraceSink
+
+ async with AsyncSentienceBrowser() as browser:
+ page = await browser.new_page()
+ await page.goto("https://example.com")
+
+ sink = JsonlTraceSink("trace.jsonl")
+ tracer = Tracer(run_id="test-run", sink=sink)
+
+ runtime = AgentRuntime(browser=browser, page=page, tracer=tracer)
+
+ # Take snapshot and run assertions
+ await runtime.snapshot()
+ runtime.assert_(url_matches(r"example\\.com"), label="on_homepage")
+ runtime.assert_(exists("role=button"), label="has_buttons")
+
+ # Check if task is done
+ if runtime.assert_done(exists("text~'Success'"), label="task_complete"):
+ print("Task completed!")
+"""
+
+from __future__ import annotations
+
+import uuid
+from typing import TYPE_CHECKING, Any
+
+from .verification import AssertContext, AssertOutcome, Predicate
+
+if TYPE_CHECKING:
+ from playwright.async_api import Page
+
+ from .browser import AsyncSentienceBrowser
+ from .models import Snapshot
+ from .tracing import Tracer
+
+
+class AgentRuntime:
+ """
+ Runtime wrapper for agent verification loops.
+
+ Provides ergonomic methods for:
+ - snapshot(): Take page snapshot
+ - assert_(): Evaluate assertion predicates
+ - assert_done(): Assert task completion (required assertion)
+
+ The runtime manages assertion state per step and emits verification events
+ to the tracer for Studio timeline display.
+
+ Attributes:
+ browser: AsyncSentienceBrowser instance
+ page: Playwright Page instance
+ tracer: Tracer for event emission
+ step_id: Current step identifier
+ step_index: Current step index (0-based)
+ last_snapshot: Most recent snapshot (for assertion context)
+ """
+
+ def __init__(
+ self,
+ browser: AsyncSentienceBrowser,
+ page: Page,
+ tracer: Tracer,
+ ):
+ """
+ Initialize agent runtime.
+
+ Args:
+ browser: AsyncSentienceBrowser instance for taking snapshots
+ page: Playwright Page for browser interaction
+ tracer: Tracer for emitting verification events
+ """
+ self.browser = browser
+ self.page = page
+ self.tracer = tracer
+
+ # Step tracking
+ self.step_id: str | None = None
+ self.step_index: int = 0
+
+ # Snapshot state
+ self.last_snapshot: Snapshot | None = None
+
+ # Assertions accumulated during current step
+ self._assertions_this_step: list[dict[str, Any]] = []
+
+ # Task completion tracking
+ self._task_done: bool = False
+ self._task_done_label: str | None = None
+
+ def _ctx(self) -> AssertContext:
+ """
+ Build assertion context from current state.
+
+ Returns:
+ AssertContext with current snapshot and URL
+ """
+ url = None
+ if self.last_snapshot is not None:
+ url = self.last_snapshot.url
+ elif self.page:
+ url = self.page.url
+
+ return AssertContext(
+ snapshot=self.last_snapshot,
+ url=url,
+ step_id=self.step_id,
+ )
+
+ async def snapshot(self, **kwargs) -> Snapshot:
+ """
+ Take a snapshot of the current page state.
+
+ This updates last_snapshot which is used as context for assertions.
+
+ Args:
+ **kwargs: Passed through to browser.snapshot()
+
+ Returns:
+ Snapshot of current page state
+ """
+ self.last_snapshot = await self.browser.snapshot(self.page, **kwargs)
+ return self.last_snapshot
+
+ def begin_step(self, goal: str, step_index: int | None = None) -> str:
+ """
+ Begin a new step in the verification loop.
+
+ This:
+ - Generates a new step_id
+ - Clears assertions from previous step
+ - Increments step_index (or uses provided value)
+
+ Args:
+ goal: Description of what this step aims to achieve
+ step_index: Optional explicit step index (otherwise auto-increments)
+
+ Returns:
+ Generated step_id
+ """
+ # Clear previous step state
+ self._assertions_this_step = []
+
+ # Generate new step_id
+ self.step_id = str(uuid.uuid4())
+
+ # Update step index
+ if step_index is not None:
+ self.step_index = step_index
+ else:
+ self.step_index += 1
+
+ return self.step_id
+
+ def assert_(
+ self,
+ predicate: Predicate,
+ label: str,
+ required: bool = False,
+ ) -> bool:
+ """
+ Evaluate an assertion against current snapshot state.
+
+ The assertion result is:
+ 1. Accumulated for inclusion in step_end.data.verify.signals.assertions
+ 2. Emitted as a dedicated 'verification' event for Studio timeline
+
+ Args:
+ predicate: Predicate function to evaluate
+ label: Human-readable label for this assertion
+ required: If True, this assertion gates step success (default: False)
+
+ Returns:
+ True if assertion passed, False otherwise
+ """
+ outcome = predicate(self._ctx())
+
+ record = {
+ "label": label,
+ "passed": outcome.passed,
+ "required": required,
+ "reason": outcome.reason,
+ "details": outcome.details,
+ }
+ self._assertions_this_step.append(record)
+
+ # Emit dedicated verification event (Option B from design doc)
+ # This makes assertions visible in Studio timeline
+ self.tracer.emit(
+ "verification",
+ data={
+ "kind": "assert",
+ "passed": outcome.passed,
+ **record,
+ },
+ step_id=self.step_id,
+ )
+
+ return outcome.passed
+
+ def assert_done(
+ self,
+ predicate: Predicate,
+ label: str,
+ ) -> bool:
+ """
+ Assert task completion (required assertion).
+
+ This is a convenience wrapper for assert_() with required=True.
+ When the assertion passes, it marks the task as done.
+
+ Use this for final verification that the agent's goal is complete.
+
+ Args:
+ predicate: Predicate function to evaluate
+ label: Human-readable label for this assertion
+
+ Returns:
+ True if task is complete (assertion passed), False otherwise
+ """
+ ok = self.assert_(predicate, label=label, required=True)
+
+ if ok:
+ self._task_done = True
+ self._task_done_label = label
+
+ # Emit task_done verification event
+ self.tracer.emit(
+ "verification",
+ data={
+ "kind": "task_done",
+ "passed": True,
+ "label": label,
+ },
+ step_id=self.step_id,
+ )
+
+ return ok
+
+ def get_assertions_for_step_end(self) -> dict[str, Any]:
+ """
+ Get assertions data for inclusion in step_end.data.verify.signals.
+
+ This is called when building the step_end event to include
+ assertion results in the trace.
+
+ Returns:
+ Dictionary with 'assertions', 'task_done', 'task_done_label' keys
+ """
+ result: dict[str, Any] = {
+ "assertions": self._assertions_this_step.copy(),
+ }
+
+ if self._task_done:
+ result["task_done"] = True
+ result["task_done_label"] = self._task_done_label
+
+ return result
+
+ def flush_assertions(self) -> list[dict[str, Any]]:
+ """
+ Get and clear assertions for current step.
+
+ Call this at step end to get accumulated assertions
+ for the step_end event, then clear for next step.
+
+ Returns:
+ List of assertion records from this step
+ """
+ assertions = self._assertions_this_step.copy()
+ self._assertions_this_step = []
+ return assertions
+
+ @property
+ def is_task_done(self) -> bool:
+ """Check if task has been marked as done via assert_done()."""
+ return self._task_done
+
+ def reset_task_done(self) -> None:
+ """Reset task_done state (for multi-task runs)."""
+ self._task_done = False
+ self._task_done_label = None
+
+ def all_assertions_passed(self) -> bool:
+ """
+ Check if all assertions in current step passed.
+
+ Returns:
+ True if all assertions passed (or no assertions made)
+ """
+ return all(a["passed"] for a in self._assertions_this_step)
+
+ def required_assertions_passed(self) -> bool:
+ """
+ Check if all required assertions in current step passed.
+
+ Returns:
+ True if all required assertions passed (or no required assertions)
+ """
+ required = [a for a in self._assertions_this_step if a.get("required")]
+ return all(a["passed"] for a in required)
diff --git a/sentience/schemas/trace_v1.json b/sentience/schemas/trace_v1.json
index b844d04..37c28cb 100644
--- a/sentience/schemas/trace_v1.json
+++ b/sentience/schemas/trace_v1.json
@@ -248,7 +248,24 @@
}
}
}
- }
+ },
+ "assertions": {
+ "type": "array",
+ "description": "Assertion results from agent verification loop",
+ "items": {
+ "type": "object",
+ "required": ["label", "passed"],
+ "properties": {
+ "label": {"type": "string", "description": "Human-readable assertion label"},
+ "passed": {"type": "boolean", "description": "Whether the assertion passed"},
+ "required": {"type": "boolean", "description": "If true, assertion gates step success"},
+ "reason": {"type": "string", "description": "Explanation (especially when failed)"},
+ "details": {"type": "object", "description": "Additional structured data for debugging"}
+ }
+ }
+ },
+ "task_done": {"type": "boolean", "description": "True if task completion assertion passed"},
+ "task_done_label": {"type": "string", "description": "Label of the task completion assertion"}
}
}
}
@@ -270,6 +287,15 @@
"properties": {
"step_id": {"type": "string"},
"passed": {"type": "boolean"},
+ "kind": {
+ "type": "string",
+ "enum": ["assert", "task_done"],
+ "description": "Type of verification event"
+ },
+ "label": {"type": "string", "description": "Human-readable label for the assertion"},
+ "required": {"type": "boolean", "description": "If true, assertion gates step success"},
+ "reason": {"type": "string", "description": "Explanation (especially when failed)"},
+ "details": {"type": "object", "description": "Additional structured data for debugging"},
"signals": {"type": "object"}
}
},
diff --git a/sentience/trace_event_builder.py b/sentience/trace_event_builder.py
index d2e5f9f..272c7e0 100644
--- a/sentience/trace_event_builder.py
+++ b/sentience/trace_event_builder.py
@@ -84,6 +84,7 @@ def build_step_end_event(
exec_data: dict[str, Any],
verify_data: dict[str, Any],
pre_elements: list[dict[str, Any]] | None = None,
+ assertions: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
"""
Build step_end trace event data.
@@ -100,6 +101,7 @@ def build_step_end_event(
exec_data: Action execution data
verify_data: Verification data
pre_elements: Optional list of elements from pre-snapshot (with diff_status)
+ assertions: Optional list of assertion results from AgentRuntime
Returns:
Dictionary with step_end event data
@@ -113,6 +115,23 @@ def build_step_end_event(
if pre_elements is not None:
pre_data["elements"] = pre_elements
+ # Build verify data with assertions if provided
+ final_verify_data = verify_data.copy()
+ if assertions:
+ # Ensure signals dict exists
+ if "signals" not in final_verify_data:
+ final_verify_data["signals"] = {}
+
+ # Add assertions to signals
+ final_verify_data["signals"]["assertions"] = assertions
+
+ # Check for task completion (assertions marked as required that passed)
+ for a in assertions:
+ if a.get("passed") and a.get("required"):
+ final_verify_data["signals"]["task_done"] = True
+ final_verify_data["signals"]["task_done_label"] = a.get("label")
+ break
+
return {
"v": 1,
"step_id": step_id,
@@ -125,5 +144,5 @@ def build_step_end_event(
"post": {
"url": post_url,
},
- "verify": verify_data,
+ "verify": final_verify_data,
}
diff --git a/sentience/verification.py b/sentience/verification.py
new file mode 100644
index 0000000..216f25e
--- /dev/null
+++ b/sentience/verification.py
@@ -0,0 +1,376 @@
+"""
+Verification primitives for agent assertion loops.
+
+This module provides assertion predicates and outcome types for runtime verification
+in agent loops. Assertions evaluate against the current browser state (snapshot/url)
+and record results into the trace.
+
+Key concepts:
+- AssertOutcome: Result of evaluating an assertion
+- AssertContext: Context provided to assertion predicates (snapshot, url, step_id)
+- Predicate: Callable that takes context and returns outcome
+
+Example usage:
+ from sentience.verification import url_matches, exists, AssertContext
+
+ # Create predicates
+ on_search_page = url_matches(r"/s\\?k=")
+ results_loaded = exists("text~'Results'")
+
+ # Evaluate against context
+ ctx = AssertContext(snapshot=snapshot, url="https://example.com/s?k=shoes")
+ outcome = on_search_page(ctx)
+ print(outcome.passed) # True
+"""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Callable
+
+if TYPE_CHECKING:
+ from .models import Snapshot
+
+
+@dataclass
+class AssertOutcome:
+ """
+ Result of evaluating an assertion predicate.
+
+ Attributes:
+ passed: Whether the assertion passed
+ reason: Human-readable explanation (especially useful when failed)
+ details: Additional structured data for debugging/display
+ """
+
+ passed: bool
+ reason: str = ""
+ details: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class AssertContext:
+ """
+ Context provided to assertion predicates.
+
+ Provides access to current browser state without requiring
+ the predicate to know about browser internals.
+
+ Attributes:
+ snapshot: Current page snapshot (may be None if not taken)
+ url: Current page URL
+ step_id: Current step identifier (for trace correlation)
+ """
+
+ snapshot: Snapshot | None = None
+ url: str | None = None
+ step_id: str | None = None
+
+
+# Type alias for assertion predicates
+Predicate = Callable[[AssertContext], AssertOutcome]
+
+
+def url_matches(pattern: str) -> Predicate:
+ """
+ Create a predicate that checks if current URL matches a regex pattern.
+
+ Args:
+ pattern: Regular expression pattern to match against URL
+
+ Returns:
+ Predicate function that evaluates URL matching
+
+ Example:
+ >>> pred = url_matches(r"/search\\?q=")
+ >>> ctx = AssertContext(url="https://example.com/search?q=shoes")
+ >>> outcome = pred(ctx)
+ >>> outcome.passed
+ True
+ """
+ rx = re.compile(pattern)
+
+ def _pred(ctx: AssertContext) -> AssertOutcome:
+ url = ctx.url or ""
+ ok = rx.search(url) is not None
+ return AssertOutcome(
+ passed=ok,
+ reason="" if ok else f"url did not match pattern: {pattern}",
+ details={"pattern": pattern, "url": url[:200]},
+ )
+
+ return _pred
+
+
+def url_contains(substring: str) -> Predicate:
+ """
+ Create a predicate that checks if current URL contains a substring.
+
+ Args:
+ substring: String to search for in URL
+
+ Returns:
+ Predicate function that evaluates URL containment
+
+ Example:
+ >>> pred = url_contains("/cart")
+ >>> ctx = AssertContext(url="https://example.com/cart/checkout")
+ >>> outcome = pred(ctx)
+ >>> outcome.passed
+ True
+ """
+
+ def _pred(ctx: AssertContext) -> AssertOutcome:
+ url = ctx.url or ""
+ ok = substring in url
+ return AssertOutcome(
+ passed=ok,
+ reason="" if ok else f"url does not contain: {substring}",
+ details={"substring": substring, "url": url[:200]},
+ )
+
+ return _pred
+
+
+def exists(selector: str) -> Predicate:
+ """
+ Create a predicate that checks if elements matching selector exist.
+
+ Uses the SDK's query engine to find matching elements.
+
+ Args:
+ selector: Semantic selector string (e.g., "role=button text~'Sign in'")
+
+ Returns:
+ Predicate function that evaluates element existence
+
+ Example:
+ >>> pred = exists("text~'Results'")
+ >>> # Will check if snapshot contains elements with "Results" in text
+ """
+
+ def _pred(ctx: AssertContext) -> AssertOutcome:
+ snap = ctx.snapshot
+ if snap is None:
+ return AssertOutcome(
+ passed=False,
+ reason="no snapshot available",
+ details={"selector": selector},
+ )
+
+ # Import here to avoid circular imports
+ from .query import query
+
+ matches = query(snap, selector)
+ ok = len(matches) > 0
+ return AssertOutcome(
+ passed=ok,
+ reason="" if ok else f"no elements matched selector: {selector}",
+ details={"selector": selector, "matched": len(matches)},
+ )
+
+ return _pred
+
+
+def not_exists(selector: str) -> Predicate:
+ """
+ Create a predicate that checks that NO elements match the selector.
+
+ Useful for asserting that error messages, loading spinners, etc. are gone.
+
+ Args:
+ selector: Semantic selector string
+
+ Returns:
+ Predicate function that evaluates element non-existence
+
+ Example:
+ >>> pred = not_exists("text~'Loading'")
+ >>> # Will pass if no elements contain "Loading" text
+ """
+
+ def _pred(ctx: AssertContext) -> AssertOutcome:
+ snap = ctx.snapshot
+ if snap is None:
+ return AssertOutcome(
+ passed=False,
+ reason="no snapshot available",
+ details={"selector": selector},
+ )
+
+ from .query import query
+
+ matches = query(snap, selector)
+ ok = len(matches) == 0
+ return AssertOutcome(
+ passed=ok,
+ reason="" if ok else f"found {len(matches)} elements matching: {selector}",
+ details={"selector": selector, "matched": len(matches)},
+ )
+
+ return _pred
+
+
+def element_count(selector: str, *, min_count: int = 0, max_count: int | None = None) -> Predicate:
+ """
+ Create a predicate that checks the number of matching elements.
+
+ Args:
+ selector: Semantic selector string
+ min_count: Minimum number of matches required (inclusive)
+ max_count: Maximum number of matches allowed (inclusive, None = no limit)
+
+ Returns:
+ Predicate function that evaluates element count
+
+ Example:
+ >>> pred = element_count("role=button", min_count=1, max_count=5)
+ >>> # Will pass if 1-5 buttons found
+ """
+
+ def _pred(ctx: AssertContext) -> AssertOutcome:
+ snap = ctx.snapshot
+ if snap is None:
+ return AssertOutcome(
+ passed=False,
+ reason="no snapshot available",
+ details={"selector": selector, "min_count": min_count, "max_count": max_count},
+ )
+
+ from .query import query
+
+ matches = query(snap, selector)
+ count = len(matches)
+
+ ok = count >= min_count
+ if max_count is not None:
+ ok = ok and count <= max_count
+
+ if ok:
+ reason = ""
+ else:
+ if max_count is not None:
+ reason = f"expected {min_count}-{max_count} elements, found {count}"
+ else:
+ reason = f"expected at least {min_count} elements, found {count}"
+
+ return AssertOutcome(
+ passed=ok,
+ reason=reason,
+ details={
+ "selector": selector,
+ "matched": count,
+ "min_count": min_count,
+ "max_count": max_count,
+ },
+ )
+
+ return _pred
+
+
+def all_of(*predicates: Predicate) -> Predicate:
+ """
+ Create a predicate that passes only if ALL sub-predicates pass.
+
+ Args:
+ *predicates: Predicate functions to combine with AND logic
+
+ Returns:
+ Combined predicate
+
+ Example:
+ >>> pred = all_of(url_contains("/cart"), exists("text~'Checkout'"))
+ >>> # Will pass only if both conditions are true
+ """
+
+ def _pred(ctx: AssertContext) -> AssertOutcome:
+ failed_reasons = []
+ all_details: list[dict[str, Any]] = []
+
+ for p in predicates:
+ outcome = p(ctx)
+ all_details.append(outcome.details)
+ if not outcome.passed:
+ failed_reasons.append(outcome.reason)
+
+ ok = len(failed_reasons) == 0
+ return AssertOutcome(
+ passed=ok,
+ reason="; ".join(failed_reasons) if failed_reasons else "",
+ details={"sub_predicates": all_details, "failed_count": len(failed_reasons)},
+ )
+
+ return _pred
+
+
+def any_of(*predicates: Predicate) -> Predicate:
+ """
+ Create a predicate that passes if ANY sub-predicate passes.
+
+ Args:
+ *predicates: Predicate functions to combine with OR logic
+
+ Returns:
+ Combined predicate
+
+ Example:
+ >>> pred = any_of(exists("text~'Success'"), exists("text~'Complete'"))
+ >>> # Will pass if either condition is true
+ """
+
+ def _pred(ctx: AssertContext) -> AssertOutcome:
+ all_reasons = []
+ all_details: list[dict[str, Any]] = []
+
+ for p in predicates:
+ outcome = p(ctx)
+ all_details.append(outcome.details)
+ if outcome.passed:
+ return AssertOutcome(
+ passed=True,
+ reason="",
+ details={"sub_predicates": all_details, "matched_at_index": len(all_details) - 1},
+ )
+ all_reasons.append(outcome.reason)
+
+ return AssertOutcome(
+ passed=False,
+ reason=f"none of {len(predicates)} predicates passed: " + "; ".join(all_reasons),
+ details={"sub_predicates": all_details},
+ )
+
+ return _pred
+
+
+def custom(check_fn: Callable[[AssertContext], bool], label: str = "custom") -> Predicate:
+ """
+ Create a predicate from a custom function.
+
+ Args:
+ check_fn: Function that takes AssertContext and returns bool
+ label: Label for debugging/display
+
+ Returns:
+ Predicate wrapping the custom function
+
+ Example:
+ >>> pred = custom(lambda ctx: ctx.snapshot and len(ctx.snapshot.elements) > 10, "has_many_elements")
+ """
+
+ def _pred(ctx: AssertContext) -> AssertOutcome:
+ try:
+ ok = check_fn(ctx)
+ return AssertOutcome(
+ passed=ok,
+ reason="" if ok else f"custom check '{label}' returned False",
+ details={"label": label},
+ )
+ except Exception as e:
+ return AssertOutcome(
+ passed=False,
+ reason=f"custom check '{label}' raised exception: {e}",
+ details={"label": label, "error": str(e)},
+ )
+
+ return _pred
diff --git a/tests/test_verification.py b/tests/test_verification.py
new file mode 100644
index 0000000..1f01511
--- /dev/null
+++ b/tests/test_verification.py
@@ -0,0 +1,295 @@
+"""
+Tests for verification module - assertion predicates for agent loops.
+"""
+
+import pytest
+
+from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues
+from sentience.verification import (
+ AssertContext,
+ AssertOutcome,
+ all_of,
+ any_of,
+ custom,
+ element_count,
+ exists,
+ not_exists,
+ url_contains,
+ url_matches,
+)
+
+
+def make_element(
+ id: int,
+ role: str = "button",
+ text: str | None = None,
+ importance: int = 100,
+) -> Element:
+ """Helper to create test elements."""
+ return Element(
+ id=id,
+ role=role,
+ text=text,
+ importance=importance,
+ bbox=BBox(x=0, y=0, width=100, height=50),
+ visual_cues=VisualCues(is_primary=False, is_clickable=True, background_color_name=None),
+ )
+
+
+def make_snapshot(elements: list[Element], url: str = "https://example.com") -> Snapshot:
+ """Helper to create test snapshots."""
+ return Snapshot(
+ status="success",
+ url=url,
+ elements=elements,
+ viewport=Viewport(width=1920, height=1080),
+ )
+
+
+class TestUrlMatches:
+ """Tests for url_matches predicate."""
+
+ def test_matches_pattern(self):
+ pred = url_matches(r"/search\?q=")
+ ctx = AssertContext(url="https://example.com/search?q=shoes")
+ outcome = pred(ctx)
+ assert outcome.passed is True
+ assert outcome.reason == ""
+
+ def test_no_match(self):
+ pred = url_matches(r"/cart")
+ ctx = AssertContext(url="https://example.com/search?q=shoes")
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert "did not match" in outcome.reason
+
+ def test_none_url(self):
+ pred = url_matches(r"/search")
+ ctx = AssertContext(url=None)
+ outcome = pred(ctx)
+ assert outcome.passed is False
+
+ def test_details_include_pattern_and_url(self):
+ pred = url_matches(r"/test")
+ ctx = AssertContext(url="https://example.com/test")
+ outcome = pred(ctx)
+ assert outcome.details["pattern"] == r"/test"
+ assert "example.com" in outcome.details["url"]
+
+
+class TestUrlContains:
+ """Tests for url_contains predicate."""
+
+ def test_contains_substring(self):
+ pred = url_contains("/cart")
+ ctx = AssertContext(url="https://example.com/cart/checkout")
+ outcome = pred(ctx)
+ assert outcome.passed is True
+
+ def test_no_substring(self):
+ pred = url_contains("/orders")
+ ctx = AssertContext(url="https://example.com/cart")
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert "does not contain" in outcome.reason
+
+ def test_none_url(self):
+ pred = url_contains("/test")
+ ctx = AssertContext(url=None)
+ outcome = pred(ctx)
+ assert outcome.passed is False
+
+
+class TestExists:
+ """Tests for exists predicate."""
+
+ def test_element_exists(self):
+ elements = [make_element(1, role="button", text="Click me")]
+ snap = make_snapshot(elements)
+ pred = exists("role=button")
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is True
+ assert outcome.details["matched"] == 1
+
+ def test_element_not_found(self):
+ elements = [make_element(1, role="button", text="Click me")]
+ snap = make_snapshot(elements)
+ pred = exists("role=link")
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert "no elements matched" in outcome.reason
+
+ def test_text_selector(self):
+ elements = [make_element(1, role="button", text="Submit Form")]
+ snap = make_snapshot(elements)
+ pred = exists("text~'Submit'")
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is True
+
+ def test_no_snapshot(self):
+ pred = exists("role=button")
+ ctx = AssertContext(snapshot=None)
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert "no snapshot available" in outcome.reason
+
+
+class TestNotExists:
+ """Tests for not_exists predicate."""
+
+ def test_element_absent(self):
+ elements = [make_element(1, role="button")]
+ snap = make_snapshot(elements)
+ pred = not_exists("text~'Loading'")
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is True
+
+ def test_element_present(self):
+ elements = [make_element(1, role="button", text="Loading...")]
+ snap = make_snapshot(elements)
+ pred = not_exists("text~'Loading'")
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert "found 1 elements" in outcome.reason
+
+
+class TestElementCount:
+ """Tests for element_count predicate."""
+
+ def test_min_count_satisfied(self):
+ elements = [make_element(i, role="button") for i in range(3)]
+ snap = make_snapshot(elements)
+ pred = element_count("role=button", min_count=2)
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is True
+
+ def test_min_count_not_satisfied(self):
+ elements = [make_element(1, role="button")]
+ snap = make_snapshot(elements)
+ pred = element_count("role=button", min_count=5)
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert "expected at least 5" in outcome.reason
+
+ def test_max_count_satisfied(self):
+ elements = [make_element(i, role="button") for i in range(3)]
+ snap = make_snapshot(elements)
+ pred = element_count("role=button", min_count=1, max_count=5)
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is True
+
+ def test_max_count_exceeded(self):
+ elements = [make_element(i, role="button") for i in range(10)]
+ snap = make_snapshot(elements)
+ pred = element_count("role=button", min_count=1, max_count=5)
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert "expected 1-5" in outcome.reason
+
+
+class TestAllOf:
+ """Tests for all_of combinator."""
+
+ def test_all_pass(self):
+ elements = [make_element(1, role="button", text="Checkout")]
+ snap = make_snapshot(elements, url="https://example.com/cart")
+ pred = all_of(url_contains("/cart"), exists("role=button"))
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is True
+ assert outcome.details["failed_count"] == 0
+
+ def test_one_fails(self):
+ elements = [make_element(1, role="button")]
+ snap = make_snapshot(elements, url="https://example.com/home")
+ pred = all_of(url_contains("/cart"), exists("role=button"))
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert outcome.details["failed_count"] == 1
+
+ def test_all_fail(self):
+ elements = [make_element(1, role="link")]
+ snap = make_snapshot(elements, url="https://example.com/home")
+ pred = all_of(url_contains("/cart"), exists("role=button"))
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert outcome.details["failed_count"] == 2
+
+
+class TestAnyOf:
+ """Tests for any_of combinator."""
+
+ def test_first_passes(self):
+ elements = [make_element(1, role="button", text="Success")]
+ snap = make_snapshot(elements)
+ pred = any_of(exists("text~'Success'"), exists("text~'Complete'"))
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is True
+
+ def test_second_passes(self):
+ elements = [make_element(1, role="button", text="Complete")]
+ snap = make_snapshot(elements)
+ pred = any_of(exists("text~'Success'"), exists("text~'Complete'"))
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is True
+
+ def test_none_pass(self):
+ elements = [make_element(1, role="button", text="Error")]
+ snap = make_snapshot(elements)
+ pred = any_of(exists("text~'Success'"), exists("text~'Complete'"))
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert "none of 2 predicates passed" in outcome.reason
+
+
+class TestCustom:
+ """Tests for custom predicate."""
+
+ def test_custom_returns_true(self):
+ pred = custom(lambda ctx: ctx.url is not None, "has_url")
+ ctx = AssertContext(url="https://example.com")
+ outcome = pred(ctx)
+ assert outcome.passed is True
+
+ def test_custom_returns_false(self):
+ pred = custom(lambda ctx: ctx.url is None, "no_url")
+ ctx = AssertContext(url="https://example.com")
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert "returned False" in outcome.reason
+
+ def test_custom_with_snapshot(self):
+ elements = [make_element(i, role="button") for i in range(15)]
+ snap = make_snapshot(elements)
+ pred = custom(
+ lambda ctx: ctx.snapshot is not None and len(ctx.snapshot.elements) > 10,
+ "has_many_elements",
+ )
+ ctx = AssertContext(snapshot=snap, url=snap.url)
+ outcome = pred(ctx)
+ assert outcome.passed is True
+
+ def test_custom_exception(self):
+ def bad_check(ctx):
+ raise ValueError("Something went wrong")
+
+ pred = custom(bad_check, "bad_check")
+ ctx = AssertContext()
+ outcome = pred(ctx)
+ assert outcome.passed is False
+ assert "raised exception" in outcome.reason
+ assert "Something went wrong" in outcome.reason