diff --git a/README.md b/README.md index b8802a1..b6222eb 100644 --- a/README.md +++ b/README.md @@ -830,6 +830,40 @@ with browser: +
+

🔍 Agent Runtime Verification

+ +`AgentRuntime` provides assertion predicates for runtime verification in agent loops, enabling programmatic verification of browser state during execution. + +```python +from sentience import ( + AgentRuntime, SentienceBrowser, + url_contains, exists, all_of +) +from sentience.tracer_factory import create_tracer + +browser = SentienceBrowser() +browser.start() +tracer = create_tracer(run_id="my-run", upload_trace=False) +runtime = AgentRuntime(browser, browser.page, tracer) + +# Navigate and take snapshot +browser.page.goto("https://example.com") +runtime.begin_step("Verify page") +runtime.snapshot() + +# Run assertions +runtime.assert_(url_contains("example.com"), "on_correct_domain") +runtime.assert_(exists("role=heading"), "has_heading") +runtime.assert_done(exists("text~'Example'"), "task_complete") + +print(f"Task done: {runtime.is_task_done}") +``` + +**See example:** [`examples/agent_runtime_verification.py`](examples/agent_runtime_verification.py) + +
+

🧰 Snapshot Utilities

diff --git a/examples/agent_runtime_verification.py b/examples/agent_runtime_verification.py new file mode 100644 index 0000000..dc93d26 --- /dev/null +++ b/examples/agent_runtime_verification.py @@ -0,0 +1,126 @@ +""" +Example: Agent Runtime with Verification Loop + +Demonstrates how to use AgentRuntime for runtime verification in agent loops. +The AgentRuntime provides assertion predicates to verify browser state during execution. + +Key features: +- Predicate helpers: url_matches, url_contains, exists, not_exists, element_count +- Combinators: all_of, any_of for complex conditions +- Task completion: assert_done() for goal verification +- Trace integration: Assertions emitted to trace for Studio timeline + +Requirements: +- SENTIENCE_API_KEY (Pro or Enterprise tier) + +Usage: + python examples/agent_runtime_verification.py +""" + +import os + +from sentience import ( + AgentRuntime, + SentienceBrowser, + all_of, + exists, + not_exists, + url_contains, + url_matches, +) +from sentience.tracer_factory import create_tracer + + +def main(): + # Get API key from environment + sentience_key = os.environ.get("SENTIENCE_API_KEY") + + if not sentience_key: + print("Error: SENTIENCE_API_KEY not set") + return + + print("Starting Agent Runtime Verification Demo\n") + + # 1. Create tracer for verification event emission + run_id = "verification-demo" + tracer = create_tracer(api_key=sentience_key, run_id=run_id, upload_trace=False) + print(f"Run ID: {run_id}\n") + + # 2. Create browser + browser = SentienceBrowser(api_key=sentience_key, headless=False) + browser.start() + + try: + # 3. Create AgentRuntime with browser, page, and tracer + runtime = AgentRuntime(browser, browser.page, tracer) + + # 4. Navigate to a page + print("Navigating to example.com...\n") + browser.page.goto("https://example.com") + browser.page.wait_for_load_state("networkidle") + + # 5. Begin a verification step + runtime.begin_step("Verify page loaded correctly") + + # 6. Take a snapshot (required for element assertions) + snapshot = runtime.snapshot() + print(f"Snapshot taken: {len(snapshot.elements)} elements found\n") + + # 7. Run assertions against current state + print("Running assertions:\n") + + # URL assertions + url_ok = runtime.assert_(url_contains("example.com"), "on_example_domain") + print(f" [{'PASS' if url_ok else 'FAIL'}] on_example_domain") + + url_match = runtime.assert_(url_matches(r"https://.*example\.com"), "url_is_https") + print(f" [{'PASS' if url_match else 'FAIL'}] url_is_https") + + # Element assertions + has_heading = runtime.assert_(exists("role=heading"), "has_heading") + print(f" [{'PASS' if has_heading else 'FAIL'}] has_heading") + + no_error = runtime.assert_(not_exists("text~'Error'"), "no_error_message") + print(f" [{'PASS' if no_error else 'FAIL'}] no_error_message") + + # Combined assertion with all_of + page_ready = runtime.assert_( + all_of(url_contains("example"), exists("role=link")), + "page_fully_ready", + ) + print(f" [{'PASS' if page_ready else 'FAIL'}] page_fully_ready") + + # 8. Check if task is done (required assertion) + task_complete = runtime.assert_done( + exists("text~'Example Domain'"), + "reached_example_page", + ) + print(f"\n [{'DONE' if task_complete else 'NOT DONE'}] reached_example_page") + + # 9. Get accumulated assertions for step_end event + assertions_data = runtime.get_assertions_for_step_end() + print(f"\nTotal assertions: {len(assertions_data['assertions'])}") + print(f"Task done: {assertions_data.get('task_done', False)}") + + # 10. Check overall status + print("\nVerification Summary:") + print(f" All passed: {runtime.all_assertions_passed()}") + print(f" Required passed: {runtime.required_assertions_passed()}") + print(f" Task complete: {runtime.is_task_done}") + + except Exception as e: + print(f"\nError during execution: {e}") + raise + + finally: + # Close tracer and browser + print("\nClosing tracer...") + tracer.close(blocking=True) + print(f"Trace saved to: ~/.sentience/traces/{run_id}.jsonl") + + browser.close() + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/sentience/__init__.py b/sentience/__init__.py index d587e8c..47e2745 100644 --- a/sentience/__init__.py +++ b/sentience/__init__.py @@ -5,6 +5,7 @@ from .actions import click, click_rect, press, scroll_to, type_text from .agent import SentienceAgent, SentienceAgentAsync from .agent_config import AgentConfig +from .agent_runtime import AgentRuntime # Agent Layer (Phase 1 & 2) from .base_agent import BaseAgent @@ -70,6 +71,21 @@ # Formatting (v0.12.0+) from .utils.formatting import format_snapshot_for_llm + +# Verification (agent assertion loop) +from .verification import ( + AssertContext, + AssertOutcome, + Predicate, + all_of, + any_of, + custom, + element_count, + exists, + not_exists, + url_contains, + url_matches, +) from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync from .wait import wait_for @@ -160,4 +176,17 @@ # Enums "SentienceMethod", "AgentAction", + # Verification (agent assertion loop) + "AgentRuntime", + "AssertContext", + "AssertOutcome", + "Predicate", + "url_matches", + "url_contains", + "exists", + "not_exists", + "element_count", + "all_of", + "any_of", + "custom", ] diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py new file mode 100644 index 0000000..83f37d6 --- /dev/null +++ b/sentience/agent_runtime.py @@ -0,0 +1,316 @@ +""" +Agent runtime for verification loop support. + +This module provides a thin runtime wrapper that combines: +1. Browser session management +2. Snapshot/query helpers +3. Tracer for event emission +4. Assertion/verification methods + +The AgentRuntime is designed to be used in agent verification loops where +you need to repeatedly take snapshots, execute actions, and verify results. + +Example usage: + from sentience import AsyncSentienceBrowser + from sentience.agent_runtime import AgentRuntime + from sentience.verification import url_matches, exists + from sentience.tracing import Tracer, JsonlTraceSink + + async with AsyncSentienceBrowser() as browser: + page = await browser.new_page() + await page.goto("https://example.com") + + sink = JsonlTraceSink("trace.jsonl") + tracer = Tracer(run_id="test-run", sink=sink) + + runtime = AgentRuntime(browser=browser, page=page, tracer=tracer) + + # Take snapshot and run assertions + await runtime.snapshot() + runtime.assert_(url_matches(r"example\\.com"), label="on_homepage") + runtime.assert_(exists("role=button"), label="has_buttons") + + # Check if task is done + if runtime.assert_done(exists("text~'Success'"), label="task_complete"): + print("Task completed!") +""" + +from __future__ import annotations + +import uuid +from typing import TYPE_CHECKING, Any + +from .verification import AssertContext, AssertOutcome, Predicate + +if TYPE_CHECKING: + from playwright.async_api import Page + + from .browser import AsyncSentienceBrowser + from .models import Snapshot + from .tracing import Tracer + + +class AgentRuntime: + """ + Runtime wrapper for agent verification loops. + + Provides ergonomic methods for: + - snapshot(): Take page snapshot + - assert_(): Evaluate assertion predicates + - assert_done(): Assert task completion (required assertion) + + The runtime manages assertion state per step and emits verification events + to the tracer for Studio timeline display. + + Attributes: + browser: AsyncSentienceBrowser instance + page: Playwright Page instance + tracer: Tracer for event emission + step_id: Current step identifier + step_index: Current step index (0-based) + last_snapshot: Most recent snapshot (for assertion context) + """ + + def __init__( + self, + browser: AsyncSentienceBrowser, + page: Page, + tracer: Tracer, + ): + """ + Initialize agent runtime. + + Args: + browser: AsyncSentienceBrowser instance for taking snapshots + page: Playwright Page for browser interaction + tracer: Tracer for emitting verification events + """ + self.browser = browser + self.page = page + self.tracer = tracer + + # Step tracking + self.step_id: str | None = None + self.step_index: int = 0 + + # Snapshot state + self.last_snapshot: Snapshot | None = None + + # Assertions accumulated during current step + self._assertions_this_step: list[dict[str, Any]] = [] + + # Task completion tracking + self._task_done: bool = False + self._task_done_label: str | None = None + + def _ctx(self) -> AssertContext: + """ + Build assertion context from current state. + + Returns: + AssertContext with current snapshot and URL + """ + url = None + if self.last_snapshot is not None: + url = self.last_snapshot.url + elif self.page: + url = self.page.url + + return AssertContext( + snapshot=self.last_snapshot, + url=url, + step_id=self.step_id, + ) + + async def snapshot(self, **kwargs) -> Snapshot: + """ + Take a snapshot of the current page state. + + This updates last_snapshot which is used as context for assertions. + + Args: + **kwargs: Passed through to browser.snapshot() + + Returns: + Snapshot of current page state + """ + self.last_snapshot = await self.browser.snapshot(self.page, **kwargs) + return self.last_snapshot + + def begin_step(self, goal: str, step_index: int | None = None) -> str: + """ + Begin a new step in the verification loop. + + This: + - Generates a new step_id + - Clears assertions from previous step + - Increments step_index (or uses provided value) + + Args: + goal: Description of what this step aims to achieve + step_index: Optional explicit step index (otherwise auto-increments) + + Returns: + Generated step_id + """ + # Clear previous step state + self._assertions_this_step = [] + + # Generate new step_id + self.step_id = str(uuid.uuid4()) + + # Update step index + if step_index is not None: + self.step_index = step_index + else: + self.step_index += 1 + + return self.step_id + + def assert_( + self, + predicate: Predicate, + label: str, + required: bool = False, + ) -> bool: + """ + Evaluate an assertion against current snapshot state. + + The assertion result is: + 1. Accumulated for inclusion in step_end.data.verify.signals.assertions + 2. Emitted as a dedicated 'verification' event for Studio timeline + + Args: + predicate: Predicate function to evaluate + label: Human-readable label for this assertion + required: If True, this assertion gates step success (default: False) + + Returns: + True if assertion passed, False otherwise + """ + outcome = predicate(self._ctx()) + + record = { + "label": label, + "passed": outcome.passed, + "required": required, + "reason": outcome.reason, + "details": outcome.details, + } + self._assertions_this_step.append(record) + + # Emit dedicated verification event (Option B from design doc) + # This makes assertions visible in Studio timeline + self.tracer.emit( + "verification", + data={ + "kind": "assert", + "passed": outcome.passed, + **record, + }, + step_id=self.step_id, + ) + + return outcome.passed + + def assert_done( + self, + predicate: Predicate, + label: str, + ) -> bool: + """ + Assert task completion (required assertion). + + This is a convenience wrapper for assert_() with required=True. + When the assertion passes, it marks the task as done. + + Use this for final verification that the agent's goal is complete. + + Args: + predicate: Predicate function to evaluate + label: Human-readable label for this assertion + + Returns: + True if task is complete (assertion passed), False otherwise + """ + ok = self.assert_(predicate, label=label, required=True) + + if ok: + self._task_done = True + self._task_done_label = label + + # Emit task_done verification event + self.tracer.emit( + "verification", + data={ + "kind": "task_done", + "passed": True, + "label": label, + }, + step_id=self.step_id, + ) + + return ok + + def get_assertions_for_step_end(self) -> dict[str, Any]: + """ + Get assertions data for inclusion in step_end.data.verify.signals. + + This is called when building the step_end event to include + assertion results in the trace. + + Returns: + Dictionary with 'assertions', 'task_done', 'task_done_label' keys + """ + result: dict[str, Any] = { + "assertions": self._assertions_this_step.copy(), + } + + if self._task_done: + result["task_done"] = True + result["task_done_label"] = self._task_done_label + + return result + + def flush_assertions(self) -> list[dict[str, Any]]: + """ + Get and clear assertions for current step. + + Call this at step end to get accumulated assertions + for the step_end event, then clear for next step. + + Returns: + List of assertion records from this step + """ + assertions = self._assertions_this_step.copy() + self._assertions_this_step = [] + return assertions + + @property + def is_task_done(self) -> bool: + """Check if task has been marked as done via assert_done().""" + return self._task_done + + def reset_task_done(self) -> None: + """Reset task_done state (for multi-task runs).""" + self._task_done = False + self._task_done_label = None + + def all_assertions_passed(self) -> bool: + """ + Check if all assertions in current step passed. + + Returns: + True if all assertions passed (or no assertions made) + """ + return all(a["passed"] for a in self._assertions_this_step) + + def required_assertions_passed(self) -> bool: + """ + Check if all required assertions in current step passed. + + Returns: + True if all required assertions passed (or no required assertions) + """ + required = [a for a in self._assertions_this_step if a.get("required")] + return all(a["passed"] for a in required) diff --git a/sentience/schemas/trace_v1.json b/sentience/schemas/trace_v1.json index b844d04..37c28cb 100644 --- a/sentience/schemas/trace_v1.json +++ b/sentience/schemas/trace_v1.json @@ -248,7 +248,24 @@ } } } - } + }, + "assertions": { + "type": "array", + "description": "Assertion results from agent verification loop", + "items": { + "type": "object", + "required": ["label", "passed"], + "properties": { + "label": {"type": "string", "description": "Human-readable assertion label"}, + "passed": {"type": "boolean", "description": "Whether the assertion passed"}, + "required": {"type": "boolean", "description": "If true, assertion gates step success"}, + "reason": {"type": "string", "description": "Explanation (especially when failed)"}, + "details": {"type": "object", "description": "Additional structured data for debugging"} + } + } + }, + "task_done": {"type": "boolean", "description": "True if task completion assertion passed"}, + "task_done_label": {"type": "string", "description": "Label of the task completion assertion"} } } } @@ -270,6 +287,15 @@ "properties": { "step_id": {"type": "string"}, "passed": {"type": "boolean"}, + "kind": { + "type": "string", + "enum": ["assert", "task_done"], + "description": "Type of verification event" + }, + "label": {"type": "string", "description": "Human-readable label for the assertion"}, + "required": {"type": "boolean", "description": "If true, assertion gates step success"}, + "reason": {"type": "string", "description": "Explanation (especially when failed)"}, + "details": {"type": "object", "description": "Additional structured data for debugging"}, "signals": {"type": "object"} } }, diff --git a/sentience/trace_event_builder.py b/sentience/trace_event_builder.py index d2e5f9f..272c7e0 100644 --- a/sentience/trace_event_builder.py +++ b/sentience/trace_event_builder.py @@ -84,6 +84,7 @@ def build_step_end_event( exec_data: dict[str, Any], verify_data: dict[str, Any], pre_elements: list[dict[str, Any]] | None = None, + assertions: list[dict[str, Any]] | None = None, ) -> dict[str, Any]: """ Build step_end trace event data. @@ -100,6 +101,7 @@ def build_step_end_event( exec_data: Action execution data verify_data: Verification data pre_elements: Optional list of elements from pre-snapshot (with diff_status) + assertions: Optional list of assertion results from AgentRuntime Returns: Dictionary with step_end event data @@ -113,6 +115,23 @@ def build_step_end_event( if pre_elements is not None: pre_data["elements"] = pre_elements + # Build verify data with assertions if provided + final_verify_data = verify_data.copy() + if assertions: + # Ensure signals dict exists + if "signals" not in final_verify_data: + final_verify_data["signals"] = {} + + # Add assertions to signals + final_verify_data["signals"]["assertions"] = assertions + + # Check for task completion (assertions marked as required that passed) + for a in assertions: + if a.get("passed") and a.get("required"): + final_verify_data["signals"]["task_done"] = True + final_verify_data["signals"]["task_done_label"] = a.get("label") + break + return { "v": 1, "step_id": step_id, @@ -125,5 +144,5 @@ def build_step_end_event( "post": { "url": post_url, }, - "verify": verify_data, + "verify": final_verify_data, } diff --git a/sentience/verification.py b/sentience/verification.py new file mode 100644 index 0000000..216f25e --- /dev/null +++ b/sentience/verification.py @@ -0,0 +1,376 @@ +""" +Verification primitives for agent assertion loops. + +This module provides assertion predicates and outcome types for runtime verification +in agent loops. Assertions evaluate against the current browser state (snapshot/url) +and record results into the trace. + +Key concepts: +- AssertOutcome: Result of evaluating an assertion +- AssertContext: Context provided to assertion predicates (snapshot, url, step_id) +- Predicate: Callable that takes context and returns outcome + +Example usage: + from sentience.verification import url_matches, exists, AssertContext + + # Create predicates + on_search_page = url_matches(r"/s\\?k=") + results_loaded = exists("text~'Results'") + + # Evaluate against context + ctx = AssertContext(snapshot=snapshot, url="https://example.com/s?k=shoes") + outcome = on_search_page(ctx) + print(outcome.passed) # True +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any, Callable + +if TYPE_CHECKING: + from .models import Snapshot + + +@dataclass +class AssertOutcome: + """ + Result of evaluating an assertion predicate. + + Attributes: + passed: Whether the assertion passed + reason: Human-readable explanation (especially useful when failed) + details: Additional structured data for debugging/display + """ + + passed: bool + reason: str = "" + details: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class AssertContext: + """ + Context provided to assertion predicates. + + Provides access to current browser state without requiring + the predicate to know about browser internals. + + Attributes: + snapshot: Current page snapshot (may be None if not taken) + url: Current page URL + step_id: Current step identifier (for trace correlation) + """ + + snapshot: Snapshot | None = None + url: str | None = None + step_id: str | None = None + + +# Type alias for assertion predicates +Predicate = Callable[[AssertContext], AssertOutcome] + + +def url_matches(pattern: str) -> Predicate: + """ + Create a predicate that checks if current URL matches a regex pattern. + + Args: + pattern: Regular expression pattern to match against URL + + Returns: + Predicate function that evaluates URL matching + + Example: + >>> pred = url_matches(r"/search\\?q=") + >>> ctx = AssertContext(url="https://example.com/search?q=shoes") + >>> outcome = pred(ctx) + >>> outcome.passed + True + """ + rx = re.compile(pattern) + + def _pred(ctx: AssertContext) -> AssertOutcome: + url = ctx.url or "" + ok = rx.search(url) is not None + return AssertOutcome( + passed=ok, + reason="" if ok else f"url did not match pattern: {pattern}", + details={"pattern": pattern, "url": url[:200]}, + ) + + return _pred + + +def url_contains(substring: str) -> Predicate: + """ + Create a predicate that checks if current URL contains a substring. + + Args: + substring: String to search for in URL + + Returns: + Predicate function that evaluates URL containment + + Example: + >>> pred = url_contains("/cart") + >>> ctx = AssertContext(url="https://example.com/cart/checkout") + >>> outcome = pred(ctx) + >>> outcome.passed + True + """ + + def _pred(ctx: AssertContext) -> AssertOutcome: + url = ctx.url or "" + ok = substring in url + return AssertOutcome( + passed=ok, + reason="" if ok else f"url does not contain: {substring}", + details={"substring": substring, "url": url[:200]}, + ) + + return _pred + + +def exists(selector: str) -> Predicate: + """ + Create a predicate that checks if elements matching selector exist. + + Uses the SDK's query engine to find matching elements. + + Args: + selector: Semantic selector string (e.g., "role=button text~'Sign in'") + + Returns: + Predicate function that evaluates element existence + + Example: + >>> pred = exists("text~'Results'") + >>> # Will check if snapshot contains elements with "Results" in text + """ + + def _pred(ctx: AssertContext) -> AssertOutcome: + snap = ctx.snapshot + if snap is None: + return AssertOutcome( + passed=False, + reason="no snapshot available", + details={"selector": selector}, + ) + + # Import here to avoid circular imports + from .query import query + + matches = query(snap, selector) + ok = len(matches) > 0 + return AssertOutcome( + passed=ok, + reason="" if ok else f"no elements matched selector: {selector}", + details={"selector": selector, "matched": len(matches)}, + ) + + return _pred + + +def not_exists(selector: str) -> Predicate: + """ + Create a predicate that checks that NO elements match the selector. + + Useful for asserting that error messages, loading spinners, etc. are gone. + + Args: + selector: Semantic selector string + + Returns: + Predicate function that evaluates element non-existence + + Example: + >>> pred = not_exists("text~'Loading'") + >>> # Will pass if no elements contain "Loading" text + """ + + def _pred(ctx: AssertContext) -> AssertOutcome: + snap = ctx.snapshot + if snap is None: + return AssertOutcome( + passed=False, + reason="no snapshot available", + details={"selector": selector}, + ) + + from .query import query + + matches = query(snap, selector) + ok = len(matches) == 0 + return AssertOutcome( + passed=ok, + reason="" if ok else f"found {len(matches)} elements matching: {selector}", + details={"selector": selector, "matched": len(matches)}, + ) + + return _pred + + +def element_count(selector: str, *, min_count: int = 0, max_count: int | None = None) -> Predicate: + """ + Create a predicate that checks the number of matching elements. + + Args: + selector: Semantic selector string + min_count: Minimum number of matches required (inclusive) + max_count: Maximum number of matches allowed (inclusive, None = no limit) + + Returns: + Predicate function that evaluates element count + + Example: + >>> pred = element_count("role=button", min_count=1, max_count=5) + >>> # Will pass if 1-5 buttons found + """ + + def _pred(ctx: AssertContext) -> AssertOutcome: + snap = ctx.snapshot + if snap is None: + return AssertOutcome( + passed=False, + reason="no snapshot available", + details={"selector": selector, "min_count": min_count, "max_count": max_count}, + ) + + from .query import query + + matches = query(snap, selector) + count = len(matches) + + ok = count >= min_count + if max_count is not None: + ok = ok and count <= max_count + + if ok: + reason = "" + else: + if max_count is not None: + reason = f"expected {min_count}-{max_count} elements, found {count}" + else: + reason = f"expected at least {min_count} elements, found {count}" + + return AssertOutcome( + passed=ok, + reason=reason, + details={ + "selector": selector, + "matched": count, + "min_count": min_count, + "max_count": max_count, + }, + ) + + return _pred + + +def all_of(*predicates: Predicate) -> Predicate: + """ + Create a predicate that passes only if ALL sub-predicates pass. + + Args: + *predicates: Predicate functions to combine with AND logic + + Returns: + Combined predicate + + Example: + >>> pred = all_of(url_contains("/cart"), exists("text~'Checkout'")) + >>> # Will pass only if both conditions are true + """ + + def _pred(ctx: AssertContext) -> AssertOutcome: + failed_reasons = [] + all_details: list[dict[str, Any]] = [] + + for p in predicates: + outcome = p(ctx) + all_details.append(outcome.details) + if not outcome.passed: + failed_reasons.append(outcome.reason) + + ok = len(failed_reasons) == 0 + return AssertOutcome( + passed=ok, + reason="; ".join(failed_reasons) if failed_reasons else "", + details={"sub_predicates": all_details, "failed_count": len(failed_reasons)}, + ) + + return _pred + + +def any_of(*predicates: Predicate) -> Predicate: + """ + Create a predicate that passes if ANY sub-predicate passes. + + Args: + *predicates: Predicate functions to combine with OR logic + + Returns: + Combined predicate + + Example: + >>> pred = any_of(exists("text~'Success'"), exists("text~'Complete'")) + >>> # Will pass if either condition is true + """ + + def _pred(ctx: AssertContext) -> AssertOutcome: + all_reasons = [] + all_details: list[dict[str, Any]] = [] + + for p in predicates: + outcome = p(ctx) + all_details.append(outcome.details) + if outcome.passed: + return AssertOutcome( + passed=True, + reason="", + details={"sub_predicates": all_details, "matched_at_index": len(all_details) - 1}, + ) + all_reasons.append(outcome.reason) + + return AssertOutcome( + passed=False, + reason=f"none of {len(predicates)} predicates passed: " + "; ".join(all_reasons), + details={"sub_predicates": all_details}, + ) + + return _pred + + +def custom(check_fn: Callable[[AssertContext], bool], label: str = "custom") -> Predicate: + """ + Create a predicate from a custom function. + + Args: + check_fn: Function that takes AssertContext and returns bool + label: Label for debugging/display + + Returns: + Predicate wrapping the custom function + + Example: + >>> pred = custom(lambda ctx: ctx.snapshot and len(ctx.snapshot.elements) > 10, "has_many_elements") + """ + + def _pred(ctx: AssertContext) -> AssertOutcome: + try: + ok = check_fn(ctx) + return AssertOutcome( + passed=ok, + reason="" if ok else f"custom check '{label}' returned False", + details={"label": label}, + ) + except Exception as e: + return AssertOutcome( + passed=False, + reason=f"custom check '{label}' raised exception: {e}", + details={"label": label, "error": str(e)}, + ) + + return _pred diff --git a/tests/test_verification.py b/tests/test_verification.py new file mode 100644 index 0000000..1f01511 --- /dev/null +++ b/tests/test_verification.py @@ -0,0 +1,295 @@ +""" +Tests for verification module - assertion predicates for agent loops. +""" + +import pytest + +from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues +from sentience.verification import ( + AssertContext, + AssertOutcome, + all_of, + any_of, + custom, + element_count, + exists, + not_exists, + url_contains, + url_matches, +) + + +def make_element( + id: int, + role: str = "button", + text: str | None = None, + importance: int = 100, +) -> Element: + """Helper to create test elements.""" + return Element( + id=id, + role=role, + text=text, + importance=importance, + bbox=BBox(x=0, y=0, width=100, height=50), + visual_cues=VisualCues(is_primary=False, is_clickable=True, background_color_name=None), + ) + + +def make_snapshot(elements: list[Element], url: str = "https://example.com") -> Snapshot: + """Helper to create test snapshots.""" + return Snapshot( + status="success", + url=url, + elements=elements, + viewport=Viewport(width=1920, height=1080), + ) + + +class TestUrlMatches: + """Tests for url_matches predicate.""" + + def test_matches_pattern(self): + pred = url_matches(r"/search\?q=") + ctx = AssertContext(url="https://example.com/search?q=shoes") + outcome = pred(ctx) + assert outcome.passed is True + assert outcome.reason == "" + + def test_no_match(self): + pred = url_matches(r"/cart") + ctx = AssertContext(url="https://example.com/search?q=shoes") + outcome = pred(ctx) + assert outcome.passed is False + assert "did not match" in outcome.reason + + def test_none_url(self): + pred = url_matches(r"/search") + ctx = AssertContext(url=None) + outcome = pred(ctx) + assert outcome.passed is False + + def test_details_include_pattern_and_url(self): + pred = url_matches(r"/test") + ctx = AssertContext(url="https://example.com/test") + outcome = pred(ctx) + assert outcome.details["pattern"] == r"/test" + assert "example.com" in outcome.details["url"] + + +class TestUrlContains: + """Tests for url_contains predicate.""" + + def test_contains_substring(self): + pred = url_contains("/cart") + ctx = AssertContext(url="https://example.com/cart/checkout") + outcome = pred(ctx) + assert outcome.passed is True + + def test_no_substring(self): + pred = url_contains("/orders") + ctx = AssertContext(url="https://example.com/cart") + outcome = pred(ctx) + assert outcome.passed is False + assert "does not contain" in outcome.reason + + def test_none_url(self): + pred = url_contains("/test") + ctx = AssertContext(url=None) + outcome = pred(ctx) + assert outcome.passed is False + + +class TestExists: + """Tests for exists predicate.""" + + def test_element_exists(self): + elements = [make_element(1, role="button", text="Click me")] + snap = make_snapshot(elements) + pred = exists("role=button") + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is True + assert outcome.details["matched"] == 1 + + def test_element_not_found(self): + elements = [make_element(1, role="button", text="Click me")] + snap = make_snapshot(elements) + pred = exists("role=link") + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is False + assert "no elements matched" in outcome.reason + + def test_text_selector(self): + elements = [make_element(1, role="button", text="Submit Form")] + snap = make_snapshot(elements) + pred = exists("text~'Submit'") + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is True + + def test_no_snapshot(self): + pred = exists("role=button") + ctx = AssertContext(snapshot=None) + outcome = pred(ctx) + assert outcome.passed is False + assert "no snapshot available" in outcome.reason + + +class TestNotExists: + """Tests for not_exists predicate.""" + + def test_element_absent(self): + elements = [make_element(1, role="button")] + snap = make_snapshot(elements) + pred = not_exists("text~'Loading'") + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is True + + def test_element_present(self): + elements = [make_element(1, role="button", text="Loading...")] + snap = make_snapshot(elements) + pred = not_exists("text~'Loading'") + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is False + assert "found 1 elements" in outcome.reason + + +class TestElementCount: + """Tests for element_count predicate.""" + + def test_min_count_satisfied(self): + elements = [make_element(i, role="button") for i in range(3)] + snap = make_snapshot(elements) + pred = element_count("role=button", min_count=2) + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is True + + def test_min_count_not_satisfied(self): + elements = [make_element(1, role="button")] + snap = make_snapshot(elements) + pred = element_count("role=button", min_count=5) + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is False + assert "expected at least 5" in outcome.reason + + def test_max_count_satisfied(self): + elements = [make_element(i, role="button") for i in range(3)] + snap = make_snapshot(elements) + pred = element_count("role=button", min_count=1, max_count=5) + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is True + + def test_max_count_exceeded(self): + elements = [make_element(i, role="button") for i in range(10)] + snap = make_snapshot(elements) + pred = element_count("role=button", min_count=1, max_count=5) + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is False + assert "expected 1-5" in outcome.reason + + +class TestAllOf: + """Tests for all_of combinator.""" + + def test_all_pass(self): + elements = [make_element(1, role="button", text="Checkout")] + snap = make_snapshot(elements, url="https://example.com/cart") + pred = all_of(url_contains("/cart"), exists("role=button")) + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is True + assert outcome.details["failed_count"] == 0 + + def test_one_fails(self): + elements = [make_element(1, role="button")] + snap = make_snapshot(elements, url="https://example.com/home") + pred = all_of(url_contains("/cart"), exists("role=button")) + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is False + assert outcome.details["failed_count"] == 1 + + def test_all_fail(self): + elements = [make_element(1, role="link")] + snap = make_snapshot(elements, url="https://example.com/home") + pred = all_of(url_contains("/cart"), exists("role=button")) + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is False + assert outcome.details["failed_count"] == 2 + + +class TestAnyOf: + """Tests for any_of combinator.""" + + def test_first_passes(self): + elements = [make_element(1, role="button", text="Success")] + snap = make_snapshot(elements) + pred = any_of(exists("text~'Success'"), exists("text~'Complete'")) + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is True + + def test_second_passes(self): + elements = [make_element(1, role="button", text="Complete")] + snap = make_snapshot(elements) + pred = any_of(exists("text~'Success'"), exists("text~'Complete'")) + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is True + + def test_none_pass(self): + elements = [make_element(1, role="button", text="Error")] + snap = make_snapshot(elements) + pred = any_of(exists("text~'Success'"), exists("text~'Complete'")) + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is False + assert "none of 2 predicates passed" in outcome.reason + + +class TestCustom: + """Tests for custom predicate.""" + + def test_custom_returns_true(self): + pred = custom(lambda ctx: ctx.url is not None, "has_url") + ctx = AssertContext(url="https://example.com") + outcome = pred(ctx) + assert outcome.passed is True + + def test_custom_returns_false(self): + pred = custom(lambda ctx: ctx.url is None, "no_url") + ctx = AssertContext(url="https://example.com") + outcome = pred(ctx) + assert outcome.passed is False + assert "returned False" in outcome.reason + + def test_custom_with_snapshot(self): + elements = [make_element(i, role="button") for i in range(15)] + snap = make_snapshot(elements) + pred = custom( + lambda ctx: ctx.snapshot is not None and len(ctx.snapshot.elements) > 10, + "has_many_elements", + ) + ctx = AssertContext(snapshot=snap, url=snap.url) + outcome = pred(ctx) + assert outcome.passed is True + + def test_custom_exception(self): + def bad_check(ctx): + raise ValueError("Something went wrong") + + pred = custom(bad_check, "bad_check") + ctx = AssertContext() + outcome = pred(ctx) + assert outcome.passed is False + assert "raised exception" in outcome.reason + assert "Something went wrong" in outcome.reason