From 8020867b20b17e80370b1a716302c5cd2d31a9ff Mon Sep 17 00:00:00 2001
From: SentienceDev <dev@sentienceapi.com>
Date: Thu, 8 Jan 2026 17:20:16 -0800
Subject: [PATCH 1/3] Phase 2: full integration with backend protocol

---
 sentience/__init__.py                     |  18 +
 sentience/backends/__init__.py            |  21 +-
 sentience/backends/actions.py             | 341 ++++++++++++++++++
 sentience/backends/browser_use_adapter.py |   4 +-
 sentience/backends/cdp_backend.py         |  15 +-
 sentience/backends/playwright_backend.py  | 187 ++++++++++
 sentience/backends/snapshot.py            | 297 ++++++++++++++++
 tests/test_backends.py                    | 401 ++++++++++++++++++++--
 8 files changed, 1244 insertions(+), 40 deletions(-)
 create mode 100644 sentience/backends/actions.py
 create mode 100644 sentience/backends/playwright_backend.py
 create mode 100644 sentience/backends/snapshot.py

diff --git a/sentience/__init__.py b/sentience/__init__.py
index 1149bad..ecb4711 100644
--- a/sentience/__init__.py
+++ b/sentience/__init__.py
@@ -16,16 +16,25 @@
 from .agent_config import AgentConfig
 from .agent_runtime import AgentRuntime
 
+# Backend-agnostic actions (aliased to avoid conflict with existing actions)
 # Browser backends (for browser-use integration)
 from .backends import (
     BrowserBackendV0,
     BrowserUseAdapter,
     BrowserUseCDPTransport,
+    CachedSnapshot,
     CDPBackendV0,
     CDPTransport,
     LayoutMetrics,
+    PlaywrightBackend,
     ViewportInfo,
 )
+from .backends import click as backend_click
+from .backends import scroll as backend_scroll
+from .backends import scroll_to_element as backend_scroll_to_element
+from .backends import snapshot as backend_snapshot
+from .backends import type_text as backend_type_text
+from .backends import wait_for_stable as backend_wait_for_stable
 
 # Agent Layer (Phase 1 & 2)
 from .base_agent import BaseAgent
@@ -123,10 +132,19 @@
     "BrowserBackendV0",
     "CDPTransport",
     "CDPBackendV0",
+    "PlaywrightBackend",
     "BrowserUseAdapter",
     "BrowserUseCDPTransport",
     "ViewportInfo",
     "LayoutMetrics",
+    "backend_snapshot",
+    "CachedSnapshot",
+    # Backend-agnostic actions (prefixed to avoid conflicts)
+    "backend_click",
+    "backend_type_text",
+    "backend_scroll",
+    "backend_scroll_to_element",
+    "backend_wait_for_stable",
     # Core SDK
     "SentienceBrowser",
     "Snapshot",
diff --git a/sentience/backends/__init__.py b/sentience/backends/__init__.py
index 063685a..0c7d7f3 100644
--- a/sentience/backends/__init__.py
+++ b/sentience/backends/__init__.py
@@ -12,7 +12,7 @@
 For browser-use integration:
     from browser_use import BrowserSession, BrowserProfile
     from sentience import get_extension_dir
-    from sentience.backends import BrowserUseAdapter, CDPBackendV0
+    from sentience.backends import BrowserUseAdapter, snapshot, click, type_text
 
     # Setup browser-use with Sentience extension
     profile = BrowserProfile(args=[f"--load-extension={get_extension_dir()}"])
@@ -23,13 +23,18 @@
     adapter = BrowserUseAdapter(session)
     backend = await adapter.create_backend()
 
-    # Use backend for precise operations
-    await backend.mouse_click(100, 200)
+    # Take snapshot and interact
+    snap = await snapshot(backend)
+    element = find(snap, 'role=button[name="Submit"]')
+    await click(backend, element.bbox)
 """
 
+from .actions import click, scroll, scroll_to_element, type_text, wait_for_stable
 from .browser_use_adapter import BrowserUseAdapter, BrowserUseCDPTransport
 from .cdp_backend import CDPBackendV0, CDPTransport
+from .playwright_backend import PlaywrightBackend
 from .protocol_v0 import BrowserBackendV0, LayoutMetrics, ViewportInfo
+from .snapshot import CachedSnapshot, snapshot
 
 __all__ = [
     # Protocol
@@ -40,7 +45,17 @@
     # CDP Backend
     "CDPTransport",
     "CDPBackendV0",
+    # Playwright Backend
+    "PlaywrightBackend",
     # browser-use adapter
     "BrowserUseAdapter",
     "BrowserUseCDPTransport",
+    # Backend-agnostic functions
+    "snapshot",
+    "CachedSnapshot",
+    "click",
+    "type_text",
+    "scroll",
+    "scroll_to_element",
+    "wait_for_stable",
 ]
diff --git a/sentience/backends/actions.py b/sentience/backends/actions.py
new file mode 100644
index 0000000..c987d64
--- /dev/null
+++ b/sentience/backends/actions.py
@@ -0,0 +1,341 @@
+"""
+Backend-agnostic actions for browser-use integration.
+
+These actions work with any BrowserBackendV0 implementation,
+enabling Sentience grounding with browser-use or other frameworks.
+
+Usage with browser-use:
+    from sentience.backends import BrowserUseAdapter
+    from sentience.backends.actions import click, type_text, scroll
+
+    adapter = BrowserUseAdapter(session)
+    backend = await adapter.create_backend()
+
+    # Take snapshot and click element
+    snap = await snapshot_from_backend(backend)
+    element = find(snap, 'role=button[name="Submit"]')
+    await click(backend, element.bbox)
+"""
+
+import asyncio
+import time
+from typing import TYPE_CHECKING, Any, Literal
+
+from ..models import ActionResult, BBox, Snapshot
+
+if TYPE_CHECKING:
+    from .protocol_v0 import BrowserBackendV0
+
+
+async def click(
+    backend: "BrowserBackendV0",
+    target: BBox | dict[str, float] | tuple[float, float],
+    button: Literal["left", "right", "middle"] = "left",
+    click_count: int = 1,
+    move_first: bool = True,
+) -> ActionResult:
+    """
+    Click at coordinates using the backend.
+
+    Args:
+        backend: BrowserBackendV0 implementation
+        target: Click target - BBox (clicks center), dict with x/y, or (x, y) tuple
+        button: Mouse button to click
+        click_count: Number of clicks (1=single, 2=double)
+        move_first: Whether to move mouse to position before clicking
+
+    Returns:
+        ActionResult with success status
+
+    Example:
+        # Click at coordinates
+        await click(backend, (100, 200))
+
+        # Click element bbox center
+        await click(backend, element.bbox)
+
+        # Double-click
+        await click(backend, element.bbox, click_count=2)
+    """
+    start_time = time.time()
+
+    # Resolve coordinates
+    x, y = _resolve_coordinates(target)
+
+    try:
+        # Optional mouse move for hover effects
+        if move_first:
+            await backend.mouse_move(x, y)
+            await asyncio.sleep(0.02)  # Brief pause for hover
+
+        # Perform click
+        await backend.mouse_click(x, y, button=button, click_count=click_count)
+
+        duration_ms = int((time.time() - start_time) * 1000)
+        return ActionResult(
+            success=True,
+            duration_ms=duration_ms,
+            outcome="dom_updated",
+        )
+    except Exception as e:
+        duration_ms = int((time.time() - start_time) * 1000)
+        return ActionResult(
+            success=False,
+            duration_ms=duration_ms,
+            outcome="error",
+            error={"code": "click_failed", "reason": str(e)},
+        )
+
+
+async def type_text(
+    backend: "BrowserBackendV0",
+    text: str,
+    target: BBox | dict[str, float] | tuple[float, float] | None = None,
+    clear_first: bool = False,
+) -> ActionResult:
+    """
+    Type text, optionally clicking a target first.
+
+    Args:
+        backend: BrowserBackendV0 implementation
+        text: Text to type
+        target: Optional click target before typing (BBox, dict, or tuple)
+        clear_first: If True, select all and delete before typing
+
+    Returns:
+        ActionResult with success status
+
+    Example:
+        # Type into focused element
+        await type_text(backend, "Hello World")
+
+        # Click input then type
+        await type_text(backend, "search query", target=search_box.bbox)
+
+        # Clear and type
+        await type_text(backend, "new value", target=input.bbox, clear_first=True)
+    """
+    start_time = time.time()
+
+    try:
+        # Click target if provided
+        if target is not None:
+            x, y = _resolve_coordinates(target)
+            await backend.mouse_click(x, y)
+            await asyncio.sleep(0.05)  # Wait for focus
+
+        # Clear existing content if requested
+        if clear_first:
+            # Select all (Ctrl+A / Cmd+A) and delete
+            await backend.eval("document.execCommand('selectAll')")
+            await asyncio.sleep(0.02)
+
+        # Type the text
+        await backend.type_text(text)
+
+        duration_ms = int((time.time() - start_time) * 1000)
+        return ActionResult(
+            success=True,
+            duration_ms=duration_ms,
+            outcome="dom_updated",
+        )
+    except Exception as e:
+        duration_ms = int((time.time() - start_time) * 1000)
+        return ActionResult(
+            success=False,
+            duration_ms=duration_ms,
+            outcome="error",
+            error={"code": "type_failed", "reason": str(e)},
+        )
+
+
+async def scroll(
+    backend: "BrowserBackendV0",
+    delta_y: float = 300,
+    target: BBox | dict[str, float] | tuple[float, float] | None = None,
+) -> ActionResult:
+    """
+    Scroll the page or element.
+
+    Args:
+        backend: BrowserBackendV0 implementation
+        delta_y: Scroll amount (positive=down, negative=up)
+        target: Optional position for scroll (defaults to viewport center)
+
+    Returns:
+        ActionResult with success status
+
+    Example:
+        # Scroll down 300px
+        await scroll(backend, 300)
+
+        # Scroll up 500px
+        await scroll(backend, -500)
+
+        # Scroll at specific position
+        await scroll(backend, 200, target=(500, 300))
+    """
+    start_time = time.time()
+
+    try:
+        x: float | None = None
+        y: float | None = None
+
+        if target is not None:
+            x, y = _resolve_coordinates(target)
+
+        await backend.wheel(delta_y=delta_y, x=x, y=y)
+
+        # Wait for scroll to settle
+        await asyncio.sleep(0.1)
+
+        duration_ms = int((time.time() - start_time) * 1000)
+        return ActionResult(
+            success=True,
+            duration_ms=duration_ms,
+            outcome="dom_updated",
+        )
+    except Exception as e:
+        duration_ms = int((time.time() - start_time) * 1000)
+        return ActionResult(
+            success=False,
+            duration_ms=duration_ms,
+            outcome="error",
+            error={"code": "scroll_failed", "reason": str(e)},
+        )
+
+
+async def scroll_to_element(
+    backend: "BrowserBackendV0",
+    element_id: int,
+    behavior: Literal["smooth", "instant", "auto"] = "instant",
+    block: Literal["start", "center", "end", "nearest"] = "center",
+) -> ActionResult:
+    """
+    Scroll element into view using JavaScript scrollIntoView.
+
+    Args:
+        backend: BrowserBackendV0 implementation
+        element_id: Element ID from snapshot (requires sentience_registry)
+        behavior: Scroll behavior
+        block: Vertical alignment
+
+    Returns:
+        ActionResult with success status
+    """
+    start_time = time.time()
+
+    try:
+        scrolled = await backend.eval(f"""
+            (() => {{
+                const el = window.sentience_registry && window.sentience_registry[{element_id}];
+                if (el && el.scrollIntoView) {{
+                    el.scrollIntoView({{
+                        behavior: '{behavior}',
+                        block: '{block}',
+                        inline: 'nearest'
+                    }});
+                    return true;
+                }}
+                return false;
+            }})()
+        """)
+
+        # Wait for scroll animation
+        wait_time = 0.3 if behavior == "smooth" else 0.05
+        await asyncio.sleep(wait_time)
+
+        duration_ms = int((time.time() - start_time) * 1000)
+
+        if scrolled:
+            return ActionResult(
+                success=True,
+                duration_ms=duration_ms,
+                outcome="dom_updated",
+            )
+        else:
+            return ActionResult(
+                success=False,
+                duration_ms=duration_ms,
+                outcome="error",
+                error={"code": "scroll_failed", "reason": "Element not found in registry"},
+            )
+    except Exception as e:
+        duration_ms = int((time.time() - start_time) * 1000)
+        return ActionResult(
+            success=False,
+            duration_ms=duration_ms,
+            outcome="error",
+            error={"code": "scroll_failed", "reason": str(e)},
+        )
+
+
+async def wait_for_stable(
+    backend: "BrowserBackendV0",
+    state: Literal["interactive", "complete"] = "complete",
+    timeout_ms: int = 10000,
+) -> ActionResult:
+    """
+    Wait for page to reach stable state.
+
+    Args:
+        backend: BrowserBackendV0 implementation
+        state: Target document.readyState
+        timeout_ms: Maximum wait time
+
+    Returns:
+        ActionResult with success status
+    """
+    start_time = time.time()
+
+    try:
+        await backend.wait_ready_state(state=state, timeout_ms=timeout_ms)
+
+        duration_ms = int((time.time() - start_time) * 1000)
+        return ActionResult(
+            success=True,
+            duration_ms=duration_ms,
+            outcome="dom_updated",
+        )
+    except TimeoutError as e:
+        duration_ms = int((time.time() - start_time) * 1000)
+        return ActionResult(
+            success=False,
+            duration_ms=duration_ms,
+            outcome="error",
+            error={"code": "timeout", "reason": str(e)},
+        )
+    except Exception as e:
+        duration_ms = int((time.time() - start_time) * 1000)
+        return ActionResult(
+            success=False,
+            duration_ms=duration_ms,
+            outcome="error",
+            error={"code": "wait_failed", "reason": str(e)},
+        )
+
+
+def _resolve_coordinates(
+    target: BBox | dict[str, float] | tuple[float, float],
+) -> tuple[float, float]:
+    """
+    Resolve target to (x, y) coordinates.
+
+    - BBox: Returns center point
+    - dict: Returns x, y keys (or center if width/height present)
+    - tuple: Returns as-is
+    """
+    if isinstance(target, BBox):
+        return (target.x + target.width / 2, target.y + target.height / 2)
+    elif isinstance(target, tuple):
+        return target
+    elif isinstance(target, dict):
+        # If has width/height, compute center
+        if "width" in target and "height" in target:
+            x = target.get("x", 0) + target["width"] / 2
+            y = target.get("y", 0) + target["height"] / 2
+            return (x, y)
+        # Otherwise use x/y directly
+        return (target.get("x", 0), target.get("y", 0))
+    else:
+        raise ValueError(f"Invalid target type: {type(target)}")
diff --git a/sentience/backends/browser_use_adapter.py b/sentience/backends/browser_use_adapter.py
index b8b9762..c932cd3 100644
--- a/sentience/backends/browser_use_adapter.py
+++ b/sentience/backends/browser_use_adapter.py
@@ -158,9 +158,7 @@ def page(self) -> Any:
         if hasattr(self._session, "get_current_page"):
             # This is async, but we need sync access for property
             # Caller should use get_page_async() instead
-            raise RuntimeError(
-                "Use await adapter.get_page_async() to get the page"
-            )
+            raise RuntimeError("Use await adapter.get_page_async() to get the page")
         raise RuntimeError("Could not find page in browser-use session")
 
     async def get_page_async(self) -> Any:
diff --git a/sentience/backends/cdp_backend.py b/sentience/backends/cdp_backend.py
index 0768c94..1061e1a 100644
--- a/sentience/backends/cdp_backend.py
+++ b/sentience/backends/cdp_backend.py
@@ -188,9 +188,7 @@ async def call(
         if not object_id:
             # Fallback: evaluate the function directly
             if args:
-                args_json = ", ".join(
-                    repr(a) if isinstance(a, str) else str(a) for a in args
-                )
+                args_json = ", ".join(repr(a) if isinstance(a, str) else str(a) for a in args)
                 expression = f"({function_declaration})({args_json})"
             else:
                 expression = f"({function_declaration})()"
@@ -234,8 +232,12 @@ async def get_layout_metrics(self) -> LayoutMetrics:
         return LayoutMetrics(
             viewport_x=visual_viewport.get("pageX", 0),
             viewport_y=visual_viewport.get("pageY", 0),
-            viewport_width=visual_viewport.get("clientWidth", layout_viewport.get("clientWidth", 0)),
-            viewport_height=visual_viewport.get("clientHeight", layout_viewport.get("clientHeight", 0)),
+            viewport_width=visual_viewport.get(
+                "clientWidth", layout_viewport.get("clientWidth", 0)
+            ),
+            viewport_height=visual_viewport.get(
+                "clientHeight", layout_viewport.get("clientHeight", 0)
+            ),
             content_width=content_size.get("width", 0),
             content_height=content_size.get("height", 0),
             device_scale_factor=visual_viewport.get("scale", 1.0),
@@ -375,8 +377,7 @@ async def wait_ready_state(
             elapsed = time.monotonic() - start
             if elapsed >= timeout_sec:
                 raise TimeoutError(
-                    f"Timed out waiting for document.readyState='{state}' "
-                    f"after {timeout_ms}ms"
+                    f"Timed out waiting for document.readyState='{state}' " f"after {timeout_ms}ms"
                 )
 
             current_state = await self.eval("document.readyState")
diff --git a/sentience/backends/playwright_backend.py b/sentience/backends/playwright_backend.py
new file mode 100644
index 0000000..f5ea8df
--- /dev/null
+++ b/sentience/backends/playwright_backend.py
@@ -0,0 +1,187 @@
+"""
+Playwright backend implementation for BrowserBackendV0 protocol.
+
+This wraps existing SentienceBrowser/AsyncSentienceBrowser to provide
+a unified interface, enabling code that works with both browser-use
+(CDPBackendV0) and native Playwright (PlaywrightBackend).
+
+Usage:
+    from sentience import SentienceBrowserAsync
+    from sentience.backends import PlaywrightBackend, snapshot_from_backend
+
+    browser = SentienceBrowserAsync()
+    await browser.start()
+    await browser.goto("https://example.com")
+
+    # Create backend from existing browser
+    backend = PlaywrightBackend(browser.page)
+
+    # Use backend-agnostic functions
+    snap = await snapshot_from_backend(backend)
+    await click(backend, element.bbox)
+"""
+
+import asyncio
+import base64
+import time
+from typing import TYPE_CHECKING, Any, Literal
+
+from .protocol_v0 import BrowserBackendV0, LayoutMetrics, ViewportInfo
+
+if TYPE_CHECKING:
+    from playwright.async_api import Page as AsyncPage
+
+
+class PlaywrightBackend:
+    """
+    Playwright-based implementation of BrowserBackendV0.
+
+    Wraps a Playwright async Page to provide the standard backend interface.
+    This enables using backend-agnostic actions with existing SentienceBrowser code.
+    """
+
+    def __init__(self, page: "AsyncPage") -> None:
+        """
+        Initialize Playwright backend.
+
+        Args:
+            page: Playwright async Page object
+        """
+        self._page = page
+        self._cached_viewport: ViewportInfo | None = None
+
+    @property
+    def page(self) -> "AsyncPage":
+        """Access the underlying Playwright page."""
+        return self._page
+
+    async def refresh_page_info(self) -> ViewportInfo:
+        """Cache viewport + scroll offsets; cheap & safe to call often."""
+        result = await self._page.evaluate("""
+            (() => ({
+                width: window.innerWidth,
+                height: window.innerHeight,
+                scroll_x: window.scrollX,
+                scroll_y: window.scrollY,
+                content_width: document.documentElement.scrollWidth,
+                content_height: document.documentElement.scrollHeight
+            }))()
+        """)
+
+        self._cached_viewport = ViewportInfo(
+            width=result.get("width", 0),
+            height=result.get("height", 0),
+            scroll_x=result.get("scroll_x", 0),
+            scroll_y=result.get("scroll_y", 0),
+            content_width=result.get("content_width"),
+            content_height=result.get("content_height"),
+        )
+        return self._cached_viewport
+
+    async def eval(self, expression: str) -> Any:
+        """Evaluate JavaScript expression in page context."""
+        return await self._page.evaluate(expression)
+
+    async def call(
+        self,
+        function_declaration: str,
+        args: list[Any] | None = None,
+    ) -> Any:
+        """Call JavaScript function with arguments."""
+        if args:
+            return await self._page.evaluate(function_declaration, *args)
+        return await self._page.evaluate(f"({function_declaration})()")
+
+    async def get_layout_metrics(self) -> LayoutMetrics:
+        """Get page layout metrics."""
+        # Playwright doesn't expose CDP directly in the same way,
+        # so we approximate using JavaScript
+        result = await self._page.evaluate("""
+            (() => ({
+                viewport_x: window.scrollX,
+                viewport_y: window.scrollY,
+                viewport_width: window.innerWidth,
+                viewport_height: window.innerHeight,
+                content_width: document.documentElement.scrollWidth,
+                content_height: document.documentElement.scrollHeight,
+                device_scale_factor: window.devicePixelRatio || 1
+            }))()
+        """)
+
+        return LayoutMetrics(
+            viewport_x=result.get("viewport_x", 0),
+            viewport_y=result.get("viewport_y", 0),
+            viewport_width=result.get("viewport_width", 0),
+            viewport_height=result.get("viewport_height", 0),
+            content_width=result.get("content_width", 0),
+            content_height=result.get("content_height", 0),
+            device_scale_factor=result.get("device_scale_factor", 1.0),
+        )
+
+    async def screenshot_png(self) -> bytes:
+        """Capture viewport screenshot as PNG bytes."""
+        return await self._page.screenshot(type="png")
+
+    async def mouse_move(self, x: float, y: float) -> None:
+        """Move mouse to viewport coordinates."""
+        await self._page.mouse.move(x, y)
+
+    async def mouse_click(
+        self,
+        x: float,
+        y: float,
+        button: Literal["left", "right", "middle"] = "left",
+        click_count: int = 1,
+    ) -> None:
+        """Click at viewport coordinates."""
+        await self._page.mouse.click(x, y, button=button, click_count=click_count)
+
+    async def wheel(
+        self,
+        delta_y: float,
+        x: float | None = None,
+        y: float | None = None,
+    ) -> None:
+        """Scroll using mouse wheel."""
+        # Get viewport center if coordinates not provided
+        if x is None or y is None:
+            if self._cached_viewport is None:
+                await self.refresh_page_info()
+            assert self._cached_viewport is not None
+            x = x if x is not None else self._cached_viewport.width / 2
+            y = y if y is not None else self._cached_viewport.height / 2
+
+        await self._page.mouse.wheel(0, delta_y)
+
+    async def type_text(self, text: str) -> None:
+        """Type text using keyboard input."""
+        await self._page.keyboard.type(text)
+
+    async def wait_ready_state(
+        self,
+        state: Literal["interactive", "complete"] = "interactive",
+        timeout_ms: int = 15000,
+    ) -> None:
+        """Wait for document.readyState to reach target state."""
+        acceptable_states = {"complete"} if state == "complete" else {"interactive", "complete"}
+
+        start = time.monotonic()
+        timeout_sec = timeout_ms / 1000.0
+
+        while True:
+            elapsed = time.monotonic() - start
+            if elapsed >= timeout_sec:
+                raise TimeoutError(
+                    f"Timed out waiting for document.readyState='{state}' "
+                    f"after {timeout_ms}ms"
+                )
+
+            current_state = await self._page.evaluate("document.readyState")
+            if current_state in acceptable_states:
+                return
+
+            await asyncio.sleep(0.1)
+
+
+# Verify protocol compliance at import time
+assert isinstance(PlaywrightBackend.__new__(PlaywrightBackend), BrowserBackendV0)
diff --git a/sentience/backends/snapshot.py b/sentience/backends/snapshot.py
new file mode 100644
index 0000000..6f11dd9
--- /dev/null
+++ b/sentience/backends/snapshot.py
@@ -0,0 +1,297 @@
+"""
+Backend-agnostic snapshot for browser-use integration.
+
+Takes Sentience snapshots using BrowserBackendV0 protocol,
+enabling element grounding with browser-use or other frameworks.
+
+Usage with browser-use:
+    from sentience.backends import BrowserUseAdapter, snapshot, CachedSnapshot
+
+    adapter = BrowserUseAdapter(session)
+    backend = await adapter.create_backend()
+
+    # Take snapshot
+    snap = await snapshot(backend)
+    print(f"Found {len(snap.elements)} elements")
+
+    # With caching (reuse if fresh)
+    cache = CachedSnapshot(backend, max_age_ms=2000)
+    snap1 = await cache.get()  # Fresh snapshot
+    snap2 = await cache.get()  # Returns cached if < 2s old
+    cache.invalidate()  # Force refresh on next get()
+"""
+
+import time
+from typing import TYPE_CHECKING, Any
+
+from ..models import Snapshot, SnapshotOptions
+
+if TYPE_CHECKING:
+    from .protocol_v0 import BrowserBackendV0
+
+
+class CachedSnapshot:
+    """
+    Snapshot cache with staleness detection.
+
+    Caches snapshots and returns cached version if still fresh.
+    Useful for reducing redundant snapshot calls in action loops.
+
+    Usage:
+        cache = CachedSnapshot(backend, max_age_ms=2000)
+
+        # First call takes fresh snapshot
+        snap1 = await cache.get()
+
+        # Second call returns cached if < 2s old
+        snap2 = await cache.get()
+
+        # Invalidate after actions that change DOM
+        await click(backend, element.bbox)
+        cache.invalidate()
+
+        # Next get() will take fresh snapshot
+        snap3 = await cache.get()
+    """
+
+    def __init__(
+        self,
+        backend: "BrowserBackendV0",
+        max_age_ms: int = 2000,
+        options: SnapshotOptions | None = None,
+    ) -> None:
+        """
+        Initialize cached snapshot.
+
+        Args:
+            backend: BrowserBackendV0 implementation
+            max_age_ms: Maximum cache age in milliseconds (default: 2000)
+            options: Default snapshot options
+        """
+        self._backend = backend
+        self._max_age_ms = max_age_ms
+        self._options = options
+        self._cached: Snapshot | None = None
+        self._cached_at: float = 0  # timestamp in seconds
+        self._cached_url: str | None = None
+
+    async def get(
+        self,
+        options: SnapshotOptions | None = None,
+        force_refresh: bool = False,
+    ) -> Snapshot:
+        """
+        Get snapshot, using cache if fresh.
+
+        Args:
+            options: Override default options for this call
+            force_refresh: If True, always take fresh snapshot
+
+        Returns:
+            Snapshot (cached or fresh)
+        """
+        # Check if we need to refresh
+        if force_refresh or self._is_stale():
+            self._cached = await snapshot(
+                self._backend,
+                options or self._options,
+            )
+            self._cached_at = time.time()
+            self._cached_url = self._cached.url
+
+        assert self._cached is not None
+        return self._cached
+
+    def invalidate(self) -> None:
+        """
+        Invalidate cache, forcing refresh on next get().
+
+        Call this after actions that modify the DOM.
+        """
+        self._cached = None
+        self._cached_at = 0
+        self._cached_url = None
+
+    def _is_stale(self) -> bool:
+        """Check if cache is stale and needs refresh."""
+        if self._cached is None:
+            return True
+
+        # Check age
+        age_ms = (time.time() - self._cached_at) * 1000
+        if age_ms > self._max_age_ms:
+            return True
+
+        return False
+
+    @property
+    def is_cached(self) -> bool:
+        """Check if a cached snapshot exists."""
+        return self._cached is not None
+
+    @property
+    def age_ms(self) -> float:
+        """Get age of cached snapshot in milliseconds."""
+        if self._cached is None:
+            return float("inf")
+        return (time.time() - self._cached_at) * 1000
+
+
+async def snapshot(
+    backend: "BrowserBackendV0",
+    options: SnapshotOptions | None = None,
+) -> Snapshot:
+    """
+    Take a Sentience snapshot using the backend protocol.
+
+    This function calls window.sentience.snapshot() via the backend's eval(),
+    enabling snapshot collection with any BrowserBackendV0 implementation.
+
+    Requires:
+        - Sentience extension loaded in browser (via --load-extension)
+        - Extension injected window.sentience API
+
+    Args:
+        backend: BrowserBackendV0 implementation (CDPBackendV0, PlaywrightBackend, etc.)
+        options: Snapshot options (limit, filter, screenshot, etc.)
+
+    Returns:
+        Snapshot with elements, viewport, and optional screenshot
+
+    Example:
+        from sentience.backends import BrowserUseAdapter
+        from sentience.backends.snapshot import snapshot_from_backend
+
+        adapter = BrowserUseAdapter(session)
+        backend = await adapter.create_backend()
+
+        # Basic snapshot
+        snap = await snapshot_from_backend(backend)
+
+        # With options
+        snap = await snapshot_from_backend(backend, SnapshotOptions(
+            limit=100,
+            screenshot=True
+        ))
+    """
+    if options is None:
+        options = SnapshotOptions()
+
+    # Wait for extension injection
+    await _wait_for_extension(backend, timeout_ms=5000)
+
+    # Build options dict for extension API
+    ext_options = _build_extension_options(options)
+
+    # Call extension's snapshot function
+    result = await backend.eval(f"""
+        (() => {{
+            const options = {_json_serialize(ext_options)};
+            return window.sentience.snapshot(options);
+        }})()
+    """)
+
+    if result is None:
+        raise RuntimeError(
+            "window.sentience.snapshot() returned null. "
+            "Is the Sentience extension loaded and injected?"
+        )
+
+    # Show overlay if requested
+    if options.show_overlay:
+        raw_elements = result.get("raw_elements", [])
+        if raw_elements:
+            await backend.eval(f"""
+                (() => {{
+                    if (window.sentience && window.sentience.showOverlay) {{
+                        window.sentience.showOverlay({_json_serialize(raw_elements)}, null);
+                    }}
+                }})()
+            """)
+
+    # Build and return Snapshot
+    return Snapshot(**result)
+
+
+async def _wait_for_extension(
+    backend: "BrowserBackendV0",
+    timeout_ms: int = 5000,
+) -> None:
+    """
+    Wait for Sentience extension to inject window.sentience API.
+
+    Args:
+        backend: BrowserBackendV0 implementation
+        timeout_ms: Maximum wait time
+
+    Raises:
+        RuntimeError: If extension not injected within timeout
+    """
+    import asyncio
+
+    start = time.monotonic()
+    timeout_sec = timeout_ms / 1000.0
+
+    while True:
+        elapsed = time.monotonic() - start
+        if elapsed >= timeout_sec:
+            # Gather diagnostics
+            try:
+                diag = await backend.eval("""
+                    (() => ({
+                        sentience_defined: typeof window.sentience !== 'undefined',
+                        sentience_snapshot: typeof window.sentience?.snapshot === 'function',
+                        url: window.location.href
+                    }))()
+                """)
+            except Exception:
+                diag = {"error": "Could not gather diagnostics"}
+
+            raise RuntimeError(
+                f"Sentience extension failed to inject window.sentience API "
+                f"within {timeout_ms}ms. Diagnostics: {diag}"
+            )
+
+        # Check if extension is ready
+        try:
+            ready = await backend.eval(
+                "typeof window.sentience !== 'undefined' && "
+                "typeof window.sentience.snapshot === 'function'"
+            )
+            if ready:
+                return
+        except Exception:
+            pass  # Keep polling
+
+        await asyncio.sleep(0.1)
+
+
+def _build_extension_options(options: SnapshotOptions) -> dict[str, Any]:
+    """Build options dict for extension API call."""
+    ext_options: dict[str, Any] = {}
+
+    # Screenshot config
+    if options.screenshot is not False:
+        if hasattr(options.screenshot, "model_dump"):
+            ext_options["screenshot"] = options.screenshot.model_dump()
+        else:
+            ext_options["screenshot"] = options.screenshot
+
+    # Limit (only if not default)
+    if options.limit != 50:
+        ext_options["limit"] = options.limit
+
+    # Filter
+    if options.filter is not None:
+        if hasattr(options.filter, "model_dump"):
+            ext_options["filter"] = options.filter.model_dump()
+        else:
+            ext_options["filter"] = options.filter
+
+    return ext_options
+
+
+def _json_serialize(obj: Any) -> str:
+    """Serialize object to JSON string for embedding in JS."""
+    import json
+    return json.dumps(obj)
diff --git a/tests/test_backends.py b/tests/test_backends.py
index 9c82363..a1c7d90 100644
--- a/tests/test_backends.py
+++ b/tests/test_backends.py
@@ -6,6 +6,7 @@
 """
 
 import asyncio
+import time
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock
 
@@ -15,11 +16,18 @@
     BrowserBackendV0,
     BrowserUseAdapter,
     BrowserUseCDPTransport,
+    CachedSnapshot,
     CDPBackendV0,
     CDPTransport,
     LayoutMetrics,
+    PlaywrightBackend,
     ViewportInfo,
+    click,
+    scroll,
+    type_text,
+    wait_for_stable,
 )
+from sentience.models import ActionResult, BBox
 
 
 class MockCDPTransport:
@@ -150,9 +158,7 @@ async def test_refresh_page_info(
         assert info.scroll_y == 100
 
     @pytest.mark.asyncio
-    async def test_eval(
-        self, backend: CDPBackendV0, transport: MockCDPTransport
-    ) -> None:
+    async def test_eval(self, backend: CDPBackendV0, transport: MockCDPTransport) -> None:
         """Test eval executes JavaScript and returns value."""
         transport.set_response(
             "Runtime.evaluate",
@@ -167,9 +173,7 @@ async def test_eval(
         assert transport.calls[0][1]["expression"] == "1 + 1"
 
     @pytest.mark.asyncio
-    async def test_eval_exception(
-        self, backend: CDPBackendV0, transport: MockCDPTransport
-    ) -> None:
+    async def test_eval_exception(self, backend: CDPBackendV0, transport: MockCDPTransport) -> None:
         """Test eval raises on JavaScript exception."""
         transport.set_response(
             "Runtime.evaluate",
@@ -211,9 +215,7 @@ async def test_get_layout_metrics(
         assert metrics.content_height == 5000
 
     @pytest.mark.asyncio
-    async def test_screenshot_png(
-        self, backend: CDPBackendV0, transport: MockCDPTransport
-    ) -> None:
+    async def test_screenshot_png(self, backend: CDPBackendV0, transport: MockCDPTransport) -> None:
         """Test screenshot_png returns PNG bytes."""
         import base64
 
@@ -230,9 +232,7 @@ async def test_screenshot_png(
         assert result.startswith(b"\x89PNG")
 
     @pytest.mark.asyncio
-    async def test_mouse_move(
-        self, backend: CDPBackendV0, transport: MockCDPTransport
-    ) -> None:
+    async def test_mouse_move(self, backend: CDPBackendV0, transport: MockCDPTransport) -> None:
         """Test mouse_move dispatches mouseMoved event."""
         await backend.mouse_move(100, 200)
 
@@ -244,9 +244,7 @@ async def test_mouse_move(
         assert params["y"] == 200
 
     @pytest.mark.asyncio
-    async def test_mouse_click(
-        self, backend: CDPBackendV0, transport: MockCDPTransport
-    ) -> None:
+    async def test_mouse_click(self, backend: CDPBackendV0, transport: MockCDPTransport) -> None:
         """Test mouse_click dispatches press and release events."""
         await backend.mouse_click(100, 200)
 
@@ -276,9 +274,7 @@ async def test_mouse_click_right_button(
         assert params["button"] == "right"
 
     @pytest.mark.asyncio
-    async def test_wheel(
-        self, backend: CDPBackendV0, transport: MockCDPTransport
-    ) -> None:
+    async def test_wheel(self, backend: CDPBackendV0, transport: MockCDPTransport) -> None:
         """Test wheel dispatches mouseWheel event."""
         # First set up viewport info for default coordinates
         transport.set_response(
@@ -304,9 +300,7 @@ async def test_wheel(
         assert params["y"] == 300
 
     @pytest.mark.asyncio
-    async def test_type_text(
-        self, backend: CDPBackendV0, transport: MockCDPTransport
-    ) -> None:
+    async def test_type_text(self, backend: CDPBackendV0, transport: MockCDPTransport) -> None:
         """Test type_text dispatches key events for each character."""
         await backend.type_text("Hi")
 
@@ -434,9 +428,7 @@ async def test_create_backend(self) -> None:
 
         # Create mock browser session
         mock_session = MagicMock()
-        mock_session.get_or_create_cdp_session = AsyncMock(
-            return_value=mock_cdp_session
-        )
+        mock_session.get_or_create_cdp_session = AsyncMock(return_value=mock_cdp_session)
 
         adapter = BrowserUseAdapter(mock_session)
         backend = await adapter.create_backend()
@@ -452,9 +444,7 @@ async def test_create_backend_caches_result(self) -> None:
         mock_cdp_session.session_id = "session-123"
 
         mock_session = MagicMock()
-        mock_session.get_or_create_cdp_session = AsyncMock(
-            return_value=mock_cdp_session
-        )
+        mock_session.get_or_create_cdp_session = AsyncMock(return_value=mock_cdp_session)
 
         adapter = BrowserUseAdapter(mock_session)
 
@@ -486,3 +476,360 @@ async def test_get_page_async(self) -> None:
         page = await adapter.get_page_async()
 
         assert page is mock_page
+
+
+class TestBackendAgnosticActions:
+    """Tests for backend-agnostic action functions."""
+
+    @pytest.fixture
+    def transport(self) -> MockCDPTransport:
+        """Create mock transport."""
+        return MockCDPTransport()
+
+    @pytest.fixture
+    def backend(self, transport: MockCDPTransport) -> CDPBackendV0:
+        """Create backend with mock transport."""
+        return CDPBackendV0(transport)
+
+    @pytest.mark.asyncio
+    async def test_click_with_tuple(
+        self, backend: CDPBackendV0, transport: MockCDPTransport
+    ) -> None:
+        """Test click with (x, y) tuple."""
+        result = await click(backend, (100, 200))
+
+        assert isinstance(result, ActionResult)
+        assert result.success is True
+
+        # Should have mouse move + mouse click (press + release)
+        mouse_events = [c for c in transport.calls if c[0] == "Input.dispatchMouseEvent"]
+        assert len(mouse_events) == 3  # move, press, release
+
+    @pytest.mark.asyncio
+    async def test_click_with_bbox(
+        self, backend: CDPBackendV0, transport: MockCDPTransport
+    ) -> None:
+        """Test click with BBox (clicks center)."""
+        bbox = BBox(x=100, y=200, width=50, height=30)
+        result = await click(backend, bbox)
+
+        assert result.success is True
+
+        # Find the click event
+        press_events = [
+            c
+            for c in transport.calls
+            if c[0] == "Input.dispatchMouseEvent" and c[1]["type"] == "mousePressed"
+        ]
+        assert len(press_events) == 1
+        # Should click at center: (100 + 25, 200 + 15) = (125, 215)
+        assert press_events[0][1]["x"] == 125
+        assert press_events[0][1]["y"] == 215
+
+    @pytest.mark.asyncio
+    async def test_click_with_dict(
+        self, backend: CDPBackendV0, transport: MockCDPTransport
+    ) -> None:
+        """Test click with dict containing x, y."""
+        result = await click(backend, {"x": 150, "y": 250})
+
+        assert result.success is True
+
+    @pytest.mark.asyncio
+    async def test_click_double(self, backend: CDPBackendV0, transport: MockCDPTransport) -> None:
+        """Test double-click."""
+        result = await click(backend, (100, 200), click_count=2)
+
+        assert result.success is True
+
+        # Check clickCount parameter
+        press_events = [
+            c
+            for c in transport.calls
+            if c[0] == "Input.dispatchMouseEvent" and c[1]["type"] == "mousePressed"
+        ]
+        assert press_events[0][1]["clickCount"] == 2
+
+    @pytest.mark.asyncio
+    async def test_type_text_simple(
+        self, backend: CDPBackendV0, transport: MockCDPTransport
+    ) -> None:
+        """Test typing text."""
+        result = await type_text(backend, "Hi")
+
+        assert isinstance(result, ActionResult)
+        assert result.success is True
+
+        # Check key events were dispatched
+        key_events = [c for c in transport.calls if c[0] == "Input.dispatchKeyEvent"]
+        assert len(key_events) == 6  # 2 chars * 3 events each
+
+    @pytest.mark.asyncio
+    async def test_type_text_with_target(
+        self, backend: CDPBackendV0, transport: MockCDPTransport
+    ) -> None:
+        """Test typing text with click target."""
+        result = await type_text(backend, "test", target=(100, 200))
+
+        assert result.success is True
+
+        # Should have click + key events
+        mouse_events = [c for c in transport.calls if c[0] == "Input.dispatchMouseEvent"]
+        key_events = [c for c in transport.calls if c[0] == "Input.dispatchKeyEvent"]
+        assert len(mouse_events) >= 2  # At least press + release
+        assert len(key_events) == 12  # 4 chars * 3 events
+
+    @pytest.mark.asyncio
+    async def test_scroll_down(self, backend: CDPBackendV0, transport: MockCDPTransport) -> None:
+        """Test scrolling down."""
+        # Set up viewport for default coordinates
+        transport.set_response(
+            "Runtime.evaluate",
+            {
+                "result": {
+                    "type": "object",
+                    "value": {"width": 1920, "height": 1080},
+                }
+            },
+        )
+
+        result = await scroll(backend, delta_y=300)
+
+        assert result.success is True
+
+        wheel_events = [
+            c
+            for c in transport.calls
+            if c[0] == "Input.dispatchMouseEvent" and c[1].get("type") == "mouseWheel"
+        ]
+        assert len(wheel_events) == 1
+        assert wheel_events[0][1]["deltaY"] == 300
+
+    @pytest.mark.asyncio
+    async def test_scroll_at_position(
+        self, backend: CDPBackendV0, transport: MockCDPTransport
+    ) -> None:
+        """Test scrolling at specific position."""
+        result = await scroll(backend, delta_y=200, target=(500, 300))
+
+        assert result.success is True
+
+        wheel_events = [
+            c
+            for c in transport.calls
+            if c[0] == "Input.dispatchMouseEvent" and c[1].get("type") == "mouseWheel"
+        ]
+        assert wheel_events[0][1]["x"] == 500
+        assert wheel_events[0][1]["y"] == 300
+
+    @pytest.mark.asyncio
+    async def test_wait_for_stable_success(
+        self, backend: CDPBackendV0, transport: MockCDPTransport
+    ) -> None:
+        """Test wait_for_stable with immediate success."""
+        transport.set_response(
+            "Runtime.evaluate",
+            {"result": {"type": "string", "value": "complete"}},
+        )
+
+        result = await wait_for_stable(backend, state="complete", timeout_ms=1000)
+
+        assert result.success is True
+
+    @pytest.mark.asyncio
+    async def test_wait_for_stable_timeout(
+        self, backend: CDPBackendV0, transport: MockCDPTransport
+    ) -> None:
+        """Test wait_for_stable timeout."""
+        transport.set_response(
+            "Runtime.evaluate",
+            {"result": {"type": "string", "value": "loading"}},
+        )
+
+        result = await wait_for_stable(backend, state="complete", timeout_ms=200)
+
+        assert result.success is False
+        assert result.error["code"] == "timeout"
+
+
+class TestPlaywrightBackend:
+    """Tests for PlaywrightBackend wrapper."""
+
+    def test_implements_protocol(self) -> None:
+        """Verify PlaywrightBackend implements BrowserBackendV0."""
+        mock_page = MagicMock()
+        backend = PlaywrightBackend(mock_page)
+        assert isinstance(backend, BrowserBackendV0)
+
+    def test_page_property(self) -> None:
+        """Test page property returns underlying page."""
+        mock_page = MagicMock()
+        backend = PlaywrightBackend(mock_page)
+        assert backend.page is mock_page
+
+    @pytest.mark.asyncio
+    async def test_refresh_page_info(self) -> None:
+        """Test refresh_page_info calls page.evaluate."""
+        mock_page = AsyncMock()
+        mock_page.evaluate = AsyncMock(
+            return_value={
+                "width": 1920,
+                "height": 1080,
+                "scroll_x": 0,
+                "scroll_y": 100,
+                "content_width": 1920,
+                "content_height": 5000,
+            }
+        )
+
+        backend = PlaywrightBackend(mock_page)
+        info = await backend.refresh_page_info()
+
+        assert isinstance(info, ViewportInfo)
+        assert info.width == 1920
+        assert info.scroll_y == 100
+
+    @pytest.mark.asyncio
+    async def test_eval(self) -> None:
+        """Test eval calls page.evaluate."""
+        mock_page = AsyncMock()
+        mock_page.evaluate = AsyncMock(return_value=42)
+
+        backend = PlaywrightBackend(mock_page)
+        result = await backend.eval("1 + 1")
+
+        assert result == 42
+
+    @pytest.mark.asyncio
+    async def test_mouse_click(self) -> None:
+        """Test mouse_click calls page.mouse.click."""
+        mock_mouse = AsyncMock()
+        mock_page = MagicMock()
+        mock_page.mouse = mock_mouse
+
+        backend = PlaywrightBackend(mock_page)
+        await backend.mouse_click(100, 200, button="left", click_count=1)
+
+        mock_mouse.click.assert_called_once_with(100, 200, button="left", click_count=1)
+
+    @pytest.mark.asyncio
+    async def test_type_text(self) -> None:
+        """Test type_text calls page.keyboard.type."""
+        mock_keyboard = AsyncMock()
+        mock_page = MagicMock()
+        mock_page.keyboard = mock_keyboard
+
+        backend = PlaywrightBackend(mock_page)
+        await backend.type_text("Hello")
+
+        mock_keyboard.type.assert_called_once_with("Hello")
+
+    @pytest.mark.asyncio
+    async def test_screenshot_png(self) -> None:
+        """Test screenshot_png calls page.screenshot."""
+        mock_page = AsyncMock()
+        mock_page.screenshot = AsyncMock(return_value=b"\x89PNG\r\n\x1a\n")
+
+        backend = PlaywrightBackend(mock_page)
+        result = await backend.screenshot_png()
+
+        assert result.startswith(b"\x89PNG")
+        mock_page.screenshot.assert_called_once_with(type="png")
+
+
+class TestCachedSnapshot:
+    """Tests for CachedSnapshot caching behavior."""
+
+    @pytest.fixture
+    def mock_backend(self) -> MagicMock:
+        """Create mock backend."""
+        backend = MagicMock()
+        backend.eval = AsyncMock()
+        return backend
+
+    def test_initial_state(self, mock_backend: MagicMock) -> None:
+        """Test initial cache state."""
+        cache = CachedSnapshot(mock_backend, max_age_ms=2000)
+
+        assert cache.is_cached is False
+        assert cache.age_ms == float("inf")
+
+    def test_invalidate(self, mock_backend: MagicMock) -> None:
+        """Test cache invalidation."""
+        cache = CachedSnapshot(mock_backend)
+        cache._cached = MagicMock()  # Simulate cached snapshot
+        cache._cached_at = time.time()
+
+        assert cache.is_cached is True
+
+        cache.invalidate()
+
+        assert cache.is_cached is False
+        assert cache.age_ms == float("inf")
+
+    def test_staleness_by_age(self, mock_backend: MagicMock) -> None:
+        """Test cache staleness detection."""
+        cache = CachedSnapshot(mock_backend, max_age_ms=100)
+
+        # Simulate old cache
+        cache._cached = MagicMock()
+        cache._cached_at = time.time() - 0.2  # 200ms ago
+
+        assert cache._is_stale() is True
+
+    def test_fresh_cache(self, mock_backend: MagicMock) -> None:
+        """Test fresh cache detection."""
+        cache = CachedSnapshot(mock_backend, max_age_ms=2000)
+
+        # Simulate fresh cache
+        cache._cached = MagicMock()
+        cache._cached_at = time.time()
+
+        assert cache._is_stale() is False
+
+
+class TestCoordinateResolution:
+    """Test coordinate resolution in actions."""
+
+    @pytest.mark.asyncio
+    async def test_bbox_center_calculation(self) -> None:
+        """Test BBox center calculation."""
+        from sentience.backends.actions import _resolve_coordinates
+
+        bbox = BBox(x=100, y=200, width=50, height=30)
+        x, y = _resolve_coordinates(bbox)
+
+        assert x == 125  # 100 + 50/2
+        assert y == 215  # 200 + 30/2
+
+    @pytest.mark.asyncio
+    async def test_dict_with_dimensions(self) -> None:
+        """Test dict with width/height computes center."""
+        from sentience.backends.actions import _resolve_coordinates
+
+        target = {"x": 100, "y": 200, "width": 50, "height": 30}
+        x, y = _resolve_coordinates(target)
+
+        assert x == 125
+        assert y == 215
+
+    @pytest.mark.asyncio
+    async def test_dict_without_dimensions(self) -> None:
+        """Test dict without width/height uses x/y directly."""
+        from sentience.backends.actions import _resolve_coordinates
+
+        target = {"x": 150, "y": 250}
+        x, y = _resolve_coordinates(target)
+
+        assert x == 150
+        assert y == 250
+
+    @pytest.mark.asyncio
+    async def test_tuple_passthrough(self) -> None:
+        """Test tuple passes through unchanged."""
+        from sentience.backends.actions import _resolve_coordinates
+
+        x, y = _resolve_coordinates((300, 400))
+
+        assert x == 300
+        assert y == 400

From 6c1405c3b1016fef2d30a7e9985b217d9df12f76 Mon Sep 17 00:00:00 2001
From: SentienceDev <dev@sentienceapi.com>
Date: Thu, 8 Jan 2026 17:35:20 -0800
Subject: [PATCH 2/3] Phase 3: polish

---
 examples/browser_use_integration.py      | 210 ++++++++++++++++++++++
 sentience/backends/__init__.py           |  85 ++++++++-
 sentience/backends/actions.py            |   6 +-
 sentience/backends/exceptions.py         | 211 +++++++++++++++++++++++
 sentience/backends/playwright_backend.py |  15 +-
 sentience/backends/snapshot.py           |  43 +++--
 tests/test_backends.py                   | 123 +++++++++++++
 7 files changed, 662 insertions(+), 31 deletions(-)
 create mode 100644 examples/browser_use_integration.py
 create mode 100644 sentience/backends/exceptions.py

diff --git a/examples/browser_use_integration.py b/examples/browser_use_integration.py
new file mode 100644
index 0000000..9167c5f
--- /dev/null
+++ b/examples/browser_use_integration.py
@@ -0,0 +1,210 @@
+"""
+Example: Using Sentience with browser-use for element grounding.
+
+This example demonstrates how to integrate Sentience's semantic element
+detection with browser-use, enabling accurate click/type/scroll operations
+using Sentience's snapshot-based grounding instead of coordinate estimation.
+
+Requirements:
+    pip install browser-use sentienceapi
+
+Usage:
+    python examples/browser_use_integration.py
+"""
+
+import asyncio
+
+# browser-use imports (install via: pip install browser-use)
+# from browser_use import BrowserSession, BrowserProfile
+
+# Sentience imports
+from sentience import (
+    find,
+    get_extension_dir,
+    query,
+)
+from sentience.backends import (
+    BrowserUseAdapter,
+    CachedSnapshot,
+    ExtensionNotLoadedError,
+    click,
+    scroll,
+    snapshot,
+    type_text,
+)
+
+
+async def main() -> None:
+    """
+    Demo: Search on Google using Sentience grounding with browser-use.
+
+    This example shows the full workflow:
+    1. Launch browser-use with Sentience extension loaded
+    2. Create a Sentience backend adapter
+    3. Take snapshots and interact with elements using semantic queries
+    """
+
+    # =========================================================================
+    # STEP 1: Setup browser-use with Sentience extension
+    # =========================================================================
+    #
+    # The Sentience extension must be loaded for element grounding to work.
+    # Use get_extension_dir() to get the path to the bundled extension.
+    #
+    # Uncomment the following when running with browser-use installed:
+
+    # extension_path = get_extension_dir()
+    # print(f"Loading Sentience extension from: {extension_path}")
+    #
+    # profile = BrowserProfile(
+    #     args=[
+    #         f"--load-extension={extension_path}",
+    #         "--disable-extensions-except=" + extension_path,
+    #     ],
+    # )
+    # session = BrowserSession(browser_profile=profile)
+    # await session.start()
+
+    # =========================================================================
+    # STEP 2: Create Sentience backend adapter
+    # =========================================================================
+    #
+    # The adapter bridges browser-use's CDP client to Sentience's backend protocol.
+    #
+    # adapter = BrowserUseAdapter(session)
+    # backend = await adapter.create_backend()
+
+    # =========================================================================
+    # STEP 3: Navigate and take snapshots
+    # =========================================================================
+    #
+    # await session.navigate("https://www.google.com")
+    #
+    # # Take a snapshot - this uses the Sentience extension's element detection
+    # try:
+    #     snap = await snapshot(backend)
+    #     print(f"Found {len(snap.elements)} elements")
+    # except ExtensionNotLoadedError as e:
+    #     print(f"Extension not loaded: {e}")
+    #     print("Make sure the browser was launched with --load-extension flag")
+    #     return
+
+    # =========================================================================
+    # STEP 4: Find and interact with elements using semantic queries
+    # =========================================================================
+    #
+    # Sentience provides powerful element selectors:
+    # - Role-based: 'role=textbox', 'role=button'
+    # - Name-based: 'role=button[name="Submit"]'
+    # - Text-based: 'text=Search'
+    #
+    # # Find the search input
+    # search_input = find(snap, 'role=textbox[name*="Search"]')
+    # if search_input:
+    #     # Click on the search input (uses center of bounding box)
+    #     await click(backend, search_input.bbox)
+    #
+    #     # Type search query
+    #     await type_text(backend, "Sentience AI browser automation")
+    #     print("Typed search query")
+
+    # =========================================================================
+    # STEP 5: Using cached snapshots for efficiency
+    # =========================================================================
+    #
+    # Taking snapshots has overhead. Use CachedSnapshot to reuse recent snapshots:
+    #
+    # cache = CachedSnapshot(backend, max_age_ms=2000)
+    #
+    # # First call takes fresh snapshot
+    # snap1 = await cache.get()
+    #
+    # # Second call returns cached version if less than 2 seconds old
+    # snap2 = await cache.get()
+    #
+    # # After actions that modify DOM, invalidate the cache
+    # await click(backend, some_element.bbox)
+    # cache.invalidate()  # Next get() will take fresh snapshot
+
+    # =========================================================================
+    # STEP 6: Scrolling to elements
+    # =========================================================================
+    #
+    # # Scroll down by 500 pixels
+    # await scroll(backend, delta_y=500)
+    #
+    # # Scroll at a specific position (useful for scrollable containers)
+    # await scroll(backend, delta_y=300, target=(400, 500))
+
+    # =========================================================================
+    # STEP 7: Advanced element queries
+    # =========================================================================
+    #
+    # # Find all buttons
+    # buttons = query(snap, 'role=button')
+    # print(f"Found {len(buttons)} buttons")
+    #
+    # # Find by partial text match
+    # links = query(snap, 'role=link[name*="Learn"]')
+    #
+    # # Find by exact text
+    # submit_btn = find(snap, 'role=button[name="Submit"]')
+
+    # =========================================================================
+    # STEP 8: Error handling
+    # =========================================================================
+    #
+    # Sentience provides specific exceptions for common errors:
+    #
+    # from sentience.backends import (
+    #     ExtensionNotLoadedError,  # Extension not loaded in browser
+    #     SnapshotError,            # Snapshot failed
+    #     ActionError,              # Click/type/scroll failed
+    # )
+    #
+    # try:
+    #     snap = await snapshot(backend)
+    # except ExtensionNotLoadedError as e:
+    #     # The error message includes fix suggestions
+    #     print(f"Fix: {e}")
+
+    # =========================================================================
+    # CLEANUP
+    # =========================================================================
+    #
+    # await session.stop()
+
+    print("=" * 60)
+    print("browser-use + Sentience Integration Example")
+    print("=" * 60)
+    print()
+    print("This example demonstrates the integration pattern.")
+    print("To run with a real browser, uncomment the code sections above")
+    print("and install browser-use: pip install browser-use")
+    print()
+    print("Key imports:")
+    print("  from sentience import get_extension_dir, find, query")
+    print("  from sentience.backends import (")
+    print("      BrowserUseAdapter, snapshot, click, type_text, scroll")
+    print("  )")
+    print()
+    print("Extension path:", get_extension_dir())
+
+
+async def full_example() -> None:
+    """
+    Complete working example - requires browser-use installed.
+
+    This is the uncommented version for users who have browser-use installed.
+    """
+    # Import browser-use (uncomment when installed)
+    # from browser_use import BrowserSession, BrowserProfile
+
+    print("To run the full example:")
+    print("1. Install browser-use: pip install browser-use")
+    print("2. Uncomment the imports in this function")
+    print("3. Run: python examples/browser_use_integration.py")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/sentience/backends/__init__.py b/sentience/backends/__init__.py
index 0c7d7f3..97601c6 100644
--- a/sentience/backends/__init__.py
+++ b/sentience/backends/__init__.py
@@ -5,13 +5,28 @@
 Sentience actions (click, type, scroll) to work with different browser
 automation frameworks.
 
-Supported backends:
-- PlaywrightBackend: Default backend using Playwright (existing SentienceBrowser)
-- CDPBackendV0: CDP-based backend for browser-use integration
+Supported Backends
+------------------
+
+**PlaywrightBackend**
+    Wraps Playwright Page objects. Use this when integrating with existing
+    SentienceBrowser or Playwright-based code.
+
+**CDPBackendV0**
+    Low-level CDP (Chrome DevTools Protocol) backend. Use this when you have
+    direct access to a CDP client and session.
+
+**BrowserUseAdapter**
+    High-level adapter for browser-use framework. Automatically creates a
+    CDPBackendV0 from a BrowserSession.
+
+Quick Start with browser-use
+----------------------------
+
+.. code-block:: python
 
-For browser-use integration:
     from browser_use import BrowserSession, BrowserProfile
-    from sentience import get_extension_dir
+    from sentience import get_extension_dir, find
     from sentience.backends import BrowserUseAdapter, snapshot, click, type_text
 
     # Setup browser-use with Sentience extension
@@ -23,15 +38,63 @@
     adapter = BrowserUseAdapter(session)
     backend = await adapter.create_backend()
 
-    # Take snapshot and interact
+    # Take snapshot and interact with elements
     snap = await snapshot(backend)
-    element = find(snap, 'role=button[name="Submit"]')
+    search_box = find(snap, 'role=textbox[name*="Search"]')
+    await click(backend, search_box.bbox)
+    await type_text(backend, "Sentience AI")
+
+Snapshot Caching
+----------------
+
+Use CachedSnapshot to reduce redundant snapshot calls in action loops:
+
+.. code-block:: python
+
+    from sentience.backends import CachedSnapshot
+
+    cache = CachedSnapshot(backend, max_age_ms=2000)
+
+    snap1 = await cache.get()  # Takes fresh snapshot
+    snap2 = await cache.get()  # Returns cached if < 2s old
+
     await click(backend, element.bbox)
+    cache.invalidate()  # Force refresh on next get()
+
+Error Handling
+--------------
+
+The module provides specific exceptions for common failure modes:
+
+- ``ExtensionNotLoadedError``: Extension not loaded in browser launch args
+- ``SnapshotError``: window.sentience.snapshot() failed
+- ``ActionError``: Click/type/scroll operation failed
+
+All exceptions inherit from ``SentienceBackendError`` and include helpful
+fix suggestions in their error messages.
+
+.. code-block:: python
+
+    from sentience.backends import ExtensionNotLoadedError, snapshot
+
+    try:
+        snap = await snapshot(backend)
+    except ExtensionNotLoadedError as e:
+        print(f"Fix suggestion: {e}")
 """
 
 from .actions import click, scroll, scroll_to_element, type_text, wait_for_stable
 from .browser_use_adapter import BrowserUseAdapter, BrowserUseCDPTransport
 from .cdp_backend import CDPBackendV0, CDPTransport
+from .exceptions import (
+    ActionError,
+    BackendEvalError,
+    ExtensionDiagnostics,
+    ExtensionInjectionError,
+    ExtensionNotLoadedError,
+    SentienceBackendError,
+    SnapshotError,
+)
 from .playwright_backend import PlaywrightBackend
 from .protocol_v0 import BrowserBackendV0, LayoutMetrics, ViewportInfo
 from .snapshot import CachedSnapshot, snapshot
@@ -58,4 +121,12 @@
     "scroll",
     "scroll_to_element",
     "wait_for_stable",
+    # Exceptions
+    "SentienceBackendError",
+    "ExtensionNotLoadedError",
+    "ExtensionInjectionError",
+    "ExtensionDiagnostics",
+    "BackendEvalError",
+    "SnapshotError",
+    "ActionError",
 ]
diff --git a/sentience/backends/actions.py b/sentience/backends/actions.py
index c987d64..67ec479 100644
--- a/sentience/backends/actions.py
+++ b/sentience/backends/actions.py
@@ -226,7 +226,8 @@ async def scroll_to_element(
     start_time = time.time()
 
     try:
-        scrolled = await backend.eval(f"""
+        scrolled = await backend.eval(
+            f"""
             (() => {{
                 const el = window.sentience_registry && window.sentience_registry[{element_id}];
                 if (el && el.scrollIntoView) {{
@@ -239,7 +240,8 @@ async def scroll_to_element(
                 }}
                 return false;
             }})()
-        """)
+        """
+        )
 
         # Wait for scroll animation
         wait_time = 0.3 if behavior == "smooth" else 0.05
diff --git a/sentience/backends/exceptions.py b/sentience/backends/exceptions.py
new file mode 100644
index 0000000..a1d176c
--- /dev/null
+++ b/sentience/backends/exceptions.py
@@ -0,0 +1,211 @@
+"""
+Custom exceptions for Sentience backends.
+
+These exceptions provide clear, actionable error messages when things go wrong
+during browser-use integration or backend operations.
+"""
+
+from dataclasses import dataclass
+from typing import Any
+
+
+class SentienceBackendError(Exception):
+    """Base exception for all Sentience backend errors."""
+
+    pass
+
+
+@dataclass
+class ExtensionDiagnostics:
+    """Diagnostics collected when extension loading fails."""
+
+    sentience_defined: bool = False
+    sentience_snapshot: bool = False
+    url: str = ""
+    error: str | None = None
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "ExtensionDiagnostics":
+        """Create from diagnostic dict returned by browser eval."""
+        return cls(
+            sentience_defined=data.get("sentience_defined", False),
+            sentience_snapshot=data.get("sentience_snapshot", False),
+            url=data.get("url", ""),
+            error=data.get("error"),
+        )
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dict for serialization."""
+        return {
+            "sentience_defined": self.sentience_defined,
+            "sentience_snapshot": self.sentience_snapshot,
+            "url": self.url,
+            "error": self.error,
+        }
+
+
+class ExtensionNotLoadedError(SentienceBackendError):
+    """
+    Raised when the Sentience extension is not loaded in the browser.
+
+    This typically means:
+    1. Browser was launched without --load-extension flag
+    2. Extension path is incorrect
+    3. Extension failed to initialize
+
+    Example fix for browser-use:
+        from sentience import get_extension_dir
+        from browser_use import BrowserSession, BrowserProfile
+
+        profile = BrowserProfile(
+            args=[f"--load-extension={get_extension_dir()}"],
+        )
+        session = BrowserSession(browser_profile=profile)
+    """
+
+    def __init__(
+        self,
+        message: str,
+        timeout_ms: int | None = None,
+        diagnostics: ExtensionDiagnostics | None = None,
+    ) -> None:
+        self.timeout_ms = timeout_ms
+        self.diagnostics = diagnostics
+        super().__init__(message)
+
+    @classmethod
+    def from_timeout(
+        cls,
+        timeout_ms: int,
+        diagnostics: ExtensionDiagnostics | None = None,
+    ) -> "ExtensionNotLoadedError":
+        """Create error from timeout during extension wait."""
+        diag_info = ""
+        if diagnostics:
+            if diagnostics.error:
+                diag_info = f"\n  Error: {diagnostics.error}"
+            else:
+                diag_info = (
+                    f"\n  window.sentience defined: {diagnostics.sentience_defined}"
+                    f"\n  window.sentience.snapshot available: {diagnostics.sentience_snapshot}"
+                    f"\n  Page URL: {diagnostics.url}"
+                )
+
+        message = (
+            f"Sentience extension not loaded after {timeout_ms}ms.{diag_info}\n\n"
+            "To fix this, ensure the extension is loaded when launching the browser:\n\n"
+            "  from sentience import get_extension_dir\n"
+            "  from browser_use import BrowserSession, BrowserProfile\n\n"
+            "  profile = BrowserProfile(\n"
+            f'      args=[f"--load-extension={{get_extension_dir()}}"],\n'
+            "  )\n"
+            "  session = BrowserSession(browser_profile=profile)\n"
+        )
+        return cls(message, timeout_ms=timeout_ms, diagnostics=diagnostics)
+
+
+class ExtensionInjectionError(SentienceBackendError):
+    """
+    Raised when window.sentience API is not available on the page.
+
+    This can happen when:
+    1. Page loaded before extension could inject
+    2. Page has Content Security Policy blocking extension
+    3. Extension crashed or was disabled
+
+    Call snapshot() with a longer timeout or wait for page load.
+    """
+
+    def __init__(
+        self,
+        message: str,
+        url: str | None = None,
+    ) -> None:
+        self.url = url
+        super().__init__(message)
+
+    @classmethod
+    def from_page(cls, url: str) -> "ExtensionInjectionError":
+        """Create error for a specific page."""
+        message = (
+            f"window.sentience API not available on page: {url}\n\n"
+            "Possible causes:\n"
+            "  1. Page loaded before extension could inject (try increasing timeout)\n"
+            "  2. Page has Content Security Policy blocking the extension\n"
+            "  3. Extension was disabled or crashed\n\n"
+            "Try:\n"
+            "  snap = await snapshot(backend, options=SnapshotOptions(timeout_ms=10000))"
+        )
+        return cls(message, url=url)
+
+
+class BackendEvalError(SentienceBackendError):
+    """
+    Raised when JavaScript evaluation fails in the browser.
+
+    This wraps underlying CDP or Playwright errors with context.
+    """
+
+    def __init__(
+        self,
+        message: str,
+        expression: str | None = None,
+        original_error: Exception | None = None,
+    ) -> None:
+        self.expression = expression
+        self.original_error = original_error
+        super().__init__(message)
+
+
+class SnapshotError(SentienceBackendError):
+    """
+    Raised when taking a snapshot fails.
+
+    This can happen when:
+    1. Extension returned null or invalid data
+    2. Page is in an invalid state
+    3. Extension threw an error
+    """
+
+    def __init__(
+        self,
+        message: str,
+        url: str | None = None,
+        raw_result: Any = None,
+    ) -> None:
+        self.url = url
+        self.raw_result = raw_result
+        super().__init__(message)
+
+    @classmethod
+    def from_null_result(cls, url: str | None = None) -> "SnapshotError":
+        """Create error for null snapshot result."""
+        message = (
+            "window.sentience.snapshot() returned null.\n\n"
+            "Possible causes:\n"
+            "  1. Extension is not properly initialized\n"
+            "  2. Page DOM is in an invalid state\n"
+            "  3. Extension encountered an internal error\n\n"
+            "Try refreshing the page and taking a new snapshot."
+        )
+        if url:
+            message = f"{message}\n  Page URL: {url}"
+        return cls(message, url=url, raw_result=None)
+
+
+class ActionError(SentienceBackendError):
+    """
+    Raised when a browser action (click, type, scroll) fails.
+    """
+
+    def __init__(
+        self,
+        action: str,
+        message: str,
+        coordinates: tuple[float, float] | None = None,
+        original_error: Exception | None = None,
+    ) -> None:
+        self.action = action
+        self.coordinates = coordinates
+        self.original_error = original_error
+        super().__init__(f"{action} failed: {message}")
diff --git a/sentience/backends/playwright_backend.py b/sentience/backends/playwright_backend.py
index f5ea8df..719561a 100644
--- a/sentience/backends/playwright_backend.py
+++ b/sentience/backends/playwright_backend.py
@@ -57,7 +57,8 @@ def page(self) -> "AsyncPage":
 
     async def refresh_page_info(self) -> ViewportInfo:
         """Cache viewport + scroll offsets; cheap & safe to call often."""
-        result = await self._page.evaluate("""
+        result = await self._page.evaluate(
+            """
             (() => ({
                 width: window.innerWidth,
                 height: window.innerHeight,
@@ -66,7 +67,8 @@ async def refresh_page_info(self) -> ViewportInfo:
                 content_width: document.documentElement.scrollWidth,
                 content_height: document.documentElement.scrollHeight
             }))()
-        """)
+        """
+        )
 
         self._cached_viewport = ViewportInfo(
             width=result.get("width", 0),
@@ -96,7 +98,8 @@ async def get_layout_metrics(self) -> LayoutMetrics:
         """Get page layout metrics."""
         # Playwright doesn't expose CDP directly in the same way,
         # so we approximate using JavaScript
-        result = await self._page.evaluate("""
+        result = await self._page.evaluate(
+            """
             (() => ({
                 viewport_x: window.scrollX,
                 viewport_y: window.scrollY,
@@ -106,7 +109,8 @@ async def get_layout_metrics(self) -> LayoutMetrics:
                 content_height: document.documentElement.scrollHeight,
                 device_scale_factor: window.devicePixelRatio || 1
             }))()
-        """)
+        """
+        )
 
         return LayoutMetrics(
             viewport_x=result.get("viewport_x", 0),
@@ -172,8 +176,7 @@ async def wait_ready_state(
             elapsed = time.monotonic() - start
             if elapsed >= timeout_sec:
                 raise TimeoutError(
-                    f"Timed out waiting for document.readyState='{state}' "
-                    f"after {timeout_ms}ms"
+                    f"Timed out waiting for document.readyState='{state}' " f"after {timeout_ms}ms"
                 )
 
             current_state = await self._page.evaluate("document.readyState")
diff --git a/sentience/backends/snapshot.py b/sentience/backends/snapshot.py
index 6f11dd9..ffe647b 100644
--- a/sentience/backends/snapshot.py
+++ b/sentience/backends/snapshot.py
@@ -25,6 +25,7 @@
 from typing import TYPE_CHECKING, Any
 
 from ..models import Snapshot, SnapshotOptions
+from .exceptions import ExtensionDiagnostics, ExtensionNotLoadedError, SnapshotError
 
 if TYPE_CHECKING:
     from .protocol_v0 import BrowserBackendV0
@@ -184,30 +185,36 @@ async def snapshot(
     ext_options = _build_extension_options(options)
 
     # Call extension's snapshot function
-    result = await backend.eval(f"""
+    result = await backend.eval(
+        f"""
         (() => {{
             const options = {_json_serialize(ext_options)};
             return window.sentience.snapshot(options);
         }})()
-    """)
+    """
+    )
 
     if result is None:
-        raise RuntimeError(
-            "window.sentience.snapshot() returned null. "
-            "Is the Sentience extension loaded and injected?"
-        )
+        # Try to get URL for better error message
+        try:
+            url = await backend.eval("window.location.href")
+        except Exception:
+            url = None
+        raise SnapshotError.from_null_result(url=url)
 
     # Show overlay if requested
     if options.show_overlay:
         raw_elements = result.get("raw_elements", [])
         if raw_elements:
-            await backend.eval(f"""
+            await backend.eval(
+                f"""
                 (() => {{
                     if (window.sentience && window.sentience.showOverlay) {{
                         window.sentience.showOverlay({_json_serialize(raw_elements)}, null);
                     }}
                 }})()
-            """)
+            """
+            )
 
     # Build and return Snapshot
     return Snapshot(**result)
@@ -237,19 +244,22 @@ async def _wait_for_extension(
         if elapsed >= timeout_sec:
             # Gather diagnostics
             try:
-                diag = await backend.eval("""
+                diag_dict = await backend.eval(
+                    """
                     (() => ({
                         sentience_defined: typeof window.sentience !== 'undefined',
                         sentience_snapshot: typeof window.sentience?.snapshot === 'function',
                         url: window.location.href
                     }))()
-                """)
-            except Exception:
-                diag = {"error": "Could not gather diagnostics"}
-
-            raise RuntimeError(
-                f"Sentience extension failed to inject window.sentience API "
-                f"within {timeout_ms}ms. Diagnostics: {diag}"
+                """
+                )
+                diagnostics = ExtensionDiagnostics.from_dict(diag_dict)
+            except Exception as e:
+                diagnostics = ExtensionDiagnostics(error=f"Could not gather diagnostics: {e}")
+
+            raise ExtensionNotLoadedError.from_timeout(
+                timeout_ms=timeout_ms,
+                diagnostics=diagnostics,
             )
 
         # Check if extension is ready
@@ -294,4 +304,5 @@ def _build_extension_options(options: SnapshotOptions) -> dict[str, Any]:
 def _json_serialize(obj: Any) -> str:
     """Serialize object to JSON string for embedding in JS."""
     import json
+
     return json.dumps(obj)
diff --git a/tests/test_backends.py b/tests/test_backends.py
index a1c7d90..00e4325 100644
--- a/tests/test_backends.py
+++ b/tests/test_backends.py
@@ -833,3 +833,126 @@ async def test_tuple_passthrough(self) -> None:
 
         assert x == 300
         assert y == 400
+
+
+class TestBackendExceptions:
+    """Tests for custom backend exceptions."""
+
+    def test_extension_diagnostics_from_dict(self) -> None:
+        """Test ExtensionDiagnostics.from_dict."""
+        from sentience.backends.exceptions import ExtensionDiagnostics
+
+        data = {
+            "sentience_defined": True,
+            "sentience_snapshot": False,
+            "url": "https://example.com",
+        }
+        diag = ExtensionDiagnostics.from_dict(data)
+
+        assert diag.sentience_defined is True
+        assert diag.sentience_snapshot is False
+        assert diag.url == "https://example.com"
+        assert diag.error is None
+
+    def test_extension_diagnostics_to_dict(self) -> None:
+        """Test ExtensionDiagnostics.to_dict."""
+        from sentience.backends.exceptions import ExtensionDiagnostics
+
+        diag = ExtensionDiagnostics(
+            sentience_defined=True,
+            sentience_snapshot=True,
+            url="https://test.com",
+            error=None,
+        )
+        result = diag.to_dict()
+
+        assert result["sentience_defined"] is True
+        assert result["sentience_snapshot"] is True
+        assert result["url"] == "https://test.com"
+
+    def test_extension_not_loaded_error_from_timeout(self) -> None:
+        """Test ExtensionNotLoadedError.from_timeout creates helpful message."""
+        from sentience.backends.exceptions import ExtensionDiagnostics, ExtensionNotLoadedError
+
+        diag = ExtensionDiagnostics(
+            sentience_defined=False,
+            sentience_snapshot=False,
+            url="https://example.com",
+        )
+        error = ExtensionNotLoadedError.from_timeout(timeout_ms=5000, diagnostics=diag)
+
+        assert error.timeout_ms == 5000
+        assert error.diagnostics is diag
+        assert "5000ms" in str(error)
+        assert "window.sentience defined: False" in str(error)
+        assert "get_extension_dir" in str(error)  # Contains fix suggestion
+
+    def test_extension_not_loaded_error_with_eval_error(self) -> None:
+        """Test ExtensionNotLoadedError when diagnostics collection failed."""
+        from sentience.backends.exceptions import ExtensionDiagnostics, ExtensionNotLoadedError
+
+        diag = ExtensionDiagnostics(error="Could not evaluate JavaScript")
+        error = ExtensionNotLoadedError.from_timeout(timeout_ms=3000, diagnostics=diag)
+
+        assert "Could not evaluate JavaScript" in str(error)
+
+    def test_snapshot_error_from_null_result(self) -> None:
+        """Test SnapshotError.from_null_result creates helpful message."""
+        from sentience.backends.exceptions import SnapshotError
+
+        error = SnapshotError.from_null_result(url="https://example.com/page")
+
+        assert error.url == "https://example.com/page"
+        assert "returned null" in str(error)
+        assert "example.com/page" in str(error)
+
+    def test_snapshot_error_from_null_result_no_url(self) -> None:
+        """Test SnapshotError.from_null_result without URL."""
+        from sentience.backends.exceptions import SnapshotError
+
+        error = SnapshotError.from_null_result(url=None)
+
+        assert error.url is None
+        assert "returned null" in str(error)
+
+    def test_action_error_message_format(self) -> None:
+        """Test ActionError formats message correctly."""
+        from sentience.backends.exceptions import ActionError
+
+        error = ActionError(
+            action="click",
+            message="Element not found",
+            coordinates=(100, 200),
+        )
+
+        assert error.action == "click"
+        assert error.coordinates == (100, 200)
+        assert "click failed" in str(error)
+        assert "Element not found" in str(error)
+
+    def test_sentience_backend_error_inheritance(self) -> None:
+        """Test all exceptions inherit from SentienceBackendError."""
+        from sentience.backends.exceptions import (
+            ActionError,
+            BackendEvalError,
+            ExtensionInjectionError,
+            ExtensionNotLoadedError,
+            SentienceBackendError,
+            SnapshotError,
+        )
+
+        assert issubclass(ExtensionNotLoadedError, SentienceBackendError)
+        assert issubclass(ExtensionInjectionError, SentienceBackendError)
+        assert issubclass(BackendEvalError, SentienceBackendError)
+        assert issubclass(SnapshotError, SentienceBackendError)
+        assert issubclass(ActionError, SentienceBackendError)
+
+    def test_extension_injection_error_from_page(self) -> None:
+        """Test ExtensionInjectionError.from_page."""
+        from sentience.backends.exceptions import ExtensionInjectionError
+
+        error = ExtensionInjectionError.from_page("https://secure-site.com")
+
+        assert error.url == "https://secure-site.com"
+        assert "secure-site.com" in str(error)
+        assert "Content Security Policy" in str(error)

From c598d490f605ebb1718264d79b998c4568c62957 Mon Sep 17 00:00:00 2001
From: SentienceDev <dev@sentienceapi.com>
Date: Thu, 8 Jan 2026 21:35:32 -0800
Subject: [PATCH 3/3] backend and regular snapshot consistent

---
 examples/browser_use_integration.py |  12 +-
 sentience/__init__.py               |   2 +-
 sentience/backends/snapshot.py      | 212 ++++++++++++++++++++++------
 sentience/extension/background.js   |   2 +-
 sentience/snapshot.py               | 165 ++++++++++++++++------
 5 files changed, 291 insertions(+), 102 deletions(-)

diff --git a/examples/browser_use_integration.py b/examples/browser_use_integration.py
index 9167c5f..d24468f 100644
--- a/examples/browser_use_integration.py
+++ b/examples/browser_use_integration.py
@@ -14,15 +14,8 @@
 
 import asyncio
 
-# browser-use imports (install via: pip install browser-use)
-# from browser_use import BrowserSession, BrowserProfile
-
 # Sentience imports
-from sentience import (
-    find,
-    get_extension_dir,
-    query,
-)
+from sentience import find, get_extension_dir, query
 from sentience.backends import (
     BrowserUseAdapter,
     CachedSnapshot,
@@ -33,6 +26,9 @@
     type_text,
 )
 
+# browser-use imports (install via: pip install browser-use)
+# from browser_use import BrowserSession, BrowserProfile
+
 
 async def main() -> None:
     """
diff --git a/sentience/__init__.py b/sentience/__init__.py
index ecb4711..91ebe36 100644
--- a/sentience/__init__.py
+++ b/sentience/__init__.py
@@ -118,7 +118,7 @@
 from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync
 from .wait import wait_for
 
-__version__ = "0.92.3"
+__version__ = "0.93.0"
 
 __all__ = [
     # Extension helpers (for browser-use integration)
diff --git a/sentience/backends/snapshot.py b/sentience/backends/snapshot.py
index ffe647b..2a1ff7d 100644
--- a/sentience/backends/snapshot.py
+++ b/sentience/backends/snapshot.py
@@ -25,6 +25,11 @@
 from typing import TYPE_CHECKING, Any
 
 from ..models import Snapshot, SnapshotOptions
+from ..snapshot import (
+    _build_snapshot_payload,
+    _merge_api_result_with_local,
+    _post_snapshot_to_gateway_async,
+)
 from .exceptions import ExtensionDiagnostics, ExtensionNotLoadedError, SnapshotError
 
 if TYPE_CHECKING:
@@ -145,8 +150,9 @@ async def snapshot(
     """
     Take a Sentience snapshot using the backend protocol.
 
-    This function calls window.sentience.snapshot() via the backend's eval(),
-    enabling snapshot collection with any BrowserBackendV0 implementation.
+    This function respects the `use_api` option and can call either:
+    - Server-side API (Pro/Enterprise tier) when `use_api=True` and API key is provided
+    - Local extension (Free tier) when `use_api=False` or no API key
 
     Requires:
         - Sentience extension loaded in browser (via --load-extension)
@@ -154,70 +160,50 @@ async def snapshot(
 
     Args:
         backend: BrowserBackendV0 implementation (CDPBackendV0, PlaywrightBackend, etc.)
-        options: Snapshot options (limit, filter, screenshot, etc.)
+        options: Snapshot options (limit, filter, screenshot, use_api, sentience_api_key, etc.)
 
     Returns:
         Snapshot with elements, viewport, and optional screenshot
 
     Example:
         from sentience.backends import BrowserUseAdapter
-        from sentience.backends.snapshot import snapshot_from_backend
+        from sentience.backends.snapshot import snapshot
+        from sentience.models import SnapshotOptions
 
         adapter = BrowserUseAdapter(session)
         backend = await adapter.create_backend()
 
-        # Basic snapshot
-        snap = await snapshot_from_backend(backend)
+        # Basic snapshot (uses local extension)
+        snap = await snapshot(backend)
 
-        # With options
-        snap = await snapshot_from_backend(backend, SnapshotOptions(
+        # With server-side API (Pro/Enterprise tier)
+        snap = await snapshot(backend, SnapshotOptions(
+            use_api=True,
+            sentience_api_key="sk_pro_xxxxx",
             limit=100,
             screenshot=True
         ))
+
+        # Force local extension (Free tier)
+        snap = await snapshot(backend, SnapshotOptions(
+            use_api=False
+        ))
     """
     if options is None:
         options = SnapshotOptions()
 
-    # Wait for extension injection
-    await _wait_for_extension(backend, timeout_ms=5000)
-
-    # Build options dict for extension API
-    ext_options = _build_extension_options(options)
-
-    # Call extension's snapshot function
-    result = await backend.eval(
-        f"""
-        (() => {{
-            const options = {_json_serialize(ext_options)};
-            return window.sentience.snapshot(options);
-        }})()
-    """
+    # Determine if we should use server-side API
+    # Same logic as main snapshot() function in sentience/snapshot.py
+    should_use_api = (
+        options.use_api if options.use_api is not None else (options.sentience_api_key is not None)
     )
 
-    if result is None:
-        # Try to get URL for better error message
-        try:
-            url = await backend.eval("window.location.href")
-        except Exception:
-            url = None
-        raise SnapshotError.from_null_result(url=url)
-
-    # Show overlay if requested
-    if options.show_overlay:
-        raw_elements = result.get("raw_elements", [])
-        if raw_elements:
-            await backend.eval(
-                f"""
-                (() => {{
-                    if (window.sentience && window.sentience.showOverlay) {{
-                        window.sentience.showOverlay({_json_serialize(raw_elements)}, null);
-                    }}
-                }})()
-            """
-            )
-
-    # Build and return Snapshot
-    return Snapshot(**result)
+    if should_use_api and options.sentience_api_key:
+        # Use server-side API (Pro/Enterprise tier)
+        return await _snapshot_via_api(backend, options)
+    else:
+        # Use local extension (Free tier)
+        return await _snapshot_via_extension(backend, options)
 
 
 async def _wait_for_extension(
@@ -235,12 +221,23 @@ async def _wait_for_extension(
         RuntimeError: If extension not injected within timeout
     """
     import asyncio
+    import logging
+
+    logger = logging.getLogger("sentience.backends.snapshot")
 
     start = time.monotonic()
     timeout_sec = timeout_ms / 1000.0
+    poll_count = 0
+
+    logger.debug(f"Waiting for extension injection (timeout={timeout_ms}ms)...")
 
     while True:
         elapsed = time.monotonic() - start
+        poll_count += 1
+
+        if poll_count % 10 == 0:  # Log every 10 polls (~1 second)
+            logger.debug(f"Extension poll #{poll_count}, elapsed={elapsed*1000:.0f}ms")
+
         if elapsed >= timeout_sec:
             # Gather diagnostics
             try:
@@ -249,11 +246,14 @@ async def _wait_for_extension(
                     (() => ({
                         sentience_defined: typeof window.sentience !== 'undefined',
                         sentience_snapshot: typeof window.sentience?.snapshot === 'function',
-                        url: window.location.href
+                        url: window.location.href,
+                        extension_id: document.documentElement.dataset.sentienceExtensionId || null,
+                        has_content_script: !!document.documentElement.dataset.sentienceExtensionId
                     }))()
                 """
                 )
                 diagnostics = ExtensionDiagnostics.from_dict(diag_dict)
+                logger.debug(f"Extension diagnostics: {diag_dict}")
             except Exception as e:
                 diagnostics = ExtensionDiagnostics(error=f"Could not gather diagnostics: {e}")
 
@@ -276,6 +276,124 @@ async def _wait_for_extension(
         await asyncio.sleep(0.1)
 
 
+async def _snapshot_via_extension(
+    backend: "BrowserBackendV0",
+    options: SnapshotOptions,
+) -> Snapshot:
+    """Take snapshot using local extension (Free tier)"""
+    # Wait for extension injection
+    await _wait_for_extension(backend, timeout_ms=5000)
+
+    # Build options dict for extension API
+    ext_options = _build_extension_options(options)
+
+    # Call extension's snapshot function
+    result = await backend.eval(
+        f"""
+        (() => {{
+            const options = {_json_serialize(ext_options)};
+            return window.sentience.snapshot(options);
+        }})()
+    """
+    )
+
+    if result is None:
+        # Try to get URL for better error message
+        try:
+            url = await backend.eval("window.location.href")
+        except Exception:
+            url = None
+        raise SnapshotError.from_null_result(url=url)
+
+    # Show overlay if requested
+    if options.show_overlay:
+        raw_elements = result.get("raw_elements", [])
+        if raw_elements:
+            await backend.eval(
+                f"""
+                (() => {{
+                    if (window.sentience && window.sentience.showOverlay) {{
+                        window.sentience.showOverlay({_json_serialize(raw_elements)}, null);
+                    }}
+                }})()
+            """
+            )
+
+    # Build and return Snapshot
+    return Snapshot(**result)
+
+
+async def _snapshot_via_api(
+    backend: "BrowserBackendV0",
+    options: SnapshotOptions,
+) -> Snapshot:
+    """Take snapshot using server-side API (Pro/Enterprise tier)"""
+    # Default API URL (same as main snapshot function)
+    api_url = "https://api.sentienceapi.com"
+
+    # Wait for extension injection (needed even for API mode to collect raw data)
+    await _wait_for_extension(backend, timeout_ms=5000)
+
+    # Step 1: Get raw data from local extension (always happens locally)
+    raw_options: dict[str, Any] = {}
+    if options.screenshot is not False:
+        raw_options["screenshot"] = options.screenshot
+
+    # Call extension to get raw elements
+    raw_result = await backend.eval(
+        f"""
+        (() => {{
+            const options = {_json_serialize(raw_options)};
+            return window.sentience.snapshot(options);
+        }})()
+    """
+    )
+
+    if raw_result is None:
+        try:
+            url = await backend.eval("window.location.href")
+        except Exception:
+            url = None
+        raise SnapshotError.from_null_result(url=url)
+
+    # Step 2: Send to server for smart ranking/filtering
+    payload = _build_snapshot_payload(raw_result, options)
+
+    try:
+        api_result = await _post_snapshot_to_gateway_async(
+            payload, options.sentience_api_key, api_url
+        )
+
+        # Merge API result with local data (screenshot, etc.)
+        snapshot_data = _merge_api_result_with_local(api_result, raw_result)
+
+        # Show visual overlay if requested (use API-ranked elements)
+        if options.show_overlay:
+            elements = api_result.get("elements", [])
+            if elements:
+                await backend.eval(
+                    f"""
+                    (() => {{
+                        if (window.sentience && window.sentience.showOverlay) {{
+                            window.sentience.showOverlay({_json_serialize(elements)}, null);
+                        }}
+                    }})()
+                """
+                )
+
+        return Snapshot(**snapshot_data)
+    except (RuntimeError, ValueError):
+        # Re-raise validation errors as-is
+        raise
+    except Exception as e:
+        # Fallback to local extension on API error
+        # This matches the behavior of the main snapshot function
+        raise RuntimeError(
+            f"Server-side snapshot API failed: {e}. "
+            "Try using use_api=False to use local extension instead."
+        ) from e
+
+
 def _build_extension_options(options: SnapshotOptions) -> dict[str, Any]:
     """Build options dict for extension API call."""
     ext_options: dict[str, Any] = {}
diff --git a/sentience/extension/background.js b/sentience/extension/background.js
index aff49b0..02c0408 100644
--- a/sentience/extension/background.js
+++ b/sentience/extension/background.js
@@ -1,4 +1,4 @@
-import init, { analyze_page_with_options, analyze_page, prune_for_api } from "../pkg/sentience_core.js";
+import init, { analyze_page_with_options, analyze_page, prune_for_api } from "./pkg/sentience_core.js";
 
 let wasmReady = !1, wasmInitPromise = null;
 
diff --git a/sentience/snapshot.py b/sentience/snapshot.py
index ec17d5a..3366141 100644
--- a/sentience/snapshot.py
+++ b/sentience/snapshot.py
@@ -19,6 +19,122 @@
 MAX_PAYLOAD_BYTES = 10 * 1024 * 1024
 
 
+def _build_snapshot_payload(
+    raw_result: dict[str, Any],
+    options: SnapshotOptions,
+) -> dict[str, Any]:
+    """
+    Build payload dict for gateway snapshot API.
+
+    Shared helper used by both sync and async snapshot implementations.
+    """
+    return {
+        "raw_elements": raw_result.get("raw_elements", []),
+        "url": raw_result.get("url", ""),
+        "viewport": raw_result.get("viewport"),
+        "goal": options.goal,
+        "options": {
+            "limit": options.limit,
+            "filter": options.filter.model_dump() if options.filter else None,
+        },
+    }
+
+
+def _validate_payload_size(payload_json: str) -> None:
+    """
+    Validate payload size before sending to gateway.
+
+    Raises ValueError if payload exceeds server limit.
+    """
+    payload_size = len(payload_json.encode("utf-8"))
+    if payload_size > MAX_PAYLOAD_BYTES:
+        raise ValueError(
+            f"Payload size ({payload_size / 1024 / 1024:.2f}MB) exceeds server limit "
+            f"({MAX_PAYLOAD_BYTES / 1024 / 1024:.0f}MB). "
+            f"Try reducing the number of elements on the page or filtering elements."
+        )
+
+
+def _post_snapshot_to_gateway_sync(
+    payload: dict[str, Any],
+    api_key: str,
+    api_url: str = "https://api.sentienceapi.com",
+) -> dict[str, Any]:
+    """
+    Post snapshot payload to gateway (synchronous).
+
+    Used by sync snapshot() function.
+    """
+    payload_json = json.dumps(payload)
+    _validate_payload_size(payload_json)
+
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+
+    response = requests.post(
+        f"{api_url}/v1/snapshot",
+        data=payload_json,
+        headers=headers,
+        timeout=30,
+    )
+    response.raise_for_status()
+    return response.json()
+
+
+async def _post_snapshot_to_gateway_async(
+    payload: dict[str, Any],
+    api_key: str,
+    api_url: str = "https://api.sentienceapi.com",
+) -> dict[str, Any]:
+    """
+    Post snapshot payload to gateway (asynchronous).
+
+    Used by async backend snapshot() function.
+    """
+    # Lazy import httpx - only needed for async API calls
+    import httpx
+
+    payload_json = json.dumps(payload)
+    _validate_payload_size(payload_json)
+
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        response = await client.post(
+            f"{api_url}/v1/snapshot",
+            content=payload_json,
+            headers=headers,
+        )
+        response.raise_for_status()
+        return response.json()
+
+
+def _merge_api_result_with_local(
+    api_result: dict[str, Any],
+    raw_result: dict[str, Any],
+) -> dict[str, Any]:
+    """
+    Merge API result with local data (screenshot, etc.).
+
+    Shared helper used by both sync and async snapshot implementations.
+    """
+    return {
+        "status": api_result.get("status", "success"),
+        "timestamp": api_result.get("timestamp"),
+        "url": api_result.get("url", raw_result.get("url", "")),
+        "viewport": api_result.get("viewport", raw_result.get("viewport")),
+        "elements": api_result.get("elements", []),
+        "screenshot": raw_result.get("screenshot"),  # Keep local screenshot
+        "screenshot_format": raw_result.get("screenshot_format"),
+        "error": api_result.get("error"),
+    }
+
+
 def _save_trace_to_file(raw_elements: list[dict[str, Any]], trace_path: str | None = None) -> None:
     """
     Save raw_elements to a JSON file for benchmarking/training
@@ -181,54 +297,13 @@ def _snapshot_via_api(
     # Step 2: Send to server for smart ranking/filtering
     # Use raw_elements (raw data) instead of elements (processed data)
     # Server validates API key and applies proprietary ranking logic
-    payload = {
-        "raw_elements": raw_result.get("raw_elements", []),  # Raw data needed for server processing
-        "url": raw_result.get("url", ""),
-        "viewport": raw_result.get("viewport"),
-        "goal": options.goal,  # Optional goal/task description
-        "options": {
-            "limit": options.limit,
-            "filter": options.filter.model_dump() if options.filter else None,
-        },
-    }
-
-    # Check payload size before sending (server has 10MB limit)
-    payload_json = json.dumps(payload)
-    payload_size = len(payload_json.encode("utf-8"))
-    if payload_size > MAX_PAYLOAD_BYTES:
-        raise ValueError(
-            f"Payload size ({payload_size / 1024 / 1024:.2f}MB) exceeds server limit "
-            f"({MAX_PAYLOAD_BYTES / 1024 / 1024:.0f}MB). "
-            f"Try reducing the number of elements on the page or filtering elements."
-        )
-
-    headers = {
-        "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json",
-    }
+    payload = _build_snapshot_payload(raw_result, options)
 
     try:
-        response = requests.post(
-            f"{api_url}/v1/snapshot",
-            data=payload_json,  # Reuse already-serialized JSON
-            headers=headers,
-            timeout=30,
-        )
-        response.raise_for_status()
-
-        api_result = response.json()
+        api_result = _post_snapshot_to_gateway_sync(payload, api_key, api_url)
 
         # Merge API result with local data (screenshot, etc.)
-        snapshot_data = {
-            "status": api_result.get("status", "success"),
-            "timestamp": api_result.get("timestamp"),
-            "url": api_result.get("url", raw_result.get("url", "")),
-            "viewport": api_result.get("viewport", raw_result.get("viewport")),
-            "elements": api_result.get("elements", []),
-            "screenshot": raw_result.get("screenshot"),  # Keep local screenshot
-            "screenshot_format": raw_result.get("screenshot_format"),
-            "error": api_result.get("error"),
-        }
+        snapshot_data = _merge_api_result_with_local(api_result, raw_result)
 
         # Show visual overlay if requested (use API-ranked elements)
         if options.show_overlay:
@@ -247,7 +322,7 @@ def _snapshot_via_api(
 
         return Snapshot(**snapshot_data)
     except requests.exceptions.RequestException as e:
-        raise RuntimeError(f"API request failed: {e}")
+        raise RuntimeError(f"API request failed: {e}") from e
 
 
 # ========== Async Snapshot Functions ==========