From efd65fe2b28bafce9cac586b3503109a85e0fda3 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 19:07:20 -0800
Subject: [PATCH 01/12] Tabs + JS evaluator + Lifecycle hooks + cli

---
 README.md                                 |  18 +++
 sentience/__init__.py                     |   1 +
 sentience/agent.py                        | 137 +++++++++++++++++-
 sentience/agent_runtime.py                | 129 ++++++++++++++---
 sentience/backends/playwright_backend.py  | 100 ++++++++++++++
 sentience/cli.py                          | 160 +++++++++++++++++++++-
 sentience/extension/background.js         |   6 +-
 sentience/extension/content.js            |  18 +--
 sentience/extension/injected_api.js       |  82 +++++------
 sentience/extension/pkg/sentience_core.js |  14 +-
 sentience/llm_provider.py                 |   4 +-
 sentience/models.py                       |  77 +++++++++++
 sentience/runtime_agent.py                |  48 ++++++-
 tests/test_agent_runtime.py               |  63 ++++++++-
 tests/unit/test_runtime_agent.py          |  47 ++++++-
 15 files changed, 815 insertions(+), 89 deletions(-)
diff --git a/README.md b/README.md
index 4fa9219..d78bb04 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,24 @@ pip install transformers torch  # For local LLMs
 pip install -e .
 ```
 
+## 🧭 Manual driver CLI
+
+Use the interactive CLI to open a page, inspect clickables, and drive actions:
+
+```bash
+sentience driver --url https://example.com
+```
+
+Commands:
+- `open <url>`
+- `state [limit]`
+- `click <element_id>`
+- `type <element_id> <text>`
+- `press <key>`
+- `screenshot [path]`
+- `help`
+- `close`
+
 ## Jest for AI Web Agent
 
 ### Semantic snapshots and assertions that let agents act, verify, and know when they're done.
diff --git a/sentience/__init__.py b/sentience/__init__.py
index 4425bf0..5e4ecc0 100644
--- a/sentience/__init__.py
+++ b/sentience/__init__.py
@@ -86,6 +86,7 @@
     Snapshot,
     SnapshotFilter,
     SnapshotOptions,
+    StepHookContext,
     StorageState,
     TextContext,
     TextMatch,
diff --git a/sentience/agent.py b/sentience/agent.py
index 2f7f4ac..fce9c14 100644
--- a/sentience/agent.py
+++ b/sentience/agent.py
@@ -5,7 +5,10 @@
 
 import asyncio
 import hashlib
+import inspect
+import logging
 import time
+from collections.abc import Callable
 from typing import TYPE_CHECKING, Any, Optional, Union
 
 from .action_executor import ActionExecutor
@@ -23,6 +26,7 @@
     ScreenshotConfig,
     Snapshot,
     SnapshotOptions,
+    StepHookContext,
     TokenStats,
 )
 from .protocols import AsyncBrowserProtocol, BrowserProtocol
@@ -65,6 +69,51 @@ def _safe_tracer_call(
             print(f"⚠️  Tracer error (non-fatal): {tracer_error}")
 
 
+def _safe_hook_call_sync(
+    hook: Callable[[StepHookContext], Any] | None,
+    ctx: StepHookContext,
+    verbose: bool,
+) -> None:
+    if not hook:
+        return
+    try:
+        result = hook(ctx)
+        if inspect.isawaitable(result):
+            try:
+                loop = asyncio.get_running_loop()
+            except RuntimeError:
+                asyncio.run(result)
+            else:
+                loop.create_task(result)
+    except Exception as hook_error:
+        if verbose:
+            print(f"⚠️  Hook error (non-fatal): {hook_error}")
+        else:
+            logging.getLogger(__name__).warning(
+                "Hook error (non-fatal): %s", hook_error
+            )
+
+
+async def _safe_hook_call_async(
+    hook: Callable[[StepHookContext], Any] | None,
+    ctx: StepHookContext,
+    verbose: bool,
+) -> None:
+    if not hook:
+        return
+    try:
+        result = hook(ctx)
+        if inspect.isawaitable(result):
+            await result
+    except Exception as hook_error:
+        if verbose:
+            print(f"⚠️  Hook error (non-fatal): {hook_error}")
+        else:
+            logging.getLogger(__name__).warning(
+                "Hook error (non-fatal): %s", hook_error
+            )
+
+
 class SentienceAgent(BaseAgent):
     """
     High-level agent that combines Sentience SDK with any LLM provider.
@@ -181,6 +230,8 @@ def act(  # noqa: C901
         goal: str,
         max_retries: int = 2,
         snapshot_options: SnapshotOptions | None = None,
+        on_step_start: Callable[[StepHookContext], Any] | None = None,
+        on_step_end: Callable[[StepHookContext], Any] | None = None,
     ) -> AgentActionResult:
         """
         Execute a high-level goal using observe → think → act loop
@@ -224,6 +275,18 @@ def act(  # noqa: C901
                 pre_url=pre_url,
             )
 
+        _safe_hook_call_sync(
+            on_step_start,
+            StepHookContext(
+                step_id=step_id,
+                step_index=self._step_count,
+                goal=goal,
+                attempt=0,
+                url=pre_url,
+            ),
+            self.verbose,
+        )
+
         # Track data collected during step execution for step_end emission on failure
         _step_snap_with_diff: Snapshot | None = None
         _step_pre_url: str | None = None
@@ -396,8 +459,8 @@ def act(  # noqa: C901
                 _step_duration_ms = duration_ms
 
                 # Emit action execution trace event if tracer is enabled
+                post_url = self.browser.page.url if self.browser.page else None
                 if self.tracer:
-                    post_url = self.browser.page.url if self.browser.page else None
 
                     # Include element data for live overlay visualization
                     elements_data = [
@@ -454,7 +517,6 @@ def act(  # noqa: C901
                 if self.tracer:
                     # Get pre_url from step_start (stored in tracer or use current)
                     pre_url = snap.url
-                    post_url = self.browser.page.url if self.browser.page else None
 
                     # Compute snapshot digest (simplified - use URL + timestamp)
                     snapshot_digest = f"sha256:{self._compute_hash(f'{pre_url}{snap.timestamp}')}"
@@ -561,6 +623,20 @@ def act(  # noqa: C901
                         step_id=step_id,
                     )
 
+                _safe_hook_call_sync(
+                    on_step_end,
+                    StepHookContext(
+                        step_id=step_id,
+                        step_index=self._step_count,
+                        goal=goal,
+                        attempt=attempt,
+                        url=post_url,
+                        success=result.success,
+                        outcome=result.outcome,
+                        error=result.error,
+                    ),
+                    self.verbose,
+                )
                 return result
 
             except Exception as e:
@@ -660,6 +736,20 @@ def act(  # noqa: C901
                             "duration_ms": 0,
                         }
                     )
+                    _safe_hook_call_sync(
+                        on_step_end,
+                        StepHookContext(
+                            step_id=step_id,
+                            step_index=self._step_count,
+                            goal=goal,
+                            attempt=attempt,
+                            url=_step_pre_url,
+                            success=False,
+                            outcome="exception",
+                            error=str(e),
+                        ),
+                        self.verbose,
+                    )
                     raise RuntimeError(f"Failed after {max_retries} retries: {e}")
 
     def _track_tokens(self, goal: str, llm_response: LLMResponse):
@@ -833,6 +923,8 @@ async def act(  # noqa: C901
         goal: str,
         max_retries: int = 2,
         snapshot_options: SnapshotOptions | None = None,
+        on_step_start: Callable[[StepHookContext], Any] | None = None,
+        on_step_end: Callable[[StepHookContext], Any] | None = None,
     ) -> AgentActionResult:
         """
         Execute a high-level goal using observe → think → act loop (async)
@@ -873,6 +965,18 @@ async def act(  # noqa: C901
                 pre_url=pre_url,
             )
 
+        await _safe_hook_call_async(
+            on_step_start,
+            StepHookContext(
+                step_id=step_id,
+                step_index=self._step_count,
+                goal=goal,
+                attempt=0,
+                url=pre_url,
+            ),
+            self.verbose,
+        )
+
         # Track data collected during step execution for step_end emission on failure
         _step_snap_with_diff: Snapshot | None = None
         _step_pre_url: str | None = None
@@ -1209,6 +1313,21 @@ async def act(  # noqa: C901
                         step_id=step_id,
                     )
 
+                post_url = self.browser.page.url if self.browser.page else None
+                await _safe_hook_call_async(
+                    on_step_end,
+                    StepHookContext(
+                        step_id=step_id,
+                        step_index=self._step_count,
+                        goal=goal,
+                        attempt=attempt,
+                        url=post_url,
+                        success=result.success,
+                        outcome=result.outcome,
+                        error=result.error,
+                    ),
+                    self.verbose,
+                )
                 return result
 
             except Exception as e:
@@ -1308,6 +1427,20 @@ async def act(  # noqa: C901
                             "duration_ms": 0,
                         }
                     )
+                    await _safe_hook_call_async(
+                        on_step_end,
+                        StepHookContext(
+                            step_id=step_id,
+                            step_index=self._step_count,
+                            goal=goal,
+                            attempt=attempt,
+                            url=_step_pre_url,
+                            success=False,
+                            outcome="exception",
+                            error=str(e),
+                        ),
+                        self.verbose,
+                    )
                     raise RuntimeError(f"Failed after {max_retries} retries: {e}")
 
     def _track_tokens(self, goal: str, llm_response: LLMResponse):
diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
index 2852699..e7a1ad4 100644
--- a/sentience/agent_runtime.py
+++ b/sentience/agent_runtime.py
@@ -72,7 +72,15 @@
 
 from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
 from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions
-from .models import Snapshot, SnapshotOptions
+from .models import (
+    EvaluateJsRequest,
+    EvaluateJsResult,
+    Snapshot,
+    SnapshotOptions,
+    TabInfo,
+    TabListResult,
+    TabOperationResult,
+)
 from .trace_event_builder import TraceEventBuilder
 from .verification import AssertContext, AssertOutcome, Predicate
 
@@ -286,6 +294,97 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
             await self._handle_captcha_if_needed(self.last_snapshot, source="gateway")
         return self.last_snapshot
 
+    async def evaluate_js(self, request: EvaluateJsRequest) -> EvaluateJsResult:
+        """
+        Evaluate JavaScript expression in the active backend.
+
+        Args:
+            request: EvaluateJsRequest with code and output limits.
+
+        Returns:
+            EvaluateJsResult with normalized text output.
+        """
+        try:
+            value = await self.backend.eval(request.code)
+        except Exception as exc:  # pragma: no cover - backend-specific errors
+            return EvaluateJsResult(ok=False, error=str(exc))
+
+        text = self._stringify_eval_value(value)
+        truncated = False
+        if request.truncate and len(text) > request.max_output_chars:
+            text = text[: request.max_output_chars] + "..."
+            truncated = True
+
+        return EvaluateJsResult(
+            ok=True,
+            value=value,
+            text=text,
+            truncated=truncated,
+        )
+
+    async def list_tabs(self) -> TabListResult:
+        backend = self._get_tab_backend()
+        if backend is None:
+            return TabListResult(ok=False, error="unsupported_capability")
+        try:
+            tabs = await backend.list_tabs()
+        except Exception as exc:  # pragma: no cover - backend specific
+            return TabListResult(ok=False, error=str(exc))
+        return TabListResult(ok=True, tabs=tabs)
+
+    async def open_tab(self, url: str) -> TabOperationResult:
+        backend = self._get_tab_backend()
+        if backend is None:
+            return TabOperationResult(ok=False, error="unsupported_capability")
+        try:
+            tab = await backend.open_tab(url)
+        except Exception as exc:  # pragma: no cover - backend specific
+            return TabOperationResult(ok=False, error=str(exc))
+        return TabOperationResult(ok=True, tab=tab)
+
+    async def switch_tab(self, tab_id: str) -> TabOperationResult:
+        backend = self._get_tab_backend()
+        if backend is None:
+            return TabOperationResult(ok=False, error="unsupported_capability")
+        try:
+            tab = await backend.switch_tab(tab_id)
+        except Exception as exc:  # pragma: no cover - backend specific
+            return TabOperationResult(ok=False, error=str(exc))
+        return TabOperationResult(ok=True, tab=tab)
+
+    async def close_tab(self, tab_id: str) -> TabOperationResult:
+        backend = self._get_tab_backend()
+        if backend is None:
+            return TabOperationResult(ok=False, error="unsupported_capability")
+        try:
+            tab = await backend.close_tab(tab_id)
+        except Exception as exc:  # pragma: no cover - backend specific
+            return TabOperationResult(ok=False, error=str(exc))
+        return TabOperationResult(ok=True, tab=tab)
+
+    def _get_tab_backend(self):
+        backend = getattr(self, "backend", None)
+        if backend is None:
+            return None
+        if not all(
+            hasattr(backend, attr) for attr in ("list_tabs", "open_tab", "switch_tab", "close_tab")
+        ):
+            return None
+        return backend
+
+    @staticmethod
+    def _stringify_eval_value(value: Any) -> str:
+        if value is None:
+            return "null"
+        if isinstance(value, (dict, list)):
+            try:
+                import json
+
+                return json.dumps(value, ensure_ascii=False)
+            except Exception:
+                return str(value)
+        return str(value)
+
     def set_captcha_options(self, options: CaptchaOptions) -> None:
         """
         Configure CAPTCHA handling (disabled by default unless set).
@@ -450,10 +549,7 @@ def _compute_snapshot_digest(self, snap: Snapshot | None) -> str | None:
         if snap is None:
             return None
         try:
-            return (
-                "sha256:"
-                + hashlib.sha256(f"{snap.url}{snap.timestamp}".encode("utf-8")).hexdigest()
-            )
+            return "sha256:" + hashlib.sha256(f"{snap.url}{snap.timestamp}".encode()).hexdigest()
         except Exception:
             return None
 
@@ -477,10 +573,7 @@ async def emit_step_end(
         goal = self._step_goal or ""
         pre_snap = self._step_pre_snapshot or self.last_snapshot
         pre_url = (
-            self._step_pre_url
-            or (pre_snap.url if pre_snap else None)
-            or self._cached_url
-            or ""
+            self._step_pre_url or (pre_snap.url if pre_snap else None) or self._cached_url or ""
         )
 
         if post_url is None:
@@ -488,8 +581,8 @@ async def emit_step_end(
                 post_url = await self.get_url()
             except Exception:
                 post_url = (
-                    (self.last_snapshot.url if self.last_snapshot else None) or self._cached_url
-                )
+                    self.last_snapshot.url if self.last_snapshot else None
+                ) or self._cached_url
         post_url = post_url or pre_url
 
         pre_digest = self._compute_snapshot_digest(pre_snap)
@@ -505,13 +598,15 @@ async def emit_step_end(
             signals["error"] = error
 
         passed = (
-            bool(verify_passed)
-            if verify_passed is not None
-            else self.required_assertions_passed()
+            bool(verify_passed) if verify_passed is not None else self.required_assertions_passed()
         )
 
-        exec_success = bool(success) if success is not None else bool(
-            self._last_action_success if self._last_action_success is not None else passed
+        exec_success = (
+            bool(success)
+            if success is not None
+            else bool(
+                self._last_action_success if self._last_action_success is not None else passed
+            )
         )
 
         exec_data: dict[str, Any] = {
@@ -716,7 +811,9 @@ def assert_done(
             True if task is complete (assertion passed), False otherwise
         """
         # Convenience wrapper for assert_ with required=True
+        # pylint: disable=deprecated-method
         ok = self.assert_(predicate, label=label, required=True)
+        # pylint: enable=deprecated-method
         if ok:
             self._task_done = True
             self._task_done_label = label
diff --git a/sentience/backends/playwright_backend.py b/sentience/backends/playwright_backend.py
index cfbc808..47b9960 100644
--- a/sentience/backends/playwright_backend.py
+++ b/sentience/backends/playwright_backend.py
@@ -29,6 +29,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal
 
+from ..models import TabInfo
 from .protocol import BrowserBackend, LayoutMetrics, ViewportInfo
 
 if TYPE_CHECKING:
@@ -53,6 +54,7 @@ def __init__(self, page: "AsyncPage") -> None:
         self._page = page
         self._cached_viewport: ViewportInfo | None = None
         self._downloads: list[dict[str, Any]] = []
+        self._tab_registry: dict[str, "AsyncPage"] = {}
 
         # Best-effort download tracking (does not change behavior unless a download occurs).
         # pylint: disable=broad-exception-caught
@@ -108,6 +110,104 @@ def page(self) -> "AsyncPage":
         """Access the underlying Playwright page."""
         return self._page
 
+    async def list_tabs(self) -> list[TabInfo]:
+        self._prune_tabs()
+        context = self._page.context
+        tabs: list[TabInfo] = []
+        for page in context.pages:
+            tab_id = self._ensure_tab_id(page)
+            title = None
+            try:
+                title = await page.title()
+            except Exception:  # pylint: disable=broad-exception-caught
+                title = None
+            tabs.append(
+                TabInfo(
+                    tab_id=tab_id,
+                    url=getattr(page, "url", None),
+                    title=title,
+                    is_active=page == self._page,
+                )
+            )
+        return tabs
+
+    async def open_tab(self, url: str) -> TabInfo:
+        self._prune_tabs()
+        context = self._page.context
+        page = await context.new_page()
+        await page.goto(url)
+        self._page = page
+        tab_id = self._ensure_tab_id(page)
+        title = None
+        try:
+            title = await page.title()
+        except Exception:  # pylint: disable=broad-exception-caught
+            title = None
+        return TabInfo(tab_id=tab_id, url=getattr(page, "url", None), title=title, is_active=True)
+
+    async def switch_tab(self, tab_id: str) -> TabInfo:
+        self._prune_tabs()
+        page = self._tab_registry.get(tab_id)
+        if page is None:
+            raise ValueError(f"unknown tab_id: {tab_id}")
+        self._page = page
+        try:
+            await page.bring_to_front()
+        except Exception:  # pylint: disable=broad-exception-caught
+            pass
+        title = None
+        try:
+            title = await page.title()
+        except Exception:  # pylint: disable=broad-exception-caught
+            title = None
+        return TabInfo(tab_id=tab_id, url=getattr(page, "url", None), title=title, is_active=True)
+
+    async def close_tab(self, tab_id: str) -> TabInfo:
+        self._prune_tabs()
+        page = self._tab_registry.get(tab_id)
+        if page is None:
+            raise ValueError(f"unknown tab_id: {tab_id}")
+        info = TabInfo(
+            tab_id=tab_id,
+            url=getattr(page, "url", None),
+            title=None,
+            is_active=page == self._page,
+        )
+        try:
+            info.title = await page.title()
+        except Exception:  # pylint: disable=broad-exception-caught
+            info.title = None
+        await page.close()
+        self._tab_registry.pop(tab_id, None)
+        if self._page == page:
+            context = page.context
+            pages = context.pages
+            if pages:
+                self._page = pages[0]
+        return info
+
+    def _ensure_tab_id(self, page: "AsyncPage") -> str:
+        self._prune_tabs()
+        for tab_id, entry in self._tab_registry.items():
+            if entry == page:
+                return tab_id
+        tab_id = f"tab-{id(page)}"
+        self._tab_registry[tab_id] = page
+        return tab_id
+
+    def _prune_tabs(self) -> None:
+        dead: list[str] = []
+        for tab_id, page in self._tab_registry.items():
+            is_closed = getattr(page, "is_closed", None)
+            try:
+                closed = is_closed() if callable(is_closed) else bool(is_closed)
+            except Exception:  # pragma: no cover - defensive
+                closed = False
+            if closed:
+                dead.append(tab_id)
+        for tab_id in dead:
+            self._tab_registry.pop(tab_id, None)
+
     async def refresh_page_info(self) -> ViewportInfo:
         """Cache viewport + scroll offsets; cheap & safe to call often."""
         result = await self._page.evaluate(
diff --git a/sentience/cli.py b/sentience/cli.py
index c5f669c..823c5be 100644
--- a/sentience/cli.py
+++ b/sentience/cli.py
@@ -3,16 +3,25 @@
 """
 
 import argparse
+import base64
+import shlex
 import sys
+import time
+from pathlib import Path
 
+from .actions import click, press, type_text
 from .browser import SentienceBrowser
 from .generator import ScriptGenerator
 from .inspector import inspect
+from .models import SnapshotOptions
 from .recorder import Trace, record
+from .screenshot import screenshot
+from .snapshot import snapshot
 
 
 def cmd_inspect(args):
     """Start inspector mode"""
+    _ = args
     browser = SentienceBrowser(headless=False)
     try:
         browser.start()
@@ -21,8 +30,6 @@ def cmd_inspect(args):
 
         with inspect(browser):
             # Keep running until interrupted
-            import time
-
             try:
                 while True:
                     time.sleep(1)
@@ -52,8 +59,6 @@ def cmd_record(args):
                 rec.add_mask_pattern(pattern)
 
             # Keep running until interrupted
-            import time
-
             try:
                 while True:
                     time.sleep(1)
@@ -87,6 +92,144 @@ def cmd_gen(args):
     print(f"✅ Generated {args.lang.upper()} script: {output}")
 
 
+def _print_driver_help():
+    print(
+        "\nCommands:\n"
+        "  open <url>                 Navigate to URL\n"
+        "  state [limit]              List clickable elements (optional limit)\n"
+        "  click <element_id>         Click element by id\n"
+        "  type <element_id> <text>   Type text into element\n"
+        "  press <key>                Press a key (e.g., Enter)\n"
+        "  screenshot [path]          Save screenshot (png/jpg)\n"
+        "  close                      Close browser and exit\n"
+        "  help                       Show this help\n"
+    )
+
+
+def cmd_driver(args):
+    """Manual driver CLI for open/state/click/type/screenshot/close."""
+    browser = SentienceBrowser(headless=args.headless)
+    try:
+        browser.start()
+        if args.url:
+            browser.page.goto(args.url)
+            browser.page.wait_for_load_state("networkidle")
+
+        print("✅ Manual driver started. Type 'help' for commands.")
+
+        while True:
+            try:
+                raw = input("sentience> ").strip()
+            except (EOFError, KeyboardInterrupt):
+                print("\n👋 Exiting manual driver.")
+                break
+
+            if not raw:
+                continue
+
+            try:
+                parts = shlex.split(raw)
+            except ValueError as exc:
+                print(f"❌ Parse error: {exc}")
+                continue
+
+            cmd = parts[0].lower()
+            cmd_args = parts[1:]
+
+            if cmd in {"help", "?"}:
+                _print_driver_help()
+                continue
+
+            if cmd == "open":
+                if not cmd_args:
+                    print("❌ Usage: open <url>")
+                    continue
+                url = cmd_args[0]
+                browser.page.goto(url)
+                browser.page.wait_for_load_state("networkidle")
+                print(f"✅ Opened {url}")
+                continue
+
+            if cmd == "state":
+                limit = args.limit
+                if cmd_args:
+                    try:
+                        limit = int(cmd_args[0])
+                    except ValueError:
+                        print("❌ Usage: state [limit]")
+                        continue
+                snap = snapshot(browser, SnapshotOptions(limit=limit))
+                clickables = [
+                    el
+                    for el in snap.elements
+                    if getattr(getattr(el, "visual_cues", None), "is_clickable", False)
+                ]
+                print(f"URL: {snap.url}")
+                print(f"Clickable elements: {len(clickables)}")
+                for el in clickables:
+                    text = (el.text or "").replace("\n", " ").strip()
+                    if len(text) > 60:
+                        text = text[:57] + "..."
+                    print(f"- id={el.id} role={el.role} text='{text}'")
+                continue
+
+            if cmd == "click":
+                if len(cmd_args) != 1:
+                    print("❌ Usage: click <element_id>")
+                    continue
+                try:
+                    element_id = int(cmd_args[0])
+                except ValueError:
+                    print("❌ element_id must be an integer")
+                    continue
+                click(browser, element_id)
+                print(f"✅ Clicked element {element_id}")
+                continue
+
+            if cmd == "type":
+                if len(cmd_args) < 2:
+                    print("❌ Usage: type <element_id> <text>")
+                    continue
+                try:
+                    element_id = int(cmd_args[0])
+                except ValueError:
+                    print("❌ element_id must be an integer")
+                    continue
+                text = " ".join(cmd_args[1:])
+                type_text(browser, element_id, text)
+                print(f"✅ Typed into element {element_id}")
+                continue
+
+            if cmd == "press":
+                if len(cmd_args) != 1:
+                    print('❌ Usage: press <key> (e.g., "Enter")')
+                    continue
+                press(browser, cmd_args[0])
+                print(f"✅ Pressed {cmd_args[0]}")
+                continue
+
+            if cmd == "screenshot":
+                path = cmd_args[0] if cmd_args else None
+                if path is None:
+                    path = f"screenshot-{int(time.time())}.png"
+                out_path = Path(path)
+                ext = out_path.suffix.lower()
+                fmt = "jpeg" if ext in {".jpg", ".jpeg"} else "png"
+                data_url = screenshot(browser, format=fmt)
+                _, b64 = data_url.split(",", 1)
+                out_path.write_bytes(base64.b64decode(b64))
+                print(f"✅ Saved screenshot to {out_path}")
+                continue
+
+            if cmd in {"close", "exit", "quit"}:
+                print("👋 Closing browser.")
+                break
+
+            print(f"❌ Unknown command: {cmd}. Type 'help' for options.")
+    finally:
+        browser.close()
+
+
 def main():
     """Main CLI entry point"""
     parser = argparse.ArgumentParser(description="Sentience SDK CLI")
@@ -117,6 +260,15 @@ def main():
     gen_parser.add_argument("--output", "-o", help="Output script file")
     gen_parser.set_defaults(func=cmd_gen)
 
+    # Manual driver command
+    driver_parser = subparsers.add_parser("driver", help="Manual driver CLI")
+    driver_parser.add_argument("--url", help="Start URL")
+    driver_parser.add_argument("--limit", type=int, default=50, help="Snapshot limit for state")
+    driver_parser.add_argument(
+        "--headless", action="store_true", help="Run browser in headless mode"
+    )
+    driver_parser.set_defaults(func=cmd_driver)
+
     args = parser.parse_args()
 
     if not args.command:
diff --git a/sentience/extension/background.js b/sentience/extension/background.js
index b5192d9..02c0408 100644
--- a/sentience/extension/background.js
+++ b/sentience/extension/background.js
@@ -28,14 +28,14 @@ async function handleSnapshotProcessing(rawData, options = {}) {
     const startTime = performance.now();
     try {
         if (!Array.isArray(rawData)) throw new Error("rawData must be an array");
-        if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), 
+        if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(),
         !wasmReady) throw new Error("WASM module not initialized");
         let analyzedElements, prunedRawData;
         try {
             const wasmPromise = new Promise((resolve, reject) => {
                 try {
                     let result;
-                    result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), 
+                    result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData),
                     resolve(result);
                 } catch (e) {
                     reject(e);
@@ -101,4 +101,4 @@ initWASM().catch(err => {}), chrome.runtime.onMessage.addListener((request, send
     event.preventDefault();
 }), self.addEventListener("unhandledrejection", event => {
     event.preventDefault();
-});
\ No newline at end of file
+});
diff --git a/sentience/extension/content.js b/sentience/extension/content.js
index b65cfb5..97923a2 100644
--- a/sentience/extension/content.js
+++ b/sentience/extension/content.js
@@ -82,7 +82,7 @@
                 if (!elements || !Array.isArray(elements)) return;
                 removeOverlay();
                 const host = document.createElement("div");
-                host.id = OVERLAY_HOST_ID, host.style.cssText = "\n        position: fixed !important;\n        top: 0 !important;\n        left: 0 !important;\n        width: 100vw !important;\n        height: 100vh !important;\n        pointer-events: none !important;\n        z-index: 2147483647 !important;\n        margin: 0 !important;\n        padding: 0 !important;\n    ", 
+                host.id = OVERLAY_HOST_ID, host.style.cssText = "\n        position: fixed !important;\n        top: 0 !important;\n        left: 0 !important;\n        width: 100vw !important;\n        height: 100vh !important;\n        pointer-events: none !important;\n        z-index: 2147483647 !important;\n        margin: 0 !important;\n        padding: 0 !important;\n    ",
                 document.body.appendChild(host);
                 const shadow = host.attachShadow({
                     mode: "closed"
@@ -94,15 +94,15 @@
                     let color;
                     color = isTarget ? "#FF0000" : isPrimary ? "#0066FF" : "#00FF00";
                     const importanceRatio = maxImportance > 0 ? importance / maxImportance : .5, borderOpacity = isTarget ? 1 : isPrimary ? .9 : Math.max(.4, .5 + .5 * importanceRatio), fillOpacity = .2 * borderOpacity, borderWidth = isTarget ? 2 : isPrimary ? 1.5 : Math.max(.5, Math.round(2 * importanceRatio)), hexOpacity = Math.round(255 * fillOpacity).toString(16).padStart(2, "0"), box = document.createElement("div");
-                    if (box.style.cssText = `\n            position: absolute;\n            left: ${bbox.x}px;\n            top: ${bbox.y}px;\n            width: ${bbox.width}px;\n            height: ${bbox.height}px;\n            border: ${borderWidth}px solid ${color};\n            background-color: ${color}${hexOpacity};\n            box-sizing: border-box;\n            opacity: ${borderOpacity};\n            pointer-events: none;\n        `, 
+                    if (box.style.cssText = `\n            position: absolute;\n            left: ${bbox.x}px;\n            top: ${bbox.y}px;\n            width: ${bbox.width}px;\n            height: ${bbox.height}px;\n            border: ${borderWidth}px solid ${color};\n            background-color: ${color}${hexOpacity};\n            box-sizing: border-box;\n            opacity: ${borderOpacity};\n            pointer-events: none;\n        `,
                     importance > 0 || isPrimary) {
                         const badge = document.createElement("span");
-                        badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n                position: absolute;\n                top: -18px;\n                left: 0;\n                background: ${color};\n                color: white;\n                font-size: 11px;\n                font-weight: bold;\n                padding: 2px 6px;\n                font-family: Arial, sans-serif;\n                border-radius: 3px;\n                opacity: 0.95;\n                white-space: nowrap;\n                pointer-events: none;\n            `, 
+                        badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n                position: absolute;\n                top: -18px;\n                left: 0;\n                background: ${color};\n                color: white;\n                font-size: 11px;\n                font-weight: bold;\n                padding: 2px 6px;\n                font-family: Arial, sans-serif;\n                border-radius: 3px;\n                opacity: 0.95;\n                white-space: nowrap;\n                pointer-events: none;\n            `,
                         box.appendChild(badge);
                     }
                     if (isTarget) {
                         const targetIndicator = document.createElement("span");
-                        targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n                position: absolute;\n                top: -18px;\n                right: 0;\n                font-size: 16px;\n                pointer-events: none;\n            ", 
+                        targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n                position: absolute;\n                top: -18px;\n                right: 0;\n                font-size: 16px;\n                pointer-events: none;\n            ",
                         box.appendChild(targetIndicator);
                     }
                     shadow.appendChild(box);
@@ -122,7 +122,7 @@
                 if (!grids || !Array.isArray(grids)) return;
                 removeOverlay();
                 const host = document.createElement("div");
-                host.id = OVERLAY_HOST_ID, host.style.cssText = "\n        position: fixed !important;\n        top: 0 !important;\n        left: 0 !important;\n        width: 100vw !important;\n        height: 100vh !important;\n        pointer-events: none !important;\n        z-index: 2147483647 !important;\n        margin: 0 !important;\n        padding: 0 !important;\n    ", 
+                host.id = OVERLAY_HOST_ID, host.style.cssText = "\n        position: fixed !important;\n        top: 0 !important;\n        left: 0 !important;\n        width: 100vw !important;\n        height: 100vh !important;\n        pointer-events: none !important;\n        z-index: 2147483647 !important;\n        margin: 0 !important;\n        padding: 0 !important;\n    ",
                 document.body.appendChild(host);
                 const shadow = host.attachShadow({
                     mode: "closed"
@@ -138,10 +138,10 @@
                     let labelText = grid.label ? `Grid ${grid.grid_id}: ${grid.label}` : `Grid ${grid.grid_id}`;
                     grid.is_dominant && (labelText = `⭐ ${labelText} (dominant)`);
                     const badge = document.createElement("span");
-                    if (badge.textContent = labelText, badge.style.cssText = `\n                position: absolute;\n                top: -18px;\n                left: 0;\n                background: ${color};\n                color: white;\n                font-size: 11px;\n                font-weight: bold;\n                padding: 2px 6px;\n                font-family: Arial, sans-serif;\n                border-radius: 3px;\n                opacity: 0.95;\n                white-space: nowrap;\n                pointer-events: none;\n            `, 
+                    if (badge.textContent = labelText, badge.style.cssText = `\n                position: absolute;\n                top: -18px;\n                left: 0;\n                background: ${color};\n                color: white;\n                font-size: 11px;\n                font-weight: bold;\n                padding: 2px 6px;\n                font-family: Arial, sans-serif;\n                border-radius: 3px;\n                opacity: 0.95;\n                white-space: nowrap;\n                pointer-events: none;\n            `,
                     box.appendChild(badge), isTarget) {
                         const targetIndicator = document.createElement("span");
-                        targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n                position: absolute;\n                top: -18px;\n                right: 0;\n                font-size: 16px;\n                pointer-events: none;\n            ", 
+                        targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n                position: absolute;\n                top: -18px;\n                right: 0;\n                font-size: 16px;\n                pointer-events: none;\n            ",
                         box.appendChild(targetIndicator);
                     }
                     shadow.appendChild(box);
@@ -155,7 +155,7 @@
     let overlayTimeout = null;
     function removeOverlay() {
         const existing = document.getElementById(OVERLAY_HOST_ID);
-        existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), 
+        existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout),
         overlayTimeout = null);
     }
-}();
\ No newline at end of file
+}();
diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js
index 12ad84b..73dda41 100644
--- a/sentience/extension/injected_api.js
+++ b/sentience/extension/injected_api.js
@@ -103,9 +103,9 @@
             const iframes = document.querySelectorAll("iframe");
             for (const iframe of iframes) {
                 const src = iframe.getAttribute("src") || "", title = iframe.getAttribute("title") || "";
-                if (src) for (const [provider, hints] of Object.entries(CAPTCHA_IFRAME_HINTS)) matchHints(src, hints) && (hasIframeHit = !0, 
+                if (src) for (const [provider, hints] of Object.entries(CAPTCHA_IFRAME_HINTS)) matchHints(src, hints) && (hasIframeHit = !0,
                 providerSignals[provider] += 1, addEvidence(evidence.iframe_src_hits, truncateText(src, 120)));
-                if (title && matchHints(title, [ "captcha", "recaptcha" ]) && (hasContainerHit = !0, 
+                if (title && matchHints(title, [ "captcha", "recaptcha" ]) && (hasContainerHit = !0,
                 addEvidence(evidence.selector_hits, 'iframe[title*="captcha"]')), evidence.iframe_src_hits.length >= 5) break;
             }
         } catch (e) {}
@@ -114,14 +114,14 @@
             for (const script of scripts) {
                 const src = script.getAttribute("src") || "";
                 if (src) {
-                    for (const [provider, hints] of Object.entries(CAPTCHA_SCRIPT_HINTS)) matchHints(src, hints) && (hasScriptHit = !0, 
+                    for (const [provider, hints] of Object.entries(CAPTCHA_SCRIPT_HINTS)) matchHints(src, hints) && (hasScriptHit = !0,
                     providerSignals[provider] += 1, addEvidence(evidence.selector_hits, `script[src*="${hints[0]}"]`));
                     if (evidence.selector_hits.length >= 5) break;
                 }
             }
         } catch (e) {}
         for (const {selector: selector, provider: provider} of CAPTCHA_CONTAINER_SELECTORS) try {
-            document.querySelector(selector) && (hasContainerHit = !0, addEvidence(evidence.selector_hits, selector), 
+            document.querySelector(selector) && (hasContainerHit = !0, addEvidence(evidence.selector_hits, selector),
             "unknown" !== provider && (providerSignals[provider] += 1));
         } catch (e) {}
         const textSnippet = function() {
@@ -139,7 +139,7 @@
             } catch (e) {}
             try {
                 let bodyText = document.body?.innerText || "";
-                return !bodyText && document.body?.textContent && (bodyText = document.body.textContent), 
+                return !bodyText && document.body?.textContent && (bodyText = document.body.textContent),
                 truncateText(bodyText.replace(/\s+/g, " ").trim(), 2e3);
             } catch (e) {
                 return "";
@@ -147,21 +147,21 @@
         }();
         if (textSnippet) {
             const lowerText = textSnippet.toLowerCase();
-            for (const keyword of CAPTCHA_TEXT_KEYWORDS) lowerText.includes(keyword) && (hasKeywordHit = !0, 
+            for (const keyword of CAPTCHA_TEXT_KEYWORDS) lowerText.includes(keyword) && (hasKeywordHit = !0,
             addEvidence(evidence.text_hits, keyword));
         }
         try {
             const lowerUrl = (window.location?.href || "").toLowerCase();
-            for (const hint of CAPTCHA_URL_HINTS) lowerUrl.includes(hint) && (hasUrlHit = !0, 
+            for (const hint of CAPTCHA_URL_HINTS) lowerUrl.includes(hint) && (hasUrlHit = !0,
             addEvidence(evidence.url_hits, hint));
         } catch (e) {}
         let confidence = 0;
-        hasIframeHit && (confidence += .7), hasContainerHit && (confidence += .5), hasScriptHit && (confidence += .5), 
-        hasKeywordHit && (confidence += .3), hasUrlHit && (confidence += .2), confidence = Math.min(1, confidence), 
+        hasIframeHit && (confidence += .7), hasContainerHit && (confidence += .5), hasScriptHit && (confidence += .5),
+        hasKeywordHit && (confidence += .3), hasUrlHit && (confidence += .2), confidence = Math.min(1, confidence),
         hasIframeHit && (confidence = Math.max(confidence, .8)), !hasKeywordHit || hasIframeHit || hasContainerHit || hasScriptHit || hasUrlHit || (confidence = Math.min(confidence, .4));
         const detected = confidence >= .7;
         let providerHint = null;
-        return providerSignals.recaptcha > 0 ? providerHint = "recaptcha" : providerSignals.hcaptcha > 0 ? providerHint = "hcaptcha" : providerSignals.turnstile > 0 ? providerHint = "turnstile" : providerSignals.arkose > 0 ? providerHint = "arkose" : providerSignals.awswaf > 0 ? providerHint = "awswaf" : detected && (providerHint = "unknown"), 
+        return providerSignals.recaptcha > 0 ? providerHint = "recaptcha" : providerSignals.hcaptcha > 0 ? providerHint = "hcaptcha" : providerSignals.turnstile > 0 ? providerHint = "turnstile" : providerSignals.arkose > 0 ? providerHint = "arkose" : providerSignals.awswaf > 0 ? providerHint = "awswaf" : detected && (providerHint = "unknown"),
         {
             detected: detected,
             provider_hint: providerHint,
@@ -271,7 +271,7 @@
                     if (labelEl) {
                         let text = "";
                         try {
-                            if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), 
+                            if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()),
                             !text && labelEl.getAttribute) {
                                 const ariaLabel = labelEl.getAttribute("aria-label");
                                 ariaLabel && (text = ariaLabel.trim());
@@ -466,7 +466,7 @@
                         });
                         const checkStable = () => {
                             const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime;
-                            timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), 
+                            timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(),
                             resolve()) : setTimeout(checkStable, 50);
                         };
                         checkStable();
@@ -492,7 +492,7 @@
                                 });
                                 const checkQuiet = () => {
                                     const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime;
-                                    timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), 
+                                    timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(),
                                     resolve()) : setTimeout(checkQuiet, 50);
                                 };
                                 checkQuiet();
@@ -607,7 +607,7 @@
                 }(el);
                 let safeValue = null, valueRedacted = null;
                 try {
-                    if (void 0 !== el.value || el.getAttribute && null !== el.getAttribute("value")) if (isPasswordInput) safeValue = null, 
+                    if (void 0 !== el.value || el.getAttribute && null !== el.getAttribute("value")) if (isPasswordInput) safeValue = null,
                     valueRedacted = "true"; else {
                         const rawValue = void 0 !== el.value ? String(el.value) : String(el.getAttribute("value"));
                         safeValue = rawValue.length > 200 ? rawValue.substring(0, 200) : rawValue, valueRedacted = "false";
@@ -734,8 +734,8 @@
                             const requestId = `iframe-${idx}-${Date.now()}`, timeout = setTimeout(() => {
                                 resolve(null);
                             }, 5e3), listener = event => {
-                                "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), 
-                                window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, 
+                                "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout),
+                                window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot,
                                 resolve({
                                     iframe: iframe,
                                     data: event.data.snapshot,
@@ -751,7 +751,7 @@
                                         ...options,
                                         collectIframes: !0
                                     }
-                                }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), 
+                                }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener),
                                 resolve(null));
                             } catch (error) {
                                 clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null);
@@ -836,7 +836,7 @@
                         }, 25e3), listener = e => {
                             if ("SENTIENCE_SNAPSHOT_RESULT" === e.data.type && e.data.requestId === requestId) {
                                 if (resolved) return;
-                                resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), 
+                                resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener),
                                 e.data.error ? reject(new Error(e.data.error)) : resolve({
                                     elements: e.data.elements,
                                     raw_elements: e.data.raw_elements,
@@ -853,7 +853,7 @@
                                 options: options
                             }, "*");
                         } catch (error) {
-                            resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), 
+                            resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener),
                             reject(new Error(`Failed to send snapshot request: ${error.message}`)));
                         }
                     });
@@ -874,7 +874,7 @@
             options.screenshot && (screenshot = await function(options) {
                 return new Promise(resolve => {
                     const requestId = Math.random().toString(36).substring(7), listener = e => {
-                        "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), 
+                        "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener),
                         resolve(e.data.screenshot));
                     };
                     window.addEventListener("message", listener), window.postMessage({
@@ -893,7 +893,7 @@
                 const lastMutationTs = window.__sentience_lastMutationTs, now = performance.now(), quietMs = "number" == typeof lastMutationTs && Number.isFinite(lastMutationTs) ? Math.max(0, now - lastMutationTs) : null, nodeCount = document.querySelectorAll("*").length;
                 let requiresVision = !1, requiresVisionReason = null;
                 const canvasCount = document.getElementsByTagName("canvas").length;
-                canvasCount > 0 && (requiresVision = !0, requiresVisionReason = `canvas:${canvasCount}`), 
+                canvasCount > 0 && (requiresVision = !0, requiresVisionReason = `canvas:${canvasCount}`),
                 diagnostics = {
                     metrics: {
                         ready_state: document.readyState || null,
@@ -939,15 +939,15 @@
                 }
                 if (node.nodeType !== Node.ELEMENT_NODE) return;
                 const tag = node.tagName.toLowerCase();
-                if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), 
-                "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), 
-                "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), 
-                "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), 
+                if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "),
+                "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"),
+                "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"),
+                "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk),
                 "a" === tag) {
                     const href = node.getAttribute("href");
                     markdown += href ? `](${href})` : "]", insideLink = !1;
                 }
-                "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), 
+                "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"),
                 insideLink || "h1" !== tag && "h2" !== tag && "h3" !== tag && "p" !== tag && "div" !== tag || (markdown += "\n");
             }(tempDiv), markdown.replace(/\n{3,}/g, "\n\n").trim();
         }(document.body) : function(root) {
@@ -960,7 +960,7 @@
                         const style = window.getComputedStyle(node);
                         if ("none" === style.display || "hidden" === style.visibility) return;
                         const isBlock = "block" === style.display || "flex" === style.display || "P" === node.tagName || "DIV" === node.tagName;
-                        isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), 
+                        isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk),
                         isBlock && (text += "\n");
                     }
                 } else text += node.textContent;
@@ -1059,25 +1059,25 @@
     }
     function startRecording(options = {}) {
         const {highlightColor: highlightColor = "#ff0000", successColor: successColor = "#00ff00", autoDisableTimeout: autoDisableTimeout = 18e5, keyboardShortcut: keyboardShortcut = "Ctrl+Shift+I"} = options;
-        if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), 
+        if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"),
         () => {};
         window.sentience_registry_map = new Map, window.sentience_registry.forEach((el, idx) => {
             el && window.sentience_registry_map.set(el, idx);
         });
         let highlightBox = document.getElementById("sentience-highlight-box");
-        highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", 
-        highlightBox.style.cssText = `\n            position: fixed;\n            pointer-events: none;\n            z-index: 2147483647;\n            border: 2px solid ${highlightColor};\n            background: rgba(255, 0, 0, 0.1);\n            display: none;\n            transition: all 0.1s ease;\n            box-sizing: border-box;\n        `, 
+        highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box",
+        highlightBox.style.cssText = `\n            position: fixed;\n            pointer-events: none;\n            z-index: 2147483647;\n            border: 2px solid ${highlightColor};\n            background: rgba(255, 0, 0, 0.1);\n            display: none;\n            transition: all 0.1s ease;\n            box-sizing: border-box;\n        `,
         document.body.appendChild(highlightBox));
         let recordingIndicator = document.getElementById("sentience-recording-indicator");
-        recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", 
-        recordingIndicator.style.cssText = `\n            position: fixed;\n            top: 0;\n            left: 0;\n            right: 0;\n            height: 3px;\n            background: ${highlightColor};\n            z-index: 2147483646;\n            pointer-events: none;\n        `, 
+        recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator",
+        recordingIndicator.style.cssText = `\n            position: fixed;\n            top: 0;\n            left: 0;\n            right: 0;\n            height: 3px;\n            background: ${highlightColor};\n            z-index: 2147483646;\n            pointer-events: none;\n        `,
         document.body.appendChild(recordingIndicator)), recordingIndicator.style.display = "block";
         const mouseOverHandler = e => {
             const el = e.target;
             if (!el || el === highlightBox || el === recordingIndicator) return;
             const rect = el.getBoundingClientRect();
-            highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", 
-            highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", 
+            highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px",
+            highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px",
             highlightBox.style.height = rect.height + "px";
         }, clickHandler = e => {
             e.preventDefault(), e.stopPropagation();
@@ -1154,7 +1154,7 @@
                 debug_snapshot: rawData
             }, jsonString = JSON.stringify(snippet, null, 2);
             navigator.clipboard.writeText(jsonString).then(() => {
-                highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", 
+                highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)",
                 setTimeout(() => {
                     highlightBox.style.border = `2px solid ${highlightColor}`, highlightBox.style.background = "rgba(255, 0, 0, 0.1)";
                 }, 500);
@@ -1164,15 +1164,15 @@
         };
         let timeoutId = null;
         const stopRecording = () => {
-            document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), 
-            document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), 
-            timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), 
+            document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0),
+            document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId),
+            timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"),
             window.sentience_registry_map && window.sentience_registry_map.clear(), window.sentience_stopRecording === stopRecording && delete window.sentience_stopRecording;
         }, keyboardHandler = e => {
-            (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), 
+            (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(),
             stopRecording());
         };
-        return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), 
+        return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0),
         document.addEventListener("keydown", keyboardHandler, !0), autoDisableTimeout > 0 && (timeoutId = setTimeout(() => {
             stopRecording();
         }, autoDisableTimeout)), window.sentience_stopRecording = stopRecording, stopRecording;
@@ -1241,4 +1241,4 @@
             }
         }), window.sentience_iframe_handler_setup = !0));
     })();
-}();
\ No newline at end of file
+}();
diff --git a/sentience/extension/pkg/sentience_core.js b/sentience/extension/pkg/sentience_core.js
index bb9cae0..c50ad61 100644
--- a/sentience/extension/pkg/sentience_core.js
+++ b/sentience/extension/pkg/sentience_core.js
@@ -25,7 +25,7 @@ function __wbg_get_imports() {
         },
         __wbg___wbindgen_bigint_get_as_i64_8fcf4ce7f1ca72a2: function(arg0, arg1) {
             const v = getObject(arg1), ret = "bigint" == typeof v ? v : void 0;
-            getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), 
+            getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0),
             getDataViewMemory0().setInt32(arg0 + 0, !isLikeNone(ret), !0);
         },
         __wbg___wbindgen_boolean_get_bbbb1c18aa2f5e25: function(arg0) {
@@ -224,7 +224,7 @@ function getArrayU8FromWasm0(ptr, len) {
 let cachedDataViewMemory0 = null;
 
 function getDataViewMemory0() {
-    return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), 
+    return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)),
     cachedDataViewMemory0;
 }
 
@@ -235,7 +235,7 @@ function getStringFromWasm0(ptr, len) {
 let cachedUint8ArrayMemory0 = null;
 
 function getUint8ArrayMemory0() {
-    return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), 
+    return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)),
     cachedUint8ArrayMemory0;
 }
 
@@ -264,7 +264,7 @@ function isLikeNone(x) {
 function passStringToWasm0(arg, malloc, realloc) {
     if (void 0 === realloc) {
         const buf = cachedTextEncoder.encode(arg), ptr = malloc(buf.length, 1) >>> 0;
-        return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, 
+        return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length,
         ptr;
     }
     let len = arg.length, ptr = malloc(len, 1) >>> 0;
@@ -319,7 +319,7 @@ const cachedTextEncoder = new TextEncoder;
 let wasmModule, wasm, WASM_VECTOR_LEN = 0;
 
 function __wbg_finalize_init(instance, module) {
-    return wasm = instance.exports, wasmModule = module, cachedDataViewMemory0 = null, 
+    return wasm = instance.exports, wasmModule = module, cachedDataViewMemory0 = null,
     cachedUint8ArrayMemory0 = null, wasm;
 }
 
@@ -360,7 +360,7 @@ function initSync(module) {
 
 async function __wbg_init(module_or_path) {
     if (void 0 !== wasm) return wasm;
-    void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), 
+    void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path),
     void 0 === module_or_path && (module_or_path = new URL("sentience_core_bg.wasm", import.meta.url));
     const imports = __wbg_get_imports();
     ("string" == typeof module_or_path || "function" == typeof Request && module_or_path instanceof Request || "function" == typeof URL && module_or_path instanceof URL) && (module_or_path = fetch(module_or_path));
@@ -368,4 +368,4 @@ async function __wbg_init(module_or_path) {
     return __wbg_finalize_init(instance, module);
 }
 
-export { initSync, __wbg_init as default };
\ No newline at end of file
+export { initSync, __wbg_init as default };
diff --git a/sentience/llm_provider.py b/sentience/llm_provider.py
index 4874e47..f119c0e 100644
--- a/sentience/llm_provider.py
+++ b/sentience/llm_provider.py
@@ -335,9 +335,7 @@ def __init__(
         model: str = "meta-llama/Meta-Llama-3-8B-Instruct",
         base_url: str = "https://api.deepinfra.com/v1/openai",
     ):
-        api_key = get_api_key_from_env(
-            ["DEEPINFRA_TOKEN", "DEEPINFRA_API_KEY"], api_key
-        )
+        api_key = get_api_key_from_env(["DEEPINFRA_TOKEN", "DEEPINFRA_API_KEY"], api_key)
         super().__init__(api_key=api_key, model=model, base_url=base_url)
 
 
diff --git a/sentience/models.py b/sentience/models.py
index 2daef70..99c7208 100644
--- a/sentience/models.py
+++ b/sentience/models.py
@@ -630,6 +630,83 @@ class ActionResult(BaseModel):
     cursor: dict[str, Any] | None = None
 
 
+class TabInfo(BaseModel):
+    """Metadata about an open browser tab/page."""
+
+    tab_id: str
+    url: str | None = None
+    title: str | None = None
+    is_active: bool = False
+
+
+class TabListResult(BaseModel):
+    """Result of listing tabs."""
+
+    ok: bool
+    tabs: list[TabInfo] = Field(default_factory=list)
+    error: str | None = None
+
+
+class TabOperationResult(BaseModel):
+    """Result of tab operations (open/switch/close)."""
+
+    ok: bool
+    tab: TabInfo | None = None
+    error: str | None = None
+
+
+class StepHookContext(BaseModel):
+    """Context passed to lifecycle hooks."""
+
+    step_id: str
+    step_index: int
+    goal: str
+    attempt: int = 0
+    url: str | None = None
+    success: bool | None = None
+    outcome: str | None = None
+    error: str | None = None
+
+
+class EvaluateJsRequest(BaseModel):
+    """Request for evaluate_js helper."""
+
+    code: str = Field(
+        ...,
+        min_length=1,
+        max_length=8000,
+        description="JavaScript source code to evaluate in the page context.",
+    )
+    max_output_chars: int = Field(
+        4000,
+        ge=1,
+        le=20000,
+        description="Maximum number of characters to return in the text field.",
+    )
+    truncate: bool = Field(
+        True,
+        description="Whether to truncate text output when it exceeds max_output_chars.",
+    )
+
+
+class EvaluateJsResult(BaseModel):
+    """Result of evaluate_js helper."""
+
+    ok: bool = Field(..., description="Whether evaluation succeeded.")
+    value: Any | None = Field(
+        None, description="Raw value returned by the page evaluation."
+    )
+    text: str | None = Field(
+        None, description="Best-effort string representation of the value."
+    )
+    truncated: bool = Field(
+        False, description="True if text output was truncated."
+    )
+    error: str | None = Field(
+        None, description="Error string when ok=False."
+    )
+
+
 class WaitResult(BaseModel):
     """Result of wait_for operation"""
 
diff --git a/sentience/runtime_agent.py b/sentience/runtime_agent.py
index 3c107e2..b6c4193 100644
--- a/sentience/runtime_agent.py
+++ b/sentience/runtime_agent.py
@@ -10,7 +10,9 @@
 from __future__ import annotations
 
 import base64
+import inspect
 import re
+from collections.abc import Callable
 from dataclasses import dataclass, field
 from typing import Any, Literal
 
@@ -18,7 +20,7 @@
 from .backends import actions as backend_actions
 from .llm_interaction_handler import LLMInteractionHandler
 from .llm_provider import LLMProvider
-from .models import BBox, Snapshot
+from .models import BBox, Snapshot, StepHookContext
 from .verification import AssertContext, AssertOutcome, Predicate
 
 
@@ -84,15 +86,29 @@ async def run_step(
         *,
         task_goal: str,
         step: RuntimeStep,
+        on_step_start: Callable[[StepHookContext], Any] | None = None,
+        on_step_end: Callable[[StepHookContext], Any] | None = None,
     ) -> bool:
-        self.runtime.begin_step(step.goal)
+        step_id = self.runtime.begin_step(step.goal)
+        await self._run_hook(
+            on_step_start,
+            StepHookContext(
+                step_id=step_id,
+                step_index=self.runtime.step_index,
+                goal=step.goal,
+                url=getattr(self.runtime.last_snapshot, "url", None),
+            ),
+        )
         emitted = False
         ok = False
+        error_msg: str | None = None
+        outcome: str | None = None
         try:
             snap = await self._snapshot_with_ramp(step=step)
 
             if await self._should_short_circuit_to_vision(step=step, snap=snap):
                 ok = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
+                outcome = "ok" if ok else "verification_failed"
                 return ok
 
             # 1) Structured executor attempt.
@@ -100,15 +116,20 @@ async def run_step(
             await self._execute_action(action=action, snap=snap)
             ok = await self._apply_verifications(step=step)
             if ok:
+                outcome = "ok"
                 return True
 
             # 2) Optional vision executor fallback (bounded).
             if step.vision_executor_enabled and step.max_vision_executor_attempts > 0:
                 ok = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
+                outcome = "ok" if ok else "verification_failed"
                 return ok
 
+            outcome = "verification_failed"
             return False
         except Exception as exc:
+            error_msg = str(exc)
+            outcome = "exception"
             try:
                 await self.runtime.emit_step_end(
                     success=False,
@@ -130,6 +151,29 @@ async def run_step(
                     )
                 except Exception:
                     pass
+            await self._run_hook(
+                on_step_end,
+                StepHookContext(
+                    step_id=step_id,
+                    step_index=self.runtime.step_index,
+                    goal=step.goal,
+                    url=getattr(self.runtime.last_snapshot, "url", None),
+                    success=ok,
+                    outcome=outcome,
+                    error=error_msg,
+                ),
+            )
+
+    async def _run_hook(
+        self,
+        hook: Callable[[StepHookContext], Any] | None,
+        ctx: StepHookContext,
+    ) -> None:
+        if hook is None:
+            return
+        result = hook(ctx)
+        if inspect.isawaitable(result):
+            await result
 
     async def _snapshot_with_ramp(self, *, step: RuntimeStep) -> Snapshot:
         limit = step.snapshot_limit_base
diff --git a/tests/test_agent_runtime.py b/tests/test_agent_runtime.py
index c7f1b06..130ce5c 100644
--- a/tests/test_agent_runtime.py
+++ b/tests/test_agent_runtime.py
@@ -10,7 +10,7 @@
 import pytest
 
 from sentience.agent_runtime import AgentRuntime
-from sentience.models import SnapshotOptions
+from sentience.models import EvaluateJsRequest, SnapshotOptions, TabInfo
 from sentience.verification import AssertContext, AssertOutcome
 
 
@@ -54,6 +54,21 @@ async def type_text(self, text: str) -> None:
     async def wait_ready_state(self, state="interactive", timeout_ms=15000) -> None:
         pass
 
+    async def list_tabs(self):
+        return [
+            TabInfo(tab_id="tab-1", url="https://example.com", is_active=True),
+            TabInfo(tab_id="tab-2", url="https://example.com/2", is_active=False),
+        ]
+
+    async def open_tab(self, url: str):
+        return TabInfo(tab_id="tab-new", url=url, is_active=True)
+
+    async def switch_tab(self, tab_id: str):
+        return TabInfo(tab_id=tab_id, url="https://example.com/2", is_active=True)
+
+    async def close_tab(self, tab_id: str):
+        return TabInfo(tab_id=tab_id, url="https://example.com/2", is_active=False)
+
 
 class MockTracer:
     """Mock Tracer for testing."""
@@ -130,6 +145,52 @@ def test_init_with_api_key_and_options(self) -> None:
         assert runtime._snapshot_options.sentience_api_key == "sk_pro_key"
         assert runtime._snapshot_options.use_api is True
 
+    @pytest.mark.asyncio
+    async def test_evaluate_js_success(self) -> None:
+        backend = MockBackend()
+        tracer = MockTracer()
+        backend.eval_results["1 + 1"] = 2
+        runtime = AgentRuntime(backend=backend, tracer=tracer)
+
+        result = await runtime.evaluate_js(EvaluateJsRequest(code="1 + 1"))
+
+        assert result.ok is True
+        assert result.value == 2
+        assert result.text == "2"
+
+    @pytest.mark.asyncio
+    async def test_evaluate_js_truncate(self) -> None:
+        backend = MockBackend()
+        tracer = MockTracer()
+        backend.eval_results["long"] = "x" * 50
+        runtime = AgentRuntime(backend=backend, tracer=tracer)
+
+        result = await runtime.evaluate_js(EvaluateJsRequest(code="long", max_output_chars=10))
+
+        assert result.ok is True
+        assert result.truncated is True
+        assert result.text == "x" * 10 + "..."
+
+    @pytest.mark.asyncio
+    async def test_tab_operations(self) -> None:
+        backend = MockBackend()
+        tracer = MockTracer()
+        runtime = AgentRuntime(backend=backend, tracer=tracer)
+
+        tabs = await runtime.list_tabs()
+        assert tabs.ok is True
+        assert len(tabs.tabs) == 2
+
+        opened = await runtime.open_tab("https://example.com/new")
+        assert opened.ok is True
+        assert opened.tab is not None
+
+        switched = await runtime.switch_tab("tab-2")
+        assert switched.ok is True
+
+        closed = await runtime.close_tab("tab-2")
+        assert closed.ok is True
+
 
 class TestAgentRuntimeGetUrl:
     """Tests for get_url method."""
diff --git a/tests/unit/test_runtime_agent.py b/tests/unit/test_runtime_agent.py
index f97ab03..2565319 100644
--- a/tests/unit/test_runtime_agent.py
+++ b/tests/unit/test_runtime_agent.py
@@ -6,7 +6,15 @@
 
 from sentience.agent_runtime import AgentRuntime
 from sentience.llm_provider import LLMProvider, LLMResponse
-from sentience.models import BBox, Element, Snapshot, SnapshotDiagnostics, Viewport, VisualCues
+from sentience.models import (
+    BBox,
+    Element,
+    Snapshot,
+    SnapshotDiagnostics,
+    StepHookContext,
+    Viewport,
+    VisualCues,
+)
 from sentience.runtime_agent import RuntimeAgent, RuntimeStep, StepVerification
 from sentience.verification import AssertContext, AssertOutcome
 
@@ -233,6 +241,43 @@ def pred(ctx: AssertContext) -> AssertOutcome:
     assert len(vision.calls) == 1
 
 
+@pytest.mark.asyncio
+async def test_runtime_agent_hooks_called() -> None:
+    backend = MockBackend()
+    tracer = MockTracer()
+    runtime = AgentRuntime(backend=backend, tracer=tracer)
+    executor = ProviderStub(responses=["CLICK(1)"])
+
+    agent = RuntimeAgent(runtime=runtime, executor=executor)
+    step = RuntimeStep(goal="click first", verifications=[], max_snapshot_attempts=1)
+
+    started: list[StepHookContext] = []
+    ended: list[StepHookContext] = []
+
+    async def on_start(ctx: StepHookContext):
+        started.append(ctx)
+
+    async def on_end(ctx: StepHookContext):
+        ended.append(ctx)
+
+    snapshot = make_snapshot(url="https://example.com/start", elements=[make_clickable_element(1)])
+
+    async def fake_snapshot(**_kwargs):
+        runtime.last_snapshot = snapshot
+        return snapshot
+
+    runtime.snapshot = AsyncMock(side_effect=fake_snapshot)  # type: ignore[method-assign]
+
+    await agent.run_step(task_goal="task", step=step, on_step_start=on_start, on_step_end=on_end)
+
+    assert len(started) == 1
+    assert len(ended) == 1
+    assert started[0].goal == "click first"
+    assert ended[0].success is True
+    assert ended[0].outcome == "ok"
+    assert ended[0].error is None
+
+
 @pytest.mark.asyncio
 async def test_snapshot_limit_ramp_increases_limit_on_low_confidence() -> None:
     backend = MockBackend()

From 2191e5a11744a93519ea803b6784e72af09c21a9 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 20:07:39 -0800
Subject: [PATCH 02/12] Phase 2: ToolRegistry + Filesystem + extraction

---
 docs/CODE_REVIEW_PHASE2_BU_PARITY.md | 270 +++++++++++++++++++++++++++
 sentience/__init__.py                |   3 +-
 sentience/agent.py                   |   8 +-
 sentience/agent_runtime.py           |  31 +++
 sentience/llm_provider.py            |   7 +
 sentience/models.py                  |  25 +--
 sentience/read.py                    | 115 ++++++++++--
 sentience/tools/__init__.py          |  18 ++
 sentience/tools/context.py           |  58 ++++++
 sentience/tools/defaults.py          | 230 +++++++++++++++++++++++
 sentience/tools/filesystem.py        | 151 +++++++++++++++
 sentience/tools/registry.py          | 143 ++++++++++++++
 tests/unit/test_extract.py           |  74 ++++++++
 tests/unit/test_filesystem_tools.py  |  58 ++++++
 tests/unit/test_tool_registry.py     | 224 ++++++++++++++++++++++
 15 files changed, 1380 insertions(+), 35 deletions(-)
 create mode 100644 docs/CODE_REVIEW_PHASE2_BU_PARITY.md
 create mode 100644 sentience/tools/__init__.py
 create mode 100644 sentience/tools/context.py
 create mode 100644 sentience/tools/defaults.py
 create mode 100644 sentience/tools/filesystem.py
 create mode 100644 sentience/tools/registry.py
 create mode 100644 tests/unit/test_extract.py
 create mode 100644 tests/unit/test_filesystem_tools.py
 create mode 100644 tests/unit/test_tool_registry.py

diff --git a/docs/CODE_REVIEW_PHASE2_BU_PARITY.md b/docs/CODE_REVIEW_PHASE2_BU_PARITY.md
new file mode 100644
index 0000000..32db024
--- /dev/null
+++ b/docs/CODE_REVIEW_PHASE2_BU_PARITY.md
@@ -0,0 +1,270 @@
+# Code Review: Browser-Use Parity Phase 2 Implementation
+
+**Date:** 2026-01-23
+**Reviewer:** Claude
+**Branch:** parity_win2
+**Files Reviewed:** 4 modified + 7 new files
+**Scope:** Phase 2 deliverables from BU_GAP_DELIVERABLES.md
+
+---
+
+## Summary
+
+This PR implements all Phase 2 deliverables for Browser-Use parity:
+
+| Deliverable | Status | Notes |
+|-------------|--------|-------|
+| ToolRegistry + typed schemas | ✅ Complete | Pydantic-based, LLM-ready specs |
+| ToolContext/capability routing | ✅ Complete | `ctx.require()` pattern |
+| Tool execution tracing | ✅ Complete | `tool_call` events emitted |
+| Default tools registered | ✅ Complete | 7 core tools |
+| Filesystem tools (sandboxed) | ✅ Complete | read/write/append/replace |
+| Structured extraction | ✅ Complete | `extract(query, schema)` |
+
+Overall the implementation is well-designed and aligns with the design docs. A few issues need attention.
+
+---
+
+## Design Doc Alignment
+
+### CAPABILITY_ROUTING_DESIGN.md Compliance
+
+| Requirement | Implementation | Status |
+|-------------|----------------|--------|
+| `BackendCapabilities` model | [context.py:14-19](sentience/tools/context.py#L14-L19) | ✅ |
+| `runtime.capabilities()` | [agent_runtime.py:378-389](sentience/agent_runtime.py#L378-L389) | ✅ |
+| `runtime.can(name)` | [agent_runtime.py:391-393](sentience/agent_runtime.py#L391-L393) | ✅ |
+| Structured "unsupported" errors | [context.py:43-45](sentience/tools/context.py#L43-L45) | ⚠️ Uses ValueError, not JSON |
+
+### JS_EVALUATE_TOOL_DESIGN.md Compliance
+
+The `evaluate_js` capability is flagged in `BackendCapabilities`, and the existing `runtime.evaluate_js()` from Phase 1 satisfies this requirement. The tool registry does **not** currently expose `evaluate_js` as a registered tool, which may be intentional.
+
+---
+
+## Critical Issues
+
+### 1. `BackendCapabilities.downloads` and `filesystem_tools` missing
+
+**File:** [context.py:14-19](sentience/tools/context.py#L14-L19)
+
+The design doc specifies:
+```python
+class BackendCapabilities(BaseModel):
+    tabs: bool = False
+    evaluate_js: bool = True
+    downloads: bool = True
+    filesystem_tools: bool = False
+```
+
+But the implementation only has:
+```python
+class BackendCapabilities(BaseModel):
+    tabs: bool = False
+    evaluate_js: bool = False
+    keyboard: bool = False
+```
+
+**Impact:** No way to check if filesystem tools are available before using them.
+
+**Fix:** Add missing capability flags per design doc.
+
+---
+
+### 2. `ToolContext.require()` raises ValueError, not structured error
+
+**File:** [context.py:43-45](sentience/tools/context.py#L43-L45)
+
+```python
+def require(self, name: str) -> None:
+    if not self.can(name):
+        raise ValueError(f"unsupported_capability: {name}")
+```
+
+The design doc (CAPABILITY_ROUTING_DESIGN.md) specifies structured errors:
+```json
+{ "error": "unsupported_capability", "detail": "tabs not supported by backend" }
+```
+
+**Impact:** LLM tool calls cannot distinguish unsupported capabilities from validation errors.
+
+**Fix:** Create a custom `UnsupportedCapabilityError` exception or return a structured result.
+
+---
+
+## Medium Issues
+
+### 3. Missing `evaluate_js` tool in default registry
+
+**File:** [defaults.py](sentience/tools/defaults.py)
+
+The registry registers 7 tools: `snapshot_state`, `click`, `type`, `scroll`, `scroll_to_element`, `click_rect`, `press`. However, `evaluate_js` is not registered despite being a key Phase 1/2 feature per the gap analysis.
+
+**Suggestion:** Add an `evaluate_js` tool that wraps `runtime.evaluate_js()`.
+
+---
+
+### 4. `extract_async` does not use async LLM call
+
+**File:** [read.py:237-277](sentience/read.py#L237-L277)
+
+```python
+async def extract_async(...) -> ExtractResult:
+    # ...
+    response = llm.generate(system, user)  # Sync call in async function
+```
+
+The `LLMProvider.generate()` is synchronous, so `extract_async` doesn't provide true async benefits.
+
+**Suggestion:** Consider adding `generate_async()` to `LLMProvider` or document this limitation.
+
+---
+
+### 5. `FileSandbox` path validation may have edge cases
+
+**File:** [filesystem.py:18-22](sentience/tools/filesystem.py#L18-L22)
+
+```python
+def _resolve(self, path: str) -> Path:
+    candidate = (self.base_dir / path).resolve()
+    if not str(candidate).startswith(str(self.base_dir)):
+        raise ValueError("path escapes sandbox root")
+    return candidate
+```
+
+The string prefix check could fail on edge cases like:
+- `base_dir = /tmp/sandbox` and path resolves to `/tmp/sandbox2/file` (passes check incorrectly)
+
+**Fix:** Use `candidate.is_relative_to(self.base_dir)` (Python 3.9+) instead of string prefix.
+
+---
+
+### 6. Duplicate capability logic in `AgentRuntime` and `ToolContext`
+
+**Files:** [agent_runtime.py:378-393](sentience/agent_runtime.py#L378-L393), [context.py:37-41](sentience/tools/context.py#L37-L41)
+
+Both classes implement `capabilities()` and `can()`. `ToolContext` delegates to `runtime`, which is correct, but the capability detection logic in `AgentRuntime.capabilities()` is complex:
+
+```python
+has_keyboard = hasattr(backend, "type_text") or bool(
+    getattr(getattr(backend, "_page", None), "keyboard", None)
+)
+```
+
+**Suggestion:** Consider moving capability detection to the backend protocol or making it more explicit.
+
+---
+
+## Minor Issues
+
+### 7. `read.py` inconsistent return types fixed
+
+The diff shows fixing inconsistent `dict` vs `ReadResult` returns:
+```python
+-return {
+-    "status": "success",
+-    ...
+-}
++return ReadResult(
++    status="success",
++    ...
++)
+```
+
+This is a good fix.
+
+---
+
+### 8. Missing docstrings in new modules
+
+**Files:** `context.py`, `defaults.py`, `filesystem.py`
+
+The `ToolContext` and `FileSandbox` classes lack detailed docstrings explaining their purpose and usage patterns.
+
+---
+
+### 9. `defaults.py` has unused `Snapshot` import warning potential
+
+**File:** [defaults.py:7](sentience/tools/defaults.py#L7)
+
+```python
+from ..models import ActionResult, BBox, Snapshot
+```
+
+`Snapshot` is used as the output model for `snapshot_state`, so this is correct. No issue.
+
+---
+
+## Test Coverage
+
+The tests are comprehensive and well-structured:
+
+| Test File | Coverage |
+|-----------|----------|
+| test_tool_registry.py | ✅ Registration, validation, LLM spec, execution, tracing, capability checks |
+| test_filesystem_tools.py | ✅ Sandbox traversal prevention, CRUD operations |
+| test_extract.py | ✅ Schema validation success/failure |
+
+### Missing Test Cases
+
+1. **`extract_async` test** - Only sync `extract` is tested
+2. **Edge case for sandbox path validation** - Test `/tmp/sandbox` vs `/tmp/sandbox2` scenario
+3. **`capabilities()` detection logic** - No test for `AgentRuntime.capabilities()`
+
+---
+
+## Architecture Assessment
+
+The implementation follows a clean layered architecture:
+
+```
+┌─────────────────────────────────────┐
+│           ToolRegistry              │  ← LLM-facing tool specs
+├─────────────────────────────────────┤
+│           ToolContext               │  ← Capability routing
+├─────────────────────────────────────┤
+│          AgentRuntime               │  ← Backend abstraction
+├─────────────────────────────────────┤
+│      PlaywrightBackend / CDP        │  ← Browser execution
+└─────────────────────────────────────┘
+```
+
+This aligns with the design goal: *"unified SDK surface through AgentRuntime regardless of backend"*.
+
+---
+
+## Verdict
+
+**Approve with required changes:**
+
+1. **Must fix:** Add missing capability flags (`downloads`, `filesystem_tools`) per design doc
+2. **Must fix:** Fix `FileSandbox._resolve()` to use `is_relative_to()` instead of string prefix
+3. **Should fix:** Add `evaluate_js` tool to default registry
+
+The Phase 2 deliverables are functionally complete. The tool registry design is solid and the filesystem sandbox provides good security boundaries.
+
+---
+
+## Appendix: Files Changed
+
+### Modified Files
+
+| File | Changes |
+|------|---------|
+| sentience/__init__.py | Export new tools module symbols |
+| sentience/agent_runtime.py | Add `tool_registry`, `capabilities()`, `can()` |
+| sentience/models.py | Add `ExtractResult` model |
+| sentience/read.py | Add `extract()` / `extract_async()`, fix return types |
+
+### New Files
+
+| File | Purpose |
+|------|---------|
+| sentience/tools/__init__.py | Package exports |
+| sentience/tools/registry.py | `ToolRegistry` and `ToolSpec` classes |
+| sentience/tools/context.py | `ToolContext` and `BackendCapabilities` |
+| sentience/tools/defaults.py | Default browser tool registrations |
+| sentience/tools/filesystem.py | Sandboxed filesystem tools |
+| tests/unit/test_tool_registry.py | Registry + execution tests |
+| tests/unit/test_filesystem_tools.py | Sandbox + CRUD tests |
+| tests/unit/test_extract.py | Extraction tests |
diff --git a/sentience/__init__.py b/sentience/__init__.py
index 5e4ecc0..a8932a1 100644
--- a/sentience/__init__.py
+++ b/sentience/__init__.py
@@ -102,13 +102,14 @@
 from .ordinal import OrdinalIntent, boost_ordinal_elements, detect_ordinal_intent, select_by_ordinal
 from .overlay import clear_overlay, show_overlay
 from .query import find, query
-from .read import read
+from .read import extract, extract_async, read
 from .recorder import Recorder, Trace, TraceStep, record
 from .runtime_agent import RuntimeAgent, RuntimeStep, StepVerification
 from .screenshot import screenshot
 from .sentience_methods import AgentAction, SentienceMethod
 from .snapshot import snapshot
 from .text_search import find_text_rect
+from .tools import BackendCapabilities, ToolContext, ToolRegistry, ToolSpec, register_default_tools
 from .tracer_factory import SENTIENCE_API_URL, create_tracer
 from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
 
diff --git a/sentience/agent.py b/sentience/agent.py
index fce9c14..7d42348 100644
--- a/sentience/agent.py
+++ b/sentience/agent.py
@@ -89,9 +89,7 @@ def _safe_hook_call_sync(
         if verbose:
             print(f"⚠️  Hook error (non-fatal): {hook_error}")
         else:
-            logging.getLogger(__name__).warning(
-                "Hook error (non-fatal): %s", hook_error
-            )
+            logging.getLogger(__name__).warning("Hook error (non-fatal): %s", hook_error)
 
 
 async def _safe_hook_call_async(
@@ -109,9 +107,7 @@ async def _safe_hook_call_async(
         if verbose:
             print(f"⚠️  Hook error (non-fatal): {hook_error}")
         else:
-            logging.getLogger(__name__).warning(
-                "Hook error (non-fatal): %s", hook_error
-            )
+            logging.getLogger(__name__).warning("Hook error (non-fatal): %s", hook_error)
 
 
 class SentienceAgent(BaseAgent):
diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
index e7a1ad4..bebe547 100644
--- a/sentience/agent_runtime.py
+++ b/sentience/agent_runtime.py
@@ -81,6 +81,7 @@
     TabListResult,
     TabOperationResult,
 )
+from .tools import BackendCapabilities, ToolRegistry
 from .trace_event_builder import TraceEventBuilder
 from .verification import AssertContext, AssertOutcome, Predicate
 
@@ -118,6 +119,7 @@ def __init__(
         tracer: Tracer,
         snapshot_options: SnapshotOptions | None = None,
         sentience_api_key: str | None = None,
+        tool_registry: ToolRegistry | None = None,
     ):
         """
         Initialize agent runtime with any BrowserBackend-compatible browser.
@@ -130,9 +132,11 @@ def __init__(
             tracer: Tracer for emitting verification events
             snapshot_options: Default options for snapshots
             sentience_api_key: API key for Pro/Enterprise tier (enables Gateway refinement)
+            tool_registry: Optional ToolRegistry for LLM-callable tools
         """
         self.backend = backend
         self.tracer = tracer
+        self.tool_registry = tool_registry
 
         # Build default snapshot options with API key if provided
         default_opts = snapshot_options or SnapshotOptions()
@@ -372,6 +376,33 @@ def _get_tab_backend(self):
             return None
         return backend
 
+    def capabilities(self) -> BackendCapabilities:
+        backend = getattr(self, "backend", None)
+        if backend is None:
+            return BackendCapabilities()
+        has_eval = hasattr(backend, "eval")
+        has_keyboard = hasattr(backend, "type_text") or bool(
+            getattr(getattr(backend, "_page", None), "keyboard", None)
+        )
+        has_downloads = bool(getattr(backend, "downloads", None))
+        has_files = False
+        if self.tool_registry is not None:
+            try:
+                has_files = self.tool_registry.get("read_file") is not None
+            except Exception:
+                has_files = False
+        return BackendCapabilities(
+            tabs=self._get_tab_backend() is not None,
+            evaluate_js=bool(has_eval),
+            downloads=has_downloads,
+            filesystem_tools=has_files,
+            keyboard=bool(has_keyboard or has_eval),
+        )
+
+    def can(self, capability: str) -> bool:
+        caps = self.capabilities()
+        return bool(getattr(caps, capability, False))
+
     @staticmethod
     def _stringify_eval_value(value: Any) -> str:
         if value is None:
diff --git a/sentience/llm_provider.py b/sentience/llm_provider.py
index f119c0e..e220e75 100644
--- a/sentience/llm_provider.py
+++ b/sentience/llm_provider.py
@@ -3,6 +3,7 @@
 Enables "Bring Your Own Brain" (BYOB) pattern - plug in any LLM provider
 """
 
+import asyncio
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Any
@@ -59,6 +60,12 @@ def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMRespons
         """
         pass
 
+    async def generate_async(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
+        """
+        Async wrapper around generate() for providers without native async support.
+        """
+        return await asyncio.to_thread(self.generate, system_prompt, user_prompt, **kwargs)
+
     @abstractmethod
     def supports_json_mode(self) -> bool:
         """
diff --git a/sentience/models.py b/sentience/models.py
index 99c7208..ebc18f8 100644
--- a/sentience/models.py
+++ b/sentience/models.py
@@ -693,18 +693,10 @@ class EvaluateJsResult(BaseModel):
     """Result of evaluate_js helper."""
 
     ok: bool = Field(..., description="Whether evaluation succeeded.")
-    value: Any | None = Field(
-        None, description="Raw value returned by the page evaluation."
-    )
-    text: str | None = Field(
-        None, description="Best-effort string representation of the value."
-    )
-    truncated: bool = Field(
-        False, description="True if text output was truncated."
-    )
-    error: str | None = Field(
-        None, description="Error string when ok=False."
-    )
+    value: Any | None = Field(None, description="Raw value returned by the page evaluation.")
+    text: str | None = Field(None, description="Best-effort string representation of the value.")
+    truncated: bool = Field(False, description="True if text output was truncated.")
+    error: str | None = Field(None, description="Error string when ok=False.")
 
 
 class WaitResult(BaseModel):
@@ -1065,6 +1057,15 @@ class ReadResult(BaseModel):
     error: str | None = None
 
 
+class ExtractResult(BaseModel):
+    """Result of extract() or extract_async() operation"""
+
+    ok: bool
+    data: Any | None = None
+    raw: str | None = None
+    error: str | None = None
+
+
 class TraceStats(BaseModel):
     """Execution statistics for trace completion"""
 
diff --git a/sentience/read.py b/sentience/read.py
index 6d95534..29d5eaa 100644
--- a/sentience/read.py
+++ b/sentience/read.py
@@ -2,10 +2,15 @@
 Read page content - supports raw HTML, text, and markdown formats
 """
 
-from typing import Literal
+import json
+import re
+from typing import Any, Literal
+
+from pydantic import BaseModel, ValidationError
 
 from .browser import AsyncSentienceBrowser, SentienceBrowser
-from .models import ReadResult
+from .llm_provider import LLMProvider
+from .models import ExtractResult, ReadResult
 
 
 def read(
@@ -66,13 +71,13 @@ def read(
                 from markdownify import MarkdownifyError, markdownify
 
                 markdown_content = markdownify(html_content, heading_style="ATX", wrap=True)
-                return {
-                    "status": "success",
-                    "url": raw_html_result["url"],
-                    "format": "markdown",
-                    "content": markdown_content,
-                    "length": len(markdown_content),
-                }
+                return ReadResult(
+                    status="success",
+                    url=raw_html_result["url"],
+                    format="markdown",
+                    content=markdown_content,
+                    length=len(markdown_content),
+                )
             except ImportError:
                 print(
                     "Warning: 'markdownify' not installed. Install with 'pip install markdownify' for enhanced markdown. Falling back to extension's markdown."
@@ -156,13 +161,13 @@ async def read_async(
                 from markdownify import MarkdownifyError, markdownify
 
                 markdown_content = markdownify(html_content, heading_style="ATX", wrap=True)
-                return {
-                    "status": "success",
-                    "url": raw_html_result["url"],
-                    "format": "markdown",
-                    "content": markdown_content,
-                    "length": len(markdown_content),
-                }
+                return ReadResult(
+                    status="success",
+                    url=raw_html_result["url"],
+                    format="markdown",
+                    content=markdown_content,
+                    length=len(markdown_content),
+                )
             except ImportError:
                 print(
                     "Warning: 'markdownify' not installed. Install with 'pip install markdownify' for enhanced markdown. Falling back to extension's markdown."
@@ -186,3 +191,81 @@ async def read_async(
 
     # Convert dict result to ReadResult model
     return ReadResult(**result)
+
+
+def _extract_json_payload(text: str) -> dict[str, Any]:
+    fenced = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL | re.IGNORECASE)
+    if fenced:
+        return json.loads(fenced.group(1))
+    inline = re.search(r"(\{.*\})", text, re.DOTALL)
+    if inline:
+        return json.loads(inline.group(1))
+    return json.loads(text)
+
+
+def extract(
+    browser: SentienceBrowser,
+    llm: LLMProvider,
+    query: str,
+    schema: type[BaseModel] | None = None,
+    max_chars: int = 12000,
+) -> ExtractResult:
+    """
+    Extract structured data from the current page using read() markdown + LLM.
+    """
+    result = read(browser, output_format="markdown", enhance_markdown=True)
+    if result.status != "success":
+        return ExtractResult(ok=False, error=result.error)
+
+    content = result.content[:max_chars]
+    schema_desc = ""
+    if schema is not None:
+        schema_desc = json.dumps(schema.model_json_schema(), ensure_ascii=False)
+    system = "You extract structured data from markdown content. " "Return only JSON. No prose."
+    user = f"QUERY:\n{query}\n\nSCHEMA:\n{schema_desc}\n\nCONTENT:\n{content}"
+    response = await llm.generate_async(system, user)
+    raw = response.content.strip()
+
+    if schema is None:
+        return ExtractResult(ok=True, data={"text": raw}, raw=raw)
+
+    try:
+        payload = _extract_json_payload(raw)
+        validated = schema.model_validate(payload)
+        return ExtractResult(ok=True, data=validated, raw=raw)
+    except (json.JSONDecodeError, ValidationError) as exc:
+        return ExtractResult(ok=False, error=str(exc), raw=raw)
+
+
+async def extract_async(
+    browser: AsyncSentienceBrowser,
+    llm: LLMProvider,
+    query: str,
+    schema: type[BaseModel] | None = None,
+    max_chars: int = 12000,
+) -> ExtractResult:
+    """
+    Async version of extract().
+    """
+    result = await read_async(browser, output_format="markdown", enhance_markdown=True)
+    if result.status != "success":
+        return ExtractResult(ok=False, error=result.error)
+
+    content = result.content[:max_chars]
+    schema_desc = ""
+    if schema is not None:
+        schema_desc = json.dumps(schema.model_json_schema(), ensure_ascii=False)
+    system = "You extract structured data from markdown content. " "Return only JSON. No prose."
+    user = f"QUERY:\n{query}\n\nSCHEMA:\n{schema_desc}\n\nCONTENT:\n{content}"
+    response = llm.generate(system, user)
+    raw = response.content.strip()
+
+    if schema is None:
+        return ExtractResult(ok=True, data={"text": raw}, raw=raw)
+
+    try:
+        payload = _extract_json_payload(raw)
+        validated = schema.model_validate(payload)
+        return ExtractResult(ok=True, data=validated, raw=raw)
+    except (json.JSONDecodeError, ValidationError) as exc:
+        return ExtractResult(ok=False, error=str(exc), raw=raw)
diff --git a/sentience/tools/__init__.py b/sentience/tools/__init__.py
new file mode 100644
index 0000000..4b9ec1f
--- /dev/null
+++ b/sentience/tools/__init__.py
@@ -0,0 +1,18 @@
+"""
+Tool registry for LLM-callable tools.
+"""
+
+from .context import BackendCapabilities, ToolContext
+from .defaults import register_default_tools
+from .filesystem import FileSandbox, register_filesystem_tools
+from .registry import ToolRegistry, ToolSpec
+
+__all__ = [
+    "BackendCapabilities",
+    "FileSandbox",
+    "ToolContext",
+    "ToolRegistry",
+    "ToolSpec",
+    "register_default_tools",
+    "register_filesystem_tools",
+]
diff --git a/sentience/tools/context.py b/sentience/tools/context.py
new file mode 100644
index 0000000..d4eff5c
--- /dev/null
+++ b/sentience/tools/context.py
@@ -0,0 +1,58 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from pydantic import BaseModel
+
+if TYPE_CHECKING:
+    from ..agent_runtime import AgentRuntime
+
+from .filesystem import FileSandbox
+
+
+class BackendCapabilities(BaseModel):
+    """Best-effort backend capability flags."""
+
+    tabs: bool = False
+    evaluate_js: bool = False
+    downloads: bool = False
+    filesystem_tools: bool = False
+    keyboard: bool = False
+
+
+class UnsupportedCapabilityError(RuntimeError):
+    """Structured error for unsupported capabilities."""
+
+    def __init__(self, capability: str, detail: str | None = None) -> None:
+        msg = detail or f"{capability} not supported by backend"
+        super().__init__(msg)
+        self.error = "unsupported_capability"
+        self.detail = msg
+        self.capability = capability
+
+
+class ToolContext:
+    """Context passed to tool handlers."""
+
+    def __init__(
+        self,
+        runtime: "AgentRuntime",
+        files: FileSandbox | None = None,
+        base_dir: Path | None = None,
+    ) -> None:
+        self.runtime = runtime
+        if files is None:
+            root = base_dir or (Path.cwd() / ".sentience" / "files")
+            files = FileSandbox(root)
+        self.files = files
+
+    def capabilities(self) -> BackendCapabilities:
+        return self.runtime.capabilities()
+
+    def can(self, name: str) -> bool:
+        return self.runtime.can(name)
+
+    def require(self, name: str) -> None:
+        if not self.can(name):
+            raise UnsupportedCapabilityError(name)
diff --git a/sentience/tools/defaults.py b/sentience/tools/defaults.py
new file mode 100644
index 0000000..e1253a2
--- /dev/null
+++ b/sentience/tools/defaults.py
@@ -0,0 +1,230 @@
+from __future__ import annotations
+
+from pydantic import BaseModel, Field
+
+from ..agent_runtime import AgentRuntime
+from ..backends import actions as backend_actions
+from ..models import ActionResult, BBox, EvaluateJsRequest, Snapshot
+from .context import ToolContext
+from .registry import ToolRegistry
+
+
+class SnapshotToolInput(BaseModel):
+    limit: int = Field(50, ge=1, le=500, description="Max elements to return.")
+
+
+class ClickToolInput(BaseModel):
+    element_id: int = Field(..., ge=1, description="Sentience element id from snapshot.")
+
+
+class TypeToolInput(BaseModel):
+    element_id: int = Field(..., ge=1, description="Sentience element id from snapshot.")
+    text: str = Field(..., min_length=1, description="Text to type into the element.")
+    clear_first: bool = Field(False, description="Clear existing content before typing.")
+
+
+class ScrollToolInput(BaseModel):
+    delta_y: float = Field(..., description="Scroll amount (positive = down, negative = up).")
+    x: float | None = Field(None, description="Optional scroll x coordinate.")
+    y: float | None = Field(None, description="Optional scroll y coordinate.")
+
+
+class ScrollToElementToolInput(BaseModel):
+    element_id: int = Field(..., ge=1, description="Sentience element id from snapshot.")
+    behavior: str = Field("instant", description="Scroll behavior.")
+    block: str = Field("center", description="Vertical alignment.")
+
+
+class ClickRectToolInput(BaseModel):
+    x: float = Field(..., description="Rect x coordinate.")
+    y: float = Field(..., description="Rect y coordinate.")
+    width: float = Field(..., ge=0, description="Rect width.")
+    height: float = Field(..., ge=0, description="Rect height.")
+
+
+class PressToolInput(BaseModel):
+    key: str = Field(..., min_length=1, description="Key to press (e.g., Enter).")
+
+
+class EvaluateJsToolInput(BaseModel):
+    code: str = Field(..., min_length=1, max_length=8000, description="JavaScript to execute.")
+    max_output_chars: int = Field(4000, ge=1, le=20000, description="Output cap.")
+    truncate: bool = Field(True, description="Truncate output when too long.")
+
+
+def register_default_tools(
+    registry: ToolRegistry, runtime: ToolContext | AgentRuntime | None = None
+) -> ToolRegistry:
+    """Register default browser tools on a registry."""
+
+    def _get_runtime(ctx: ToolContext | None):
+        if ctx is not None:
+            return ctx.runtime
+        if runtime is not None:
+            if isinstance(runtime, ToolContext):
+                return runtime.runtime
+            return runtime
+        raise RuntimeError("ToolContext with runtime is required")
+
+    @registry.tool(
+        name="snapshot_state",
+        input_model=SnapshotToolInput,
+        output_model=Snapshot,
+        description="Capture a snapshot of the current page state.",
+    )
+    async def snapshot_state(ctx, params: SnapshotToolInput) -> Snapshot:
+        runtime_ref = _get_runtime(ctx)
+        snap = await runtime_ref.snapshot(limit=params.limit, goal="tool_snapshot_state")
+        if snap is None:
+            raise RuntimeError("snapshot() returned None")
+        return snap
+
+    @registry.tool(
+        name="click",
+        input_model=ClickToolInput,
+        output_model=ActionResult,
+        description="Click an element by id from the latest snapshot.",
+    )
+    async def click_tool(ctx, params: ClickToolInput) -> ActionResult:
+        runtime_ref = _get_runtime(ctx)
+        snap = runtime_ref.last_snapshot or await runtime_ref.snapshot(goal="tool_click")
+        if snap is None:
+            raise RuntimeError("snapshot() returned None")
+        el = next((e for e in snap.elements if e.id == params.element_id), None)
+        if el is None:
+            raise ValueError(f"element_id not found: {params.element_id}")
+        return await backend_actions.click(runtime_ref.backend, el.bbox)
+
+    @registry.tool(
+        name="type",
+        input_model=TypeToolInput,
+        output_model=ActionResult,
+        description="Type text into an element by id from the latest snapshot.",
+    )
+    async def type_tool(ctx, params: TypeToolInput) -> ActionResult:
+        runtime_ref = _get_runtime(ctx)
+        snap = runtime_ref.last_snapshot or await runtime_ref.snapshot(goal="tool_type")
+        if snap is None:
+            raise RuntimeError("snapshot() returned None")
+        el = next((e for e in snap.elements if e.id == params.element_id), None)
+        if el is None:
+            raise ValueError(f"element_id not found: {params.element_id}")
+        return await backend_actions.type_text(
+            runtime_ref.backend, params.text, target=el.bbox, clear_first=params.clear_first
+        )
+
+    @registry.tool(
+        name="scroll",
+        input_model=ScrollToolInput,
+        output_model=ActionResult,
+        description="Scroll the page by a delta amount.",
+    )
+    async def scroll_tool(ctx, params: ScrollToolInput) -> ActionResult:
+        runtime_ref = _get_runtime(ctx)
+        target = None
+        if params.x is not None and params.y is not None:
+            target = (params.x, params.y)
+        return await backend_actions.scroll(runtime_ref.backend, params.delta_y, target=target)
+
+    @registry.tool(
+        name="scroll_to_element",
+        input_model=ScrollToElementToolInput,
+        output_model=ActionResult,
+        description="Scroll a specific element into view by element id.",
+    )
+    async def scroll_to_element_tool(ctx, params: ScrollToElementToolInput) -> ActionResult:
+        if ctx is not None:
+            ctx.require("evaluate_js")
+        runtime_ref = _get_runtime(ctx)
+        return await backend_actions.scroll_to_element(
+            runtime_ref.backend,
+            params.element_id,
+            behavior=params.behavior,
+            block=params.block,
+        )
+
+    @registry.tool(
+        name="click_rect",
+        input_model=ClickRectToolInput,
+        output_model=ActionResult,
+        description="Click the center of a rectangle.",
+    )
+    async def click_rect_tool(ctx, params: ClickRectToolInput) -> ActionResult:
+        runtime_ref = _get_runtime(ctx)
+        bbox = BBox(
+            x=params.x,
+            y=params.y,
+            width=params.width,
+            height=params.height,
+        )
+        return await backend_actions.click(runtime_ref.backend, bbox)
+
+    @registry.tool(
+        name="press",
+        input_model=PressToolInput,
+        output_model=ActionResult,
+        description="Press a keyboard key on the active element.",
+    )
+    async def press_tool(ctx, params: PressToolInput) -> ActionResult:
+        if ctx is not None:
+            ctx.require("keyboard")
+        runtime_ref = _get_runtime(ctx)
+        page = getattr(runtime_ref.backend, "_page", None) or getattr(
+            runtime_ref.backend, "page", None
+        )
+        if page is not None and getattr(page, "keyboard", None) is not None:
+            await page.keyboard.press(params.key)
+            return ActionResult(success=True, duration_ms=0, outcome="dom_updated")
+        try:
+            await runtime_ref.backend.eval(
+                f"""
+                (() => {{
+                    const el = document.activeElement;
+                    if (!el) return false;
+                    const key = {params.key!r};
+                    el.dispatchEvent(new KeyboardEvent('keydown', {{ key }}));
+                    el.dispatchEvent(new KeyboardEvent('keyup', {{ key }}));
+                    return true;
+                }})()
+                """
+            )
+            return ActionResult(success=True, duration_ms=0, outcome="dom_updated")
+        except Exception as exc:
+            return ActionResult(
+                success=False,
+                duration_ms=0,
+                outcome="error",
+                error={"code": "press_failed", "reason": str(exc)},
+            )
+
+    @registry.tool(
+        name="evaluate_js",
+        input_model=EvaluateJsToolInput,
+        output_model=ActionResult,
+        description="Evaluate JavaScript in the page context (returns text output).",
+    )
+    async def evaluate_js_tool(ctx, params: EvaluateJsToolInput) -> ActionResult:
+        if ctx is not None:
+            ctx.require("evaluate_js")
+        runtime_ref = _get_runtime(ctx)
+        result = await runtime_ref.evaluate_js(
+            EvaluateJsRequest(
+                code=params.code,
+                max_output_chars=params.max_output_chars,
+                truncate=params.truncate,
+            )
+        )
+        if not result.ok:
+            return ActionResult(
+                success=False,
+                duration_ms=0,
+                outcome="error",
+                error={"code": "evaluate_js_failed", "reason": result.error or "error"},
+            )
+        return ActionResult(
+            success=True,
+            duration_ms=0,
+            outcome="dom_updated",
+        )
+
+    return registry
diff --git a/sentience/tools/filesystem.py b/sentience/tools/filesystem.py
new file mode 100644
index 0000000..5595499
--- /dev/null
+++ b/sentience/tools/filesystem.py
@@ -0,0 +1,151 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from pydantic import BaseModel, Field
+
+from .context import ToolContext
+from .registry import ToolRegistry
+
+
+class FileSandbox:
+    """Sandboxed file access rooted at a base directory."""
+
+    def __init__(self, base_dir: Path) -> None:
+        self.base_dir = base_dir.resolve()
+        self.base_dir.mkdir(parents=True, exist_ok=True)
+
+    def _resolve(self, path: str) -> Path:
+        candidate = (self.base_dir / path).resolve()
+        if not candidate.is_relative_to(self.base_dir):
+            raise ValueError("path escapes sandbox root")
+        return candidate
+
+    def read_text(self, path: str) -> str:
+        return self._resolve(path).read_text(encoding="utf-8")
+
+    def write_text(self, path: str, content: str, *, overwrite: bool = True) -> int:
+        target = self._resolve(path)
+        if target.exists() and not overwrite:
+            raise ValueError("file exists and overwrite is False")
+        target.parent.mkdir(parents=True, exist_ok=True)
+        target.write_text(content, encoding="utf-8")
+        return len(content.encode("utf-8"))
+
+    def append_text(self, path: str, content: str) -> int:
+        target = self._resolve(path)
+        target.parent.mkdir(parents=True, exist_ok=True)
+        with target.open("a", encoding="utf-8") as handle:
+            handle.write(content)
+        return len(content.encode("utf-8"))
+
+    def replace_text(self, path: str, old: str, new: str) -> int:
+        target = self._resolve(path)
+        data = target.read_text(encoding="utf-8")
+        replaced = data.count(old)
+        target.write_text(data.replace(old, new), encoding="utf-8")
+        return replaced
+
+
+class ReadFileInput(BaseModel):
+    path: str = Field(..., min_length=1, description="Path relative to sandbox root.")
+
+
+class ReadFileOutput(BaseModel):
+    content: str = Field(..., description="File contents.")
+
+
+class WriteFileInput(BaseModel):
+    path: str = Field(..., min_length=1, description="Path relative to sandbox root.")
+    content: str = Field(..., description="Content to write.")
+    overwrite: bool = Field(True, description="Whether to overwrite if file exists.")
+
+
+class WriteFileOutput(BaseModel):
+    path: str
+    bytes_written: int
+
+
+class AppendFileInput(BaseModel):
+    path: str = Field(..., min_length=1, description="Path relative to sandbox root.")
+    content: str = Field(..., description="Content to append.")
+
+
+class AppendFileOutput(BaseModel):
+    path: str
+    bytes_written: int
+
+
+class ReplaceFileInput(BaseModel):
+    path: str = Field(..., min_length=1, description="Path relative to sandbox root.")
+    old: str = Field(..., description="Text to replace.")
+    new: str = Field(..., description="Replacement text.")
+
+
+class ReplaceFileOutput(BaseModel):
+    path: str
+    replaced: int
+
+
+def register_filesystem_tools(
+    registry: ToolRegistry, sandbox: FileSandbox | None = None
+) -> ToolRegistry:
+    """Register sandboxed filesystem tools."""
+
+    def _get_files(ctx: ToolContext | None) -> FileSandbox:
+        if ctx is not None:
+            return ctx.files
+        if sandbox is not None:
+            return sandbox
+        raise RuntimeError("FileSandbox is required for filesystem tools")
+
+    @registry.tool(
+        name="read_file",
+        input_model=ReadFileInput,
+        output_model=ReadFileOutput,
+        description="Read a file from the sandbox.",
+    )
+    async def read_file(ctx: ToolContext | None, params: ReadFileInput) -> ReadFileOutput:
+        files = _get_files(ctx)
+        return ReadFileOutput(content=files.read_text(params.path))
+
+    @registry.tool(
+        name="write_file",
+        input_model=WriteFileInput,
+        output_model=WriteFileOutput,
+        description="Write a file to the sandbox.",
+    )
+    async def write_file(
+        ctx: ToolContext | None, params: WriteFileInput
+    ) -> WriteFileOutput:
+        files = _get_files(ctx)
+        written = files.write_text(params.path, params.content, overwrite=params.overwrite)
+        return WriteFileOutput(path=params.path, bytes_written=written)
+
+    @registry.tool(
+        name="append_file",
+        input_model=AppendFileInput,
+        output_model=AppendFileOutput,
+        description="Append text to a file in the sandbox.",
+    )
+    async def append_file(
+        ctx: ToolContext | None, params: AppendFileInput
+    ) -> AppendFileOutput:
+        files = _get_files(ctx)
+        written = files.append_text(params.path, params.content)
+        return AppendFileOutput(path=params.path, bytes_written=written)
+
+    @registry.tool(
+        name="replace_file",
+        input_model=ReplaceFileInput,
+        output_model=ReplaceFileOutput,
+        description="Replace text in a file in the sandbox.",
+    )
+    async def replace_file(
+        ctx: ToolContext | None, params: ReplaceFileInput
+    ) -> ReplaceFileOutput:
+        files = _get_files(ctx)
+        replaced = files.replace_text(params.path, params.old, params.new)
+        return ReplaceFileOutput(path=params.path, replaced=replaced)
+
+    return registry
diff --git a/sentience/tools/registry.py b/sentience/tools/registry.py
new file mode 100644
index 0000000..40977a2
--- /dev/null
+++ b/sentience/tools/registry.py
@@ -0,0 +1,143 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+import inspect
+import time
+from typing import Any
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class ToolSpec(BaseModel):
+    """Definition of a tool with typed input/output schemas."""
+
+    name: str = Field(..., min_length=1, description="Unique tool name.")
+    description: str | None = Field(None, description="Human-readable tool description.")
+    input_model: type[BaseModel]
+    output_model: type[BaseModel]
+    handler: Callable[..., Any] | None = None
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    def llm_spec(self) -> dict[str, Any]:
+        """Return a normalized tool spec for LLM prompts."""
+        return {
+            "name": self.name,
+            "description": self.description or "",
+            "parameters": self.input_model.model_json_schema(),
+        }
+
+    def validate_input(self, payload: Any) -> BaseModel:
+        """Validate tool input payload."""
+        return self.input_model.model_validate(payload)
+
+    def validate_output(self, payload: Any) -> BaseModel:
+        """Validate tool output payload."""
+        return self.output_model.model_validate(payload)
+
+
+class ToolRegistry:
+    """Registry for tool specs and validation."""
+
+    def __init__(self) -> None:
+        self._tools: dict[str, ToolSpec] = {}
+
+    def register(self, spec: ToolSpec) -> "ToolRegistry":
+        if spec.name in self._tools:
+            raise ValueError(f"tool already registered: {spec.name}")
+        self._tools[spec.name] = spec
+        return self
+
+    def tool(
+        self,
+        *,
+        name: str,
+        input_model: type[BaseModel],
+        output_model: type[BaseModel],
+        description: str | None = None,
+    ):
+        """Decorator to register a tool handler."""
+
+        def decorator(func: Callable[..., Any]):
+            spec = ToolSpec(
+                name=name,
+                description=description,
+                input_model=input_model,
+                output_model=output_model,
+                handler=func,
+            )
+            self.register(spec)
+            return func
+
+        return decorator
+
+    def get(self, name: str) -> ToolSpec | None:
+        return self._tools.get(name)
+
+    def list(self) -> list[ToolSpec]:
+        return list(self._tools.values())
+
+    def llm_tools(self) -> list[dict[str, Any]]:
+        return [spec.llm_spec() for spec in self.list()]
+
+    def validate_input(self, name: str, payload: Any) -> BaseModel:
+        spec = self._tools.get(name)
+        if spec is None:
+            raise KeyError(f"tool not found: {name}")
+        return spec.validate_input(payload)
+
+    def validate_output(self, name: str, payload: Any) -> BaseModel:
+        spec = self._tools.get(name)
+        if spec is None:
+            raise KeyError(f"tool not found: {name}")
+        return spec.validate_output(payload)
+
+    def validate_call(self, name: str, payload: Any) -> tuple[BaseModel, ToolSpec]:
+        """Validate input and return (validated, spec)."""
+        validated = self.validate_input(name, payload)
+        return validated, self._tools[name]
+
+    async def execute(self, name: str, payload: Any, ctx: Any | None = None) -> BaseModel:
+        """Validate inputs, execute handler, validate output."""
+        start = time.time()
+        validated, spec = self.validate_call(name, payload)
+        if spec.handler is None:
+            raise ValueError(f"tool has no handler: {name}")
+        tracer = None
+        step_id = None
+        if ctx is not None:
+            runtime = getattr(ctx, "runtime", None)
+            tracer = getattr(runtime, "tracer", None)
+            step_id = getattr(runtime, "step_id", None)
+        try:
+            result = spec.handler(ctx, validated)
+            if inspect.isawaitable(result):
+                result = await result
+            validated_output = spec.validate_output(result)
+            if tracer:
+                tracer.emit(
+                    "tool_call",
+                    data={
+                        "tool_name": name,
+                        "inputs": validated.model_dump(),
+                        "outputs": validated_output.model_dump(),
+                        "success": True,
+                        "duration_ms": int((time.time() - start) * 1000),
+                    },
+                    step_id=step_id,
+                )
+            return validated_output
+        except Exception as exc:
+            if tracer:
+                tracer.emit(
+                    "tool_call",
+                    data={
+                        "tool_name": name,
+                        "inputs": validated.model_dump(),
+                        "success": False,
+                        "error": str(exc),
+                        "duration_ms": int((time.time() - start) * 1000),
+                    },
+                    step_id=step_id,
+                )
+            raise
diff --git a/tests/unit/test_extract.py b/tests/unit/test_extract.py
new file mode 100644
index 0000000..e46ab55
--- /dev/null
+++ b/tests/unit/test_extract.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+import pytest
+from pydantic import BaseModel
+
+from sentience.llm_provider import LLMProvider, LLMResponse
+from sentience.read import extract
+
+
+class LLMStub(LLMProvider):
+    def __init__(self, response: str):
+        super().__init__("stub")
+        self._response = response
+
+    def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
+        _ = system_prompt, user_prompt, kwargs
+        return LLMResponse(content=self._response, model_name="stub")
+
+    def supports_json_mode(self) -> bool:
+        return True
+
+    @property
+    def model_name(self) -> str:
+        return "stub"
+
+
+class BrowserStub:
+    page = True
+
+    def __init__(self, content: str):
+        self._content = content
+
+    def evaluate(self, _script: str, _opts: dict):
+        return {
+            "status": "success",
+            "url": "https://example.com",
+            "format": "markdown",
+            "content": self._content,
+            "length": len(self._content),
+        }
+
+
+class ItemSchema(BaseModel):
+    name: str
+    price: str
+
+
+def test_extract_schema_success() -> None:
+    browser = BrowserStub("Product: Widget")
+    llm = LLMStub('{"name":"Widget","price":"$10"}')
+    result = extract(browser, llm, query="Extract item", schema=ItemSchema)
+    assert result.ok is True
+    assert result.data is not None
+    assert result.data.name == "Widget"
+
+
+def test_extract_schema_invalid_json() -> None:
+    browser = BrowserStub("Product: Widget")
+    llm = LLMStub("not json")
+    result = extract(browser, llm, query="Extract item", schema=ItemSchema)
+    assert result.ok is False
+    assert result.error is not None
+
+
+@pytest.mark.asyncio
+async def test_extract_async_schema_success() -> None:
+    browser = BrowserStub("Product: Widget")
+    llm = LLMStub('{"name":"Widget","price":"$10"}')
+    from sentience.read import extract_async
+
+    result = await extract_async(browser, llm, query="Extract item", schema=ItemSchema)
+    assert result.ok is True
+    assert result.data is not None
+    assert result.data.name == "Widget"
diff --git a/tests/unit/test_filesystem_tools.py b/tests/unit/test_filesystem_tools.py
new file mode 100644
index 0000000..452a9a8
--- /dev/null
+++ b/tests/unit/test_filesystem_tools.py
@@ -0,0 +1,58 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from sentience.tools import FileSandbox, ToolContext, ToolRegistry, register_filesystem_tools
+
+
+class RuntimeStub:
+    def __init__(self) -> None:
+        self.tracer = None
+        self.step_id = None
+
+    def capabilities(self):
+        return None
+
+    def can(self, _name: str) -> bool:
+        return True
+
+
+def test_file_sandbox_prevents_traversal(tmp_path: Path) -> None:
+    sandbox = FileSandbox(tmp_path)
+    with pytest.raises(ValueError):
+        sandbox.read_text("../secret.txt")
+
+
+def test_file_sandbox_prefix_edge_case(tmp_path: Path) -> None:
+    base = tmp_path / "sandbox"
+    sibling = tmp_path / "sandbox2"
+    base.mkdir()
+    sibling.mkdir()
+    sandbox = FileSandbox(base)
+    with pytest.raises(ValueError):
+        sandbox.read_text("../sandbox2/file.txt")
+
+
+@pytest.mark.asyncio
+async def test_filesystem_tools_read_write_append_replace(tmp_path: Path) -> None:
+    registry = ToolRegistry()
+    sandbox = FileSandbox(tmp_path)
+    ctx = ToolContext(RuntimeStub(), files=sandbox)
+    register_filesystem_tools(registry, sandbox)
+
+    await registry.execute("write_file", {"path": "note.txt", "content": "hello"}, ctx=ctx)
+    result = await registry.execute("read_file", {"path": "note.txt"}, ctx=ctx)
+    assert result.content == "hello"
+
+    await registry.execute("append_file", {"path": "note.txt", "content": " world"}, ctx=ctx)
+    result = await registry.execute("read_file", {"path": "note.txt"}, ctx=ctx)
+    assert result.content == "hello world"
+
+    replaced = await registry.execute(
+        "replace_file", {"path": "note.txt", "old": "world", "new": "there"}, ctx=ctx
+    )
+    assert replaced.replaced == 1
+    result = await registry.execute("read_file", {"path": "note.txt"}, ctx=ctx)
+    assert result.content == "hello there"
diff --git a/tests/unit/test_tool_registry.py b/tests/unit/test_tool_registry.py
new file mode 100644
index 0000000..bcdadae
--- /dev/null
+++ b/tests/unit/test_tool_registry.py
@@ -0,0 +1,224 @@
+from __future__ import annotations
+
+import pytest
+from pydantic import BaseModel, ValidationError
+
+from sentience.tools import ToolContext, ToolRegistry, ToolSpec, register_default_tools
+
+
+class EchoInput(BaseModel):
+    message: str
+
+
+class EchoOutput(BaseModel):
+    echoed: str
+
+
+def test_register_and_list_tools() -> None:
+    registry = ToolRegistry()
+    spec = ToolSpec(
+        name="echo",
+        description="Echo a message",
+        input_model=EchoInput,
+        output_model=EchoOutput,
+    )
+    registry.register(spec)
+    assert registry.get("echo") is spec
+    assert len(registry.list()) == 1
+
+
+def test_validate_input_and_output() -> None:
+    registry = ToolRegistry()
+    registry.register(
+        ToolSpec(
+            name="echo",
+            description="Echo a message",
+            input_model=EchoInput,
+            output_model=EchoOutput,
+        )
+    )
+
+    validated = registry.validate_input("echo", {"message": "hi"})
+    assert isinstance(validated, EchoInput)
+    assert validated.message == "hi"
+
+    out = registry.validate_output("echo", {"echoed": "hi"})
+    assert isinstance(out, EchoOutput)
+    assert out.echoed == "hi"
+
+    with pytest.raises(ValidationError):
+        registry.validate_input("echo", {"message": 123})
+
+
+def test_llm_spec_generation() -> None:
+    registry = ToolRegistry()
+
+    @registry.tool(
+        name="echo",
+        input_model=EchoInput,
+        output_model=EchoOutput,
+        description="Echo a message",
+    )
+    def _echo(_ctx, params: EchoInput) -> EchoOutput:
+        return EchoOutput(echoed=params.message)
+
+    tools = registry.llm_tools()
+    assert tools[0]["name"] == "echo"
+    assert tools[0]["description"] == "Echo a message"
+    assert "parameters" in tools[0]
+
+
+def test_register_default_tools_adds_core_tools() -> None:
+    registry = ToolRegistry()
+
+    class RuntimeStub:
+        async def snapshot(self, **_kwargs):
+            return None
+
+        last_snapshot = None
+        backend = None
+
+    ctx = ToolContext(RuntimeStub())
+    register_default_tools(registry, ctx)
+    names = {spec.name for spec in registry.list()}
+    assert {
+        "snapshot_state",
+        "click",
+        "type",
+        "scroll",
+        "scroll_to_element",
+        "click_rect",
+        "press",
+        "evaluate_js",
+    } <= names
+
+
+@pytest.mark.asyncio
+async def test_registry_execute_validates_and_runs() -> None:
+    registry = ToolRegistry()
+
+    class TracerStub:
+        def __init__(self) -> None:
+            self.events: list[dict] = []
+
+        def emit(self, event_type: str, data: dict, step_id: str | None = None) -> None:
+            self.events.append({"type": event_type, "data": data, "step_id": step_id})
+
+    class RuntimeStub:
+        def __init__(self) -> None:
+            self.tracer = TracerStub()
+            self.step_id = "step-1"
+
+        def capabilities(self):
+            return None
+
+        def can(self, _name: str) -> bool:
+            return True
+
+    runtime = RuntimeStub()
+    ctx = ToolContext(runtime)
+
+    @registry.tool(
+        name="echo",
+        input_model=EchoInput,
+        output_model=EchoOutput,
+        description="Echo",
+    )
+    async def _echo(_ctx, params: EchoInput) -> EchoOutput:
+        return EchoOutput(echoed=params.message)
+
+    result = await registry.execute("echo", {"message": "hi"}, ctx=ctx)
+    assert isinstance(result, EchoOutput)
+    assert result.echoed == "hi"
+    assert runtime.tracer.events[0]["type"] == "tool_call"
+    assert runtime.tracer.events[0]["data"]["success"] is True
+
+
+def test_tool_context_require_raises_on_missing_capability() -> None:
+    class RuntimeStub:
+        def capabilities(self):
+            return None
+
+        def can(self, _name: str) -> bool:
+            return False
+
+    ctx = ToolContext(RuntimeStub())
+    with pytest.raises(ValueError, match="unsupported_capability"):
+        ctx.require("tabs")
+
+
+@pytest.mark.asyncio
+async def test_tool_call_emits_error_on_failure() -> None:
+    registry = ToolRegistry()
+
+    class TracerStub:
+        def __init__(self) -> None:
+            self.events: list[dict] = []
+
+        def emit(self, event_type: str, data: dict, step_id: str | None = None) -> None:
+            self.events.append({"type": event_type, "data": data, "step_id": step_id})
+
+    class RuntimeStub:
+        def __init__(self) -> None:
+            self.tracer = TracerStub()
+            self.step_id = "step-1"
+
+        def capabilities(self):
+            return None
+
+        def can(self, _name: str) -> bool:
+            return True
+
+    runtime = RuntimeStub()
+    ctx = ToolContext(runtime)
+
+    @registry.tool(
+        name="boom",
+        input_model=EchoInput,
+        output_model=EchoOutput,
+        description="Boom",
+    )
+    async def _boom(_ctx, _params: EchoInput) -> EchoOutput:
+        raise RuntimeError("bad")
+
+    with pytest.raises(RuntimeError, match="bad"):
+        await registry.execute("boom", {"message": "x"}, ctx=ctx)
+
+    assert runtime.tracer.events[0]["type"] == "tool_call"
+    assert runtime.tracer.events[0]["data"]["success"] is False
+    assert "error" in runtime.tracer.events[0]["data"]
+
+
+@pytest.mark.asyncio
+async def test_default_tools_capability_checks() -> None:
+    registry = ToolRegistry()
+
+    class RuntimeStub:
+        def __init__(self) -> None:
+            self.tracer = None
+            self.step_id = None
+
+        def capabilities(self):
+            return None
+
+        def can(self, name: str) -> bool:
+            return name != "keyboard" and name != "evaluate_js"
+
+        async def snapshot(self, **_kwargs):
+            return None
+
+        last_snapshot = None
+        backend = None
+
+    ctx = ToolContext(RuntimeStub())
+    register_default_tools(registry, ctx)
+
+    with pytest.raises(ValueError, match="unsupported_capability"):
+        await registry.execute("press", {"key": "Enter"}, ctx=ctx)
+
+    with pytest.raises(ValueError, match="unsupported_capability"):
+        await registry.execute(
+            "scroll_to_element",
+            {"element_id": 1, "behavior": "instant", "block": "center"},
+            ctx=ctx,
+        )

From 73c5737778af3233143835d14b809d83ea81b4ae Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 20:09:40 -0800
Subject: [PATCH 03/12] fix test errors

---
 sentience/agent.py                | 4 ++--
 sentience/conversational_agent.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sentience/agent.py b/sentience/agent.py
index fce9c14..6df184f 100644
--- a/sentience/agent.py
+++ b/sentience/agent.py
@@ -261,9 +261,9 @@ def act(  # noqa: C901
         self._step_count += 1
         step_id = f"step-{self._step_count}"
 
+        pre_url = self.browser.page.url if self.browser.page else None
         # Emit step_start trace event if tracer is enabled
         if self.tracer:
-            pre_url = self.browser.page.url if self.browser.page else None
             _safe_tracer_call(
                 self.tracer,
                 "emit_step_start",
@@ -951,9 +951,9 @@ async def act(  # noqa: C901
         self._step_count += 1
         step_id = f"step-{self._step_count}"
 
+        pre_url = self.browser.page.url if self.browser.page else None
         # Emit step_start trace event if tracer is enabled
         if self.tracer:
-            pre_url = self.browser.page.url if self.browser.page else None
             _safe_tracer_call(
                 self.tracer,
                 "emit_step_start",
diff --git a/sentience/conversational_agent.py b/sentience/conversational_agent.py
index f9f2fc8..1ac39f1 100644
--- a/sentience/conversational_agent.py
+++ b/sentience/conversational_agent.py
@@ -311,7 +311,7 @@ def _execute_step(self, step: dict[str, Any]) -> StepExecutionResult:
         except Exception as e:
             if self.verbose:
                 print(f"❌ Step failed: {e}")
-            return StepExecutionResult(success=False, action=action, error=str(e))
+            return StepExecutionResult(success=False, action=action, data={}, error=str(e))
 
     def _extract_information(self, snap: Snapshot, info_type: str) -> ExtractionResult:
         """

From 1a37c2746f038f19ecf253f79c503c279565f319 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 20:32:06 -0800
Subject: [PATCH 04/12] Phase 3: Domain policies + search tool + send_key
 helper

---
 docs/CODE_REVIEW_PHASE2_BU_PARITY.md | 270 ---------------------------
 sentience/__init__.py                |   4 +
 sentience/actions.py                 | 212 +++++++++++++++++++++
 sentience/browser.py                 |  89 ++++++++-
 sentience/tools/context.py           |   2 +-
 sentience/tools/filesystem.py        |  12 +-
 sentience/tools/registry.py          |   4 +-
 tests/test_actions.py                | 107 +++++++++++
 tests/unit/test_domain_policies.py   |  51 +++++
 9 files changed, 467 insertions(+), 284 deletions(-)
 delete mode 100644 docs/CODE_REVIEW_PHASE2_BU_PARITY.md
 create mode 100644 tests/unit/test_domain_policies.py

diff --git a/docs/CODE_REVIEW_PHASE2_BU_PARITY.md b/docs/CODE_REVIEW_PHASE2_BU_PARITY.md
deleted file mode 100644
index 32db024..0000000
--- a/docs/CODE_REVIEW_PHASE2_BU_PARITY.md
+++ /dev/null
@@ -1,270 +0,0 @@
-# Code Review: Browser-Use Parity Phase 2 Implementation
-
-**Date:** 2026-01-23
-**Reviewer:** Claude
-**Branch:** parity_win2
-**Files Reviewed:** 4 modified + 7 new files
-**Scope:** Phase 2 deliverables from BU_GAP_DELIVERABLES.md
-
----
-
-## Summary
-
-This PR implements all Phase 2 deliverables for Browser-Use parity:
-
-| Deliverable | Status | Notes |
-|-------------|--------|-------|
-| ToolRegistry + typed schemas | ✅ Complete | Pydantic-based, LLM-ready specs |
-| ToolContext/capability routing | ✅ Complete | `ctx.require()` pattern |
-| Tool execution tracing | ✅ Complete | `tool_call` events emitted |
-| Default tools registered | ✅ Complete | 7 core tools |
-| Filesystem tools (sandboxed) | ✅ Complete | read/write/append/replace |
-| Structured extraction | ✅ Complete | `extract(query, schema)` |
-
-Overall the implementation is well-designed and aligns with the design docs. A few issues need attention.
-
----
-
-## Design Doc Alignment
-
-### CAPABILITY_ROUTING_DESIGN.md Compliance
-
-| Requirement | Implementation | Status |
-|-------------|----------------|--------|
-| `BackendCapabilities` model | [context.py:14-19](sentience/tools/context.py#L14-L19) | ✅ |
-| `runtime.capabilities()` | [agent_runtime.py:378-389](sentience/agent_runtime.py#L378-L389) | ✅ |
-| `runtime.can(name)` | [agent_runtime.py:391-393](sentience/agent_runtime.py#L391-L393) | ✅ |
-| Structured "unsupported" errors | [context.py:43-45](sentience/tools/context.py#L43-L45) | ⚠️ Uses ValueError, not JSON |
-
-### JS_EVALUATE_TOOL_DESIGN.md Compliance
-
-The `evaluate_js` capability is flagged in `BackendCapabilities`, and the existing `runtime.evaluate_js()` from Phase 1 satisfies this requirement. The tool registry does **not** currently expose `evaluate_js` as a registered tool, which may be intentional.
-
----
-
-## Critical Issues
-
-### 1. `BackendCapabilities.downloads` and `filesystem_tools` missing
-
-**File:** [context.py:14-19](sentience/tools/context.py#L14-L19)
-
-The design doc specifies:
-```python
-class BackendCapabilities(BaseModel):
-    tabs: bool = False
-    evaluate_js: bool = True
-    downloads: bool = True
-    filesystem_tools: bool = False
-```
-
-But the implementation only has:
-```python
-class BackendCapabilities(BaseModel):
-    tabs: bool = False
-    evaluate_js: bool = False
-    keyboard: bool = False
-```
-
-**Impact:** No way to check if filesystem tools are available before using them.
-
-**Fix:** Add missing capability flags per design doc.
-
----
-
-### 2. `ToolContext.require()` raises ValueError, not structured error
-
-**File:** [context.py:43-45](sentience/tools/context.py#L43-L45)
-
-```python
-def require(self, name: str) -> None:
-    if not self.can(name):
-        raise ValueError(f"unsupported_capability: {name}")
-```
-
-The design doc (CAPABILITY_ROUTING_DESIGN.md) specifies structured errors:
-```json
-{ "error": "unsupported_capability", "detail": "tabs not supported by backend" }
-```
-
-**Impact:** LLM tool calls cannot distinguish unsupported capabilities from validation errors.
-
-**Fix:** Create a custom `UnsupportedCapabilityError` exception or return a structured result.
-
----
-
-## Medium Issues
-
-### 3. Missing `evaluate_js` tool in default registry
-
-**File:** [defaults.py](sentience/tools/defaults.py)
-
-The registry registers 7 tools: `snapshot_state`, `click`, `type`, `scroll`, `scroll_to_element`, `click_rect`, `press`. However, `evaluate_js` is not registered despite being a key Phase 1/2 feature per the gap analysis.
-
-**Suggestion:** Add an `evaluate_js` tool that wraps `runtime.evaluate_js()`.
-
----
-
-### 4. `extract_async` does not use async LLM call
-
-**File:** [read.py:237-277](sentience/read.py#L237-L277)
-
-```python
-async def extract_async(...) -> ExtractResult:
-    # ...
-    response = llm.generate(system, user)  # Sync call in async function
-```
-
-The `LLMProvider.generate()` is synchronous, so `extract_async` doesn't provide true async benefits.
-
-**Suggestion:** Consider adding `generate_async()` to `LLMProvider` or document this limitation.
-
----
-
-### 5. `FileSandbox` path validation may have edge cases
-
-**File:** [filesystem.py:18-22](sentience/tools/filesystem.py#L18-L22)
-
-```python
-def _resolve(self, path: str) -> Path:
-    candidate = (self.base_dir / path).resolve()
-    if not str(candidate).startswith(str(self.base_dir)):
-        raise ValueError("path escapes sandbox root")
-    return candidate
-```
-
-The string prefix check could fail on edge cases like:
-- `base_dir = /tmp/sandbox` and path resolves to `/tmp/sandbox2/file` (passes check incorrectly)
-
-**Fix:** Use `candidate.is_relative_to(self.base_dir)` (Python 3.9+) instead of string prefix.
-
----
-
-### 6. Duplicate capability logic in `AgentRuntime` and `ToolContext`
-
-**Files:** [agent_runtime.py:378-393](sentience/agent_runtime.py#L378-L393), [context.py:37-41](sentience/tools/context.py#L37-L41)
-
-Both classes implement `capabilities()` and `can()`. `ToolContext` delegates to `runtime`, which is correct, but the capability detection logic in `AgentRuntime.capabilities()` is complex:
-
-```python
-has_keyboard = hasattr(backend, "type_text") or bool(
-    getattr(getattr(backend, "_page", None), "keyboard", None)
-)
-```
-
-**Suggestion:** Consider moving capability detection to the backend protocol or making it more explicit.
-
----
-
-## Minor Issues
-
-### 7. `read.py` inconsistent return types fixed
-
-The diff shows fixing inconsistent `dict` vs `ReadResult` returns:
-```python
--return {
--    "status": "success",
--    ...
--}
-+return ReadResult(
-+    status="success",
-+    ...
-+)
-```
-
-This is a good fix.
-
----
-
-### 8. Missing docstrings in new modules
-
-**Files:** `context.py`, `defaults.py`, `filesystem.py`
-
-The `ToolContext` and `FileSandbox` classes lack detailed docstrings explaining their purpose and usage patterns.
-
----
-
-### 9. `defaults.py` has unused `Snapshot` import warning potential
-
-**File:** [defaults.py:7](sentience/tools/defaults.py#L7)
-
-```python
-from ..models import ActionResult, BBox, Snapshot
-```
-
-`Snapshot` is used as the output model for `snapshot_state`, so this is correct. No issue.
-
----
-
-## Test Coverage
-
-The tests are comprehensive and well-structured:
-
-| Test File | Coverage |
-|-----------|----------|
-| test_tool_registry.py | ✅ Registration, validation, LLM spec, execution, tracing, capability checks |
-| test_filesystem_tools.py | ✅ Sandbox traversal prevention, CRUD operations |
-| test_extract.py | ✅ Schema validation success/failure |
-
-### Missing Test Cases
-
-1. **`extract_async` test** - Only sync `extract` is tested
-2. **Edge case for sandbox path validation** - Test `/tmp/sandbox` vs `/tmp/sandbox2` scenario
-3. **`capabilities()` detection logic** - No test for `AgentRuntime.capabilities()`
-
----
-
-## Architecture Assessment
-
-The implementation follows a clean layered architecture:
-
-```
-┌─────────────────────────────────────┐
-│           ToolRegistry              │  ← LLM-facing tool specs
-├─────────────────────────────────────┤
-│           ToolContext               │  ← Capability routing
-├─────────────────────────────────────┤
-│          AgentRuntime               │  ← Backend abstraction
-├─────────────────────────────────────┤
-│      PlaywrightBackend / CDP        │  ← Browser execution
-└─────────────────────────────────────┘
-```
-
-This aligns with the design goal: *"unified SDK surface through AgentRuntime regardless of backend"*.
-
----
-
-## Verdict
-
-**Approve with required changes:**
-
-1. **Must fix:** Add missing capability flags (`downloads`, `filesystem_tools`) per design doc
-2. **Must fix:** Fix `FileSandbox._resolve()` to use `is_relative_to()` instead of string prefix
-3. **Should fix:** Add `evaluate_js` tool to default registry
-
-The Phase 2 deliverables are functionally complete. The tool registry design is solid and the filesystem sandbox provides good security boundaries.
-
----
-
-## Appendix: Files Changed
-
-### Modified Files
-
-| File | Changes |
-|------|---------|
-| sentience/__init__.py | Export new tools module symbols |
-| sentience/agent_runtime.py | Add `tool_registry`, `capabilities()`, `can()` |
-| sentience/models.py | Add `ExtractResult` model |
-| sentience/read.py | Add `extract()` / `extract_async()`, fix return types |
-
-### New Files
-
-| File | Purpose |
-|------|---------|
-| sentience/tools/__init__.py | Package exports |
-| sentience/tools/registry.py | `ToolRegistry` and `ToolSpec` classes |
-| sentience/tools/context.py | `ToolContext` and `BackendCapabilities` |
-| sentience/tools/defaults.py | Default browser tool registrations |
-| sentience/tools/filesystem.py | Sandboxed filesystem tools |
-| tests/unit/test_tool_registry.py | Registry + execution tests |
-| tests/unit/test_filesystem_tools.py | Sandbox + CRUD tests |
-| tests/unit/test_extract.py | Extraction tests |
diff --git a/sentience/__init__.py b/sentience/__init__.py
index a8932a1..bc35bf2 100644
--- a/sentience/__init__.py
+++ b/sentience/__init__.py
@@ -19,7 +19,11 @@
     click_rect,
     press,
     scroll_to,
+    search,
+    search_async,
     select_option,
+    send_keys,
+    send_keys_async,
     submit,
     type_text,
     uncheck,
diff --git a/sentience/actions.py b/sentience/actions.py
index 0be5f48..7e55154 100644
--- a/sentience/actions.py
+++ b/sentience/actions.py
@@ -5,6 +5,7 @@
 import asyncio
 import time
 from pathlib import Path
+from urllib.parse import quote_plus
 
 from .browser import AsyncSentienceBrowser, SentienceBrowser
 from .browser_evaluator import BrowserEvaluator
@@ -709,6 +710,138 @@ def press(browser: SentienceBrowser, key: str, take_snapshot: bool = False) -> A
     )
 
 
+def _normalize_key_token(token: str) -> str:
+    lookup = {
+        "CMD": "Meta",
+        "COMMAND": "Meta",
+        "CTRL": "Control",
+        "CONTROL": "Control",
+        "ALT": "Alt",
+        "OPTION": "Alt",
+        "SHIFT": "Shift",
+        "ESC": "Escape",
+        "ESCAPE": "Escape",
+        "ENTER": "Enter",
+        "RETURN": "Enter",
+        "TAB": "Tab",
+        "SPACE": "Space",
+    }
+    upper = token.strip().upper()
+    return lookup.get(upper, token.strip())
+
+
+def _parse_key_sequence(sequence: str) -> list[str]:
+    parts = []
+    for raw in sequence.replace(",", " ").split():
+        raw = raw.strip()
+        if not raw:
+            continue
+        if raw.startswith("{") and raw.endswith("}"):
+            raw = raw[1:-1]
+        if "+" in raw:
+            combo = "+".join(_normalize_key_token(tok) for tok in raw.split("+") if tok)
+            parts.append(combo)
+        else:
+            parts.append(_normalize_key_token(raw))
+    return parts
+
+
+def send_keys(
+    browser: SentienceBrowser,
+    sequence: str,
+    take_snapshot: bool = False,
+    delay_ms: int = 50,
+) -> ActionResult:
+    """
+    Send a sequence of key presses (e.g., "CMD+H", "CTRL+SHIFT+P").
+
+    Supports sequences separated by commas/spaces, and brace-wrapped tokens
+    like "{ENTER}" or "{CTRL+L}".
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call browser.start() first.")
+
+    start_time = time.time()
+    url_before = browser.page.url
+
+    keys = _parse_key_sequence(sequence)
+    if not keys:
+        raise ValueError("send_keys sequence is empty")
+    for key in keys:
+        browser.page.keyboard.press(key)
+        if delay_ms > 0:
+            browser.page.wait_for_timeout(delay_ms)
+
+    duration_ms = int((time.time() - start_time) * 1000)
+    url_after = browser.page.url
+    url_changed = url_before != url_after
+    outcome = "navigated" if url_changed else "dom_updated"
+
+    snapshot_after: Snapshot | None = None
+    if take_snapshot:
+        snapshot_after = snapshot(browser)
+
+    return ActionResult(
+        success=True,
+        duration_ms=duration_ms,
+        outcome=outcome,
+        url_changed=url_changed,
+        snapshot_after=snapshot_after,
+    )
+
+
+def _build_search_url(query: str, engine: str) -> str:
+    q = quote_plus(query)
+    key = engine.strip().lower()
+    if key in {"duckduckgo", "ddg"}:
+        return f"https://duckduckgo.com/?q={q}"
+    if key in {"google.com", "google"}:
+        return f"https://www.google.com/search?q={q}"
+    if key in {"google"}:
+        return f"https://www.google.com/search?q={q}"
+    if key in {"bing"}:
+        return f"https://www.bing.com/search?q={q}"
+    raise ValueError(f"unsupported search engine: {engine}")
+
+
+def search(
+    browser: SentienceBrowser,
+    query: str,
+    engine: str = "duckduckgo",
+    take_snapshot: bool = False,
+) -> ActionResult:
+    """
+    Navigate to a search results page for the given query.
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call browser.start() first.")
+    if not query.strip():
+        raise ValueError("search query is empty")
+
+    start_time = time.time()
+    url_before = browser.page.url
+    url = _build_search_url(query, engine)
+    browser.goto(url)
+    browser.page.wait_for_load_state("networkidle")
+
+    duration_ms = int((time.time() - start_time) * 1000)
+    url_after = browser.page.url
+    url_changed = url_before != url_after
+    outcome = "navigated" if url_changed else "dom_updated"
+
+    snapshot_after: Snapshot | None = None
+    if take_snapshot:
+        snapshot_after = snapshot(browser)
+
+    return ActionResult(
+        success=True,
+        duration_ms=duration_ms,
+        outcome=outcome,
+        url_changed=url_changed,
+        snapshot_after=snapshot_after,
+    )
+
+
 def scroll_to(
     browser: SentienceBrowser,
     element_id: int,
@@ -1698,6 +1831,85 @@ async def press_async(
     )
 
 
+async def send_keys_async(
+    browser: AsyncSentienceBrowser,
+    sequence: str,
+    take_snapshot: bool = False,
+    delay_ms: int = 50,
+) -> ActionResult:
+    """
+    Async version of send_keys().
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call await browser.start() first.")
+
+    start_time = time.time()
+    url_before = browser.page.url
+
+    keys = _parse_key_sequence(sequence)
+    if not keys:
+        raise ValueError("send_keys sequence is empty")
+    for key in keys:
+        await browser.page.keyboard.press(key)
+        if delay_ms > 0:
+            await browser.page.wait_for_timeout(delay_ms)
+
+    duration_ms = int((time.time() - start_time) * 1000)
+    url_after = browser.page.url
+    url_changed = url_before != url_after
+    outcome = "navigated" if url_changed else "dom_updated"
+
+    snapshot_after: Snapshot | None = None
+    if take_snapshot:
+        snapshot_after = await snapshot_async(browser)
+
+    return ActionResult(
+        success=True,
+        duration_ms=duration_ms,
+        outcome=outcome,
+        url_changed=url_changed,
+        snapshot_after=snapshot_after,
+    )
+
+
+async def search_async(
+    browser: AsyncSentienceBrowser,
+    query: str,
+    engine: str = "duckduckgo",
+    take_snapshot: bool = False,
+) -> ActionResult:
+    """
+    Async version of search().
+    """
+    if not browser.page:
+        raise RuntimeError("Browser not started. Call await browser.start() first.")
+    if not query.strip():
+        raise ValueError("search query is empty")
+
+    start_time = time.time()
+    url_before = browser.page.url
+    url = _build_search_url(query, engine)
+    await browser.goto(url)
+    await browser.page.wait_for_load_state("networkidle")
+
+    duration_ms = int((time.time() - start_time) * 1000)
+    url_after = browser.page.url
+    url_changed = url_before != url_after
+    outcome = "navigated" if url_changed else "dom_updated"
+
+    snapshot_after: Snapshot | None = None
+    if take_snapshot:
+        snapshot_after = await snapshot_async(browser)
+
+    return ActionResult(
+        success=True,
+        duration_ms=duration_ms,
+        outcome=outcome,
+        url_changed=url_changed,
+        snapshot_after=snapshot_after,
+    )
+
+
 async def scroll_to_async(
     browser: AsyncSentienceBrowser,
     element_id: int,
diff --git a/sentience/browser.py b/sentience/browser.py
index 7e40cb1..2191963 100644
--- a/sentience/browser.py
+++ b/sentience/browser.py
@@ -34,6 +34,51 @@
     STEALTH_AVAILABLE = False
 
 
+def _normalize_domain(domain: str) -> str:
+    raw = domain.strip()
+    if "://" in raw:
+        host = urlparse(raw).hostname or ""
+    else:
+        host = raw.split("/", 1)[0]
+    host = host.split(":", 1)[0]
+    return host.strip().lower().lstrip(".")
+
+
+def _domain_matches(host: str, pattern: str) -> bool:
+    host_norm = _normalize_domain(host)
+    pat = _normalize_domain(pattern)
+    if pat.startswith("*."):
+        pat = pat[2:]
+    return host_norm == pat or host_norm.endswith(f".{pat}")
+
+
+def _extract_host(url: str) -> str | None:
+    raw = url.strip()
+    if "://" not in raw:
+        raw = f"https://{raw}"
+    parsed = urlparse(raw)
+    return parsed.hostname
+
+
+def _is_domain_allowed(
+    host: str | None, allowed: list[str] | None, prohibited: list[str] | None
+) -> bool:
+    """
+    Return True if host is allowed based on allow/deny lists.
+
+    Deny list takes precedence. Empty allow list means allow all.
+    """
+    if not host:
+        return False
+    if prohibited:
+        for pattern in prohibited:
+            if _domain_matches(host, pattern):
+                return False
+    if allowed:
+        return any(_domain_matches(host, pattern) for pattern in allowed)
+    return True
+
+
 class SentienceBrowser:
     """Main browser session with Sentience extension loaded"""
 
@@ -49,6 +94,9 @@ def __init__(
         record_video_size: dict[str, int] | None = None,
         viewport: Viewport | dict[str, int] | None = None,
         device_scale_factor: float | None = None,
+        allowed_domains: list[str] | None = None,
+        prohibited_domains: list[str] | None = None,
+        keep_alive: bool = False,
     ):
         """
         Initialize Sentience browser
@@ -113,6 +161,11 @@ def __init__(
         self.record_video_dir = record_video_dir
         self.record_video_size = record_video_size or {"width": 1280, "height": 800}
 
+        # Domain policies + keep-alive
+        self.allowed_domains = allowed_domains or []
+        self.prohibited_domains = prohibited_domains or []
+        self.keep_alive = keep_alive
+
         # Viewport configuration - convert dict to Viewport if needed
         if viewport is None:
             self.viewport = Viewport(width=1280, height=800)
@@ -299,9 +352,16 @@ def start(self) -> None:
         time.sleep(0.5)
 
     def goto(self, url: str) -> None:
-        """Navigate to a URL and ensure extension is ready"""
+        """Navigate to a URL and ensure extension is ready.
+
+        This enforces domain allow/deny policies. Direct page.goto() calls
+        bypass policy checks.
+        """
         if not self.page:
             raise RuntimeError("Browser not started. Call start() first.")
+        host = _extract_host(url)
+        if not _is_domain_allowed(host, self.allowed_domains, self.prohibited_domains):
+            raise ValueError(f"domain not allowed: {host}")
 
         self.page.goto(url, wait_until="domcontentloaded")
 
@@ -474,11 +534,16 @@ def close(self, output_path: str | Path | None = None) -> str | None:
             Path to video file if recording was enabled, None otherwise
             Note: Video files are saved automatically by Playwright when context closes.
             If multiple pages exist, returns the path to the first page's video.
+            If keep_alive is True, returns None and skips shutdown.
         """
         # CRITICAL: Don't access page.video.path() BEFORE closing context
         # This can poke the video subsystem at an awkward time and cause crashes on macOS
         # Instead, we'll locate the video file after context closes
 
+        if self.keep_alive:
+            logger.info("Keep-alive enabled; skipping browser shutdown.")
+            return None
+
         # Close context (this triggers video file finalization)
         if self.context:
             self.context.close()
@@ -644,6 +709,9 @@ def __init__(
         viewport: Viewport | dict[str, int] | None = None,
         device_scale_factor: float | None = None,
         executable_path: str | None = None,
+        allowed_domains: list[str] | None = None,
+        prohibited_domains: list[str] | None = None,
+        keep_alive: bool = False,
     ):
         """
         Initialize Async Sentience browser
@@ -698,6 +766,11 @@ def __init__(
         self.record_video_dir = record_video_dir
         self.record_video_size = record_video_size or {"width": 1280, "height": 800}
 
+        # Domain policies + keep-alive
+        self.allowed_domains = allowed_domains or []
+        self.prohibited_domains = prohibited_domains or []
+        self.keep_alive = keep_alive
+
         # Viewport configuration - convert dict to Viewport if needed
         if viewport is None:
             self.viewport = Viewport(width=1280, height=800)
@@ -880,9 +953,16 @@ async def start(self) -> None:
         await asyncio.sleep(0.5)
 
     async def goto(self, url: str) -> None:
-        """Navigate to a URL and ensure extension is ready (async)"""
+        """Navigate to a URL and ensure extension is ready (async).
+
+        This enforces domain allow/deny policies. Direct page.goto() calls
+        bypass policy checks.
+        """
         if not self.page:
             raise RuntimeError("Browser not started. Call await start() first.")
+        host = _extract_host(url)
+        if not _is_domain_allowed(host, self.allowed_domains, self.prohibited_domains):
+            raise ValueError(f"domain not allowed: {host}")
 
         await self.page.goto(url, wait_until="domcontentloaded")
 
@@ -1031,7 +1111,12 @@ async def close(self, output_path: str | Path | None = None) -> tuple[str | None
 
         Note: Video path is resolved AFTER context close to avoid touching video
         subsystem during teardown, which can cause crashes on macOS.
+        If keep_alive is True, returns (None, True) and skips shutdown.
         """
+        if self.keep_alive:
+            logger.info("Keep-alive enabled; skipping browser shutdown.")
+            return None, True
+
         # CRITICAL: Don't access page.video.path() BEFORE closing context
         # This can poke the video subsystem at an awkward time and cause crashes
         # Instead, we'll locate the video file after context closes
diff --git a/sentience/tools/context.py b/sentience/tools/context.py
index d4eff5c..321f753 100644
--- a/sentience/tools/context.py
+++ b/sentience/tools/context.py
@@ -37,7 +37,7 @@ class ToolContext:
 
     def __init__(
         self,
-        runtime: "AgentRuntime",
+        runtime: AgentRuntime,
         files: FileSandbox | None = None,
         base_dir: Path | None = None,
     ) -> None:
diff --git a/sentience/tools/filesystem.py b/sentience/tools/filesystem.py
index 5595499..4783fdb 100644
--- a/sentience/tools/filesystem.py
+++ b/sentience/tools/filesystem.py
@@ -115,9 +115,7 @@ async def read_file(ctx: ToolContext | None, params: ReadFileInput) -> ReadFileO
         output_model=WriteFileOutput,
         description="Write a file to the sandbox.",
     )
-    async def write_file(
-        ctx: ToolContext | None, params: WriteFileInput
-    ) -> WriteFileOutput:
+    async def write_file(ctx: ToolContext | None, params: WriteFileInput) -> WriteFileOutput:
         files = _get_files(ctx)
         written = files.write_text(params.path, params.content, overwrite=params.overwrite)
         return WriteFileOutput(path=params.path, bytes_written=written)
@@ -128,9 +126,7 @@ async def write_file(
         output_model=AppendFileOutput,
         description="Append text to a file in the sandbox.",
     )
-    async def append_file(
-        ctx: ToolContext | None, params: AppendFileInput
-    ) -> AppendFileOutput:
+    async def append_file(ctx: ToolContext | None, params: AppendFileInput) -> AppendFileOutput:
         files = _get_files(ctx)
         written = files.append_text(params.path, params.content)
         return AppendFileOutput(path=params.path, bytes_written=written)
@@ -141,9 +137,7 @@ async def append_file(
         output_model=ReplaceFileOutput,
         description="Replace text in a file in the sandbox.",
     )
-    async def replace_file(
-        ctx: ToolContext | None, params: ReplaceFileInput
-    ) -> ReplaceFileOutput:
+    async def replace_file(ctx: ToolContext | None, params: ReplaceFileInput) -> ReplaceFileOutput:
         files = _get_files(ctx)
         replaced = files.replace_text(params.path, params.old, params.new)
         return ReplaceFileOutput(path=params.path, replaced=replaced)
diff --git a/sentience/tools/registry.py b/sentience/tools/registry.py
index 40977a2..2d162b2 100644
--- a/sentience/tools/registry.py
+++ b/sentience/tools/registry.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
-from collections.abc import Callable
 import inspect
 import time
+from collections.abc import Callable
 from typing import Any
 
 from pydantic import BaseModel, ConfigDict, Field
@@ -42,7 +42,7 @@ class ToolRegistry:
     def __init__(self) -> None:
         self._tools: dict[str, ToolSpec] = {}
 
-    def register(self, spec: ToolSpec) -> "ToolRegistry":
+    def register(self, spec: ToolSpec) -> ToolRegistry:
         if spec.name in self._tools:
             raise ValueError(f"tool already registered: {spec.name}")
         self._tools[spec.name] = spec
diff --git a/tests/test_actions.py b/tests/test_actions.py
index d18f02a..847d8c2 100644
--- a/tests/test_actions.py
+++ b/tests/test_actions.py
@@ -2,7 +2,10 @@
 Tests for actions (click, type, press, click_rect)
 """
 
+import pytest
+
 from sentience import (
+    AsyncSentienceBrowser,
     SentienceBrowser,
     back,
     check,
@@ -12,7 +15,11 @@
     find,
     press,
     scroll_to,
+    search,
+    search_async,
     select_option,
+    send_keys,
+    send_keys_async,
     snapshot,
     submit,
     type_text,
@@ -65,6 +72,106 @@ def test_press():
         assert result.duration_ms > 0
 
 
+def test_send_keys():
+    """Test send_keys helper"""
+    with SentienceBrowser() as browser:
+        browser.page.goto("https://example.com")
+        browser.page.wait_for_load_state("networkidle")
+
+        result = send_keys(browser, "CTRL+L")
+        assert result.success is True
+        assert result.duration_ms > 0
+
+
+def test_send_keys_empty_sequence() -> None:
+    with SentienceBrowser() as browser:
+        browser.page.goto("https://example.com")
+        browser.page.wait_for_load_state("networkidle")
+
+        with pytest.raises(ValueError, match="empty"):
+            send_keys(browser, "")
+
+
+def test_send_keys_braced_sequence() -> None:
+    with SentienceBrowser() as browser:
+        browser.page.goto("https://example.com")
+        browser.page.wait_for_load_state("networkidle")
+
+        result = send_keys(browser, "{CTRL+L}")
+        assert result.success is True
+
+
+def test_send_keys_multi_sequence_and_alias() -> None:
+    with SentienceBrowser() as browser:
+        browser.page.goto("https://example.com")
+        browser.page.wait_for_load_state("networkidle")
+
+        result = send_keys(browser, "Tab Tab Enter")
+        assert result.success is True
+        result = send_keys(browser, "CMD+C")
+        assert result.success is True
+
+
+def test_search_builds_url() -> None:
+    with SentienceBrowser() as browser:
+        browser.page.goto("https://example.com")
+        browser.page.wait_for_load_state("networkidle")
+
+        result = search(browser, "sentience sdk", engine="duckduckgo")
+        assert result.success is True
+        assert result.duration_ms > 0
+
+        result = search(browser, "sentience sdk", engine="google")
+        assert result.success is True
+
+        result = search(browser, "sentience sdk", engine="bing")
+        assert result.success is True
+
+        result = search(browser, "sentience sdk", engine="google.com")
+        assert result.success is True
+
+
+def test_search_empty_query() -> None:
+    with SentienceBrowser() as browser:
+        browser.page.goto("https://example.com")
+        browser.page.wait_for_load_state("networkidle")
+
+        with pytest.raises(ValueError, match="empty"):
+            search(browser, "")
+
+
+def test_search_disallowed_domain() -> None:
+    with SentienceBrowser(allowed_domains=["example.com"]) as browser:
+        browser.page.goto("https://example.com")
+        browser.page.wait_for_load_state("networkidle")
+
+        with pytest.raises(ValueError, match="domain not allowed"):
+            search(browser, "sentience sdk", engine="duckduckgo")
+
+
+@pytest.mark.asyncio
+async def test_search_async() -> None:
+    async with AsyncSentienceBrowser() as browser:
+        await browser.page.goto("https://example.com")
+        await browser.page.wait_for_load_state("networkidle")
+
+        result = await search_async(
+            browser, "sentience sdk", engine="duckduckgo", take_snapshot=True
+        )
+        assert result.success is True
+        assert result.snapshot_after is not None
+
+
+@pytest.mark.asyncio
+async def test_send_keys_async() -> None:
+    async with AsyncSentienceBrowser() as browser:
+        await browser.page.goto("https://example.com")
+        await browser.page.wait_for_load_state("networkidle")
+
+        result = await send_keys_async(browser, "CTRL+L")
+        assert result.success is True
+
+
 def test_click_rect():
     """Test click_rect with rect dict"""
     with SentienceBrowser() as browser:
diff --git a/tests/unit/test_domain_policies.py b/tests/unit/test_domain_policies.py
new file mode 100644
index 0000000..3116230
--- /dev/null
+++ b/tests/unit/test_domain_policies.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from sentience.browser import _domain_matches, _extract_host, _is_domain_allowed
+
+
+def test_domain_matches_suffix() -> None:
+    assert _domain_matches("sub.example.com", "example.com") is True
+    assert _domain_matches("example.com", "example.com") is True
+    assert _domain_matches("example.com", "*.example.com") is True
+    assert _domain_matches("other.com", "example.com") is False
+    assert _domain_matches("example.com", "https://example.com") is True
+    assert _domain_matches("localhost", "http://localhost:3000") is True
+
+
+def test_domain_allowlist_denylist() -> None:
+    assert _is_domain_allowed("a.example.com", ["example.com"], []) is True
+    assert _is_domain_allowed("a.example.com", ["example.com"], ["bad.com"]) is True
+    assert _is_domain_allowed("bad.example.com", [], ["example.com"]) is False
+    assert _is_domain_allowed("x.com", ["example.com"], []) is False
+    assert _is_domain_allowed("example.com", ["https://example.com"], []) is True
+
+
+def test_extract_host_handles_ports() -> None:
+    assert _extract_host("http://localhost:3000") == "localhost"
+    assert _extract_host("localhost:3000") == "localhost"
+
+
+def test_keep_alive_skips_close() -> None:
+    from sentience.browser import SentienceBrowser
+
+    class Dummy:
+        def __init__(self) -> None:
+            self.closed = False
+
+        def close(self):
+            self.closed = True
+
+        def stop(self):
+            self.closed = True
+
+    browser = SentienceBrowser()
+    browser.keep_alive = True
+    dummy_context = Dummy()
+    dummy_playwright = Dummy()
+    browser.context = dummy_context
+    browser.playwright = dummy_playwright
+    browser._extension_path = None
+
+    browser.close()
+    assert dummy_context.closed is False
+    assert dummy_playwright.closed is False

From 2b647f0ef0ed5f16e486a722d3f2f83aa6115769 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 20:53:15 -0800
Subject: [PATCH 05/12] include snapshot options in search

---
 sentience/actions.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/sentience/actions.py b/sentience/actions.py
index 7e55154..90f9774 100644
--- a/sentience/actions.py
+++ b/sentience/actions.py
@@ -10,7 +10,7 @@
 from .browser import AsyncSentienceBrowser, SentienceBrowser
 from .browser_evaluator import BrowserEvaluator
 from .cursor_policy import CursorPolicy, build_human_cursor_path
-from .models import ActionResult, BBox, Snapshot
+from .models import ActionResult, BBox, Snapshot, SnapshotOptions
 from .sentience_methods import SentienceMethod
 from .snapshot import snapshot, snapshot_async
 
@@ -809,9 +809,17 @@ def search(
     query: str,
     engine: str = "duckduckgo",
     take_snapshot: bool = False,
+    snapshot_options: SnapshotOptions | None = None,
 ) -> ActionResult:
     """
     Navigate to a search results page for the given query.
+
+    Args:
+        browser: SentienceBrowser instance
+        query: Search query string
+        engine: Search engine name (duckduckgo, google, google.com, bing)
+        take_snapshot: Whether to take snapshot after navigation
+        snapshot_options: Snapshot options passed to snapshot() when take_snapshot is True.
     """
     if not browser.page:
         raise RuntimeError("Browser not started. Call browser.start() first.")
@@ -831,7 +839,7 @@ def search(
 
     snapshot_after: Snapshot | None = None
     if take_snapshot:
-        snapshot_after = snapshot(browser)
+        snapshot_after = snapshot(browser, snapshot_options)
 
     return ActionResult(
         success=True,
@@ -1877,9 +1885,17 @@ async def search_async(
     query: str,
     engine: str = "duckduckgo",
     take_snapshot: bool = False,
+    snapshot_options: SnapshotOptions | None = None,
 ) -> ActionResult:
     """
     Async version of search().
+
+    Args:
+        browser: AsyncSentienceBrowser instance
+        query: Search query string
+        engine: Search engine name (duckduckgo, google, google.com, bing)
+        take_snapshot: Whether to take snapshot after navigation
+        snapshot_options: Snapshot options passed to snapshot_async() when take_snapshot is True.
     """
     if not browser.page:
         raise RuntimeError("Browser not started. Call await browser.start() first.")
@@ -1899,7 +1915,7 @@ async def search_async(
 
     snapshot_after: Snapshot | None = None
     if take_snapshot:
-        snapshot_after = await snapshot_async(browser)
+        snapshot_after = await snapshot_async(browser, snapshot_options)
 
     return ActionResult(
         success=True,

From bb9be16b976981b6bfb8fcd1b42c04b790e74bc3 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 21:39:22 -0800
Subject: [PATCH 06/12] fix circular import

---
 sentience/tools/context.py    |  7 ++++---
 sentience/tools/filesystem.py | 14 ++++++++------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/sentience/tools/context.py b/sentience/tools/context.py
index 321f753..dfcd562 100644
--- a/sentience/tools/context.py
+++ b/sentience/tools/context.py
@@ -7,8 +7,7 @@
 
 if TYPE_CHECKING:
     from ..agent_runtime import AgentRuntime
-
-from .filesystem import FileSandbox
+    from .filesystem import FileSandbox
 
 
 class BackendCapabilities(BaseModel):
@@ -38,12 +37,14 @@ class ToolContext:
     def __init__(
         self,
         runtime: AgentRuntime,
-        files: FileSandbox | None = None,
+        files: "FileSandbox" | None = None,
         base_dir: Path | None = None,
     ) -> None:
         self.runtime = runtime
         if files is None:
             root = base_dir or (Path.cwd() / ".sentience" / "files")
+            from .filesystem import FileSandbox
+
             files = FileSandbox(root)
         self.files = files
 
diff --git a/sentience/tools/filesystem.py b/sentience/tools/filesystem.py
index 4783fdb..3d468d4 100644
--- a/sentience/tools/filesystem.py
+++ b/sentience/tools/filesystem.py
@@ -3,8 +3,10 @@
 from pathlib import Path
 
 from pydantic import BaseModel, Field
+from typing import TYPE_CHECKING
 
-from .context import ToolContext
+if TYPE_CHECKING:
+    from .context import ToolContext
 from .registry import ToolRegistry
 
 
@@ -92,7 +94,7 @@ def register_filesystem_tools(
 ) -> ToolRegistry:
     """Register sandboxed filesystem tools."""
 
-    def _get_files(ctx: ToolContext | None) -> FileSandbox:
+    def _get_files(ctx: "ToolContext" | None) -> FileSandbox:
         if ctx is not None:
             return ctx.files
         if sandbox is not None:
@@ -105,7 +107,7 @@ def _get_files(ctx: ToolContext | None) -> FileSandbox:
         output_model=ReadFileOutput,
         description="Read a file from the sandbox.",
     )
-    async def read_file(ctx: ToolContext | None, params: ReadFileInput) -> ReadFileOutput:
+    async def read_file(ctx: "ToolContext" | None, params: ReadFileInput) -> ReadFileOutput:
         files = _get_files(ctx)
         return ReadFileOutput(content=files.read_text(params.path))
 
@@ -115,7 +117,7 @@ async def read_file(ctx: ToolContext | None, params: ReadFileInput) -> ReadFileO
         output_model=WriteFileOutput,
         description="Write a file to the sandbox.",
     )
-    async def write_file(ctx: ToolContext | None, params: WriteFileInput) -> WriteFileOutput:
+    async def write_file(ctx: "ToolContext" | None, params: WriteFileInput) -> WriteFileOutput:
         files = _get_files(ctx)
         written = files.write_text(params.path, params.content, overwrite=params.overwrite)
         return WriteFileOutput(path=params.path, bytes_written=written)
@@ -126,7 +128,7 @@ async def write_file(ctx: ToolContext | None, params: WriteFileInput) -> WriteFi
         output_model=AppendFileOutput,
         description="Append text to a file in the sandbox.",
     )
-    async def append_file(ctx: ToolContext | None, params: AppendFileInput) -> AppendFileOutput:
+    async def append_file(ctx: "ToolContext" | None, params: AppendFileInput) -> AppendFileOutput:
         files = _get_files(ctx)
         written = files.append_text(params.path, params.content)
         return AppendFileOutput(path=params.path, bytes_written=written)
@@ -137,7 +139,7 @@ async def append_file(ctx: ToolContext | None, params: AppendFileInput) -> Appen
         output_model=ReplaceFileOutput,
         description="Replace text in a file in the sandbox.",
     )
-    async def replace_file(ctx: ToolContext | None, params: ReplaceFileInput) -> ReplaceFileOutput:
+    async def replace_file(ctx: "ToolContext" | None, params: ReplaceFileInput) -> ReplaceFileOutput:
         files = _get_files(ctx)
         replaced = files.replace_text(params.path, params.old, params.new)
         return ReplaceFileOutput(path=params.path, replaced=replaced)

From 63883525430eacff3f0333e816a62cb62ada876c Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 21:41:36 -0800
Subject: [PATCH 07/12] fix circular import

---
 sentience/tools/defaults.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sentience/tools/defaults.py b/sentience/tools/defaults.py
index e1253a2..5b319dd 100644
--- a/sentience/tools/defaults.py
+++ b/sentience/tools/defaults.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 
 from pydantic import BaseModel, Field
+from typing import TYPE_CHECKING
 
-from ..agent_runtime import AgentRuntime
+if TYPE_CHECKING:
+    from ..agent_runtime import AgentRuntime
 from ..backends import actions as backend_actions
 from ..models import ActionResult, BBox, EvaluateJsRequest, Snapshot
 from .context import ToolContext
@@ -53,7 +55,7 @@ class EvaluateJsToolInput(BaseModel):
 
 
 def register_default_tools(
-    registry: ToolRegistry, runtime: ToolContext | AgentRuntime | None = None
+    registry: ToolRegistry, runtime: ToolContext | "AgentRuntime" | None = None
 ) -> ToolRegistry:
     """Register default browser tools on a registry."""
 

From 4d5975dc3c1461222ec3695155d371260d206ce1 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 21:44:15 -0800
Subject: [PATCH 08/12] fix async mismatch

---
 sentience/read.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sentience/read.py b/sentience/read.py
index 29d5eaa..8245f03 100644
--- a/sentience/read.py
+++ b/sentience/read.py
@@ -223,7 +223,7 @@ def extract(
         schema_desc = json.dumps(schema.model_json_schema(), ensure_ascii=False)
     system = "You extract structured data from markdown content. " "Return only JSON. No prose."
     user = f"QUERY:\n{query}\n\nSCHEMA:\n{schema_desc}\n\nCONTENT:\n{content}"
-    response = await llm.generate_async(system, user)
+    response = llm.generate(system, user)
     raw = response.content.strip()
 
     if schema is None:
@@ -257,7 +257,7 @@ async def extract_async(
         schema_desc = json.dumps(schema.model_json_schema(), ensure_ascii=False)
     system = "You extract structured data from markdown content. " "Return only JSON. No prose."
     user = f"QUERY:\n{query}\n\nSCHEMA:\n{schema_desc}\n\nCONTENT:\n{content}"
-    response = llm.generate(system, user)
+    response = await llm.generate_async(system, user)
     raw = response.content.strip()
 
     if schema is None:

From 3141f86288e67853a41744a36f2b9b2ffe31318a Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 21:47:08 -0800
Subject: [PATCH 09/12] fix tests

---
 sentience/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sentience/__init__.py b/sentience/__init__.py
index bc35bf2..f2e4b58 100644
--- a/sentience/__init__.py
+++ b/sentience/__init__.py
@@ -55,7 +55,7 @@
 
 # Agent Layer (Phase 1 & 2)
 from .base_agent import BaseAgent
-from .browser import SentienceBrowser
+from .browser import AsyncSentienceBrowser, SentienceBrowser
 from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
 from .captcha_strategies import ExternalSolver, HumanHandoffSolver, VisionSolver
 
@@ -192,6 +192,7 @@
     "backend_wait_for_stable",
     # Core SDK
     "SentienceBrowser",
+    "AsyncSentienceBrowser",
     "Snapshot",
     "Element",
     "BBox",

From 26ea3b7f2582996714aec8896a4674a3f46acca7 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 21:58:14 -0800
Subject: [PATCH 10/12] fix tests

---
 tests/unit/test_extract.py       | 30 ++++++++++++++++++++++++++----
 tests/unit/test_tool_registry.py | 14 ++++++++++----
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/tests/unit/test_extract.py b/tests/unit/test_extract.py
index e46ab55..42ad1ed 100644
--- a/tests/unit/test_extract.py
+++ b/tests/unit/test_extract.py
@@ -24,9 +24,7 @@ def model_name(self) -> str:
         return "stub"
 
 
-class BrowserStub:
-    page = True
-
+class PageStub:
     def __init__(self, content: str):
         self._content = content
 
@@ -40,6 +38,30 @@ def evaluate(self, _script: str, _opts: dict):
         }
 
 
+class AsyncPageStub:
+    def __init__(self, content: str):
+        self._content = content
+
+    async def evaluate(self, _script: str, _opts: dict):
+        return {
+            "status": "success",
+            "url": "https://example.com",
+            "format": "markdown",
+            "content": self._content,
+            "length": len(self._content),
+        }
+
+
+class BrowserStub:
+    def __init__(self, content: str):
+        self.page = PageStub(content)
+
+
+class AsyncBrowserStub:
+    def __init__(self, content: str):
+        self.page = AsyncPageStub(content)
+
+
 class ItemSchema(BaseModel):
     name: str
     price: str
@@ -64,7 +86,7 @@ def test_extract_schema_invalid_json() -> None:
 
 @pytest.mark.asyncio
 async def test_extract_async_schema_success() -> None:
-    browser = BrowserStub("Product: Widget")
+    browser = AsyncBrowserStub("Product: Widget")
     llm = LLMStub('{"name":"Widget","price":"$10"}')
     from sentience.read import extract_async
 
diff --git a/tests/unit/test_tool_registry.py b/tests/unit/test_tool_registry.py
index bcdadae..bcb9929 100644
--- a/tests/unit/test_tool_registry.py
+++ b/tests/unit/test_tool_registry.py
@@ -3,7 +3,13 @@
 import pytest
 from pydantic import BaseModel, ValidationError
 
-from sentience.tools import ToolContext, ToolRegistry, ToolSpec, register_default_tools
+from sentience.tools import (
+    ToolContext,
+    ToolRegistry,
+    ToolSpec,
+    UnsupportedCapabilityError,
+    register_default_tools,
+)
 
 
 class EchoInput(BaseModel):
@@ -143,7 +149,7 @@ def can(self, _name: str) -> bool:
             return False
 
     ctx = ToolContext(RuntimeStub())
-    with pytest.raises(ValueError, match="unsupported_capability"):
+    with pytest.raises(UnsupportedCapabilityError, match="unsupported_capability"):
         ctx.require("tabs")
 
 
@@ -213,10 +219,10 @@ async def snapshot(self, **_kwargs):
     ctx = ToolContext(RuntimeStub())
     register_default_tools(registry, ctx)
 
-    with pytest.raises(ValueError, match="unsupported_capability"):
+    with pytest.raises(UnsupportedCapabilityError, match="unsupported_capability"):
         await registry.execute("press", {"key": "Enter"}, ctx=ctx)
 
-    with pytest.raises(ValueError, match="unsupported_capability"):
+    with pytest.raises(UnsupportedCapabilityError, match="unsupported_capability"):
         await registry.execute(
             "scroll_to_element",
             {"element_id": 1, "behavior": "instant", "block": "center"},

From 4f88606799e4f05ba8afdc11e5be567ede008c16 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 22:06:36 -0800
Subject: [PATCH 11/12] fix tests

---
 sentience/tools/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sentience/tools/__init__.py b/sentience/tools/__init__.py
index 4b9ec1f..cf4de8f 100644
--- a/sentience/tools/__init__.py
+++ b/sentience/tools/__init__.py
@@ -2,7 +2,7 @@
 Tool registry for LLM-callable tools.
 """
 
-from .context import BackendCapabilities, ToolContext
+from .context import BackendCapabilities, ToolContext, UnsupportedCapabilityError
 from .defaults import register_default_tools
 from .filesystem import FileSandbox, register_filesystem_tools
 from .registry import ToolRegistry, ToolSpec
@@ -13,6 +13,7 @@
     "ToolContext",
     "ToolRegistry",
     "ToolSpec",
+    "UnsupportedCapabilityError",
     "register_default_tools",
     "register_filesystem_tools",
 ]

From c0d3f64accabb0aed36d7e69e739c33ac7cdf694 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Fri, 23 Jan 2026 22:35:12 -0800
Subject: [PATCH 12/12] fix tests

---
 tests/unit/test_tool_registry.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/unit/test_tool_registry.py b/tests/unit/test_tool_registry.py
index bcb9929..8210a1c 100644
--- a/tests/unit/test_tool_registry.py
+++ b/tests/unit/test_tool_registry.py
@@ -149,8 +149,9 @@ def can(self, _name: str) -> bool:
             return False
 
     ctx = ToolContext(RuntimeStub())
-    with pytest.raises(UnsupportedCapabilityError, match="unsupported_capability"):
+    with pytest.raises(UnsupportedCapabilityError) as excinfo:
         ctx.require("tabs")
+    assert excinfo.value.error == "unsupported_capability"
 
 
 @pytest.mark.asyncio
@@ -219,12 +220,14 @@ async def snapshot(self, **_kwargs):
     ctx = ToolContext(RuntimeStub())
     register_default_tools(registry, ctx)
 
-    with pytest.raises(UnsupportedCapabilityError, match="unsupported_capability"):
+    with pytest.raises(UnsupportedCapabilityError) as excinfo:
         await registry.execute("press", {"key": "Enter"}, ctx=ctx)
+    assert excinfo.value.error == "unsupported_capability"
 
-    with pytest.raises(UnsupportedCapabilityError, match="unsupported_capability"):
+    with pytest.raises(UnsupportedCapabilityError) as excinfo:
         await registry.execute(
             "scroll_to_element",
             {"element_id": 1, "behavior": "instant", "block": "center"},
             ctx=ctx,
         )
+    assert excinfo.value.error == "unsupported_capability"