diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 419f2fd..3ccdbdf 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -32,6 +32,24 @@ jobs:
     - name: Install dependencies
       run: |
         pip install -e ".[dev]"
+        pip install pre-commit mypy types-requests
+
+    - name: Lint with pre-commit
+      continue-on-error: true
+      run: |
+        pre-commit run --all-files
+
+    - name: Type check with mypy
+      continue-on-error: true
+      run: |
+        mypy sentience --ignore-missing-imports --no-strict-optional
+
+    - name: Check code style
+      continue-on-error: true
+      run: |
+        black --check sentience tests --line-length=100
+        isort --check-only --profile black sentience tests
+        flake8 sentience tests --max-line-length=100 --extend-ignore=E203,W503,E501 --max-complexity=15
 
     - name: Build extension (if needed)
       if: runner.os != 'Windows'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7649ba7..7a4f356 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -50,20 +50,19 @@ repos:
           - '--max-complexity=15'
         exclude: ^(venv/|\.venv/|build/|dist/|tests/fixtures/)
 
-  # Type checking with mypy (disabled for now - too strict)
-  # Uncomment to enable strict type checking
-  # - repo: https://github.com/pre-commit/mirrors-mypy
-  #   rev: v1.8.0
-  #   hooks:
-  #     - id: mypy
-  #       additional_dependencies:
-  #         - pydantic>=2.0
-  #         - types-requests
-  #       args:
-  #         - '--ignore-missing-imports'
-  #         - '--no-strict-optional'
-  #         - '--warn-unused-ignores'
-  #       exclude: ^(tests/|examples/|venv/|\.venv/|build/|dist/)
+  # Type checking with mypy
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.8.0
+    hooks:
+      - id: mypy
+        additional_dependencies:
+          - pydantic>=2.0
+          - types-requests
+        args:
+          - '--ignore-missing-imports'
+          - '--no-strict-optional'
+          - '--warn-unused-ignores'
+        exclude: ^(tests/|examples/|venv/|\.venv/|build/|dist/)
 
   # Security checks
   - repo: https://github.com/PyCQA/bandit
diff --git a/sentience/__init__.py b/sentience/__init__.py
index 20c337c..76458db 100644
--- a/sentience/__init__.py
+++ b/sentience/__init__.py
@@ -14,9 +14,6 @@
 from .cloud_tracing import CloudTraceSink, SentienceLogger
 from .conversational_agent import ConversationalAgent
 from .expect import expect
-
-# Formatting (v0.12.0+)
-from .formatting import format_snapshot_for_llm
 from .generator import ScriptGenerator, generate
 from .inspector import Inspector, inspect
 from .llm_provider import (
@@ -55,12 +52,14 @@
 from .read import read
 from .recorder import Recorder, Trace, TraceStep, record
 from .screenshot import screenshot
+from .sentience_methods import AgentAction, SentienceMethod
 from .snapshot import snapshot
 from .text_search import find_text_rect
 from .tracer_factory import SENTIENCE_API_URL, create_tracer
 from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
 
 # Utilities (v0.12.0+)
+# Import from utils package (re-exports from submodules for backward compatibility)
 from .utils import (
     canonical_snapshot_loose,
     canonical_snapshot_strict,
@@ -68,6 +67,9 @@
     save_storage_state,
     sha256_digest,
 )
+
+# Formatting (v0.12.0+)
+from .utils.formatting import format_snapshot_for_llm
 from .wait import wait_for
 
 __version__ = "0.91.1"
@@ -150,4 +152,7 @@
     "format_snapshot_for_llm",
     # Agent Config (v0.12.0+)
     "AgentConfig",
+    # Enums
+    "SentienceMethod",
+    "AgentAction",
 ]
diff --git a/sentience/action_executor.py b/sentience/action_executor.py
new file mode 100644
index 0000000..c95f29b
--- /dev/null
+++ b/sentience/action_executor.py
@@ -0,0 +1,215 @@
+"""
+Action Executor for Sentience Agent.
+
+Handles parsing and execution of action commands (CLICK, TYPE, PRESS, FINISH).
+This separates action execution concerns from LLM interaction.
+"""
+
+import re
+from typing import Any, Union
+
+from .actions import click, click_async, press, press_async, type_text, type_text_async
+from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .models import Snapshot
+from .protocols import AsyncBrowserProtocol, BrowserProtocol
+
+
+class ActionExecutor:
+    """
+    Executes actions and handles parsing of action command strings.
+
+    This class encapsulates all action execution logic, making it easier to:
+    - Test action execution independently
+    - Add new action types in one place
+    - Handle action parsing errors consistently
+    """
+
+    def __init__(
+        self,
+        browser: SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol,
+    ):
+        """
+        Initialize action executor.
+
+        Args:
+            browser: SentienceBrowser, AsyncSentienceBrowser, or protocol-compatible instance
+                    (for testing, can use mock objects that implement BrowserProtocol)
+        """
+        self.browser = browser
+        # Check if browser is async - support both concrete types and protocols
+        # Check concrete types first (most reliable)
+        if isinstance(browser, AsyncSentienceBrowser):
+            self._is_async = True
+        elif isinstance(browser, SentienceBrowser):
+            self._is_async = False
+        else:
+            # For protocol-based browsers, check if methods are actually async
+            # This is more reliable than isinstance checks which can match both protocols
+            import inspect
+
+            start_method = getattr(browser, "start", None)
+            if start_method and inspect.iscoroutinefunction(start_method):
+                self._is_async = True
+            elif isinstance(browser, BrowserProtocol):
+                # If it implements BrowserProtocol and start is not async, it's sync
+                self._is_async = False
+            else:
+                # Default to sync for unknown types
+                self._is_async = False
+
+    def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
+        """
+        Parse action string and execute SDK call (synchronous).
+
+        Args:
+            action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
+            snap: Current snapshot (for context, currently unused but kept for API consistency)
+
+        Returns:
+            Execution result dictionary with keys:
+            - success: bool
+            - action: str (e.g., "click", "type", "press", "finish")
+            - element_id: Optional[int] (for click/type actions)
+            - text: Optional[str] (for type actions)
+            - key: Optional[str] (for press actions)
+            - outcome: Optional[str] (action outcome)
+            - url_changed: Optional[bool] (for click actions)
+            - error: Optional[str] (if action failed)
+            - message: Optional[str] (for finish action)
+
+        Raises:
+            ValueError: If action format is unknown
+            RuntimeError: If called on async browser (use execute_async instead)
+        """
+        if self._is_async:
+            raise RuntimeError(
+                "ActionExecutor.execute() called on async browser. Use execute_async() instead."
+            )
+
+        # Parse CLICK(42)
+        if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
+            element_id = int(match.group(1))
+            result = click(self.browser, element_id)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "click",
+                "element_id": element_id,
+                "outcome": result.outcome,
+                "url_changed": result.url_changed,
+            }
+
+        # Parse TYPE(42, "hello world")
+        elif match := re.match(
+            r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
+            action_str,
+            re.IGNORECASE,
+        ):
+            element_id = int(match.group(1))
+            text = match.group(2)
+            result = type_text(self.browser, element_id, text)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "type",
+                "element_id": element_id,
+                "text": text,
+                "outcome": result.outcome,
+            }
+
+        # Parse PRESS("Enter")
+        elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
+            key = match.group(1)
+            result = press(self.browser, key)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "press",
+                "key": key,
+                "outcome": result.outcome,
+            }
+
+        # Parse FINISH()
+        elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
+            return {
+                "success": True,
+                "action": "finish",
+                "message": "Task marked as complete",
+            }
+
+        else:
+            raise ValueError(
+                f"Unknown action format: {action_str}\n"
+                f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
+            )
+
+    async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
+        """
+        Parse action string and execute SDK call (asynchronous).
+
+        Args:
+            action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
+            snap: Current snapshot (for context, currently unused but kept for API consistency)
+
+        Returns:
+            Execution result dictionary (same format as execute())
+
+        Raises:
+            ValueError: If action format is unknown
+            RuntimeError: If called on sync browser (use execute() instead)
+        """
+        if not self._is_async:
+            raise RuntimeError(
+                "ActionExecutor.execute_async() called on sync browser. Use execute() instead."
+            )
+
+        # Parse CLICK(42)
+        if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
+            element_id = int(match.group(1))
+            result = await click_async(self.browser, element_id)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "click",
+                "element_id": element_id,
+                "outcome": result.outcome,
+                "url_changed": result.url_changed,
+            }
+
+        # Parse TYPE(42, "hello world")
+        elif match := re.match(
+            r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
+            action_str,
+            re.IGNORECASE,
+        ):
+            element_id = int(match.group(1))
+            text = match.group(2)
+            result = await type_text_async(self.browser, element_id, text)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "type",
+                "element_id": element_id,
+                "text": text,
+                "outcome": result.outcome,
+            }
+
+        # Parse PRESS("Enter")
+        elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
+            key = match.group(1)
+            result = await press_async(self.browser, key)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "press",
+                "key": key,
+                "outcome": result.outcome,
+            }
+
+        # Parse FINISH()
+        elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
+            return {
+                "success": True,
+                "action": "finish",
+                "message": "Task marked as complete",
+            }
+
+        else:
+            raise ValueError(
+                f"Unknown action format: {action_str}\n"
+                f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
+            )
diff --git a/sentience/actions.py b/sentience/actions.py
index 50c26bc..b928b00 100644
--- a/sentience/actions.py
+++ b/sentience/actions.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 """
 Actions v1 - click, type, press
 """
@@ -5,7 +7,9 @@
 import time
 
 from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .browser_evaluator import BrowserEvaluator
 from .models import ActionResult, BBox, Snapshot
+from .sentience_methods import SentienceMethod
 from .snapshot import snapshot, snapshot_async
 
 
@@ -59,13 +63,8 @@ def click(  # noqa: C901
             else:
                 # Fallback to JS click if element not found in snapshot
                 try:
-                    success = browser.page.evaluate(
-                        """
-                        (id) => {
-                            return window.sentience.click(id);
-                        }
-                        """,
-                        element_id,
+                    success = BrowserEvaluator.invoke(
+                        browser.page, SentienceMethod.CLICK, element_id
                     )
                 except Exception:
                     # Navigation might have destroyed context, assume success if URL changed
@@ -73,27 +72,13 @@ def click(  # noqa: C901
         except Exception:
             # Fallback to JS click on error
             try:
-                success = browser.page.evaluate(
-                    """
-                    (id) => {
-                        return window.sentience.click(id);
-                    }
-                    """,
-                    element_id,
-                )
+                success = BrowserEvaluator.invoke(browser.page, SentienceMethod.CLICK, element_id)
             except Exception:
                 # Navigation might have destroyed context, assume success if URL changed
                 success = True
     else:
         # Legacy JS-based click
-        success = browser.page.evaluate(
-            """
-            (id) => {
-                return window.sentience.click(id);
-            }
-            """,
-            element_id,
-        )
+        success = BrowserEvaluator.invoke(browser.page, SentienceMethod.CLICK, element_id)
 
     # Wait a bit for navigation/DOM updates
     try:
diff --git a/sentience/agent.py b/sentience/agent.py
index 81e71cc..deafbd0 100644
--- a/sentience/agent.py
+++ b/sentience/agent.py
@@ -5,14 +5,15 @@
 
 import asyncio
 import hashlib
-import re
 import time
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any, Optional, Union
 
-from .actions import click, click_async, press, press_async, type_text, type_text_async
+from .action_executor import ActionExecutor
 from .agent_config import AgentConfig
 from .base_agent import BaseAgent, BaseAgentAsync
 from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .element_filter import ElementFilter
+from .llm_interaction_handler import LLMInteractionHandler
 from .llm_provider import LLMProvider, LLMResponse
 from .models import (
     ActionHistory,
@@ -24,12 +25,45 @@
     SnapshotOptions,
     TokenStats,
 )
+from .protocols import AsyncBrowserProtocol, BrowserProtocol
 from .snapshot import snapshot, snapshot_async
+from .trace_event_builder import TraceEventBuilder
 
 if TYPE_CHECKING:
     from .tracing import Tracer
 
 
+def _safe_tracer_call(
+    tracer: Optional["Tracer"], method_name: str, verbose: bool, *args, **kwargs
+) -> None:
+    """
+    Safely call tracer method, catching and logging errors without breaking execution.
+
+    Args:
+        tracer: Tracer instance or None
+        method_name: Name of tracer method to call (e.g., "emit", "emit_error")
+        verbose: Whether to print error messages
+        *args: Positional arguments for the tracer method
+        **kwargs: Keyword arguments for the tracer method
+    """
+    if not tracer:
+        return
+    try:
+        method = getattr(tracer, method_name)
+        if args and kwargs:
+            method(*args, **kwargs)
+        elif args:
+            method(*args)
+        elif kwargs:
+            method(**kwargs)
+        else:
+            method()
+    except Exception as tracer_error:
+        # Tracer errors should not break agent execution
+        if verbose:
+            print(f"⚠️  Tracer error (non-fatal): {tracer_error}")
+
+
 class SentienceAgent(BaseAgent):
     """
     High-level agent that combines Sentience SDK with any LLM provider.
@@ -56,7 +90,7 @@ class SentienceAgent(BaseAgent):
 
     def __init__(
         self,
-        browser: SentienceBrowser,
+        browser: SentienceBrowser | BrowserProtocol,
         llm: LLMProvider,
         default_snapshot_limit: int = 50,
         verbose: bool = True,
@@ -67,7 +101,8 @@ def __init__(
         Initialize Sentience Agent
 
         Args:
-            browser: SentienceBrowser instance
+            browser: SentienceBrowser instance or BrowserProtocol-compatible object
+                    (for testing, can use mock objects that implement BrowserProtocol)
             llm: LLM provider (OpenAIProvider, AnthropicProvider, etc.)
             default_snapshot_limit: Default maximum elements to include in context (default: 50)
             verbose: Print execution logs (default: True)
@@ -81,6 +116,10 @@ def __init__(
         self.tracer = tracer
         self.config = config or AgentConfig()
 
+        # Initialize handlers
+        self.llm_handler = LLMInteractionHandler(llm)
+        self.action_executor = ActionExecutor(browser)
+
         # Screenshot sequence counter
         # Execution history
         self.history: list[dict[str, Any]] = []
@@ -151,7 +190,10 @@ def act(  # noqa: C901
         # Emit step_start trace event if tracer is enabled
         if self.tracer:
             pre_url = self.browser.page.url if self.browser.page else None
-            self.tracer.emit_step_start(
+            _safe_tracer_call(
+                self.tracer,
+                "emit_step_start",
+                self.verbose,
                 step_id=step_id,
                 step_index=self._step_count,
                 goal=goal,
@@ -198,17 +240,8 @@ def act(  # noqa: C901
 
                 # Emit snapshot trace event if tracer is enabled
                 if self.tracer:
-                    # Include ALL elements with full data for DOM tree display
-                    # Use snap.elements (all elements) not filtered_elements
-                    elements_data = [el.model_dump() for el in snap.elements]
-
                     # Build snapshot event data
-                    snapshot_data = {
-                        "url": snap.url,
-                        "element_count": len(snap.elements),
-                        "timestamp": snap.timestamp,
-                        "elements": elements_data,  # Full element data for DOM tree
-                    }
+                    snapshot_data = TraceEventBuilder.build_snapshot_event(snap)
 
                     # Always include screenshot in trace event for studio viewer compatibility
                     # CloudTraceSink will extract and upload screenshots separately, then remove
@@ -229,7 +262,10 @@ def act(  # noqa: C901
                         if snap.screenshot_format:
                             snapshot_data["screenshot_format"] = snap.screenshot_format
 
-                    self.tracer.emit(
+                    _safe_tracer_call(
+                        self.tracer,
+                        "emit",
+                        self.verbose,
                         "snapshot",
                         snapshot_data,
                         step_id=step_id,
@@ -248,14 +284,17 @@ def act(  # noqa: C901
                 )
 
                 # 2. GROUND: Format elements for LLM context
-                context = self._build_context(filtered_snap, goal)
+                context = self.llm_handler.build_context(filtered_snap, goal)
 
                 # 3. THINK: Query LLM for next action
-                llm_response = self._query_llm(context, goal)
+                llm_response = self.llm_handler.query_llm(context, goal)
 
                 # Emit LLM query trace event if tracer is enabled
                 if self.tracer:
-                    self.tracer.emit(
+                    _safe_tracer_call(
+                        self.tracer,
+                        "emit",
+                        self.verbose,
                         "llm_query",
                         {
                             "prompt_tokens": llm_response.prompt_tokens,
@@ -273,10 +312,10 @@ def act(  # noqa: C901
                 self._track_tokens(goal, llm_response)
 
                 # Parse action from LLM response
-                action_str = self._extract_action_from_response(llm_response.content)
+                action_str = self.llm_handler.extract_action(llm_response.content)
 
                 # 4. EXECUTE: Parse and run action
-                result_dict = self._execute_action(action_str, filtered_snap)
+                result_dict = self.action_executor.execute(action_str, filtered_snap)
 
                 duration_ms = int((time.time() - start_time) * 1000)
 
@@ -316,7 +355,10 @@ def act(  # noqa: C901
                         for el in filtered_snap.elements[:50]
                     ]
 
-                    self.tracer.emit(
+                    _safe_tracer_call(
+                        self.tracer,
+                        "emit",
+                        self.verbose,
                         "action",
                         {
                             "action": result.action,
@@ -423,32 +465,41 @@ def act(  # noqa: C901
                     }
 
                     # Build complete step_end event
-                    step_end_data = {
-                        "v": 1,
-                        "step_id": step_id,
-                        "step_index": self._step_count,
-                        "goal": goal,
-                        "attempt": attempt,
-                        "pre": {
-                            "url": pre_url,
-                            "snapshot_digest": snapshot_digest,
-                        },
-                        "llm": llm_data,
-                        "exec": exec_data,
-                        "post": {
-                            "url": post_url,
-                        },
-                        "verify": verify_data,
-                    }
+                    step_end_data = TraceEventBuilder.build_step_end_event(
+                        step_id=step_id,
+                        step_index=self._step_count,
+                        goal=goal,
+                        attempt=attempt,
+                        pre_url=pre_url,
+                        post_url=post_url,
+                        snapshot_digest=snapshot_digest,
+                        llm_data=llm_data,
+                        exec_data=exec_data,
+                        verify_data=verify_data,
+                    )
 
-                    self.tracer.emit("step_end", step_end_data, step_id=step_id)
+                    _safe_tracer_call(
+                        self.tracer,
+                        "emit",
+                        self.verbose,
+                        "step_end",
+                        step_end_data,
+                        step_id=step_id,
+                    )
 
                 return result
 
             except Exception as e:
                 # Emit error trace event if tracer is enabled
                 if self.tracer:
-                    self.tracer.emit_error(step_id=step_id, error=str(e), attempt=attempt)
+                    _safe_tracer_call(
+                        self.tracer,
+                        "emit_error",
+                        self.verbose,
+                        step_id=step_id,
+                        error=str(e),
+                        attempt=attempt,
+                    )
 
                 if attempt < max_retries:
                     if self.verbose:
@@ -477,187 +528,6 @@ def act(  # noqa: C901
                     )
                     raise RuntimeError(f"Failed after {max_retries} retries: {e}")
 
-    def _build_context(self, snap: Snapshot, goal: str) -> str:
-        """
-        Convert snapshot elements to token-efficient prompt string
-
-        Format: [ID] <role> "text" {cues} @ (x,y) (Imp:score)
-
-        Args:
-            snap: Snapshot object
-            goal: User goal (for context)
-
-        Returns:
-            Formatted element context string
-        """
-        lines = []
-        # Note: elements are already filtered by filter_elements() in act()
-        for el in snap.elements:
-            # Extract visual cues
-            cues = []
-            if el.visual_cues.is_primary:
-                cues.append("PRIMARY")
-            if el.visual_cues.is_clickable:
-                cues.append("CLICKABLE")
-            if el.visual_cues.background_color_name:
-                cues.append(f"color:{el.visual_cues.background_color_name}")
-
-            # Format element line
-            cues_str = f" {{{','.join(cues)}}}" if cues else ""
-            text_preview = (
-                (el.text[:50] + "...") if el.text and len(el.text) > 50 else (el.text or "")
-            )
-
-            lines.append(
-                f'[{el.id}] <{el.role}> "{text_preview}"{cues_str} '
-                f"@ ({int(el.bbox.x)},{int(el.bbox.y)}) (Imp:{el.importance})"
-            )
-
-        return "\n".join(lines)
-
-    def _extract_action_from_response(self, response: str) -> str:
-        """
-        Extract action command from LLM response, handling cases where
-        the LLM adds extra explanation despite instructions.
-
-        Args:
-            response: Raw LLM response text
-
-        Returns:
-            Cleaned action command string
-        """
-        import re
-
-        # Remove markdown code blocks if present
-        response = re.sub(r"```[\w]*\n?", "", response)
-        response = response.strip()
-
-        # Try to find action patterns in the response
-        # Pattern matches: CLICK(123), TYPE(123, "text"), PRESS("key"), FINISH()
-        action_pattern = r'(CLICK\s*\(\s*\d+\s*\)|TYPE\s*\(\s*\d+\s*,\s*["\'].*?["\']\s*\)|PRESS\s*\(\s*["\'].*?["\']\s*\)|FINISH\s*\(\s*\))'
-
-        match = re.search(action_pattern, response, re.IGNORECASE)
-        if match:
-            return match.group(1)
-
-        # If no pattern match, return the original response (will likely fail parsing)
-        return response
-
-    def _query_llm(self, dom_context: str, goal: str) -> LLMResponse:
-        """
-        Query LLM with standardized prompt template
-
-        Args:
-            dom_context: Formatted element context
-            goal: User goal
-
-        Returns:
-            LLMResponse from LLM provider
-        """
-        system_prompt = f"""You are an AI web automation agent.
-
-GOAL: {goal}
-
-VISIBLE ELEMENTS (sorted by importance):
-{dom_context}
-
-VISUAL CUES EXPLAINED:
-- {{PRIMARY}}: Main call-to-action element on the page
-- {{CLICKABLE}}: Element is clickable
-- {{color:X}}: Background color name
-
-CRITICAL RESPONSE FORMAT:
-You MUST respond with ONLY ONE of these exact action formats:
-- CLICK(id) - Click element by ID
-- TYPE(id, "text") - Type text into element
-- PRESS("key") - Press keyboard key (Enter, Escape, Tab, ArrowDown, etc)
-- FINISH() - Task complete
-
-DO NOT include any explanation, reasoning, or natural language.
-DO NOT use markdown formatting or code blocks.
-DO NOT say "The next step is..." or anything similar.
-
-CORRECT Examples:
-CLICK(42)
-TYPE(15, "magic mouse")
-PRESS("Enter")
-FINISH()
-
-INCORRECT Examples (DO NOT DO THIS):
-"The next step is to click..."
-"I will type..."
-```CLICK(42)```
-"""
-
-        user_prompt = "Return the single action command:"
-
-        return self.llm.generate(system_prompt, user_prompt, temperature=0.0)
-
-    def _execute_action(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
-        """
-        Parse action string and execute SDK call
-
-        Args:
-            action_str: Action string from LLM (e.g., "CLICK(42)")
-            snap: Current snapshot (for context)
-
-        Returns:
-            Execution result dictionary
-        """
-        # Parse CLICK(42)
-        if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
-            element_id = int(match.group(1))
-            result = click(self.browser, element_id)
-            return {
-                "success": result.success,
-                "action": "click",
-                "element_id": element_id,
-                "outcome": result.outcome,
-                "url_changed": result.url_changed,
-            }
-
-        # Parse TYPE(42, "hello world")
-        elif match := re.match(
-            r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
-            action_str,
-            re.IGNORECASE,
-        ):
-            element_id = int(match.group(1))
-            text = match.group(2)
-            result = type_text(self.browser, element_id, text)
-            return {
-                "success": result.success,
-                "action": "type",
-                "element_id": element_id,
-                "text": text,
-                "outcome": result.outcome,
-            }
-
-        # Parse PRESS("Enter")
-        elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
-            key = match.group(1)
-            result = press(self.browser, key)
-            return {
-                "success": result.success,
-                "action": "press",
-                "key": key,
-                "outcome": result.outcome,
-            }
-
-        # Parse FINISH()
-        elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
-            return {
-                "success": True,
-                "action": "finish",
-                "message": "Task marked as complete",
-            }
-
-        else:
-            raise ValueError(
-                f"Unknown action format: {action_str}\n"
-                f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
-            )
-
     def _track_tokens(self, goal: str, llm_response: LLMResponse):
         """
         Track token usage for analytics
@@ -721,8 +591,8 @@ def filter_elements(self, snapshot: Snapshot, goal: str | None = None) -> list[E
         """
         Filter elements from snapshot based on goal context.
 
-        This default implementation applies goal-based keyword matching to boost
-        relevant elements and filters out irrelevant ones.
+        This implementation uses ElementFilter to apply goal-based keyword matching
+        to boost relevant elements and filters out irrelevant ones.
 
         Args:
             snapshot: Current page snapshot
@@ -731,76 +601,7 @@ def filter_elements(self, snapshot: Snapshot, goal: str | None = None) -> list[E
         Returns:
             Filtered list of elements
         """
-        elements = snapshot.elements
-
-        # If no goal provided, return all elements (up to limit)
-        if not goal:
-            return elements[: self.default_snapshot_limit]
-
-        goal_lower = goal.lower()
-
-        # Extract keywords from goal
-        keywords = self._extract_keywords(goal_lower)
-
-        # Boost elements matching goal keywords
-        scored_elements = []
-        for el in elements:
-            score = el.importance
-
-            # Boost if element text matches goal
-            if el.text and any(kw in el.text.lower() for kw in keywords):
-                score += 0.3
-
-            # Boost if role matches goal intent
-            if "click" in goal_lower and el.visual_cues.is_clickable:
-                score += 0.2
-            if "type" in goal_lower and el.role in ["textbox", "searchbox"]:
-                score += 0.2
-            if "search" in goal_lower:
-                # Filter out non-interactive elements for search tasks
-                if el.role in ["link", "img"] and not el.visual_cues.is_primary:
-                    score -= 0.5
-
-            scored_elements.append((score, el))
-
-        # Re-sort by boosted score
-        scored_elements.sort(key=lambda x: x[0], reverse=True)
-        elements = [el for _, el in scored_elements]
-
-        return elements[: self.default_snapshot_limit]
-
-    def _extract_keywords(self, text: str) -> list[str]:
-        """
-        Extract meaningful keywords from goal text
-
-        Args:
-            text: Text to extract keywords from
-
-        Returns:
-            List of keywords
-        """
-        stopwords = {
-            "the",
-            "a",
-            "an",
-            "and",
-            "or",
-            "but",
-            "in",
-            "on",
-            "at",
-            "to",
-            "for",
-            "of",
-            "with",
-            "by",
-            "from",
-            "as",
-            "is",
-            "was",
-        }
-        words = text.split()
-        return [w for w in words if w not in stopwords and len(w) > 2]
+        return ElementFilter.filter_by_goal(snapshot, goal, self.default_snapshot_limit)
 
 
 class SentienceAgentAsync(BaseAgentAsync):
@@ -853,6 +654,10 @@ def __init__(
         self.tracer = tracer
         self.config = config or AgentConfig()
 
+        # Initialize handlers
+        self.llm_handler = LLMInteractionHandler(llm)
+        self.action_executor = ActionExecutor(browser)
+
         # Screenshot sequence counter
         # Execution history
         self.history: list[dict[str, Any]] = []
@@ -920,7 +725,10 @@ async def act(  # noqa: C901
         # Emit step_start trace event if tracer is enabled
         if self.tracer:
             pre_url = self.browser.page.url if self.browser.page else None
-            self.tracer.emit_step_start(
+            _safe_tracer_call(
+                self.tracer,
+                "emit_step_start",
+                self.verbose,
                 step_id=step_id,
                 step_index=self._step_count,
                 goal=goal,
@@ -970,17 +778,8 @@ async def act(  # noqa: C901
 
                 # Emit snapshot trace event if tracer is enabled
                 if self.tracer:
-                    # Include ALL elements with full data for DOM tree display
-                    # Use snap.elements (all elements) not filtered_elements
-                    elements_data = [el.model_dump() for el in snap.elements]
-
                     # Build snapshot event data
-                    snapshot_data = {
-                        "url": snap.url,
-                        "element_count": len(snap.elements),
-                        "timestamp": snap.timestamp,
-                        "elements": elements_data,  # Full element data for DOM tree
-                    }
+                    snapshot_data = TraceEventBuilder.build_snapshot_event(snap)
 
                     # Always include screenshot in trace event for studio viewer compatibility
                     # CloudTraceSink will extract and upload screenshots separately, then remove
@@ -1001,7 +800,10 @@ async def act(  # noqa: C901
                         if snap.screenshot_format:
                             snapshot_data["screenshot_format"] = snap.screenshot_format
 
-                    self.tracer.emit(
+                    _safe_tracer_call(
+                        self.tracer,
+                        "emit",
+                        self.verbose,
                         "snapshot",
                         snapshot_data,
                         step_id=step_id,
@@ -1020,14 +822,17 @@ async def act(  # noqa: C901
                 )
 
                 # 2. GROUND: Format elements for LLM context
-                context = self._build_context(filtered_snap, goal)
+                context = self.llm_handler.build_context(filtered_snap, goal)
 
                 # 3. THINK: Query LLM for next action
-                llm_response = self._query_llm(context, goal)
+                llm_response = self.llm_handler.query_llm(context, goal)
 
                 # Emit LLM query trace event if tracer is enabled
                 if self.tracer:
-                    self.tracer.emit(
+                    _safe_tracer_call(
+                        self.tracer,
+                        "emit",
+                        self.verbose,
                         "llm_query",
                         {
                             "prompt_tokens": llm_response.prompt_tokens,
@@ -1045,10 +850,10 @@ async def act(  # noqa: C901
                 self._track_tokens(goal, llm_response)
 
                 # Parse action from LLM response
-                action_str = self._extract_action_from_response(llm_response.content)
+                action_str = self.llm_handler.extract_action(llm_response.content)
 
                 # 4. EXECUTE: Parse and run action
-                result_dict = await self._execute_action(action_str, filtered_snap)
+                result_dict = await self.action_executor.execute_async(action_str, filtered_snap)
 
                 duration_ms = int((time.time() - start_time) * 1000)
 
@@ -1088,7 +893,10 @@ async def act(  # noqa: C901
                         for el in filtered_snap.elements[:50]
                     ]
 
-                    self.tracer.emit(
+                    _safe_tracer_call(
+                        self.tracer,
+                        "emit",
+                        self.verbose,
                         "action",
                         {
                             "action": result.action,
@@ -1195,32 +1003,41 @@ async def act(  # noqa: C901
                     }
 
                     # Build complete step_end event
-                    step_end_data = {
-                        "v": 1,
-                        "step_id": step_id,
-                        "step_index": self._step_count,
-                        "goal": goal,
-                        "attempt": attempt,
-                        "pre": {
-                            "url": pre_url,
-                            "snapshot_digest": snapshot_digest,
-                        },
-                        "llm": llm_data,
-                        "exec": exec_data,
-                        "post": {
-                            "url": post_url,
-                        },
-                        "verify": verify_data,
-                    }
+                    step_end_data = TraceEventBuilder.build_step_end_event(
+                        step_id=step_id,
+                        step_index=self._step_count,
+                        goal=goal,
+                        attempt=attempt,
+                        pre_url=pre_url,
+                        post_url=post_url,
+                        snapshot_digest=snapshot_digest,
+                        llm_data=llm_data,
+                        exec_data=exec_data,
+                        verify_data=verify_data,
+                    )
 
-                    self.tracer.emit("step_end", step_end_data, step_id=step_id)
+                    _safe_tracer_call(
+                        self.tracer,
+                        "emit",
+                        self.verbose,
+                        "step_end",
+                        step_end_data,
+                        step_id=step_id,
+                    )
 
                 return result
 
             except Exception as e:
                 # Emit error trace event if tracer is enabled
                 if self.tracer:
-                    self.tracer.emit_error(step_id=step_id, error=str(e), attempt=attempt)
+                    _safe_tracer_call(
+                        self.tracer,
+                        "emit_error",
+                        self.verbose,
+                        step_id=step_id,
+                        error=str(e),
+                        attempt=attempt,
+                    )
 
                 if attempt < max_retries:
                     if self.verbose:
@@ -1249,156 +1066,6 @@ async def act(  # noqa: C901
                     )
                     raise RuntimeError(f"Failed after {max_retries} retries: {e}")
 
-    def _build_context(self, snap: Snapshot, goal: str) -> str:
-        """Convert snapshot elements to token-efficient prompt string (same as sync version)"""
-        lines = []
-        # Note: elements are already filtered by filter_elements() in act()
-        for el in snap.elements:
-            # Extract visual cues
-            cues = []
-            if el.visual_cues.is_primary:
-                cues.append("PRIMARY")
-            if el.visual_cues.is_clickable:
-                cues.append("CLICKABLE")
-            if el.visual_cues.background_color_name:
-                cues.append(f"color:{el.visual_cues.background_color_name}")
-
-            # Format element line
-            cues_str = f" {{{','.join(cues)}}}" if cues else ""
-            text_preview = (
-                (el.text[:50] + "...") if el.text and len(el.text) > 50 else (el.text or "")
-            )
-
-            lines.append(
-                f'[{el.id}] <{el.role}> "{text_preview}"{cues_str} '
-                f"@ ({int(el.bbox.x)},{int(el.bbox.y)}) (Imp:{el.importance})"
-            )
-
-        return "\n".join(lines)
-
-    def _extract_action_from_response(self, response: str) -> str:
-        """Extract action command from LLM response (same as sync version)"""
-        # Remove markdown code blocks if present
-        response = re.sub(r"```[\w]*\n?", "", response)
-        response = response.strip()
-
-        # Try to find action patterns in the response
-        # Pattern matches: CLICK(123), TYPE(123, "text"), PRESS("key"), FINISH()
-        action_pattern = r'(CLICK\s*\(\s*\d+\s*\)|TYPE\s*\(\s*\d+\s*,\s*["\'].*?["\']\s*\)|PRESS\s*\(\s*["\'].*?["\']\s*\)|FINISH\s*\(\s*\))'
-
-        match = re.search(action_pattern, response, re.IGNORECASE)
-        if match:
-            return match.group(1)
-
-        # If no pattern match, return the original response (will likely fail parsing)
-        return response
-
-    def _query_llm(self, dom_context: str, goal: str) -> LLMResponse:
-        """Query LLM with standardized prompt template (same as sync version)"""
-        system_prompt = f"""You are an AI web automation agent.
-
-GOAL: {goal}
-
-VISIBLE ELEMENTS (sorted by importance):
-{dom_context}
-
-VISUAL CUES EXPLAINED:
-- {{PRIMARY}}: Main call-to-action element on the page
-- {{CLICKABLE}}: Element is clickable
-- {{color:X}}: Background color name
-
-CRITICAL RESPONSE FORMAT:
-You MUST respond with ONLY ONE of these exact action formats:
-- CLICK(id) - Click element by ID
-- TYPE(id, "text") - Type text into element
-- PRESS("key") - Press keyboard key (Enter, Escape, Tab, ArrowDown, etc)
-- FINISH() - Task complete
-
-DO NOT include any explanation, reasoning, or natural language.
-DO NOT use markdown formatting or code blocks.
-DO NOT say "The next step is..." or anything similar.
-
-CORRECT Examples:
-CLICK(42)
-TYPE(15, "magic mouse")
-PRESS("Enter")
-FINISH()
-
-INCORRECT Examples (DO NOT DO THIS):
-"The next step is to click..."
-"I will type..."
-```CLICK(42)```
-"""
-
-        user_prompt = "Return the single action command:"
-
-        return self.llm.generate(system_prompt, user_prompt, temperature=0.0)
-
-    async def _execute_action(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
-        """
-        Parse action string and execute SDK call (async)
-
-        Args:
-            action_str: Action string from LLM (e.g., "CLICK(42)")
-            snap: Current snapshot (for context)
-
-        Returns:
-            Execution result dictionary
-        """
-        # Parse CLICK(42)
-        if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
-            element_id = int(match.group(1))
-            result = await click_async(self.browser, element_id)
-            return {
-                "success": result.success,
-                "action": "click",
-                "element_id": element_id,
-                "outcome": result.outcome,
-                "url_changed": result.url_changed,
-            }
-
-        # Parse TYPE(42, "hello world")
-        elif match := re.match(
-            r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
-            action_str,
-            re.IGNORECASE,
-        ):
-            element_id = int(match.group(1))
-            text = match.group(2)
-            result = await type_text_async(self.browser, element_id, text)
-            return {
-                "success": result.success,
-                "action": "type",
-                "element_id": element_id,
-                "text": text,
-                "outcome": result.outcome,
-            }
-
-        # Parse PRESS("Enter")
-        elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
-            key = match.group(1)
-            result = await press_async(self.browser, key)
-            return {
-                "success": result.success,
-                "action": "press",
-                "key": key,
-                "outcome": result.outcome,
-            }
-
-        # Parse FINISH()
-        elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
-            return {
-                "success": True,
-                "action": "finish",
-                "message": "Task marked as complete",
-            }
-
-        else:
-            raise ValueError(
-                f"Unknown action format: {action_str}\n"
-                f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
-            )
-
     def _track_tokens(self, goal: str, llm_response: LLMResponse):
         """Track token usage for analytics (same as sync version)"""
         if llm_response.prompt_tokens:
@@ -1443,66 +1110,17 @@ def clear_history(self) -> None:
         }
 
     def filter_elements(self, snapshot: Snapshot, goal: str | None = None) -> list[Element]:
-        """Filter elements from snapshot based on goal context (same as sync version)"""
-        elements = snapshot.elements
-
-        # If no goal provided, return all elements (up to limit)
-        if not goal:
-            return elements[: self.default_snapshot_limit]
-
-        goal_lower = goal.lower()
-
-        # Extract keywords from goal
-        keywords = self._extract_keywords(goal_lower)
-
-        # Boost elements matching goal keywords
-        scored_elements = []
-        for el in elements:
-            score = el.importance
-
-            # Boost if element text matches goal
-            if el.text and any(kw in el.text.lower() for kw in keywords):
-                score += 0.3
-
-            # Boost if role matches goal intent
-            if "click" in goal_lower and el.visual_cues.is_clickable:
-                score += 0.2
-            if "type" in goal_lower and el.role in ["textbox", "searchbox"]:
-                score += 0.2
-            if "search" in goal_lower:
-                # Filter out non-interactive elements for search tasks
-                if el.role in ["link", "img"] and not el.visual_cues.is_primary:
-                    score -= 0.5
-
-            scored_elements.append((score, el))
-
-        # Re-sort by boosted score
-        scored_elements.sort(key=lambda x: x[0], reverse=True)
-        elements = [el for _, el in scored_elements]
-
-        return elements[: self.default_snapshot_limit]
-
-    def _extract_keywords(self, text: str) -> list[str]:
-        """Extract meaningful keywords from goal text (same as sync version)"""
-        stopwords = {
-            "the",
-            "a",
-            "an",
-            "and",
-            "or",
-            "but",
-            "in",
-            "on",
-            "at",
-            "to",
-            "for",
-            "of",
-            "with",
-            "by",
-            "from",
-            "as",
-            "is",
-            "was",
-        }
-        words = text.split()
-        return [w for w in words if w not in stopwords and len(w) > 2]
+        """
+        Filter elements from snapshot based on goal context.
+
+        This implementation uses ElementFilter to apply goal-based keyword matching
+        to boost relevant elements and filters out irrelevant ones.
+
+        Args:
+            snapshot: Current page snapshot
+            goal: User's goal (can inform filtering)
+
+        Returns:
+            Filtered list of elements
+        """
+        return ElementFilter.filter_by_goal(snapshot, goal, self.default_snapshot_limit)
diff --git a/sentience/base_agent.py b/sentience/base_agent.py
index a7c1e3c..43e00d2 100644
--- a/sentience/base_agent.py
+++ b/sentience/base_agent.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 """
 BaseAgent: Abstract base class for all Sentience agents
 Defines the interface that all agent implementations must follow
diff --git a/sentience/browser.py b/sentience/browser.py
index 4188e1d..a07dbdb 100644
--- a/sentience/browser.py
+++ b/sentience/browser.py
@@ -8,6 +8,7 @@
 import tempfile
 import time
 from pathlib import Path
+from typing import Optional, Union
 from urllib.parse import urlparse
 
 from playwright.async_api import BrowserContext as AsyncBrowserContext
diff --git a/sentience/browser_evaluator.py b/sentience/browser_evaluator.py
new file mode 100644
index 0000000..3cae2b4
--- /dev/null
+++ b/sentience/browser_evaluator.py
@@ -0,0 +1,299 @@
+"""
+Browser evaluation helper for common window.sentience API patterns.
+
+Consolidates repeated patterns for:
+- Waiting for extension injection
+- Calling window.sentience methods
+- Error handling with diagnostics
+"""
+
+from typing import Any, Optional, Union
+
+from playwright.async_api import Page as AsyncPage
+from playwright.sync_api import Page
+
+from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .sentience_methods import SentienceMethod
+
+
+class BrowserEvaluator:
+    """Helper class for common browser evaluation patterns"""
+
+    @staticmethod
+    def wait_for_extension(
+        page: Page | AsyncPage,
+        timeout_ms: int = 5000,
+    ) -> None:
+        """
+        Wait for window.sentience API to be available.
+
+        Args:
+            page: Playwright Page instance (sync or async)
+            timeout_ms: Timeout in milliseconds (default: 5000)
+
+        Raises:
+            RuntimeError: If extension fails to inject within timeout
+        """
+        if hasattr(page, "wait_for_function"):
+            # Sync page
+            try:
+                page.wait_for_function(
+                    "typeof window.sentience !== 'undefined'",
+                    timeout=timeout_ms,
+                )
+            except Exception as e:
+                diag = BrowserEvaluator._gather_diagnostics(page)
+                raise RuntimeError(
+                    f"Sentience extension failed to inject window.sentience API. "
+                    f"Is the extension loaded? Diagnostics: {diag}"
+                ) from e
+        else:
+            # Async page - should use async version
+            raise TypeError("Use wait_for_extension_async for async pages")
+
+    @staticmethod
+    async def wait_for_extension_async(
+        page: AsyncPage,
+        timeout_ms: int = 5000,
+    ) -> None:
+        """
+        Wait for window.sentience API to be available (async).
+
+        Args:
+            page: Playwright AsyncPage instance
+            timeout_ms: Timeout in milliseconds (default: 5000)
+
+        Raises:
+            RuntimeError: If extension fails to inject within timeout
+        """
+        try:
+            await page.wait_for_function(
+                "typeof window.sentience !== 'undefined'",
+                timeout=timeout_ms,
+            )
+        except Exception as e:
+            diag = await BrowserEvaluator._gather_diagnostics_async(page)
+            raise RuntimeError(
+                f"Sentience extension failed to inject window.sentience API. "
+                f"Is the extension loaded? Diagnostics: {diag}"
+            ) from e
+
+    @staticmethod
+    def _gather_diagnostics(page: Page | AsyncPage) -> dict[str, Any]:
+        """
+        Gather diagnostics about extension state.
+
+        Args:
+            page: Playwright Page instance
+
+        Returns:
+            Dictionary with diagnostic information
+        """
+        try:
+            if hasattr(page, "evaluate"):
+                # Sync page
+                return page.evaluate(
+                    """() => ({
+                        sentience_defined: typeof window.sentience !== 'undefined',
+                        extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
+                        url: window.location.href
+                    })"""
+                )
+            else:
+                return {"error": "Could not gather diagnostics - invalid page type"}
+        except Exception:
+            return {"error": "Could not gather diagnostics"}
+
+    @staticmethod
+    async def _gather_diagnostics_async(page: AsyncPage) -> dict[str, Any]:
+        """
+        Gather diagnostics about extension state (async).
+
+        Args:
+            page: Playwright AsyncPage instance
+
+        Returns:
+            Dictionary with diagnostic information
+        """
+        try:
+            return await page.evaluate(
+                """() => ({
+                    sentience_defined: typeof window.sentience !== 'undefined',
+                    extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
+                    url: window.location.href
+                })"""
+            )
+        except Exception:
+            return {"error": "Could not gather diagnostics"}
+
+    @staticmethod
+    def invoke(
+        page: Page,
+        method: SentienceMethod | str,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Any:
+        """
+        Invoke a window.sentience method with error handling (sync).
+
+        Args:
+            page: Playwright Page instance (sync)
+            method: SentienceMethod enum value or method name string (e.g., SentienceMethod.SNAPSHOT or "snapshot")
+            *args: Positional arguments to pass to the method
+            **kwargs: Keyword arguments to pass to the method
+
+        Returns:
+            Result from the method call
+
+        Raises:
+            RuntimeError: If method is not available or call fails
+
+        Example:
+            ```python
+            result = BrowserEvaluator.invoke(page, SentienceMethod.SNAPSHOT, limit=50)
+            success = BrowserEvaluator.invoke(page, SentienceMethod.CLICK, element_id)
+            ```
+        """
+        # Convert enum to string if needed
+        method_name = method.value if isinstance(method, SentienceMethod) else method
+
+        # Build JavaScript call
+        if args and kwargs:
+            # Both args and kwargs - use object spread
+            js_code = f"""
+            (args, kwargs) => {{
+                return window.sentience.{method_name}(...args, kwargs);
+            }}
+            """
+            result = page.evaluate(js_code, list(args), kwargs)
+        elif args:
+            # Only args
+            js_code = f"""
+            (args) => {{
+                return window.sentience.{method_name}(...args);
+            }}
+            """
+            result = page.evaluate(js_code, list(args))
+        elif kwargs:
+            # Only kwargs - pass as single object
+            js_code = f"""
+            (options) => {{
+                return window.sentience.{method_name}(options);
+            }}
+            """
+            result = page.evaluate(js_code, kwargs)
+        else:
+            # No arguments
+            js_code = f"""
+            () => {{
+                return window.sentience.{method_name}();
+            }}
+            """
+            result = page.evaluate(js_code)
+
+        return result
+
+    @staticmethod
+    async def invoke_async(
+        page: AsyncPage,
+        method: SentienceMethod | str,
+        *args: Any,
+        **kwargs: Any,
+    ) -> Any:
+        """
+        Invoke a window.sentience method with error handling (async).
+
+        Args:
+            page: Playwright AsyncPage instance
+            method: SentienceMethod enum value or method name string (e.g., SentienceMethod.SNAPSHOT or "snapshot")
+            *args: Positional arguments to pass to the method
+            **kwargs: Keyword arguments to pass to the method
+
+        Returns:
+            Result from the method call
+
+        Raises:
+            RuntimeError: If method is not available or call fails
+
+        Example:
+            ```python
+            result = await BrowserEvaluator.invoke_async(page, SentienceMethod.SNAPSHOT, limit=50)
+            success = await BrowserEvaluator.invoke_async(page, SentienceMethod.CLICK, element_id)
+            ```
+        """
+        # Convert enum to string if needed
+        method_name = method.value if isinstance(method, SentienceMethod) else method
+
+        # Build JavaScript call
+        if args and kwargs:
+            js_code = f"""
+            (args, kwargs) => {{
+                return window.sentience.{method_name}(...args, kwargs);
+            }}
+            """
+            result = await page.evaluate(js_code, list(args), kwargs)
+        elif args:
+            js_code = f"""
+            (args) => {{
+                return window.sentience.{method_name}(...args);
+            }}
+            """
+            result = await page.evaluate(js_code, list(args))
+        elif kwargs:
+            js_code = f"""
+            (options) => {{
+                return window.sentience.{method_name}(options);
+            }}
+            """
+            result = await page.evaluate(js_code, kwargs)
+        else:
+            js_code = f"""
+            () => {{
+                return window.sentience.{method_name}();
+            }}
+            """
+            result = await page.evaluate(js_code)
+
+        return result
+
+    @staticmethod
+    def verify_method_exists(
+        page: Page,
+        method: SentienceMethod | str,
+    ) -> bool:
+        """
+        Verify that a window.sentience method exists.
+
+        Args:
+            page: Playwright Page instance (sync)
+            method: SentienceMethod enum value or method name string
+
+        Returns:
+            True if method exists, False otherwise
+        """
+        method_name = method.value if isinstance(method, SentienceMethod) else method
+        try:
+            return page.evaluate(f"typeof window.sentience.{method_name} !== 'undefined'")
+        except Exception:
+            return False
+
+    @staticmethod
+    async def verify_method_exists_async(
+        page: AsyncPage,
+        method: SentienceMethod | str,
+    ) -> bool:
+        """
+        Verify that a window.sentience method exists (async).
+
+        Args:
+            page: Playwright AsyncPage instance
+            method: SentienceMethod enum value or method name string
+
+        Returns:
+            True if method exists, False otherwise
+        """
+        method_name = method.value if isinstance(method, SentienceMethod) else method
+        try:
+            return await page.evaluate(f"typeof window.sentience.{method_name} !== 'undefined'")
+        except Exception:
+            return False
diff --git a/sentience/cloud_tracing.py b/sentience/cloud_tracing.py
index 55871c8..ab2d366 100644
--- a/sentience/cloud_tracing.py
+++ b/sentience/cloud_tracing.py
@@ -12,10 +12,12 @@
 from collections.abc import Callable
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
-from typing import Any, Protocol
+from typing import Any, Optional, Protocol, Union
 
 import requests
 
+from sentience.models import TraceStats
+from sentience.trace_file_manager import TraceFileManager
 from sentience.tracing import TraceSink
 
 
@@ -97,6 +99,7 @@ def __init__(
         # Use persistent cache directory instead of temp file
         # This ensures traces survive process crashes
         cache_dir = Path.home() / ".sentience" / "traces" / "pending"
+        # Create directory if it doesn't exist (ensure_directory is for file paths, not dirs)
         cache_dir.mkdir(parents=True, exist_ok=True)
 
         # Persistent file (survives process crash)
@@ -123,9 +126,7 @@ def emit(self, event: dict[str, Any]) -> None:
         if self._closed:
             raise RuntimeError("CloudTraceSink is closed")
 
-        json_str = json.dumps(event, ensure_ascii=False)
-        self._trace_file.write(json_str + "\n")
-        self._trace_file.flush()  # Ensure written to disk
+        TraceFileManager.write_event(self._trace_file, event)
 
     def close(
         self,
@@ -146,9 +147,25 @@ def close(
 
         self._closed = True
 
-        # Close file first
+        # Flush and sync file to disk before closing to ensure all data is written
+        # This is critical on CI systems where file system operations may be slower
+        self._trace_file.flush()
+        try:
+            # Force OS to write buffered data to disk
+            os.fsync(self._trace_file.fileno())
+        except (OSError, AttributeError):
+            # Some file handles don't support fsync (e.g., StringIO in tests)
+            # This is fine - flush() is usually sufficient
+            pass
         self._trace_file.close()
 
+        # Ensure file exists and has content before proceeding
+        if not self._path.exists() or self._path.stat().st_size == 0:
+            # No events were emitted, nothing to upload
+            if self.logger:
+                self.logger.warning("No trace events to upload (file is empty or missing)")
+            return
+
         # Generate index after closing file
         self._generate_index()
 
@@ -384,7 +401,9 @@ def _upload_index(self) -> None:
             if self.logger:
                 self.logger.warning(f"Error uploading trace index: {e}")
 
-    def _infer_final_status_from_trace(self) -> str:
+    def _infer_final_status_from_trace(
+        self, events: list[dict[str, Any]], run_end: dict[str, Any] | None
+    ) -> str:
         """
         Infer final status from trace events by reading the trace file.
 
@@ -435,103 +454,44 @@ def _infer_final_status_from_trace(self) -> str:
             # If we can't read the trace, default to unknown
             return "unknown"
 
-    def _extract_stats_from_trace(self) -> dict[str, Any]:
+    def _extract_stats_from_trace(self) -> TraceStats:
         """
         Extract execution statistics from trace file.
 
         Returns:
-            Dictionary with stats fields for /v1/traces/complete
+            TraceStats with stats fields for /v1/traces/complete
         """
         try:
-            # Read trace file to extract stats
-            with open(self._path, encoding="utf-8") as f:
-                events = []
-                for line in f:
-                    line = line.strip()
-                    if not line:
-                        continue
-                    try:
-                        event = json.loads(line)
-                        events.append(event)
-                    except json.JSONDecodeError:
-                        continue
-
-            if not events:
-                return {
-                    "total_steps": 0,
-                    "total_events": 0,
-                    "duration_ms": None,
-                    "final_status": "unknown",
-                    "started_at": None,
-                    "ended_at": None,
-                }
-
-            # Find run_start and run_end events
-            run_start = next((e for e in events if e.get("type") == "run_start"), None)
-            run_end = next((e for e in events if e.get("type") == "run_end"), None)
-
-            # Extract timestamps
-            started_at: str | None = None
-            ended_at: str | None = None
-            if run_start:
-                started_at = run_start.get("ts")
-            if run_end:
-                ended_at = run_end.get("ts")
-
-            # Calculate duration
-            duration_ms: int | None = None
-            if started_at and ended_at:
-                try:
-                    from datetime import datetime
-
-                    start_dt = datetime.fromisoformat(started_at.replace("Z", "+00:00"))
-                    end_dt = datetime.fromisoformat(ended_at.replace("Z", "+00:00"))
-                    delta = end_dt - start_dt
-                    duration_ms = int(delta.total_seconds() * 1000)
-                except Exception:
-                    pass
-
-            # Count steps (from step_start events, only first attempt)
-            step_indices = set()
-            for event in events:
-                if event.get("type") == "step_start":
-                    step_index = event.get("data", {}).get("step_index")
-                    if step_index is not None:
-                        step_indices.add(step_index)
-            total_steps = len(step_indices) if step_indices else 0
-
-            # If run_end has steps count, use that (more accurate)
-            if run_end:
-                steps_from_end = run_end.get("data", {}).get("steps")
-                if steps_from_end is not None:
-                    total_steps = max(total_steps, steps_from_end)
-
-            # Count total events
-            total_events = len(events)
-
-            # Infer final status
-            final_status = self._infer_final_status_from_trace()
-
-            return {
-                "total_steps": total_steps,
-                "total_events": total_events,
-                "duration_ms": duration_ms,
-                "final_status": final_status,
-                "started_at": started_at,
-                "ended_at": ended_at,
-            }
+            # Check if file exists before reading
+            if not self._path.exists():
+                if self.logger:
+                    self.logger.warning(f"Trace file not found: {self._path}")
+                return TraceStats(
+                    total_steps=0,
+                    total_events=0,
+                    duration_ms=None,
+                    final_status="unknown",
+                    started_at=None,
+                    ended_at=None,
+                )
 
+            # Read trace file to extract stats
+            events = TraceFileManager.read_events(self._path)
+            # Use TraceFileManager to extract stats (with custom status inference)
+            return TraceFileManager.extract_stats(
+                events, infer_status_func=self._infer_final_status_from_trace
+            )
         except Exception as e:
             if self.logger:
                 self.logger.warning(f"Error extracting stats from trace: {e}")
-            return {
-                "total_steps": 0,
-                "total_events": 0,
-                "duration_ms": None,
-                "final_status": "unknown",
-                "started_at": None,
-                "ended_at": None,
-            }
+            return TraceStats(
+                total_steps=0,
+                total_events=0,
+                duration_ms=None,
+                final_status="unknown",
+                started_at=None,
+                ended_at=None,
+            )
 
     def _complete_trace(self) -> None:
         """
@@ -547,22 +507,21 @@ def _complete_trace(self) -> None:
             # Extract stats from trace file
             stats = self._extract_stats_from_trace()
 
-            # Add file size fields
-            stats.update(
-                {
-                    "trace_file_size_bytes": self.trace_file_size_bytes,
-                    "screenshot_total_size_bytes": self.screenshot_total_size_bytes,
-                    "screenshot_count": self.screenshot_count,
-                    "index_file_size_bytes": self.index_file_size_bytes,
-                }
-            )
+            # Build completion payload with stats and file size fields
+            completion_payload = {
+                **stats.model_dump(),  # Convert TraceStats to dict
+                "trace_file_size_bytes": self.trace_file_size_bytes,
+                "screenshot_total_size_bytes": self.screenshot_total_size_bytes,
+                "screenshot_count": self.screenshot_count,
+                "index_file_size_bytes": self.index_file_size_bytes,
+            }
 
             response = requests.post(
                 f"{self.api_url}/v1/traces/complete",
                 headers={"Authorization": f"Bearer {self.api_key}"},
                 json={
                     "run_id": self.run_id,
-                    "stats": stats,
+                    "stats": completion_payload,
                 },
                 timeout=10,
             )
@@ -593,28 +552,26 @@ def _extract_screenshots_from_trace(self) -> dict[int, dict[str, Any]]:
         sequence = 0
 
         try:
-            with open(self._path, encoding="utf-8") as f:
-                for line in f:
-                    line = line.strip()
-                    if not line:
-                        continue
+            # Check if file exists before reading
+            if not self._path.exists():
+                if self.logger:
+                    self.logger.warning(f"Trace file not found: {self._path}")
+                return screenshots
 
-                    try:
-                        event = json.loads(line)
-                        # Check if this is a snapshot event with screenshot
-                        if event.get("type") == "snapshot":
-                            data = event.get("data", {})
-                            screenshot_base64 = data.get("screenshot_base64")
-
-                            if screenshot_base64:
-                                sequence += 1
-                                screenshots[sequence] = {
-                                    "base64": screenshot_base64,
-                                    "format": data.get("screenshot_format", "jpeg"),
-                                    "step_id": event.get("step_id"),
-                                }
-                    except json.JSONDecodeError:
-                        continue
+            events = TraceFileManager.read_events(self._path)
+            for event in events:
+                # Check if this is a snapshot event with screenshot
+                if event.get("type") == "snapshot":
+                    data = event.get("data", {})
+                    screenshot_base64 = data.get("screenshot_base64")
+
+                    if screenshot_base64:
+                        sequence += 1
+                        screenshots[sequence] = {
+                            "base64": screenshot_base64,
+                            "format": data.get("screenshot_format", "jpeg"),
+                            "step_id": event.get("step_id"),
+                        }
         except Exception as e:
             if self.logger:
                 self.logger.error(f"Error extracting screenshots: {e}")
@@ -629,34 +586,32 @@ def _create_cleaned_trace(self, output_path: Path) -> None:
             output_path: Path to write cleaned trace file
         """
         try:
-            with (
-                open(self._path, encoding="utf-8") as infile,
-                open(output_path, "w", encoding="utf-8") as outfile,
-            ):
-                for line in infile:
-                    line = line.strip()
-                    if not line:
-                        continue
+            # Check if file exists before reading
+            if not self._path.exists():
+                if self.logger:
+                    self.logger.warning(f"Trace file not found: {self._path}")
+                # Create empty cleaned trace file
+                output_path.parent.mkdir(parents=True, exist_ok=True)
+                output_path.touch()
+                return
 
-                    try:
-                        event = json.loads(line)
-                        # Remove screenshot_base64 from snapshot events
-                        if event.get("type") == "snapshot":
-                            data = event.get("data", {})
-                            if "screenshot_base64" in data:
-                                # Create copy without screenshot fields
-                                cleaned_data = {
-                                    k: v
-                                    for k, v in data.items()
-                                    if k not in ("screenshot_base64", "screenshot_format")
-                                }
-                                event["data"] = cleaned_data
-
-                        # Write cleaned event
-                        outfile.write(json.dumps(event, ensure_ascii=False) + "\n")
-                    except json.JSONDecodeError:
-                        # Skip invalid lines
-                        continue
+            events = TraceFileManager.read_events(self._path)
+            with open(output_path, "w", encoding="utf-8") as outfile:
+                for event in events:
+                    # Remove screenshot_base64 from snapshot events
+                    if event.get("type") == "snapshot":
+                        data = event.get("data", {})
+                        if "screenshot_base64" in data:
+                            # Create copy without screenshot fields
+                            cleaned_data = {
+                                k: v
+                                for k, v in data.items()
+                                if k not in ("screenshot_base64", "screenshot_format")
+                            }
+                            event["data"] = cleaned_data
+
+                    # Write cleaned event
+                    TraceFileManager.write_event(outfile, event)
         except Exception as e:
             if self.logger:
                 self.logger.error(f"Error creating cleaned trace: {e}")
diff --git a/sentience/conversational_agent.py b/sentience/conversational_agent.py
index c207f04..f9f2fc8 100644
--- a/sentience/conversational_agent.py
+++ b/sentience/conversational_agent.py
@@ -5,12 +5,13 @@
 
 import json
 import time
-from typing import Any
+from typing import Any, Union
 
 from .agent import SentienceAgent
 from .browser import SentienceBrowser
 from .llm_provider import LLMProvider
-from .models import Snapshot, SnapshotOptions
+from .models import ExtractionResult, Snapshot, SnapshotOptions, StepExecutionResult
+from .protocols import BrowserProtocol
 from .snapshot import snapshot
 
 
@@ -29,12 +30,18 @@ class ConversationalAgent:
          The top result is from amazon.com selling the Apple Magic Mouse 2 for $79."
     """
 
-    def __init__(self, browser: SentienceBrowser, llm: LLMProvider, verbose: bool = True):
+    def __init__(
+        self,
+        browser: SentienceBrowser | BrowserProtocol,
+        llm: LLMProvider,
+        verbose: bool = True,
+    ):
         """
         Initialize conversational agent
 
         Args:
-            browser: SentienceBrowser instance
+            browser: SentienceBrowser instance or BrowserProtocol-compatible object
+                    (for testing, can use mock objects that implement BrowserProtocol)
             llm: LLM provider (OpenAI, Anthropic, LocalLLM, etc.)
             verbose: Print step-by-step execution logs (default: True)
         """
@@ -90,7 +97,7 @@ def execute(self, user_input: str) -> str:
             step_result = self._execute_step(step)
             execution_results.append(step_result)
 
-            if not step_result.get("success", False):
+            if not step_result.success:
                 # Early exit on failure
                 if self.verbose:
                     print(f"⚠️  Step failed: {step['description']}")
@@ -203,7 +210,7 @@ def _create_plan(self, user_input: str) -> dict[str, Any]:
                 "expected_outcome": "Complete user request",
             }
 
-    def _execute_step(self, step: dict[str, Any]) -> dict[str, Any]:
+    def _execute_step(self, step: dict[str, Any]) -> StepExecutionResult:
         """
         Execute a single atomic step from the plan
 
@@ -230,46 +237,42 @@ def _execute_step(self, step: dict[str, Any]) -> dict[str, Any]:
                 self.execution_context["current_url"] = url
                 time.sleep(1)  # Brief wait for page to settle
 
-                return {"success": True, "action": action, "data": {"url": url}}
+                return StepExecutionResult(success=True, action=action, data={"url": url})
 
             elif action == "FIND_AND_CLICK":
                 element_desc = params["element_description"]
                 # Use technical agent to find and click (returns AgentActionResult)
                 result = self.technical_agent.act(f"Click the {element_desc}")
-                return {
-                    "success": result.success,  # Use attribute access
-                    "action": action,
-                    "data": result.model_dump(),  # Convert to dict for flexibility
-                }
+                return StepExecutionResult(
+                    success=result.success,
+                    action=action,
+                    data=result.model_dump(),  # Convert to dict for flexibility
+                )
 
             elif action == "FIND_AND_TYPE":
                 element_desc = params["element_description"]
                 text = params["text"]
                 # Use technical agent to find input and type (returns AgentActionResult)
                 result = self.technical_agent.act(f"Type '{text}' into {element_desc}")
-                return {
-                    "success": result.success,  # Use attribute access
-                    "action": action,
-                    "data": {"text": text, "result": result.model_dump()},
-                }
+                return StepExecutionResult(
+                    success=result.success,
+                    action=action,
+                    data={"text": text, "result": result.model_dump()},
+                )
 
             elif action == "PRESS_KEY":
                 key = params["key"]
                 result = self.technical_agent.act(f"Press {key} key")
-                return {
-                    "success": result.success,  # Use attribute access
-                    "action": action,
-                    "data": {"key": key, "result": result.model_dump()},
-                }
+                return StepExecutionResult(
+                    success=result.success,
+                    action=action,
+                    data={"key": key, "result": result.model_dump()},
+                )
 
             elif action == "WAIT":
                 duration = params.get("duration", 2.0)
                 time.sleep(duration)
-                return {
-                    "success": True,
-                    "action": action,
-                    "data": {"duration": duration},
-                }
+                return StepExecutionResult(success=True, action=action, data={"duration": duration})
 
             elif action == "EXTRACT_INFO":
                 info_type = params["info_type"]
@@ -279,21 +282,28 @@ def _execute_step(self, step: dict[str, Any]) -> dict[str, Any]:
                 # Use LLM to extract specific information
                 extracted = self._extract_information(snap, info_type)
 
-                return {
-                    "success": True,
-                    "action": action,
-                    "data": {"extracted": extracted, "info_type": info_type},
-                }
+                return StepExecutionResult(
+                    success=True,
+                    action=action,
+                    data={
+                        "extracted": (
+                            extracted.model_dump()
+                            if isinstance(extracted, ExtractionResult)
+                            else extracted
+                        ),
+                        "info_type": info_type,
+                    },
+                )
 
             elif action == "VERIFY":
                 condition = params["condition"]
                 # Verify condition using current page state
                 is_verified = self._verify_condition(condition)
-                return {
-                    "success": is_verified,
-                    "action": action,
-                    "data": {"condition": condition, "verified": is_verified},
-                }
+                return StepExecutionResult(
+                    success=is_verified,
+                    action=action,
+                    data={"condition": condition, "verified": is_verified},
+                )
 
             else:
                 raise ValueError(f"Unknown action: {action}")
@@ -301,9 +311,9 @@ def _execute_step(self, step: dict[str, Any]) -> dict[str, Any]:
         except Exception as e:
             if self.verbose:
                 print(f"❌ Step failed: {e}")
-            return {"success": False, "action": action, "error": str(e)}
+            return StepExecutionResult(success=False, action=action, error=str(e))
 
-    def _extract_information(self, snap: Snapshot, info_type: str) -> dict[str, Any]:
+    def _extract_information(self, snap: Snapshot, info_type: str) -> ExtractionResult:
         """
         Extract specific information from snapshot using LLM
 
@@ -403,14 +413,38 @@ def _synthesize_response(
             Human-readable response string
         """
         # Build summary of what happened
-        successful_steps = [r for r in execution_results if r.get("success")]
-        failed_steps = [r for r in execution_results if not r.get("success")]
+        successful_steps = [
+            r
+            for r in execution_results
+            if (isinstance(r, StepExecutionResult) and r.success)
+            or (isinstance(r, dict) and r.get("success", False))
+        ]
+        failed_steps = [
+            r
+            for r in execution_results
+            if (isinstance(r, StepExecutionResult) and not r.success)
+            or (isinstance(r, dict) and not r.get("success", False))
+        ]
 
         # Extract key data
         extracted_data = []
         for result in execution_results:
-            if result.get("action") == "EXTRACT_INFO":
-                extracted_data.append(result.get("data", {}).get("extracted", {}))
+            if isinstance(result, StepExecutionResult):
+                action = result.action
+                data = result.data
+            else:
+                action = result.get("action")
+                data = result.get("data", {})
+
+            if action == "EXTRACT_INFO":
+                extracted = data.get("extracted", {})
+                if isinstance(extracted, dict):
+                    extracted_data.append(extracted)
+                else:
+                    # If it's an ExtractionResult model, convert to dict
+                    extracted_data.append(
+                        extracted.model_dump() if hasattr(extracted, "model_dump") else extracted
+                    )
 
         # Use LLM to create natural response
         system_prompt = """You are a helpful assistant that summarizes web automation results
diff --git a/sentience/element_filter.py b/sentience/element_filter.py
new file mode 100644
index 0000000..a6256c7
--- /dev/null
+++ b/sentience/element_filter.py
@@ -0,0 +1,134 @@
+"""
+Element filtering utilities for agent-based element selection.
+
+This module provides centralized element filtering logic to reduce duplication
+across agent implementations.
+"""
+
+from typing import Optional
+
+from .models import Element, Snapshot
+
+
+class ElementFilter:
+    """
+    Centralized element filtering logic for agent-based element selection.
+
+    Provides static methods for filtering elements based on:
+    - Importance scores
+    - Goal-based keyword matching
+    - Role and visual properties
+    """
+
+    # Common stopwords for keyword extraction
+    STOPWORDS = {
+        "the",
+        "a",
+        "an",
+        "and",
+        "or",
+        "but",
+        "in",
+        "on",
+        "at",
+        "to",
+        "for",
+        "of",
+        "with",
+        "by",
+        "from",
+        "as",
+        "is",
+        "was",
+    }
+
+    @staticmethod
+    def filter_by_importance(
+        snapshot: Snapshot,
+        max_elements: int = 50,
+    ) -> list[Element]:
+        """
+        Filter elements by importance score (simple top-N selection).
+
+        Args:
+            snapshot: Current page snapshot
+            max_elements: Maximum number of elements to return
+
+        Returns:
+            Top N elements sorted by importance score
+        """
+        elements = snapshot.elements
+        # Elements are already sorted by importance in snapshot
+        return elements[:max_elements]
+
+    @staticmethod
+    def filter_by_goal(
+        snapshot: Snapshot,
+        goal: str | None,
+        max_elements: int = 100,
+    ) -> list[Element]:
+        """
+        Filter elements from snapshot based on goal context.
+
+        Applies goal-based keyword matching to boost relevant elements
+        and filters out irrelevant ones.
+
+        Args:
+            snapshot: Current page snapshot
+            goal: User's goal (can inform filtering)
+            max_elements: Maximum number of elements to return
+
+        Returns:
+            Filtered list of elements sorted by boosted importance score
+        """
+        elements = snapshot.elements
+
+        # If no goal provided, return all elements (up to limit)
+        if not goal:
+            return elements[:max_elements]
+
+        goal_lower = goal.lower()
+
+        # Extract keywords from goal
+        keywords = ElementFilter._extract_keywords(goal_lower)
+
+        # Boost elements matching goal keywords
+        scored_elements = []
+        for el in elements:
+            score = el.importance
+
+            # Boost if element text matches goal
+            if el.text and any(kw in el.text.lower() for kw in keywords):
+                score += 0.3
+
+            # Boost if role matches goal intent
+            if "click" in goal_lower and el.visual_cues.is_clickable:
+                score += 0.2
+            if "type" in goal_lower and el.role in ["textbox", "searchbox"]:
+                score += 0.2
+            if "search" in goal_lower:
+                # Filter out non-interactive elements for search tasks
+                if el.role in ["link", "img"] and not el.visual_cues.is_primary:
+                    score -= 0.5
+
+            scored_elements.append((score, el))
+
+        # Re-sort by boosted score
+        scored_elements.sort(key=lambda x: x[0], reverse=True)
+        elements = [el for _, el in scored_elements]
+
+        return elements[:max_elements]
+
+    @staticmethod
+    def _extract_keywords(text: str) -> list[str]:
+        """
+        Extract meaningful keywords from goal text.
+
+        Args:
+            text: Text to extract keywords from
+
+        Returns:
+            List of keywords (non-stopwords, length > 2)
+        """
+        words = text.split()
+        return [w for w in words if w not in ElementFilter.STOPWORDS and len(w) > 2]
diff --git a/sentience/formatting.py b/sentience/formatting.py
index f8961c5..b8dd653 100644
--- a/sentience/formatting.py
+++ b/sentience/formatting.py
@@ -1,59 +1,15 @@
 """
 Snapshot formatting utilities for LLM prompts.
 
-Provides functions to convert Sentience snapshots into text format suitable
-for LLM consumption.
-"""
-
-from typing import List
-
-from .models import Snapshot
-
-
-def format_snapshot_for_llm(snap: Snapshot, limit: int = 50) -> str:
-    """
-    Convert snapshot elements to text format for LLM consumption.
-
-    This is the canonical way Sentience formats DOM state for LLMs.
-    The format includes element ID, role, text preview, visual cues,
-    position, and importance score.
+DEPRECATED: This module is maintained for backward compatibility only.
+New code should import from sentience.utils.formatting or sentience directly:
 
-    Args:
-        snap: Snapshot object with elements
-        limit: Maximum number of elements to include (default: 50)
-
-    Returns:
-        Formatted string with one element per line
-
-    Example:
-        >>> snap = snapshot(browser)
-        >>> formatted = format_snapshot_for_llm(snap, limit=10)
-        >>> print(formatted)
-        [1] <button> "Sign In" {PRIMARY,CLICKABLE} @ (100,50) (Imp:10)
-        [2] <input> "Email address" @ (100,100) (Imp:8)
-        ...
-    """
-    lines: list[str] = []
-
-    for el in snap.elements[:limit]:
-        # Build visual cues string
-        cues = []
-        if getattr(el.visual_cues, "is_primary", False):
-            cues.append("PRIMARY")
-        if getattr(el.visual_cues, "is_clickable", False):
-            cues.append("CLICKABLE")
-
-        cues_str = f" {{{','.join(cues)}}}" if cues else ""
-
-        # Format text preview (truncate to 50 chars)
-        text_preview = el.text or ""
-        if len(text_preview) > 50:
-            text_preview = text_preview[:50] + "..."
+    from sentience.utils.formatting import format_snapshot_for_llm
+    # or
+    from sentience import format_snapshot_for_llm
+"""
 
-        # Build element line: [ID] <role> "text" {cues} @ (x,y) (Imp:score)
-        lines.append(
-            f'[{el.id}] <{el.role}> "{text_preview}"{cues_str} '
-            f"@ ({int(el.bbox.x)},{int(el.bbox.y)}) (Imp:{el.importance})"
-        )
+# Re-export from new location for backward compatibility
+from .utils.formatting import format_snapshot_for_llm
 
-    return "\n".join(lines)
+__all__ = ["format_snapshot_for_llm"]
diff --git a/sentience/inspector.py b/sentience/inspector.py
index 8a84c9f..e8839d8 100644
--- a/sentience/inspector.py
+++ b/sentience/inspector.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 """
 Inspector tool - helps developers see what the agent "sees"
 """
diff --git a/sentience/llm_interaction_handler.py b/sentience/llm_interaction_handler.py
new file mode 100644
index 0000000..008e155
--- /dev/null
+++ b/sentience/llm_interaction_handler.py
@@ -0,0 +1,146 @@
+"""
+LLM Interaction Handler for Sentience Agent.
+
+Handles all LLM-related operations: context building, querying, and response parsing.
+This separates LLM interaction concerns from action execution.
+"""
+
+import re
+from typing import Optional
+
+from .llm_provider import LLMProvider, LLMResponse
+from .models import Snapshot
+
+
+class LLMInteractionHandler:
+    """
+    Handles LLM queries and response parsing for Sentience Agent.
+
+    This class encapsulates all LLM interaction logic, making it easier to:
+    - Test LLM interactions independently
+    - Swap LLM providers without changing agent code
+    - Modify prompt templates in one place
+    """
+
+    def __init__(self, llm: LLMProvider):
+        """
+        Initialize LLM interaction handler.
+
+        Args:
+            llm: LLM provider instance (OpenAIProvider, AnthropicProvider, etc.)
+        """
+        self.llm = llm
+
+    def build_context(self, snap: Snapshot, goal: str | None = None) -> str:
+        """
+        Convert snapshot elements to token-efficient prompt string.
+
+        Format: [ID] <role> "text" {cues} @ (x,y) (Imp:score)
+
+        Args:
+            snap: Snapshot object
+            goal: Optional user goal (for context, currently unused but kept for API consistency)
+
+        Returns:
+            Formatted element context string
+        """
+        lines = []
+        for el in snap.elements:
+            # Extract visual cues
+            cues = []
+            if el.visual_cues.is_primary:
+                cues.append("PRIMARY")
+            if el.visual_cues.is_clickable:
+                cues.append("CLICKABLE")
+            if el.visual_cues.background_color_name:
+                cues.append(f"color:{el.visual_cues.background_color_name}")
+
+            # Format element line
+            cues_str = f" {{{','.join(cues)}}}" if cues else ""
+            text_preview = (
+                (el.text[:50] + "...") if el.text and len(el.text) > 50 else (el.text or "")
+            )
+
+            lines.append(
+                f'[{el.id}] <{el.role}> "{text_preview}"{cues_str} '
+                f"@ ({int(el.bbox.x)},{int(el.bbox.y)}) (Imp:{el.importance})"
+            )
+
+        return "\n".join(lines)
+
+    def query_llm(self, dom_context: str, goal: str) -> LLMResponse:
+        """
+        Query LLM with standardized prompt template.
+
+        Args:
+            dom_context: Formatted element context from build_context()
+            goal: User goal
+
+        Returns:
+            LLMResponse from LLM provider
+        """
+        system_prompt = f"""You are an AI web automation agent.
+
+GOAL: {goal}
+
+VISIBLE ELEMENTS (sorted by importance):
+{dom_context}
+
+VISUAL CUES EXPLAINED:
+- {{PRIMARY}}: Main call-to-action element on the page
+- {{CLICKABLE}}: Element is clickable
+- {{color:X}}: Background color name
+
+CRITICAL RESPONSE FORMAT:
+You MUST respond with ONLY ONE of these exact action formats:
+- CLICK(id) - Click element by ID
+- TYPE(id, "text") - Type text into element
+- PRESS("key") - Press keyboard key (Enter, Escape, Tab, ArrowDown, etc)
+- FINISH() - Task complete
+
+DO NOT include any explanation, reasoning, or natural language.
+DO NOT use markdown formatting or code blocks.
+DO NOT say "The next step is..." or anything similar.
+
+CORRECT Examples:
+CLICK(42)
+TYPE(15, "magic mouse")
+PRESS("Enter")
+FINISH()
+
+INCORRECT Examples (DO NOT DO THIS):
+"The next step is to click..."
+"I will type..."
+```CLICK(42)```
+"""
+
+        user_prompt = "Return the single action command:"
+
+        return self.llm.generate(system_prompt, user_prompt, temperature=0.0)
+
+    def extract_action(self, response: str) -> str:
+        """
+        Extract action command from LLM response.
+
+        Handles cases where the LLM adds extra explanation despite instructions.
+
+        Args:
+            response: Raw LLM response text
+
+        Returns:
+            Cleaned action command string (e.g., "CLICK(42)", "TYPE(15, \"text\")")
+        """
+        # Remove markdown code blocks if present
+        response = re.sub(r"```[\w]*\n?", "", response)
+        response = response.strip()
+
+        # Try to find action patterns in the response
+        # Pattern matches: CLICK(123), TYPE(123, "text"), PRESS("key"), FINISH()
+        action_pattern = r'(CLICK\s*\(\s*\d+\s*\)|TYPE\s*\(\s*\d+\s*,\s*["\'].*?["\']\s*\)|PRESS\s*\(\s*["\'].*?["\']\s*\)|FINISH\s*\(\s*\))'
+
+        match = re.search(action_pattern, response, re.IGNORECASE)
+        if match:
+            return match.group(1)
+
+        # If no pattern match, return the original response (will likely fail parsing)
+        return response
diff --git a/sentience/llm_provider.py b/sentience/llm_provider.py
index 6758c1c..650f17f 100644
--- a/sentience/llm_provider.py
+++ b/sentience/llm_provider.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 """
 LLM Provider abstraction layer for Sentience SDK
 Enables "Bring Your Own Brain" (BYOB) pattern - plug in any LLM provider
@@ -6,6 +8,9 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 
+from .llm_provider_utils import get_api_key_from_env, handle_provider_error, require_package
+from .llm_response_builder import LLMResponseBuilder
+
 
 @dataclass
 class LLMResponse:
@@ -31,6 +36,15 @@ class LLMProvider(ABC):
     - Any other completion API
     """
 
+    def __init__(self, model: str):
+        """
+        Initialize LLM provider with model name.
+
+        Args:
+            model: Model identifier (e.g., "gpt-4o", "claude-3-sonnet")
+        """
+        self._model_name = model
+
     @abstractmethod
     def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
         """
@@ -95,13 +109,16 @@ def __init__(
             base_url: Custom API base URL (for compatible APIs)
             organization: OpenAI organization ID
         """
-        try:
-            from openai import OpenAI
-        except ImportError:
-            raise ImportError("OpenAI package not installed. Install with: pip install openai")
+        super().__init__(model)  # Initialize base class with model name
+
+        OpenAI = require_package(
+            "openai",
+            "openai",
+            "OpenAI",
+            "pip install openai",
+        )
 
         self.client = OpenAI(api_key=api_key, base_url=base_url, organization=organization)
-        self._model_name = model
 
     def generate(
         self,
@@ -148,12 +165,15 @@ def generate(
         api_params.update(kwargs)
 
         # Call OpenAI API
-        response = self.client.chat.completions.create(**api_params)
+        try:
+            response = self.client.chat.completions.create(**api_params)
+        except Exception as e:
+            handle_provider_error(e, "OpenAI", "generate response")
 
         choice = response.choices[0]
         usage = response.usage
 
-        return LLMResponse(
+        return LLMResponseBuilder.from_openai_format(
             content=choice.message.content,
             prompt_tokens=usage.prompt_tokens if usage else None,
             completion_tokens=usage.completion_tokens if usage else None,
@@ -191,15 +211,16 @@ def __init__(self, api_key: str | None = None, model: str = "claude-3-5-sonnet-2
             api_key: Anthropic API key (or set ANTHROPIC_API_KEY env var)
             model: Model name (claude-3-opus, claude-3-sonnet, claude-3-haiku, etc.)
         """
-        try:
-            from anthropic import Anthropic
-        except ImportError:
-            raise ImportError(
-                "Anthropic package not installed. Install with: pip install anthropic"
-            )
+        super().__init__(model)  # Initialize base class with model name
+
+        Anthropic = require_package(
+            "anthropic",
+            "anthropic",
+            "Anthropic",
+            "pip install anthropic",
+        )
 
         self.client = Anthropic(api_key=api_key)
-        self._model_name = model
 
     def generate(
         self,
@@ -237,21 +258,19 @@ def generate(
         api_params.update(kwargs)
 
         # Call Anthropic API
-        response = self.client.messages.create(**api_params)
+        try:
+            response = self.client.messages.create(**api_params)
+        except Exception as e:
+            handle_provider_error(e, "Anthropic", "generate response")
 
         content = response.content[0].text if response.content else ""
 
-        return LLMResponse(
+        return LLMResponseBuilder.from_anthropic_format(
             content=content,
-            prompt_tokens=response.usage.input_tokens if hasattr(response, "usage") else None,
-            completion_tokens=response.usage.output_tokens if hasattr(response, "usage") else None,
-            total_tokens=(
-                (response.usage.input_tokens + response.usage.output_tokens)
-                if hasattr(response, "usage")
-                else None
-            ),
+            input_tokens=response.usage.input_tokens if hasattr(response, "usage") else None,
+            output_tokens=response.usage.output_tokens if hasattr(response, "usage") else None,
             model_name=response.model,
-            finish_reason=response.stop_reason,
+            stop_reason=response.stop_reason,
         )
 
     def supports_json_mode(self) -> bool:
@@ -285,13 +304,16 @@ def __init__(self, api_key: str | None = None, model: str = "glm-4-plus"):
             api_key: Zhipu AI API key (or set GLM_API_KEY env var)
             model: Model name (glm-4-plus, glm-4, glm-4-air, glm-4-flash, etc.)
         """
-        try:
-            from zhipuai import ZhipuAI
-        except ImportError:
-            raise ImportError("ZhipuAI package not installed. Install with: pip install zhipuai")
+        super().__init__(model)  # Initialize base class with model name
+
+        ZhipuAI = require_package(
+            "zhipuai",
+            "zhipuai",
+            "ZhipuAI",
+            "pip install zhipuai",
+        )
 
         self.client = ZhipuAI(api_key=api_key)
-        self._model_name = model
 
     def generate(
         self,
@@ -333,12 +355,15 @@ def generate(
         api_params.update(kwargs)
 
         # Call GLM API
-        response = self.client.chat.completions.create(**api_params)
+        try:
+            response = self.client.chat.completions.create(**api_params)
+        except Exception as e:
+            handle_provider_error(e, "GLM", "generate response")
 
         choice = response.choices[0]
         usage = response.usage
 
-        return LLMResponse(
+        return LLMResponseBuilder.from_openai_format(
             content=choice.message.content,
             prompt_tokens=usage.prompt_tokens if usage else None,
             completion_tokens=usage.completion_tokens if usage else None,
@@ -378,25 +403,20 @@ def __init__(self, api_key: str | None = None, model: str = "gemini-2.0-flash-ex
             api_key: Google API key (or set GEMINI_API_KEY or GOOGLE_API_KEY env var)
             model: Model name (gemini-2.0-flash-exp, gemini-1.5-pro, gemini-1.5-flash, etc.)
         """
-        try:
-            import google.generativeai as genai
-        except ImportError:
-            raise ImportError(
-                "Google Generative AI package not installed. Install with: pip install google-generativeai"
-            )
+        super().__init__(model)  # Initialize base class with model name
 
-        # Configure API key
+        genai = require_package(
+            "google-generativeai",
+            "google.generativeai",
+            install_command="pip install google-generativeai",
+        )
+
+        # Configure API key (check parameter first, then environment variables)
+        api_key = get_api_key_from_env(["GEMINI_API_KEY", "GOOGLE_API_KEY"], api_key)
         if api_key:
             genai.configure(api_key=api_key)
-        else:
-            import os
-
-            api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
-            if api_key:
-                genai.configure(api_key=api_key)
 
         self.genai = genai
-        self._model_name = model
         self.model = genai.GenerativeModel(model)
 
     def generate(
@@ -435,7 +455,10 @@ def generate(
         generation_config.update(kwargs)
 
         # Call Gemini API
-        response = self.model.generate_content(full_prompt, generation_config=generation_config)
+        try:
+            response = self.model.generate_content(full_prompt, generation_config=generation_config)
+        except Exception as e:
+            handle_provider_error(e, "Gemini", "generate response")
 
         # Extract content
         content = response.text if response.text else ""
@@ -450,13 +473,12 @@ def generate(
             completion_tokens = response.usage_metadata.candidates_token_count
             total_tokens = response.usage_metadata.total_token_count
 
-        return LLMResponse(
+        return LLMResponseBuilder.from_gemini_format(
             content=content,
             prompt_tokens=prompt_tokens,
             completion_tokens=completion_tokens,
             total_tokens=total_tokens,
             model_name=self._model_name,
-            finish_reason=None,  # Gemini uses different finish reason format
         )
 
     def supports_json_mode(self) -> bool:
@@ -503,6 +525,9 @@ def __init__(
             load_in_8bit: Use 8-bit quantization (saves 50% memory)
             torch_dtype: Data type ("auto", "float16", "bfloat16", "float32")
         """
+        super().__init__(model_name)  # Initialize base class with model name
+
+        # Import required packages with consistent error handling
         try:
             import torch
             from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
@@ -512,8 +537,6 @@ def __init__(
                 "Install with: pip install transformers torch"
             )
 
-        self._model_name = model_name
-
         # Load tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
@@ -620,11 +643,10 @@ def generate(
         generated_tokens = outputs[0][input_length:]
         response_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
 
-        return LLMResponse(
+        return LLMResponseBuilder.from_local_format(
             content=response_text,
             prompt_tokens=input_length,
             completion_tokens=len(generated_tokens),
-            total_tokens=input_length + len(generated_tokens),
             model_name=self._model_name,
         )
 
diff --git a/sentience/llm_provider_utils.py b/sentience/llm_provider_utils.py
new file mode 100644
index 0000000..fdae52b
--- /dev/null
+++ b/sentience/llm_provider_utils.py
@@ -0,0 +1,120 @@
+"""
+LLM Provider utility functions for common initialization and error handling.
+
+This module provides helper functions to reduce duplication across LLM provider implementations.
+"""
+
+import os
+from collections.abc import Callable
+from typing import Any, Optional, TypeVar
+
+T = TypeVar("T")
+
+
+def require_package(
+    package_name: str,
+    module_name: str,
+    class_name: str | None = None,
+    install_command: str | None = None,
+) -> Any:
+    """
+    Import a package with consistent error handling.
+
+    Args:
+        package_name: Name of the package (for error messages)
+        module_name: Module name to import (e.g., "openai", "google.generativeai")
+        class_name: Optional class name to import from module (e.g., "OpenAI")
+        install_command: Installation command (defaults to "pip install {package_name}")
+
+    Returns:
+        Imported module or class
+
+    Raises:
+        ImportError: If package is not installed, with helpful message
+
+    Example:
+        >>> OpenAI = require_package("openai", "openai", "OpenAI", "pip install openai")
+        >>> genai = require_package("google-generativeai", "google.generativeai", install_command="pip install google-generativeai")
+    """
+    if install_command is None:
+        install_command = f"pip install {package_name}"
+
+    try:
+        if class_name:
+            # Import specific class: from module import class
+            module = __import__(module_name, fromlist=[class_name])
+            return getattr(module, class_name)
+        else:
+            # Import entire module
+            return __import__(module_name)
+    except ImportError:
+        raise ImportError(f"{package_name} package not installed. Install with: {install_command}")
+
+
+def get_api_key_from_env(
+    env_vars: list[str],
+    api_key: str | None = None,
+) -> str | None:
+    """
+    Get API key from parameter or environment variables.
+
+    Args:
+        env_vars: List of environment variable names to check (in order)
+        api_key: Optional API key parameter (takes precedence)
+
+    Returns:
+        API key string or None if not found
+
+    Example:
+        >>> key = get_api_key_from_env(["OPENAI_API_KEY"], api_key="sk-...")
+        >>> # Returns "sk-..." if provided, otherwise checks OPENAI_API_KEY env var
+    """
+    if api_key:
+        return api_key
+
+    for env_var in env_vars:
+        value = os.getenv(env_var)
+        if value:
+            return value
+
+    return None
+
+
+def handle_provider_error(
+    error: Exception,
+    provider_name: str,
+    operation: str = "operation",
+) -> None:
+    """
+    Standardize error handling for LLM provider operations.
+
+    Args:
+        error: Exception that occurred
+        provider_name: Name of the provider (e.g., "OpenAI", "Anthropic")
+        operation: Description of the operation that failed
+
+    Raises:
+        RuntimeError: With standardized error message
+
+    Example:
+        >>> try:
+        ...     response = client.chat.completions.create(...)
+        ... except Exception as e:
+        ...     handle_provider_error(e, "OpenAI", "generate response")
+    """
+    error_msg = str(error)
+    if "api key" in error_msg.lower() or "authentication" in error_msg.lower():
+        raise RuntimeError(
+            f"{provider_name} API key is invalid or missing. "
+            f"Please check your API key configuration."
+        ) from error
+    elif "rate limit" in error_msg.lower() or "429" in error_msg:
+        raise RuntimeError(
+            f"{provider_name} rate limit exceeded. Please try again later."
+        ) from error
+    elif "model" in error_msg.lower() and "not found" in error_msg.lower():
+        raise RuntimeError(
+            f"{provider_name} model not found. Please check the model name."
+        ) from error
+    else:
+        raise RuntimeError(f"{provider_name} {operation} failed: {error_msg}") from error
diff --git a/sentience/llm_response_builder.py b/sentience/llm_response_builder.py
new file mode 100644
index 0000000..a93a282
--- /dev/null
+++ b/sentience/llm_response_builder.py
@@ -0,0 +1,153 @@
+"""
+LLM Response building utilities for consistent response construction.
+
+This module provides helper functions for building LLMResponse objects
+from various provider API responses.
+"""
+
+from typing import Any, Optional
+
+# Import LLMResponse here to avoid circular dependency
+# We import it inside functions to break the cycle
+
+
+class LLMResponseBuilder:
+    """
+    Helper for building LLMResponse objects with consistent structure.
+
+    Provides static methods for building responses from different provider formats.
+    """
+
+    @staticmethod
+    def from_openai_format(
+        content: str,
+        prompt_tokens: int | None = None,
+        completion_tokens: int | None = None,
+        total_tokens: int | None = None,
+        model_name: str | None = None,
+        finish_reason: str | None = None,
+    ) -> "LLMResponse":
+        """
+        Build LLMResponse from OpenAI-style API response.
+
+        Args:
+            content: Response text content
+            prompt_tokens: Number of prompt tokens
+            completion_tokens: Number of completion tokens
+            total_tokens: Total tokens (or sum of prompt + completion)
+            model_name: Model identifier
+            finish_reason: Finish reason (stop, length, etc.)
+
+        Returns:
+            LLMResponse object
+        """
+        from .llm_provider import LLMResponse  # Import here to avoid circular dependency
+
+        return LLMResponse(
+            content=content,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens
+            or (
+                (prompt_tokens + completion_tokens) if prompt_tokens and completion_tokens else None
+            ),
+            model_name=model_name,
+            finish_reason=finish_reason,
+        )
+
+    @staticmethod
+    def from_anthropic_format(
+        content: str,
+        input_tokens: int | None = None,
+        output_tokens: int | None = None,
+        model_name: str | None = None,
+        stop_reason: str | None = None,
+    ) -> "LLMResponse":
+        """
+        Build LLMResponse from Anthropic-style API response.
+
+        Args:
+            content: Response text content
+            input_tokens: Number of input tokens
+            output_tokens: Number of output tokens
+            model_name: Model identifier
+            stop_reason: Stop reason (end_turn, max_tokens, etc.)
+
+        Returns:
+            LLMResponse object
+        """
+        from .llm_provider import LLMResponse  # Import here to avoid circular dependency
+
+        return LLMResponse(
+            content=content,
+            prompt_tokens=input_tokens,
+            completion_tokens=output_tokens,
+            total_tokens=(input_tokens + output_tokens) if input_tokens and output_tokens else None,
+            model_name=model_name,
+            finish_reason=stop_reason,
+        )
+
+    @staticmethod
+    def from_gemini_format(
+        content: str,
+        prompt_tokens: int | None = None,
+        completion_tokens: int | None = None,
+        total_tokens: int | None = None,
+        model_name: str | None = None,
+    ) -> "LLMResponse":
+        """
+        Build LLMResponse from Gemini-style API response.
+
+        Args:
+            content: Response text content
+            prompt_tokens: Number of prompt tokens
+            completion_tokens: Number of completion tokens
+            total_tokens: Total tokens
+            model_name: Model identifier
+
+        Returns:
+            LLMResponse object
+        """
+        from .llm_provider import LLMResponse  # Import here to avoid circular dependency
+
+        return LLMResponse(
+            content=content,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens
+            or (
+                (prompt_tokens + completion_tokens) if prompt_tokens and completion_tokens else None
+            ),
+            model_name=model_name,
+            finish_reason=None,  # Gemini uses different finish reason format
+        )
+
+    @staticmethod
+    def from_local_format(
+        content: str,
+        prompt_tokens: int,
+        completion_tokens: int,
+        model_name: str,
+    ) -> "LLMResponse":
+        """
+        Build LLMResponse from local model generation.
+
+        Args:
+            content: Response text content
+            prompt_tokens: Number of prompt tokens
+            completion_tokens: Number of completion tokens
+            model_name: Model identifier
+
+        Returns:
+            LLMResponse object
+        """
+        from .llm_provider import LLMResponse  # Import here to avoid circular dependency
+
+        return LLMResponse(
+            content=content,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+            model_name=model_name,
+            finish_reason=None,
+        )
diff --git a/sentience/models.py b/sentience/models.py
index a16b035..db68aa1 100644
--- a/sentience/models.py
+++ b/sentience/models.py
@@ -3,7 +3,7 @@
 """
 
 from dataclasses import dataclass
-from typing import Literal, Optional
+from typing import Any, Literal, Optional
 
 from pydantic import BaseModel, Field
 
@@ -413,6 +413,45 @@ class TextRectSearchResult(BaseModel):
     error: str | None = Field(None, description="Error message if status is 'error'")
 
 
+class ReadResult(BaseModel):
+    """Result of read() or read_async() operation"""
+
+    status: Literal["success", "error"]
+    url: str
+    format: Literal["raw", "text", "markdown"]
+    content: str
+    length: int
+    error: str | None = None
+
+
+class TraceStats(BaseModel):
+    """Execution statistics for trace completion"""
+
+    total_steps: int
+    total_events: int
+    duration_ms: int | None = None
+    final_status: Literal["success", "failure", "partial", "unknown"]
+    started_at: str | None = None
+    ended_at: str | None = None
+
+
+class StepExecutionResult(BaseModel):
+    """Result of executing a single step in ConversationalAgent"""
+
+    success: bool
+    action: str
+    data: dict[str, Any]  # Flexible data field for step-specific results
+    error: str | None = None
+
+
+class ExtractionResult(BaseModel):
+    """Result of extracting information from a page"""
+
+    found: bool
+    data: dict[str, Any]  # Extracted data fields
+    summary: str  # Brief description of what was found
+
+
 @dataclass
 class ScreenshotMetadata:
     """
diff --git a/sentience/overlay.py b/sentience/overlay.py
index 2529f38..f347e07 100644
--- a/sentience/overlay.py
+++ b/sentience/overlay.py
@@ -2,7 +2,7 @@
 Visual overlay utilities - show/clear element highlights in browser
 """
 
-from typing import Any
+from typing import Any, Optional
 
 from .browser import AsyncSentienceBrowser, SentienceBrowser
 from .models import Element, Snapshot
diff --git a/sentience/protocols.py b/sentience/protocols.py
new file mode 100644
index 0000000..8369907
--- /dev/null
+++ b/sentience/protocols.py
@@ -0,0 +1,228 @@
+"""
+Protocol definitions for testability and dependency injection.
+
+These protocols define the minimal interface required by agent classes,
+enabling better testability through mocking while maintaining type safety.
+"""
+
+from typing import TYPE_CHECKING, Any, Optional, Protocol, runtime_checkable
+
+if TYPE_CHECKING:
+    from playwright.async_api import Page as AsyncPage
+    from playwright.sync_api import Page
+
+    from .models import Snapshot
+
+
+@runtime_checkable
+class PageProtocol(Protocol):
+    """
+    Protocol for Playwright Page operations used by agents.
+
+    This protocol defines the minimal interface required from Playwright's Page object.
+    Agents use this interface to interact with the browser page.
+    """
+
+    @property
+    def url(self) -> str:
+        """Current page URL."""
+        ...
+
+    def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any:
+        """
+        Evaluate JavaScript in the page context.
+
+        Args:
+            script: JavaScript code to evaluate
+            *args: Arguments to pass to the script
+            **kwargs: Keyword arguments to pass to the script
+
+        Returns:
+            Result of the JavaScript evaluation
+        """
+        ...
+
+    def goto(self, url: str, **kwargs: Any) -> Any | None:
+        """
+        Navigate to a URL.
+
+        Args:
+            url: URL to navigate to
+            **kwargs: Additional navigation options
+
+        Returns:
+            Response object or None
+        """
+        ...
+
+    def wait_for_timeout(self, timeout: int) -> None:
+        """
+        Wait for a specified timeout.
+
+        Args:
+            timeout: Timeout in milliseconds
+        """
+        ...
+
+    def wait_for_load_state(self, state: str = "load", timeout: int | None = None) -> None:
+        """
+        Wait for page load state.
+
+        Args:
+            state: Load state to wait for (e.g., "load", "domcontentloaded", "networkidle")
+            timeout: Optional timeout in milliseconds
+        """
+        ...
+
+
+@runtime_checkable
+class BrowserProtocol(Protocol):
+    """
+    Protocol for browser operations used by agents.
+
+    This protocol defines the minimal interface required from SentienceBrowser.
+    Agents use this interface to interact with the browser and take snapshots.
+
+    Note: SentienceBrowser naturally implements this protocol, so no changes
+    are required to existing code. This protocol enables better testability
+    through mocking.
+    """
+
+    @property
+    def page(self) -> PageProtocol | None:
+        """
+        Current Playwright Page object.
+
+        Returns:
+            Page object if browser is started, None otherwise
+        """
+        ...
+
+    def start(self) -> None:
+        """Start the browser session."""
+        ...
+
+    def close(self, output_path: str | None = None) -> str | None:
+        """
+        Close the browser session.
+
+        Args:
+            output_path: Optional path to save browser state/output
+
+        Returns:
+            Path to saved output or None
+        """
+        ...
+
+    def goto(self, url: str) -> None:
+        """
+        Navigate to a URL.
+
+        Args:
+            url: URL to navigate to
+        """
+        ...
+
+
+@runtime_checkable
+class AsyncPageProtocol(Protocol):
+    """
+    Protocol for async Playwright Page operations.
+
+    Similar to PageProtocol but for async operations.
+    """
+
+    @property
+    def url(self) -> str:
+        """Current page URL."""
+        ...
+
+    async def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any:
+        """
+        Evaluate JavaScript in the page context (async).
+
+        Args:
+            script: JavaScript code to evaluate
+            *args: Arguments to pass to the script
+            **kwargs: Keyword arguments to pass to the script
+
+        Returns:
+            Result of the JavaScript evaluation
+        """
+        ...
+
+    async def goto(self, url: str, **kwargs: Any) -> Any | None:
+        """
+        Navigate to a URL (async).
+
+        Args:
+            url: URL to navigate to
+            **kwargs: Additional navigation options
+
+        Returns:
+            Response object or None
+        """
+        ...
+
+    async def wait_for_timeout(self, timeout: int) -> None:
+        """
+        Wait for a specified timeout (async).
+
+        Args:
+            timeout: Timeout in milliseconds
+        """
+        ...
+
+    async def wait_for_load_state(self, state: str = "load", timeout: int | None = None) -> None:
+        """
+        Wait for page load state (async).
+
+        Args:
+            state: Load state to wait for (e.g., "load", "domcontentloaded", "networkidle")
+            timeout: Optional timeout in milliseconds
+        """
+        ...
+
+
+@runtime_checkable
+class AsyncBrowserProtocol(Protocol):
+    """
+    Protocol for async browser operations.
+
+    Similar to BrowserProtocol but for async operations.
+    """
+
+    @property
+    def page(self) -> AsyncPageProtocol | None:
+        """
+        Current Playwright AsyncPage object.
+
+        Returns:
+            AsyncPage object if browser is started, None otherwise
+        """
+        ...
+
+    async def start(self) -> None:
+        """Start the browser session (async)."""
+        ...
+
+    async def close(self, output_path: str | None = None) -> str | None:
+        """
+        Close the browser session (async).
+
+        Args:
+            output_path: Optional path to save browser state/output
+
+        Returns:
+            Path to saved output or None
+        """
+        ...
+
+    async def goto(self, url: str) -> None:
+        """
+        Navigate to a URL (async).
+
+        Args:
+            url: URL to navigate to
+        """
+        ...
diff --git a/sentience/query.py b/sentience/query.py
index 26f10ce..f77537c 100644
--- a/sentience/query.py
+++ b/sentience/query.py
@@ -3,7 +3,7 @@
 """
 
 import re
-from typing import Any
+from typing import Any, Optional
 
 from .models import Element, Snapshot
 
diff --git a/sentience/read.py b/sentience/read.py
index 59cf82b..6d95534 100644
--- a/sentience/read.py
+++ b/sentience/read.py
@@ -5,13 +5,14 @@
 from typing import Literal
 
 from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .models import ReadResult
 
 
 def read(
     browser: SentienceBrowser,
     output_format: Literal["raw", "text", "markdown"] = "raw",
     enhance_markdown: bool = True,
-) -> dict:
+) -> ReadResult:
     """
     Read page content as raw HTML, text, or markdown
 
@@ -93,14 +94,15 @@ def read(
         {"format": output_format},
     )
 
-    return result
+    # Convert dict result to ReadResult model
+    return ReadResult(**result)
 
 
 async def read_async(
     browser: AsyncSentienceBrowser,
     output_format: Literal["raw", "text", "markdown"] = "raw",
     enhance_markdown: bool = True,
-) -> dict:
+) -> ReadResult:
     """
     Read page content as raw HTML, text, or markdown (async)
 
@@ -182,4 +184,5 @@ async def read_async(
         {"format": output_format},
     )
 
-    return result
+    # Convert dict result to ReadResult model
+    return ReadResult(**result)
diff --git a/sentience/recorder.py b/sentience/recorder.py
index c5297ee..3f921af 100644
--- a/sentience/recorder.py
+++ b/sentience/recorder.py
@@ -4,7 +4,7 @@
 
 import json
 from datetime import datetime
-from typing import Any
+from typing import Any, Optional
 
 from .browser import AsyncSentienceBrowser, SentienceBrowser
 from .models import Element, Snapshot
diff --git a/sentience/screenshot.py b/sentience/screenshot.py
index 9361615..9357c30 100644
--- a/sentience/screenshot.py
+++ b/sentience/screenshot.py
@@ -3,7 +3,7 @@
 """
 
 import base64
-from typing import Any, Literal
+from typing import Any, Literal, Optional
 
 from .browser import AsyncSentienceBrowser, SentienceBrowser
 
diff --git a/sentience/sentience_methods.py b/sentience/sentience_methods.py
new file mode 100644
index 0000000..e9a6697
--- /dev/null
+++ b/sentience/sentience_methods.py
@@ -0,0 +1,86 @@
+"""
+Enums for Sentience API methods and agent actions.
+
+This module provides type-safe enums for:
+1. window.sentience API methods (extension-level)
+2. Agent action types (high-level automation commands)
+"""
+
+from enum import Enum
+
+
+class SentienceMethod(str, Enum):
+    """
+    Enum for window.sentience API methods.
+
+    These are the actual methods available on the window.sentience object
+    injected by the Chrome extension.
+    """
+
+    # Core snapshot and element discovery
+    SNAPSHOT = "snapshot"
+    """Take a snapshot of the current page with element geometry and metadata."""
+
+    # Element interaction
+    CLICK = "click"
+    """Click an element by its ID from the snapshot registry."""
+
+    # Content extraction
+    READ = "read"
+    """Read page content as raw HTML, text, or markdown."""
+
+    FIND_TEXT_RECT = "findTextRect"
+    """Find exact pixel coordinates of text occurrences on the page."""
+
+    # Visual overlay
+    SHOW_OVERLAY = "showOverlay"
+    """Show visual overlay highlighting elements with importance scores."""
+
+    CLEAR_OVERLAY = "clearOverlay"
+    """Clear the visual overlay."""
+
+    # Developer tools
+    START_RECORDING = "startRecording"
+    """Start recording mode for golden set collection (developer tool)."""
+
+    def __str__(self) -> str:
+        """Return the method name as a string."""
+        return self.value
+
+
+class AgentAction(str, Enum):
+    """
+    Enum for high-level agent action types.
+
+    These are the action commands that agents can execute. They may use
+    one or more window.sentience methods or Playwright APIs directly.
+    """
+
+    # Element interaction
+    CLICK = "click"
+    """Click an element by ID. Uses window.sentience.click() or Playwright mouse.click()."""
+
+    TYPE = "type"
+    """Type text into an input element. Uses Playwright keyboard.type() directly."""
+
+    PRESS = "press"
+    """Press a keyboard key (Enter, Escape, Tab, etc.). Uses Playwright keyboard.press()."""
+
+    # Navigation
+    NAVIGATE = "navigate"
+    """Navigate to a URL. Uses Playwright page.goto() directly."""
+
+    SCROLL = "scroll"
+    """Scroll the page or an element. Uses Playwright page.mouse.wheel() or element.scrollIntoView()."""
+
+    # Completion
+    FINISH = "finish"
+    """Signal that the agent task is complete. No browser action, just status update."""
+
+    # Wait/verification
+    WAIT = "wait"
+    """Wait for a condition or duration. Uses Playwright wait_for_* methods."""
+
+    def __str__(self) -> str:
+        """Return the action name as a string."""
+        return self.value
diff --git a/sentience/snapshot.py b/sentience/snapshot.py
index 786161f..6f8e4fd 100644
--- a/sentience/snapshot.py
+++ b/sentience/snapshot.py
@@ -11,7 +11,9 @@
 import requests
 
 from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .browser_evaluator import BrowserEvaluator
 from .models import Snapshot, SnapshotOptions
+from .sentience_methods import SentienceMethod
 
 # Maximum payload size for API requests (10MB server limit)
 MAX_PAYLOAD_BYTES = 10 * 1024 * 1024
@@ -94,28 +96,7 @@ def _snapshot_via_extension(
     # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
     # The new architecture loads injected_api.js asynchronously, so window.sentience
     # may not be immediately available after page load
-    try:
-        browser.page.wait_for_function(
-            "typeof window.sentience !== 'undefined'",
-            timeout=5000,  # 5 second timeout
-        )
-    except Exception as e:
-        # Gather diagnostics if wait fails
-        try:
-            diag = browser.page.evaluate(
-                """() => ({
-                    sentience_defined: typeof window.sentience !== 'undefined',
-                    extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
-                    url: window.location.href
-                })"""
-            )
-        except Exception:
-            diag = {"error": "Could not gather diagnostics"}
-
-        raise RuntimeError(
-            f"Sentience extension failed to inject window.sentience API. "
-            f"Is the extension loaded? Diagnostics: {diag}"
-        ) from e
+    BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
 
     # Build options dict for extension API (exclude save_trace/trace_path)
     ext_options: dict[str, Any] = {}
@@ -182,26 +163,14 @@ def _snapshot_via_api(
 
     # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
     # Even for API mode, we need the extension to collect raw data locally
-    try:
-        browser.page.wait_for_function("typeof window.sentience !== 'undefined'", timeout=5000)
-    except Exception as e:
-        raise RuntimeError(
-            "Sentience extension failed to inject. Cannot collect raw data for API processing."
-        ) from e
+    BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
 
     # Step 1: Get raw data from local extension (always happens locally)
     raw_options: dict[str, Any] = {}
     if options.screenshot is not False:
         raw_options["screenshot"] = options.screenshot
 
-    raw_result = browser.page.evaluate(
-        """
-        (options) => {
-            return window.sentience.snapshot(options);
-        }
-        """,
-        raw_options,
-    )
+    raw_result = BrowserEvaluator.invoke(browser.page, SentienceMethod.SNAPSHOT, **raw_options)
 
     # Save trace if requested (save raw data before API processing)
     if options.save_trace:
diff --git a/sentience/text_search.py b/sentience/text_search.py
index f4cd0d7..d0a5b3b 100644
--- a/sentience/text_search.py
+++ b/sentience/text_search.py
@@ -3,6 +3,7 @@
 """
 
 from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .browser_evaluator import BrowserEvaluator
 from .models import TextRectSearchResult
 
 
@@ -91,43 +92,14 @@ def find_text_rect(
     # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture)
     # The new architecture loads injected_api.js asynchronously, so window.sentience
     # may not be immediately available after page load
-    try:
-        browser.page.wait_for_function(
-            "typeof window.sentience !== 'undefined'",
-            timeout=5000,  # 5 second timeout
-        )
-    except Exception as e:
-        # Gather diagnostics if wait fails
-        try:
-            diag = browser.page.evaluate(
-                """() => ({
-                    sentience_defined: typeof window.sentience !== 'undefined',
-                    extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
-                    url: window.location.href
-                })"""
-            )
-        except Exception:
-            diag = {"error": "Could not gather diagnostics"}
-
-        raise RuntimeError(
-            f"Sentience extension failed to inject window.sentience API. "
-            f"Is the extension loaded? Diagnostics: {diag}"
-        ) from e
+    BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
 
     # Verify findTextRect method exists (for older extension versions that don't have it)
-    try:
-        has_find_text_rect = browser.page.evaluate(
-            "typeof window.sentience.findTextRect !== 'undefined'"
+    if not BrowserEvaluator.verify_method_exists(browser.page, SentienceMethod.FIND_TEXT_RECT):
+        raise RuntimeError(
+            "window.sentience.findTextRect is not available. "
+            "Please update the Sentience extension to the latest version."
         )
-        if not has_find_text_rect:
-            raise RuntimeError(
-                "window.sentience.findTextRect is not available. "
-                "Please update the Sentience extension to the latest version."
-            )
-    except RuntimeError:
-        raise
-    except Exception as e:
-        raise RuntimeError(f"Failed to verify findTextRect availability: {e}") from e
 
     # Call the extension's findTextRect method
     result_dict = browser.page.evaluate(
diff --git a/sentience/trace_event_builder.py b/sentience/trace_event_builder.py
new file mode 100644
index 0000000..3d4dfb5
--- /dev/null
+++ b/sentience/trace_event_builder.py
@@ -0,0 +1,96 @@
+"""
+Trace event building utilities for agent-based tracing.
+
+This module provides centralized trace event building logic to reduce duplication
+across agent implementations.
+"""
+
+from typing import Any, Optional
+
+from .models import AgentActionResult, Element, Snapshot
+
+
+class TraceEventBuilder:
+    """
+    Helper for building trace events with consistent structure.
+
+    Provides static methods for building common trace event types:
+    - snapshot_taken events
+    - step_end events
+    """
+
+    @staticmethod
+    def build_snapshot_event(
+        snapshot: Snapshot,
+        include_all_elements: bool = True,
+    ) -> dict[str, Any]:
+        """
+        Build snapshot_taken trace event data.
+
+        Args:
+            snapshot: Snapshot to build event from
+            include_all_elements: If True, include all elements (for DOM tree display).
+                                 If False, use filtered elements only.
+
+        Returns:
+            Dictionary with snapshot event data
+        """
+        # Include ALL elements with full data for DOM tree display
+        # Use snap.elements (all elements) not filtered_elements
+        elements_data = [el.model_dump() for el in snapshot.elements]
+
+        return {
+            "url": snapshot.url,
+            "element_count": len(snapshot.elements),
+            "timestamp": snapshot.timestamp,
+            "elements": elements_data,  # Full element data for DOM tree
+        }
+
+    @staticmethod
+    def build_step_end_event(
+        step_id: str,
+        step_index: int,
+        goal: str,
+        attempt: int,
+        pre_url: str,
+        post_url: str,
+        snapshot_digest: str | None,
+        llm_data: dict[str, Any],
+        exec_data: dict[str, Any],
+        verify_data: dict[str, Any],
+    ) -> dict[str, Any]:
+        """
+        Build step_end trace event data.
+
+        Args:
+            step_id: Unique step identifier
+            step_index: Step index (0-based)
+            goal: User's goal for this step
+            attempt: Attempt number (0-based)
+            pre_url: URL before action execution
+            post_url: URL after action execution
+            snapshot_digest: Digest of snapshot before action
+            llm_data: LLM interaction data
+            exec_data: Action execution data
+            verify_data: Verification data
+
+        Returns:
+            Dictionary with step_end event data
+        """
+        return {
+            "v": 1,
+            "step_id": step_id,
+            "step_index": step_index,
+            "goal": goal,
+            "attempt": attempt,
+            "pre": {
+                "url": pre_url,
+                "snapshot_digest": snapshot_digest,
+            },
+            "llm": llm_data,
+            "exec": exec_data,
+            "post": {
+                "url": post_url,
+            },
+            "verify": verify_data,
+        }
diff --git a/sentience/trace_file_manager.py b/sentience/trace_file_manager.py
new file mode 100644
index 0000000..0bba017
--- /dev/null
+++ b/sentience/trace_file_manager.py
@@ -0,0 +1,197 @@
+"""
+Trace file management utilities for consistent file operations.
+
+This module provides helper functions for common trace file operations
+shared between JsonlTraceSink and CloudTraceSink.
+"""
+
+import json
+from collections.abc import Callable
+from pathlib import Path
+from typing import Any, Optional
+
+from .models import TraceStats
+
+
+class TraceFileManager:
+    """
+    Helper for common trace file operations.
+
+    Provides static methods for file operations shared across trace sinks.
+    """
+
+    @staticmethod
+    def write_event(file_handle: Any, event: dict[str, Any]) -> None:
+        """
+        Write a trace event to a file handle as JSONL.
+
+        Args:
+            file_handle: Open file handle (must be writable)
+            event: Event dictionary to write
+        """
+        json_str = json.dumps(event, ensure_ascii=False)
+        file_handle.write(json_str + "\n")
+        file_handle.flush()  # Ensure written to disk
+
+    @staticmethod
+    def ensure_directory(path: Path) -> None:
+        """
+        Ensure the parent directory of a path exists.
+
+        Args:
+            path: File path whose parent directory should exist
+        """
+        path.parent.mkdir(parents=True, exist_ok=True)
+
+    @staticmethod
+    def read_events(path: Path) -> list[dict[str, Any]]:
+        """
+        Read all events from a JSONL trace file.
+
+        Args:
+            path: Path to JSONL trace file
+
+        Returns:
+            List of event dictionaries
+
+        Raises:
+            FileNotFoundError: If file doesn't exist
+            json.JSONDecodeError: If file contains invalid JSON
+        """
+        events = []
+        with open(path, encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    event = json.loads(line)
+                    events.append(event)
+                except json.JSONDecodeError:
+                    # Skip invalid lines but continue reading
+                    continue
+        return events
+
+    @staticmethod
+    def extract_stats(
+        events: list[dict[str, Any]],
+        infer_status_func: None | (
+            Callable[[list[dict[str, Any]], dict[str, Any] | None], str]
+        ) = None,
+    ) -> TraceStats:
+        """
+        Extract execution statistics from trace events.
+
+        This is a common operation shared between JsonlTraceSink and CloudTraceSink.
+
+        Args:
+            events: List of trace event dictionaries
+            infer_status_func: Optional function to infer final_status from events.
+                             If None, uses default inference logic.
+
+        Returns:
+            TraceStats with execution statistics
+        """
+        if not events:
+            return TraceStats(
+                total_steps=0,
+                total_events=0,
+                duration_ms=None,
+                final_status="unknown",
+                started_at=None,
+                ended_at=None,
+            )
+
+        # Find run_start and run_end events
+        run_start = next((e for e in events if e.get("type") == "run_start"), None)
+        run_end = next((e for e in events if e.get("type") == "run_end"), None)
+
+        # Extract timestamps
+        started_at: str | None = None
+        ended_at: str | None = None
+        if run_start:
+            started_at = run_start.get("ts")
+        if run_end:
+            ended_at = run_end.get("ts")
+
+        # Calculate duration
+        duration_ms: int | None = None
+        if started_at and ended_at:
+            try:
+                from datetime import datetime
+
+                start_dt = datetime.fromisoformat(started_at.replace("Z", "+00:00"))
+                end_dt = datetime.fromisoformat(ended_at.replace("Z", "+00:00"))
+                delta = end_dt - start_dt
+                duration_ms = int(delta.total_seconds() * 1000)
+            except Exception:
+                pass
+
+        # Count steps (from step_start events, only first attempt)
+        step_indices = set()
+        for event in events:
+            if event.get("type") == "step_start":
+                step_index = event.get("data", {}).get("step_index")
+                if step_index is not None:
+                    step_indices.add(step_index)
+        total_steps = len(step_indices) if step_indices else 0
+
+        # If run_end has steps count, use that (more accurate)
+        if run_end:
+            steps_from_end = run_end.get("data", {}).get("steps")
+            if steps_from_end is not None:
+                total_steps = max(total_steps, steps_from_end)
+
+        # Count total events
+        total_events = len(events)
+
+        # Infer final status
+        if infer_status_func:
+            final_status = infer_status_func(events, run_end)
+        else:
+            final_status = TraceFileManager._infer_final_status(events, run_end)
+
+        return TraceStats(
+            total_steps=total_steps,
+            total_events=total_events,
+            duration_ms=duration_ms,
+            final_status=final_status,
+            started_at=started_at,
+            ended_at=ended_at,
+        )
+
+    @staticmethod
+    def _infer_final_status(
+        events: list[dict[str, Any]],
+        run_end: dict[str, Any] | None,
+    ) -> str:
+        """
+        Infer final status from trace events.
+
+        Args:
+            events: List of trace event dictionaries
+            run_end: Optional run_end event dictionary
+
+        Returns:
+            Final status string: "success", "failure", "partial", or "unknown"
+        """
+        # Check for run_end event with status
+        if run_end:
+            status = run_end.get("data", {}).get("status")
+            if status in ("success", "failure", "partial", "unknown"):
+                return status
+
+        # Infer from error events
+        has_errors = any(e.get("type") == "error" for e in events)
+        if has_errors:
+            step_ends = [e for e in events if e.get("type") == "step_end"]
+            if step_ends:
+                return "partial"
+            else:
+                return "failure"
+        else:
+            step_ends = [e for e in events if e.get("type") == "step_end"]
+            if step_ends:
+                return "success"
+            else:
+                return "unknown"
diff --git a/sentience/trace_indexing/indexer.py b/sentience/trace_indexing/indexer.py
index 842baf0..444086c 100644
--- a/sentience/trace_indexing/indexer.py
+++ b/sentience/trace_indexing/indexer.py
@@ -7,7 +7,7 @@
 import os
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any
+from typing import Any, Optional
 
 from .index_schema import (
     ActionInfo,
diff --git a/sentience/tracer_factory.py b/sentience/tracer_factory.py
index d1b0472..f2e9f57 100644
--- a/sentience/tracer_factory.py
+++ b/sentience/tracer_factory.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 """
 Tracer factory with automatic tier detection.
 
@@ -24,6 +26,10 @@ def create_tracer(
     api_url: str | None = None,
     logger: SentienceLogger | None = None,
     upload_trace: bool = False,
+    goal: str | None = None,
+    agent_type: str | None = None,
+    llm_model: str | None = None,
+    start_url: str | None = None,
 ) -> Tracer:
     """
     Create tracer with automatic tier detection.
@@ -42,13 +48,26 @@ def create_tracer(
         upload_trace: Enable cloud trace upload (default: False). When True and api_key
                       is provided, traces will be uploaded to cloud. When False, traces
                       are saved locally only.
+        goal: User's goal/objective for this trace run. This will be displayed as the
+              trace name in the frontend. Should be descriptive and action-oriented.
+              Example: "Add wireless headphones to cart on Amazon"
+        agent_type: Type of agent running (e.g., "SentienceAgent", "CustomAgent")
+        llm_model: LLM model used (e.g., "gpt-4-turbo", "claude-3-5-sonnet")
+        start_url: Starting URL of the agent run (e.g., "https://amazon.com")
 
     Returns:
         Tracer configured with appropriate sink
 
     Example:
-        >>> # Pro tier user
-        >>> tracer = create_tracer(api_key="sk_pro_xyz", run_id="demo")
+        >>> # Pro tier user with goal
+        >>> tracer = create_tracer(
+        ...     api_key="sk_pro_xyz",
+        ...     run_id="demo",
+        ...     goal="Add headphones to cart",
+        ...     agent_type="SentienceAgent",
+        ...     llm_model="gpt-4-turbo",
+        ...     start_url="https://amazon.com"
+        ... )
         >>> # Returns: Tracer with CloudTraceSink
         >>>
         >>> # Free tier user
@@ -73,11 +92,28 @@ def create_tracer(
     # 1. Try to initialize Cloud Sink (Pro/Enterprise tier) if upload enabled
     if api_key and upload_trace:
         try:
+            # Build metadata object for trace initialization
+            # Only include non-empty fields to avoid sending empty strings
+            metadata: dict[str, str] = {}
+            if goal and goal.strip():
+                metadata["goal"] = goal.strip()
+            if agent_type and agent_type.strip():
+                metadata["agent_type"] = agent_type.strip()
+            if llm_model and llm_model.strip():
+                metadata["llm_model"] = llm_model.strip()
+            if start_url and start_url.strip():
+                metadata["start_url"] = start_url.strip()
+
+            # Build request payload
+            payload: dict[str, Any] = {"run_id": run_id}
+            if metadata:
+                payload["metadata"] = metadata
+
             # Request pre-signed upload URL from backend
             response = requests.post(
                 f"{api_url}/v1/traces/init",
                 headers={"Authorization": f"Bearer {api_key}"},
-                json={"run_id": run_id},
+                json=payload,
                 timeout=10,
             )
 
diff --git a/sentience/tracing.py b/sentience/tracing.py
index 39a9bb7..fc0405c 100644
--- a/sentience/tracing.py
+++ b/sentience/tracing.py
@@ -10,7 +10,10 @@
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
-from typing import Any
+from typing import Any, Optional
+
+from .models import TraceStats
+from .trace_file_manager import TraceFileManager
 
 
 @dataclass
@@ -88,7 +91,7 @@ def __init__(self, path: str | Path):
             path: File path to write traces to
         """
         self.path = Path(path)
-        self.path.parent.mkdir(parents=True, exist_ok=True)
+        TraceFileManager.ensure_directory(self.path)
 
         # Open file in append mode with line buffering
         self._file = open(self.path, "a", encoding="utf-8", buffering=1)
@@ -100,8 +103,7 @@ def emit(self, event: dict[str, Any]) -> None:
         Args:
             event: Event dictionary
         """
-        json_str = json.dumps(event, ensure_ascii=False)
-        self._file.write(json_str + "\n")
+        TraceFileManager.write_event(self._file, event)
 
     def close(self) -> None:
         """Close the file and generate index."""
@@ -111,119 +113,26 @@ def close(self) -> None:
         # Generate index after closing file
         self._generate_index()
 
-    def get_stats(self) -> dict[str, Any]:
+    def get_stats(self) -> TraceStats:
         """
         Extract execution statistics from trace file (for local traces).
 
         Returns:
-            Dictionary with stats fields (same format as Tracer.get_stats())
+            TraceStats with execution statistics
         """
         try:
             # Read trace file to extract stats
-            with open(self.path, encoding="utf-8") as f:
-                events = []
-                for line in f:
-                    line = line.strip()
-                    if not line:
-                        continue
-                    try:
-                        event = json.loads(line)
-                        events.append(event)
-                    except json.JSONDecodeError:
-                        continue
-
-            if not events:
-                return {
-                    "total_steps": 0,
-                    "total_events": 0,
-                    "duration_ms": None,
-                    "final_status": "unknown",
-                    "started_at": None,
-                    "ended_at": None,
-                }
-
-            # Find run_start and run_end events
-            run_start = next((e for e in events if e.get("type") == "run_start"), None)
-            run_end = next((e for e in events if e.get("type") == "run_end"), None)
-
-            # Extract timestamps
-            started_at: str | None = None
-            ended_at: str | None = None
-            if run_start:
-                started_at = run_start.get("ts")
-            if run_end:
-                ended_at = run_end.get("ts")
-
-            # Calculate duration
-            duration_ms: int | None = None
-            if started_at and ended_at:
-                try:
-                    from datetime import datetime
-
-                    start_dt = datetime.fromisoformat(started_at.replace("Z", "+00:00"))
-                    end_dt = datetime.fromisoformat(ended_at.replace("Z", "+00:00"))
-                    delta = end_dt - start_dt
-                    duration_ms = int(delta.total_seconds() * 1000)
-                except Exception:
-                    pass
-
-            # Count steps (from step_start events, only first attempt)
-            step_indices = set()
-            for event in events:
-                if event.get("type") == "step_start":
-                    step_index = event.get("data", {}).get("step_index")
-                    if step_index is not None:
-                        step_indices.add(step_index)
-            total_steps = len(step_indices) if step_indices else 0
-
-            # If run_end has steps count, use that (more accurate)
-            if run_end:
-                steps_from_end = run_end.get("data", {}).get("steps")
-                if steps_from_end is not None:
-                    total_steps = max(total_steps, steps_from_end)
-
-            # Count total events
-            total_events = len(events)
-
-            # Infer final status
-            final_status = "unknown"
-            # Check for run_end event with status
-            if run_end:
-                status = run_end.get("data", {}).get("status")
-                if status in ("success", "failure", "partial", "unknown"):
-                    final_status = status
-            else:
-                # Infer from error events
-                has_errors = any(e.get("type") == "error" for e in events)
-                if has_errors:
-                    step_ends = [e for e in events if e.get("type") == "step_end"]
-                    if step_ends:
-                        final_status = "partial"
-                    else:
-                        final_status = "failure"
-                else:
-                    step_ends = [e for e in events if e.get("type") == "step_end"]
-                    if step_ends:
-                        final_status = "success"
-
-            return {
-                "total_steps": total_steps,
-                "total_events": total_events,
-                "duration_ms": duration_ms,
-                "final_status": final_status,
-                "started_at": started_at,
-                "ended_at": ended_at,
-            }
-
+            events = TraceFileManager.read_events(self.path)
+            return TraceFileManager.extract_stats(events)
         except Exception:
-            return {
-                "total_steps": 0,
-                "total_events": 0,
-                "duration_ms": None,
-                "final_status": "unknown",
-                "started_at": None,
-                "ended_at": None,
-            }
+            return TraceStats(
+                total_steps=0,
+                total_events=0,
+                duration_ms=None,
+                final_status="unknown",
+                started_at=None,
+                ended_at=None,
+            )
 
     def _generate_index(self) -> None:
         """Generate trace index file (automatic on close)."""
@@ -431,26 +340,26 @@ def set_final_status(self, status: str) -> None:
             )
         self.final_status = status
 
-    def get_stats(self) -> dict[str, Any]:
+    def get_stats(self) -> TraceStats:
         """
         Get execution statistics for trace completion.
 
         Returns:
-            Dictionary with stats fields for /v1/traces/complete
+            TraceStats with execution statistics
         """
         duration_ms: int | None = None
         if self.started_at and self.ended_at:
             delta = self.ended_at - self.started_at
             duration_ms = int(delta.total_seconds() * 1000)
 
-        return {
-            "total_steps": self.total_steps,
-            "total_events": self.total_events,
-            "duration_ms": duration_ms,
-            "final_status": self.final_status,
-            "started_at": self.started_at.isoformat() + "Z" if self.started_at else None,
-            "ended_at": self.ended_at.isoformat() + "Z" if self.ended_at else None,
-        }
+        return TraceStats(
+            total_steps=self.total_steps,
+            total_events=self.total_events,
+            duration_ms=duration_ms,
+            final_status=self.final_status,
+            started_at=self.started_at.isoformat() + "Z" if self.started_at else None,
+            ended_at=self.ended_at.isoformat() + "Z" if self.ended_at else None,
+        )
 
     def _infer_final_status(self) -> None:
         """
diff --git a/sentience/utils.py b/sentience/utils.py
index 286d0af..86014b6 100644
--- a/sentience/utils.py
+++ b/sentience/utils.py
@@ -12,7 +12,7 @@
 import re
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any
+from typing import Any, Optional
 
 from playwright.sync_api import BrowserContext
 
diff --git a/sentience/utils/__init__.py b/sentience/utils/__init__.py
new file mode 100644
index 0000000..7f8f303
--- /dev/null
+++ b/sentience/utils/__init__.py
@@ -0,0 +1,40 @@
+"""
+Utility functions for Sentience SDK.
+
+This module re-exports all utility functions from submodules for backward compatibility.
+Users can continue using:
+    from sentience.utils import compute_snapshot_digests, canonical_snapshot_strict
+    from sentience import canonical_snapshot_strict, format_snapshot_for_llm
+"""
+
+# Re-export all functions from submodules for backward compatibility
+from .browser import save_storage_state
+from .element import (
+    BBox,
+    ElementFingerprint,
+    canonical_snapshot_loose,
+    canonical_snapshot_strict,
+    compute_snapshot_digests,
+    extract_element_fingerprint,
+    normalize_bbox,
+    normalize_text_strict,
+    sha256_digest,
+)
+from .formatting import format_snapshot_for_llm
+
+__all__ = [
+    # Browser utilities
+    "save_storage_state",
+    # Element/digest utilities
+    "BBox",
+    "ElementFingerprint",
+    "canonical_snapshot_loose",
+    "canonical_snapshot_strict",
+    "compute_snapshot_digests",
+    "extract_element_fingerprint",
+    "normalize_bbox",
+    "normalize_text_strict",
+    "sha256_digest",
+    # Formatting utilities
+    "format_snapshot_for_llm",
+]
diff --git a/sentience/utils/browser.py b/sentience/utils/browser.py
new file mode 100644
index 0000000..20a2132
--- /dev/null
+++ b/sentience/utils/browser.py
@@ -0,0 +1,46 @@
+"""
+Browser-related utilities for Sentience SDK.
+
+Provides functions for managing browser storage state (cookies, localStorage).
+"""
+
+import json
+from pathlib import Path
+
+from playwright.sync_api import BrowserContext
+
+
+def save_storage_state(context: BrowserContext, file_path: str | Path) -> None:
+    """
+    Save current browser storage state (cookies + localStorage) to a file.
+
+    This is useful for capturing a logged-in session to reuse later.
+
+    Args:
+        context: Playwright BrowserContext
+        file_path: Path to save the storage state JSON file
+
+    Example:
+        ```python
+        from sentience import SentienceBrowser, save_storage_state
+
+        browser = SentienceBrowser()
+        browser.start()
+
+        # User logs in manually or via agent
+        browser.goto("https://example.com")
+        # ... login happens ...
+
+        # Save session for later
+        save_storage_state(browser.context, "auth.json")
+        ```
+
+    Raises:
+        IOError: If file cannot be written
+    """
+    storage_state = context.storage_state()
+    file_path_obj = Path(file_path)
+    file_path_obj.parent.mkdir(parents=True, exist_ok=True)
+    with open(file_path_obj, "w") as f:
+        json.dump(storage_state, f, indent=2)
+    print(f"✅ [Sentience] Saved storage state to {file_path_obj}")
diff --git a/sentience/utils/element.py b/sentience/utils/element.py
new file mode 100644
index 0000000..6269f7f
--- /dev/null
+++ b/sentience/utils/element.py
@@ -0,0 +1,257 @@
+"""
+Element manipulation and digest utilities for Sentience SDK.
+
+Provides functions to compute stable digests of snapshots for deterministic diff.
+Two digest strategies:
+- strict: includes structure + normalized text
+- loose: structure only (no text) - detects layout changes vs content changes
+"""
+
+import hashlib
+import json
+import re
+from dataclasses import dataclass
+from typing import Any, Optional
+
+
+@dataclass
+class BBox:
+    """Bounding box with normalized coordinates."""
+
+    x: int
+    y: int
+    width: int
+    height: int
+
+    @classmethod
+    def from_dict(cls, bbox_dict: dict[str, Any]) -> "BBox":
+        """Create BBox from dictionary."""
+        return cls(
+            x=int(bbox_dict.get("x", 0)),
+            y=int(bbox_dict.get("y", 0)),
+            width=int(bbox_dict.get("width", 0)),
+            height=int(bbox_dict.get("height", 0)),
+        )
+
+    def to_normalized(self, bucket_size: int = 2) -> list[int]:
+        """
+        Normalize bbox to fixed-size buckets to ignore minor jitter.
+
+        Args:
+            bucket_size: Pixel bucket size (default 2px)
+
+        Returns:
+            List of [x, y, width, height] rounded to buckets
+        """
+        return [
+            round(self.x / bucket_size) * bucket_size,
+            round(self.y / bucket_size) * bucket_size,
+            round(self.width / bucket_size) * bucket_size,
+            round(self.height / bucket_size) * bucket_size,
+        ]
+
+
+@dataclass
+class ElementFingerprint:
+    """Normalized element data for digest computation."""
+
+    id: int
+    role: str
+    bbox: list[int]  # Normalized
+    clickable: int  # 0 or 1
+    primary: int  # 0 or 1
+    text: str = ""  # Empty for loose digest
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        data = {
+            "id": self.id,
+            "role": self.role,
+            "bbox": self.bbox,
+            "clickable": self.clickable,
+            "primary": self.primary,
+        }
+        if self.text:  # Only include text if non-empty
+            data["text"] = self.text
+        return data
+
+
+def normalize_text_strict(text: str | None, max_length: int = 80) -> str:
+    """
+    Normalize text for strict digest (structure + content).
+
+    Rules:
+    - Lowercase
+    - Trim and collapse whitespace
+    - Cap length at max_length
+    - Replace digit runs with '#'
+    - Normalize currency: $79.99 -> $#
+    - Normalize time patterns: 12:34 -> #:#
+
+    Args:
+        text: Input text
+        max_length: Maximum text length (default 80)
+
+    Returns:
+        Normalized text string
+    """
+    if not text:
+        return ""
+
+    # Lowercase and trim
+    text = text.strip().lower()
+
+    # Collapse whitespace
+    text = " ".join(text.split())
+
+    # Cap length
+    text = text[:max_length]
+
+    # Replace digit runs with #
+    text = re.sub(r"\d+", "#", text)
+
+    # Normalize currency
+    text = re.sub(r"\$\s*#", "$#", text)
+
+    # Normalize time patterns (HH:MM or similar)
+    text = re.sub(r"#:#", "#:#", text)
+
+    # Normalize date patterns (YYYY-MM-DD or similar)
+    text = re.sub(r"#-#-#", "#-#-#", text)
+
+    return text
+
+
+def normalize_bbox(bbox: dict[str, Any] | BBox, bucket_size: int = 2) -> list[int]:
+    """
+    Round bbox to fixed-size buckets to ignore jitter.
+
+    Args:
+        bbox: BBox object or dict with x, y, width, height
+        bucket_size: Pixel bucket size (default 2px)
+
+    Returns:
+        List of [x, y, width, height] rounded to buckets
+    """
+    if isinstance(bbox, BBox):
+        return bbox.to_normalized(bucket_size)
+
+    bbox_obj = BBox.from_dict(bbox)
+    return bbox_obj.to_normalized(bucket_size)
+
+
+def extract_element_fingerprint(
+    element: dict[str, Any],
+    include_text: bool = True,
+) -> ElementFingerprint:
+    """
+    Extract normalized fingerprint from element dict.
+
+    Args:
+        element: Element dict from snapshot
+        include_text: Whether to include normalized text (False for loose digest)
+
+    Returns:
+        ElementFingerprint with normalized data
+    """
+    # Extract basic fields
+    element_id = element.get("id", 0)
+    role = element.get("role", "unknown")
+
+    # Extract and normalize bbox
+    bbox_data = element.get("bbox", {})
+    bbox_normalized = normalize_bbox(bbox_data)
+
+    # Extract visual cues
+    visual_cues = element.get("visual_cues", {})
+    clickable = 1 if visual_cues.get("is_clickable", False) else 0
+    primary = 1 if visual_cues.get("is_primary", False) else 0
+
+    # Extract and normalize text (if requested)
+    text = ""
+    if include_text:
+        raw_text = element.get("text", "")
+        text = normalize_text_strict(raw_text)
+
+    return ElementFingerprint(
+        id=element_id,
+        role=role,
+        bbox=bbox_normalized,
+        clickable=clickable,
+        primary=primary,
+        text=text,
+    )
+
+
+def canonical_snapshot_strict(elements: list[dict[str, Any]]) -> str:
+    """
+    Create strict snapshot digest (structure + normalized text).
+
+    Args:
+        elements: List of element dicts from snapshot
+
+    Returns:
+        Canonical JSON string for hashing
+    """
+    fingerprints = []
+
+    for element in sorted(elements, key=lambda e: e.get("id", 0)):
+        fingerprint = extract_element_fingerprint(element, include_text=True)
+        fingerprints.append(fingerprint.to_dict())
+
+    return json.dumps(fingerprints, sort_keys=True, ensure_ascii=False)
+
+
+def canonical_snapshot_loose(elements: list[dict[str, Any]]) -> str:
+    """
+    Create loose snapshot digest (structure only, no text).
+
+    This is more resistant to content churn (prices, ads, timestamps).
+    Use for detecting structural changes vs content changes.
+
+    Args:
+        elements: List of element dicts from snapshot
+
+    Returns:
+        Canonical JSON string for hashing
+    """
+    fingerprints = []
+
+    for element in sorted(elements, key=lambda e: e.get("id", 0)):
+        fingerprint = extract_element_fingerprint(element, include_text=False)
+        fingerprints.append(fingerprint.to_dict())
+
+    return json.dumps(fingerprints, sort_keys=True, ensure_ascii=False)
+
+
+def sha256_digest(canonical_str: str) -> str:
+    """
+    Compute SHA256 hash with 'sha256:' prefix.
+
+    Args:
+        canonical_str: Canonical string to hash
+
+    Returns:
+        Hash string with format: "sha256:<hex>"
+    """
+    hash_obj = hashlib.sha256(canonical_str.encode("utf-8"))
+    return f"sha256:{hash_obj.hexdigest()}"
+
+
+def compute_snapshot_digests(elements: list[dict[str, Any]]) -> dict[str, str]:
+    """
+    Compute both strict and loose digests for a snapshot.
+
+    Args:
+        elements: List of element dicts from snapshot
+
+    Returns:
+        Dict with 'strict' and 'loose' digest strings
+    """
+    canonical_strict = canonical_snapshot_strict(elements)
+    canonical_loose = canonical_snapshot_loose(elements)
+
+    return {
+        "strict": sha256_digest(canonical_strict),
+        "loose": sha256_digest(canonical_loose),
+    }
diff --git a/sentience/utils/formatting.py b/sentience/utils/formatting.py
new file mode 100644
index 0000000..5b2ef19
--- /dev/null
+++ b/sentience/utils/formatting.py
@@ -0,0 +1,59 @@
+"""
+Snapshot formatting utilities for LLM prompts.
+
+Provides functions to convert Sentience snapshots into text format suitable
+for LLM consumption.
+"""
+
+from typing import List
+
+from ..models import Snapshot
+
+
+def format_snapshot_for_llm(snap: Snapshot, limit: int = 50) -> str:
+    """
+    Convert snapshot elements to text format for LLM consumption.
+
+    This is the canonical way Sentience formats DOM state for LLMs.
+    The format includes element ID, role, text preview, visual cues,
+    position, and importance score.
+
+    Args:
+        snap: Snapshot object with elements
+        limit: Maximum number of elements to include (default: 50)
+
+    Returns:
+        Formatted string with one element per line
+
+    Example:
+        >>> snap = snapshot(browser)
+        >>> formatted = format_snapshot_for_llm(snap, limit=10)
+        >>> print(formatted)
+        [1] <button> "Sign In" {PRIMARY,CLICKABLE} @ (100,50) (Imp:10)
+        [2] <input> "Email address" @ (100,100) (Imp:8)
+        ...
+    """
+    lines: list[str] = []
+
+    for el in snap.elements[:limit]:
+        # Build visual cues string
+        cues = []
+        if getattr(el.visual_cues, "is_primary", False):
+            cues.append("PRIMARY")
+        if getattr(el.visual_cues, "is_clickable", False):
+            cues.append("CLICKABLE")
+
+        cues_str = f" {{{','.join(cues)}}}" if cues else ""
+
+        # Format text preview (truncate to 50 chars)
+        text_preview = el.text or ""
+        if len(text_preview) > 50:
+            text_preview = text_preview[:50] + "..."
+
+        # Build element line: [ID] <role> "text" {cues} @ (x,y) (Imp:score)
+        lines.append(
+            f'[{el.id}] <{el.role}> "{text_preview}"{cues_str} '
+            f"@ ({int(el.bbox.x)},{int(el.bbox.y)}) (Imp:{el.importance})"
+        )
+
+    return "\n".join(lines)
diff --git a/sentience/wait.py b/sentience/wait.py
index d42e899..f122fb9 100644
--- a/sentience/wait.py
+++ b/sentience/wait.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 """
 Wait functionality - wait_for element matching selector
 """
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 0000000..bc27454
--- /dev/null
+++ b/tests/integration/__init__.py
@@ -0,0 +1,6 @@
+"""
+Integration tests for Sentience SDK.
+
+These tests use real browser instances to test end-to-end functionality
+and catch real-world bugs that mocks might miss.
+"""
diff --git a/tests/integration/test_agent_workflows.py b/tests/integration/test_agent_workflows.py
new file mode 100644
index 0000000..f3bd0fc
--- /dev/null
+++ b/tests/integration/test_agent_workflows.py
@@ -0,0 +1,423 @@
+"""
+Integration tests for SentienceAgent workflows.
+
+Tests multi-step agent scenarios and error recovery without requiring real browser.
+Uses mocks to simulate realistic browser behavior.
+"""
+
+from unittest.mock import Mock, patch
+
+import pytest
+
+from sentience.agent import SentienceAgent
+from sentience.llm_provider import LLMProvider, LLMResponse
+from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues
+from sentience.protocols import BrowserProtocol, PageProtocol
+
+
+class MockLLMProvider(LLMProvider):
+    """Mock LLM provider for integration testing"""
+
+    def __init__(self, responses=None):
+        self.responses = responses or []
+        self.call_count = 0
+        self.calls = []
+
+    def generate(self, system_prompt: str, user_prompt: str, **kwargs):
+        self.calls.append({"system": system_prompt, "user": user_prompt, "kwargs": kwargs})
+
+        if self.responses:
+            response = self.responses[self.call_count % len(self.responses)]
+        else:
+            response = "CLICK(1)"
+
+        self.call_count += 1
+
+        return LLMResponse(
+            content=response,
+            prompt_tokens=100,
+            completion_tokens=20,
+            total_tokens=120,
+            model_name="mock-model",
+        )
+
+    def supports_json_mode(self) -> bool:
+        return True
+
+    @property
+    def model_name(self) -> str:
+        return "mock-model"
+
+
+class MockPage(PageProtocol):
+    """Mock page that implements PageProtocol"""
+
+    def __init__(self, url: str = "https://example.com"):
+        self._url = url
+
+    @property
+    def url(self) -> str:
+        return self._url
+
+    def evaluate(self, script: str, *args, **kwargs):
+        return {}
+
+    def goto(self, url: str, **kwargs):
+        self._url = url
+
+    def wait_for_timeout(self, timeout: int):
+        pass
+
+    def wait_for_load_state(self, state: str = "load", timeout: int | None = None):
+        pass
+
+    def wait_for_function(self, expression: str, timeout: int | None = None):
+        pass
+
+
+class MockBrowser(BrowserProtocol):
+    """Mock browser for integration testing"""
+
+    def __init__(self):
+        self._page = MockPage()
+        self._started = False
+        self.api_key = None  # Required by snapshot function
+        self.api_url = None  # Required by snapshot function
+        self._context = Mock()  # Mock context for storage state
+
+    def start(self):
+        self._started = True
+
+    @property
+    def page(self) -> PageProtocol | None:
+        return self._page if self._started else None
+
+    def goto(self, url: str):
+        if self._page:
+            self._page.goto(url)
+
+    def close(self, output_path=None):
+        self._started = False
+        return output_path
+
+    @property
+    def context(self):
+        return self._context
+
+
+def create_mock_snapshot(elements=None):
+    """Create a mock snapshot for testing"""
+    if elements is None:
+        elements = [
+            Element(
+                id=1,
+                role="button",
+                text="Click Me",
+                importance=900,
+                bbox=BBox(x=100, y=200, width=80, height=30),
+                visual_cues=VisualCues(is_primary=True, is_clickable=True),
+            ),
+            Element(
+                id=2,
+                role="input",
+                text="Search",
+                importance=800,
+                bbox=BBox(x=100, y=250, width=200, height=30),
+                visual_cues=VisualCues(is_primary=False, is_clickable=True),
+            ),
+        ]
+    return Snapshot(
+        status="success",
+        timestamp="2024-12-24T10:00:00Z",
+        url="https://example.com",
+        viewport=Viewport(width=1920, height=1080),
+        elements=elements,
+    )
+
+
+class TestAgentMultiStepWorkflows:
+    """Test multi-step agent workflows"""
+
+    def test_agent_multi_step_click_then_type(self):
+        """Test agent performing multiple actions in sequence."""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(2)", 'TYPE(2, "search query")'])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.agent.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+            patch("sentience.action_executor.type_text") as mock_type,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            mock_click.return_value = ActionResult(
+                success=True, duration_ms=150, outcome="dom_updated"
+            )
+            mock_type.return_value = ActionResult(
+                success=True, duration_ms=200, outcome="dom_updated"
+            )
+
+            # First action: click input
+            result1 = agent.act("Click the search input", max_retries=0)
+            assert result1.success is True
+            assert result1.action == "click"
+            assert mock_click.call_count == 1
+
+            # Second action: type into input
+            result2 = agent.act("Type search query into the input", max_retries=0)
+            assert result2.success is True
+            assert result2.action == "type"
+            assert mock_type.call_count == 1
+
+            # Verify history tracks both actions
+            assert len(agent.history) == 2
+
+    def test_agent_workflow_with_retry(self):
+        """Test agent workflow with retry on failure."""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.agent.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            # First call raises exception (triggers retry), second succeeds
+            mock_click.side_effect = [
+                RuntimeError("Element not found"),
+                ActionResult(success=True, duration_ms=150, outcome="dom_updated"),
+            ]
+
+            result = agent.act("Click the button", max_retries=1)
+
+            assert result.success is True
+            assert mock_click.call_count == 2
+            assert len(agent.history) == 1  # Only successful attempt recorded
+
+    def test_agent_workflow_url_change(self):
+        """Test agent workflow that causes URL change."""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.agent.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            mock_click.return_value = ActionResult(
+                success=True, duration_ms=150, outcome="navigated", url_changed=True
+            )
+
+            result = agent.act("Click the link", max_retries=0)
+
+            assert result.success is True
+            assert result.url_changed is True
+            assert result.action == "click"
+
+    def test_agent_workflow_finish_action(self):
+        """Test agent workflow that finishes successfully."""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["FINISH()"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with patch("sentience.agent.snapshot") as mock_snapshot:
+            mock_snapshot.return_value = create_mock_snapshot()
+
+            result = agent.act("Task is complete", max_retries=0)
+
+            assert result.success is True
+            assert result.action == "finish"
+            assert len(agent.history) == 1
+
+    def test_agent_workflow_token_tracking(self):
+        """Test that token usage is tracked across workflow."""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)", "CLICK(2)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.agent.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            mock_click.return_value = ActionResult(
+                success=True, duration_ms=150, outcome="dom_updated"
+            )
+
+            # Perform two actions
+            agent.act("Click first button", max_retries=0)
+            agent.act("Click second button", max_retries=0)
+
+            # Check token stats
+            stats = agent.get_token_stats()
+            assert stats.total_tokens > 0
+            assert stats.total_prompt_tokens > 0
+            assert stats.total_completion_tokens > 0
+            assert len(stats.by_action) == 2  # Two actions tracked
+
+
+class TestAgentErrorRecovery:
+    """Test agent error recovery scenarios"""
+
+    def test_agent_recovery_after_snapshot_failure(self):
+        """Test agent recovers after snapshot failure."""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.agent.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult, Snapshot
+
+            # First snapshot fails, second succeeds
+            failed_snapshot = Snapshot(
+                status="error",
+                error="Network timeout",
+                url="https://example.com",
+                viewport=Viewport(width=1920, height=1080),
+                elements=[],
+            )
+            mock_snapshot.side_effect = [
+                failed_snapshot,
+                create_mock_snapshot(),
+            ]
+            mock_click.return_value = ActionResult(
+                success=True, duration_ms=150, outcome="dom_updated"
+            )
+
+            # Should raise on first attempt, succeed on retry
+            with pytest.raises(RuntimeError, match="Snapshot failed"):
+                agent.act("Click button", max_retries=0)
+
+            # With retry, should succeed
+            result = agent.act("Click button", max_retries=1)
+            assert result.success is True
+
+    def test_agent_recovery_after_action_failure(self):
+        """Test agent recovers after action failure."""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)", "CLICK(1)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.agent.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            # First action fails, second succeeds
+            mock_click.side_effect = [
+                RuntimeError("Element not found"),
+                ActionResult(success=True, duration_ms=150, outcome="dom_updated"),
+            ]
+
+            result = agent.act("Click button", max_retries=1)
+
+            assert result.success is True
+            assert mock_click.call_count == 2
+
+    def test_agent_handles_max_retries_exceeded(self):
+        """Test agent handles max retries exceeded."""
+        browser = MockBrowser()
+        browser.start()
+        # Need multiple responses for multiple retries
+        llm = MockLLMProvider(responses=["CLICK(1)", "CLICK(1)", "CLICK(1)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.agent.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            # Raise exception to trigger retry logic (agent only retries on exceptions, not failed results)
+            mock_click.side_effect = RuntimeError("Action failed")
+
+            with pytest.raises(RuntimeError, match="Failed after"):
+                agent.act("Click button", max_retries=2)
+
+            # Should have attempted 3 times (initial + 2 retries)
+            # Each attempt calls snapshot, LLM, and click
+            assert mock_click.call_count == 3
+            assert mock_snapshot.call_count >= 3
+            assert llm.call_count >= 3
+
+
+class TestAgentStateManagement:
+    """Test agent state management across actions"""
+
+    def test_agent_history_preservation(self):
+        """Test that agent history is preserved across actions."""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)", "CLICK(2)", "FINISH()"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.agent.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            mock_click.return_value = ActionResult(
+                success=True, duration_ms=150, outcome="dom_updated"
+            )
+
+            # Perform multiple actions
+            agent.act("Click first", max_retries=0)
+            agent.act("Click second", max_retries=0)
+            agent.act("Finish", max_retries=0)
+
+            # Verify history contains all actions
+            assert len(agent.history) == 3
+            assert agent.history[0]["goal"] == "Click first"
+            assert agent.history[1]["goal"] == "Click second"
+            assert agent.history[2]["goal"] == "Finish"
+
+    def test_agent_step_count_increments(self):
+        """Test that step count increments across actions."""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)", "CLICK(2)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.agent.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            mock_click.return_value = ActionResult(
+                success=True, duration_ms=150, outcome="dom_updated"
+            )
+
+            initial_count = agent._step_count
+
+            agent.act("First action", max_retries=0)
+            assert agent._step_count == initial_count + 1
+
+            agent.act("Second action", max_retries=0)
+            assert agent._step_count == initial_count + 2
diff --git a/tests/test_agent.py b/tests/test_agent.py
index 259042a..8a8c7e8 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -174,7 +174,7 @@ def test_agent_build_context():
     agent = SentienceAgent(browser, llm, verbose=False)
 
     snap = create_mock_snapshot()
-    context = agent._build_context(snap, "test goal")
+    context = agent.llm_handler.build_context(snap, "test goal")
 
     # Should contain both elements
     assert "[1]" in context
@@ -196,15 +196,15 @@ def test_agent_execute_click_action():
 
     snap = create_mock_snapshot()
 
-    # Mock click function
-    with patch("sentience.agent.click") as mock_click:
+    # Mock click function via ActionExecutor
+    with patch("sentience.action_executor.click") as mock_click:
         from sentience.models import ActionResult
 
         mock_click.return_value = ActionResult(
             success=True, duration_ms=150, outcome="dom_updated", url_changed=False
         )
 
-        result = agent._execute_action("CLICK(1)", snap)
+        result = agent.action_executor.execute("CLICK(1)", snap)
 
         assert result["success"] is True
         assert result["action"] == "click"
@@ -220,13 +220,13 @@ def test_agent_execute_type_action():
 
     snap = create_mock_snapshot()
 
-    # Mock type_text function
-    with patch("sentience.agent.type_text") as mock_type:
+    # Mock type_text function via ActionExecutor
+    with patch("sentience.action_executor.type_text") as mock_type:
         from sentience.models import ActionResult
 
         mock_type.return_value = ActionResult(success=True, duration_ms=200, outcome="dom_updated")
 
-        result = agent._execute_action('TYPE(2, "hello world")', snap)
+        result = agent.action_executor.execute('TYPE(2, "hello world")', snap)
 
         assert result["success"] is True
         assert result["action"] == "type"
@@ -243,13 +243,13 @@ def test_agent_execute_press_action():
 
     snap = create_mock_snapshot()
 
-    # Mock press function
-    with patch("sentience.agent.press") as mock_press:
+    # Mock press function via ActionExecutor
+    with patch("sentience.action_executor.press") as mock_press:
         from sentience.models import ActionResult
 
         mock_press.return_value = ActionResult(success=True, duration_ms=50, outcome="dom_updated")
 
-        result = agent._execute_action('PRESS("Enter")', snap)
+        result = agent.action_executor.execute('PRESS("Enter")', snap)
 
         assert result["success"] is True
         assert result["action"] == "press"
@@ -264,7 +264,7 @@ def test_agent_execute_finish_action():
     agent = SentienceAgent(browser, llm, verbose=False)
 
     snap = create_mock_snapshot()
-    result = agent._execute_action("FINISH()", snap)
+    result = agent.action_executor.execute("FINISH()", snap)
 
     assert result["success"] is True
     assert result["action"] == "finish"
@@ -279,7 +279,7 @@ def test_agent_execute_invalid_action():
     snap = create_mock_snapshot()
 
     with pytest.raises(ValueError, match="Unknown action format"):
-        agent._execute_action("INVALID_ACTION", snap)
+        agent.action_executor.execute("INVALID_ACTION", snap)
 
 
 def test_agent_act_full_cycle():
@@ -291,7 +291,7 @@ def test_agent_act_full_cycle():
     # Mock snapshot and click
     with (
         patch("sentience.agent.snapshot") as mock_snapshot,
-        patch("sentience.agent.click") as mock_click,
+        patch("sentience.action_executor.click") as mock_click,
     ):
         from sentience.models import ActionResult
 
@@ -389,7 +389,7 @@ def test_agent_retry_on_failure():
     # Mock snapshot and click (click will fail)
     with (
         patch("sentience.agent.snapshot") as mock_snapshot,
-        patch("sentience.agent.click") as mock_click,
+        patch("sentience.action_executor.click") as mock_click,
     ):
         mock_snapshot.return_value = create_mock_snapshot()
         # Simulate click failure
@@ -411,9 +411,9 @@ def test_agent_action_parsing_variations():
     snap = create_mock_snapshot()
 
     with (
-        patch("sentience.agent.click") as mock_click,
-        patch("sentience.agent.type_text") as mock_type,
-        patch("sentience.agent.press") as mock_press,
+        patch("sentience.action_executor.click") as mock_click,
+        patch("sentience.action_executor.type_text") as mock_type,
+        patch("sentience.action_executor.press") as mock_press,
     ):
         from sentience.models import ActionResult
 
@@ -423,11 +423,11 @@ def test_agent_action_parsing_variations():
         mock_press.return_value = mock_result
 
         # Test variations
-        agent._execute_action("click(1)", snap)  # lowercase
-        agent._execute_action("CLICK( 1 )", snap)  # extra spaces
-        agent._execute_action("TYPE(2, 'single quotes')", snap)  # single quotes
-        agent._execute_action("PRESS('Enter')", snap)  # single quotes
-        agent._execute_action("finish()", snap)  # lowercase finish
+        agent.action_executor.execute("click(1)", snap)  # lowercase
+        agent.action_executor.execute("CLICK( 1 )", snap)  # extra spaces
+        agent.action_executor.execute("TYPE(2, 'single quotes')", snap)  # single quotes
+        agent.action_executor.execute("PRESS('Enter')", snap)  # single quotes
+        agent.action_executor.execute("finish()", snap)  # lowercase finish
 
         assert mock_click.call_count == 2
         assert mock_type.call_count == 1
@@ -441,29 +441,28 @@ def test_agent_extract_action_from_llm_response():
     agent = SentienceAgent(browser, llm, verbose=False)
 
     # Test clean action (should pass through)
-    assert agent._extract_action_from_response("CLICK(42)") == "CLICK(42)"
-    assert agent._extract_action_from_response('TYPE(15, "test")') == 'TYPE(15, "test")'
-    assert agent._extract_action_from_response('PRESS("Enter")') == 'PRESS("Enter")'
-    assert agent._extract_action_from_response("FINISH()") == "FINISH()"
+    assert agent.llm_handler.extract_action("CLICK(42)") == "CLICK(42)"
+    assert agent.llm_handler.extract_action('TYPE(15, "test")') == 'TYPE(15, "test")'
+    assert agent.llm_handler.extract_action('PRESS("Enter")') == 'PRESS("Enter")'
+    assert agent.llm_handler.extract_action("FINISH()") == "FINISH()"
 
     # Test with natural language prefix (the bug case)
     assert (
-        agent._extract_action_from_response("The next step is to click the button. CLICK(42)")
+        agent.llm_handler.extract_action("The next step is to click the button. CLICK(42)")
         == "CLICK(42)"
     )
     assert (
-        agent._extract_action_from_response(
+        agent.llm_handler.extract_action(
             'The next step is to type "Sentience AI agent SDK" into the search field. TYPE(15, "Sentience AI agent SDK")'
         )
         == 'TYPE(15, "Sentience AI agent SDK")'
     )
 
     # Test with markdown code blocks
-    assert agent._extract_action_from_response("```\nCLICK(42)\n```") == "CLICK(42)"
+    assert agent.llm_handler.extract_action("```\nCLICK(42)\n```") == "CLICK(42)"
     assert (
-        agent._extract_action_from_response('```python\nTYPE(15, "test")\n```')
-        == 'TYPE(15, "test")'
+        agent.llm_handler.extract_action('```python\nTYPE(15, "test")\n```') == 'TYPE(15, "test")'
     )
 
     # Test with explanation after action
-    assert agent._extract_action_from_response("CLICK(42) to submit the form") == "CLICK(42)"
+    assert agent.llm_handler.extract_action("CLICK(42) to submit the form") == "CLICK(42)"
diff --git a/tests/test_async_api.py b/tests/test_async_api.py
index 26e69ad..fdff935 100644
--- a/tests/test_async_api.py
+++ b/tests/test_async_api.py
@@ -358,24 +358,23 @@ async def test_async_read():
 
         # Test raw HTML format
         result = await read_async(browser, output_format="raw")
-        assert result["status"] == "success"
-        assert "content" in result
-        assert "url" in result
-        assert "format" in result
-        assert result["format"] == "raw"
-        assert len(result["content"]) > 0
+        assert result.status == "success"
+        assert result.content is not None
+        assert result.url is not None
+        assert result.format == "raw"
+        assert len(result.content) > 0
 
         # Test text format
         result = await read_async(browser, output_format="text")
-        assert result["status"] == "success"
-        assert result["format"] == "text"
-        assert len(result["content"]) > 0
+        assert result.status == "success"
+        assert result.format == "text"
+        assert len(result.content) > 0
 
         # Test markdown format (may fallback to extension's markdown)
         result = await read_async(browser, output_format="markdown")
-        assert result["status"] == "success"
-        assert result["format"] == "markdown"
-        assert len(result["content"]) > 0
+        assert result.status == "success"
+        assert result.format == "markdown"
+        assert len(result.content) > 0
 
 
 @pytest.mark.asyncio
@@ -515,6 +514,9 @@ async def test_sentience_agent_async_initialization():
 
     # Create a simple mock LLM provider
     class MockLLMProvider(LLMProvider):
+        def __init__(self):
+            super().__init__("mock-model")
+
         def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
             return LLMResponse(
                 content="CLICK(1)",
diff --git a/tests/test_cloud_tracing.py b/tests/test_cloud_tracing.py
index be424c8..31888f0 100644
--- a/tests/test_cloud_tracing.py
+++ b/tests/test_cloud_tracing.py
@@ -6,6 +6,7 @@
 import os
 import tempfile
 import time
+import uuid
 from pathlib import Path
 from unittest.mock import MagicMock, Mock, patch
 
@@ -19,10 +20,25 @@
 class TestCloudTraceSink:
     """Test CloudTraceSink functionality."""
 
+    @pytest.fixture(autouse=True)
+    def mock_home_dir(self):
+        """
+        Automatically patch Path.home() to use a temporary directory for all tests.
+        This isolates file operations and prevents FileNotFoundError on CI runners.
+        """
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            mock_home = Path(tmp_dir)
+
+            # Patch Path.home in the cloud_tracing module
+            with patch("sentience.cloud_tracing.Path.home", return_value=mock_home):
+                # Also patch it in the current test module if used directly
+                with patch("pathlib.Path.home", return_value=mock_home):
+                    yield mock_home
+
     def test_cloud_trace_sink_upload_success(self):
         """Test CloudTraceSink successfully uploads trace to cloud."""
         upload_url = "https://sentience.nyc3.digitaloceanspaces.com/user123/run456/trace.jsonl.gz"
-        run_id = "test-run-123"
+        run_id = f"test-run-{uuid.uuid4().hex[:8]}"
 
         with patch("sentience.cloud_tracing.requests.put") as mock_put:
             # Mock successful response
@@ -69,7 +85,7 @@ def test_cloud_trace_sink_upload_success(self):
     def test_cloud_trace_sink_upload_failure_preserves_trace(self, capsys):
         """Test CloudTraceSink preserves trace locally on upload failure."""
         upload_url = "https://sentience.nyc3.digitaloceanspaces.com/user123/run456/trace.jsonl.gz"
-        run_id = "test-run-456"
+        run_id = f"test-run-{uuid.uuid4().hex[:8]}"
 
         with patch("sentience.cloud_tracing.requests.put") as mock_put:
             # Mock failed response
@@ -103,11 +119,14 @@ def test_cloud_trace_sink_upload_failure_preserves_trace(self, capsys):
     def test_cloud_trace_sink_emit_after_close_raises(self):
         """Test CloudTraceSink raises error when emitting after close."""
         upload_url = "https://test.com/upload"
-        sink = CloudTraceSink(upload_url, run_id="test-run-789")
+        run_id = f"test-run-{uuid.uuid4().hex[:8]}"
+        sink = CloudTraceSink(upload_url, run_id=run_id)
+        # Emit at least one event so file exists
+        sink.emit({"v": 1, "type": "test", "seq": 1})
         sink.close()
 
         with pytest.raises(RuntimeError, match="CloudTraceSink is closed"):
-            sink.emit({"v": 1, "type": "test", "seq": 1})
+            sink.emit({"v": 1, "type": "test", "seq": 2})
 
     def test_cloud_trace_sink_context_manager(self):
         """Test CloudTraceSink works as context manager."""
@@ -115,7 +134,8 @@ def test_cloud_trace_sink_context_manager(self):
             mock_put.return_value = Mock(status_code=200)
 
             upload_url = "https://test.com/upload"
-            with CloudTraceSink(upload_url, run_id="test-run-context") as sink:
+            run_id = f"test-run-{uuid.uuid4().hex[:8]}"
+            with CloudTraceSink(upload_url, run_id=run_id) as sink:
                 sink.emit({"v": 1, "type": "test", "seq": 1})
 
             # Verify upload was called
@@ -124,7 +144,7 @@ def test_cloud_trace_sink_context_manager(self):
     def test_cloud_trace_sink_network_error_graceful_degradation(self, capsys):
         """Test CloudTraceSink handles network errors gracefully."""
         upload_url = "https://sentience.nyc3.digitaloceanspaces.com/user123/run456/trace.jsonl.gz"
-        run_id = "test-run-network-error"
+        run_id = f"test-run-{uuid.uuid4().hex[:8]}"
 
         with patch("sentience.cloud_tracing.requests.put") as mock_put:
             # Simulate network error
@@ -133,29 +153,26 @@ def test_cloud_trace_sink_network_error_graceful_degradation(self, capsys):
             sink = CloudTraceSink(upload_url, run_id=run_id)
             sink.emit({"v": 1, "type": "test", "seq": 1})
 
+            # Close triggers upload (which will fail due to network error)
             # Should not raise, just print warning
             sink.close()
 
             captured = capsys.readouterr()
-            assert "❌" in captured.out
-            assert "Error uploading trace" in captured.out
+            assert "❌" in captured.out or "Error uploading trace" in captured.out
 
             # Verify file was preserved
             cache_dir = Path.home() / ".sentience" / "traces" / "pending"
             trace_path = cache_dir / f"{run_id}.jsonl"
             assert trace_path.exists(), "Trace file should be preserved on network error"
 
-            # Cleanup
-            if trace_path.exists():
-                os.remove(trace_path)
-
     def test_cloud_trace_sink_multiple_close_safe(self):
         """Test CloudTraceSink.close() is idempotent."""
         with patch("sentience.cloud_tracing.requests.put") as mock_put:
             mock_put.return_value = Mock(status_code=200)
 
             upload_url = "https://test.com/upload"
-            sink = CloudTraceSink(upload_url, run_id="test-run-multiple-close")
+            run_id = f"test-run-{uuid.uuid4().hex[:8]}"
+            sink = CloudTraceSink(upload_url, run_id=run_id)
             sink.emit({"v": 1, "type": "test", "seq": 1})
 
             # Close multiple times
@@ -169,7 +186,7 @@ def test_cloud_trace_sink_multiple_close_safe(self):
     def test_cloud_trace_sink_persistent_cache_directory(self):
         """Test CloudTraceSink uses persistent cache directory instead of temp file."""
         upload_url = "https://test.com/upload"
-        run_id = "test-run-persistent"
+        run_id = f"test-run-{uuid.uuid4().hex[:8]}"
 
         sink = CloudTraceSink(upload_url, run_id=run_id)
         sink.emit({"v": 1, "type": "test", "seq": 1})
@@ -188,7 +205,7 @@ def test_cloud_trace_sink_persistent_cache_directory(self):
     def test_cloud_trace_sink_non_blocking_close(self):
         """Test CloudTraceSink.close(blocking=False) returns immediately."""
         upload_url = "https://test.com/upload"
-        run_id = "test-run-nonblocking"
+        run_id = f"test-run-{uuid.uuid4().hex[:8]}"
 
         with patch("sentience.cloud_tracing.requests.put") as mock_put:
             mock_put.return_value = Mock(status_code=200)
@@ -213,7 +230,7 @@ def test_cloud_trace_sink_non_blocking_close(self):
     def test_cloud_trace_sink_progress_callback(self):
         """Test CloudTraceSink.close() with progress callback."""
         upload_url = "https://test.com/upload"
-        run_id = "test-run-progress"
+        run_id = f"test-run-{uuid.uuid4().hex[:8]}"
         progress_calls = []
 
         def progress_callback(uploaded: int, total: int):
@@ -235,7 +252,7 @@ def progress_callback(uploaded: int, total: int):
     def test_cloud_trace_sink_uploads_screenshots_after_trace(self):
         """Test that CloudTraceSink uploads screenshots after trace upload succeeds."""
         upload_url = "https://sentience.nyc3.digitaloceanspaces.com/user123/run456/trace.jsonl.gz"
-        run_id = "test-screenshot-integration-1"
+        run_id = f"test-run-{uuid.uuid4().hex[:8]}"
         api_key = "sk_test_123"
 
         # Create test screenshot
@@ -383,34 +400,45 @@ class TestTracerFactory:
 
     def test_create_tracer_pro_tier_success(self, capsys):
         """Test create_tracer returns CloudTraceSink for Pro tier."""
-        with patch("sentience.tracer_factory.requests.post") as mock_post:
-            with patch("sentience.cloud_tracing.requests.put") as mock_put:
-                # Mock API response
-                mock_response = Mock()
-                mock_response.status_code = 200
-                mock_response.json.return_value = {
-                    "upload_url": "https://sentience.nyc3.digitaloceanspaces.com/upload"
-                }
-                mock_post.return_value = mock_response
-
-                # Mock upload response
-                mock_put.return_value = Mock(status_code=200)
-
-                tracer = create_tracer(
-                    api_key="sk_pro_test123", run_id="test-run", upload_trace=True
-                )
+        # Patch orphaned trace recovery to avoid extra API calls
+        with patch("sentience.tracer_factory._recover_orphaned_traces"):
+            with patch("sentience.tracer_factory.requests.post") as mock_post:
+                with patch("sentience.cloud_tracing.requests.put") as mock_put:
+                    # Mock API response
+                    mock_response = Mock()
+                    mock_response.status_code = 200
+                    mock_response.json.return_value = {
+                        "upload_url": "https://sentience.nyc3.digitaloceanspaces.com/upload"
+                    }
+                    mock_post.return_value = mock_response
 
-                # Verify Pro tier message
-                captured = capsys.readouterr()
-                assert "☁️  [Sentience] Cloud tracing enabled (Pro tier)" in captured.out
+                    # Mock upload response
+                    mock_put.return_value = Mock(status_code=200)
 
-                # Verify tracer works
-                assert tracer.run_id == "test-run"
-                assert isinstance(tracer.sink, CloudTraceSink)
-                assert tracer.sink.run_id == "test-run"  # Verify run_id is passed
+                    run_id = f"test-run-{uuid.uuid4().hex[:8]}"
+                    tracer = create_tracer(
+                        api_key="sk_pro_test123", run_id=run_id, upload_trace=True
+                    )
 
-                # Cleanup
-                tracer.close()
+                    # Verify Pro tier message
+                    captured = capsys.readouterr()
+                    assert "☁️  [Sentience] Cloud tracing enabled (Pro tier)" in captured.out
+
+                    # Verify tracer works
+                    assert tracer.run_id == run_id
+                    # Check if sink is CloudTraceSink (it should be)
+                    assert isinstance(
+                        tracer.sink, CloudTraceSink
+                    ), f"Expected CloudTraceSink, got {type(tracer.sink)}"
+                    assert tracer.sink.run_id == run_id  # Verify run_id is passed
+
+                    # Verify the init API was called (only once, since orphaned recovery is patched)
+                    assert mock_post.called
+                    assert mock_post.call_count == 1
+
+                    # Cleanup - emit at least one event so file exists before close
+                    tracer.emit("test", {"v": 1, "seq": 1})
+                    tracer.close()
 
     def test_create_tracer_free_tier_fallback(self, capsys):
         """Test create_tracer falls back to local for free tier."""
diff --git a/tests/test_conversational_agent.py b/tests/test_conversational_agent.py
index 29e8d20..43af436 100644
--- a/tests/test_conversational_agent.py
+++ b/tests/test_conversational_agent.py
@@ -193,8 +193,8 @@ def test_execute_navigate_step():
 
     result = agent._execute_step(step)
 
-    assert result["success"] is True
-    assert result["action"] == "NAVIGATE"
+    assert result.success is True
+    assert result.action == "NAVIGATE"
     browser.page.goto.assert_called_once()
     # Should have added https://
     assert "https://google.com" in str(browser.page.goto.call_args)
@@ -212,10 +212,10 @@ def test_execute_find_and_click_step():
         "parameters": {"element_description": "button"},
     }
 
-    # Patch at the agent module level where it's imported
+    # Patch at the action_executor level where click is actually called
     with (
         patch("sentience.agent.snapshot") as mock_snapshot,
-        patch("sentience.agent.click") as mock_click,
+        patch("sentience.action_executor.click") as mock_click,
     ):
         from sentience.models import ActionResult
 
@@ -224,7 +224,7 @@ def test_execute_find_and_click_step():
 
         result = agent._execute_step(step)
 
-        assert result["action"] == "FIND_AND_CLICK"
+        assert result.action == "FIND_AND_CLICK"
         # Technical agent should have been called
         assert len(agent.technical_agent.history) > 0
 
@@ -241,10 +241,10 @@ def test_execute_find_and_type_step():
         "parameters": {"element_description": "search box", "text": "magic mouse"},
     }
 
-    # Patch at the agent module level where it's imported
+    # Patch at the action_executor level where type_text is actually called
     with (
         patch("sentience.agent.snapshot") as mock_snapshot,
-        patch("sentience.agent.type_text") as mock_type,
+        patch("sentience.action_executor.type_text") as mock_type,
     ):
         from sentience.models import ActionResult
 
@@ -253,8 +253,8 @@ def test_execute_find_and_type_step():
 
         result = agent._execute_step(step)
 
-        assert result["action"] == "FIND_AND_TYPE"
-        assert result["data"]["text"] == "magic mouse"
+        assert result.action == "FIND_AND_TYPE"
+        assert result.data["text"] == "magic mouse"
 
 
 def test_execute_wait_step():
@@ -271,9 +271,9 @@ def test_execute_wait_step():
 
     result = agent._execute_step(step)
 
-    assert result["success"] is True
-    assert result["action"] == "WAIT"
-    assert result["data"]["duration"] == 0.1
+    assert result.success is True
+    assert result.action == "WAIT"
+    assert result.data["duration"] == 0.1
 
 
 def test_execute_extract_info_step():
@@ -298,9 +298,13 @@ def test_execute_extract_info_step():
 
         result = agent._execute_step(step)
 
-        assert result["success"] is True
-        assert result["action"] == "EXTRACT_INFO"
-        assert result["data"]["extracted"]["found"] is True
+        assert result.success is True
+        assert result.action == "EXTRACT_INFO"
+        extracted = result.data["extracted"]
+        if isinstance(extracted, dict):
+            assert extracted["found"] is True
+        else:
+            assert extracted.found is True
 
 
 def test_execute_verify_step():
@@ -323,9 +327,9 @@ def test_execute_verify_step():
 
         result = agent._execute_step(step)
 
-        assert result["success"] is True
-        assert result["action"] == "VERIFY"
-        assert result["data"]["verified"] is True
+        assert result.success is True
+        assert result.action == "VERIFY"
+        assert result.data["verified"] is True
 
 
 def test_synthesize_response():
diff --git a/tests/test_llm_provider_utils.py b/tests/test_llm_provider_utils.py
new file mode 100644
index 0000000..f5f89dc
--- /dev/null
+++ b/tests/test_llm_provider_utils.py
@@ -0,0 +1,96 @@
+"""Tests for sentience.llm_provider_utils module"""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from sentience.llm_provider_utils import (
+    get_api_key_from_env,
+    handle_provider_error,
+    require_package,
+)
+
+
+def test_require_package_success():
+    """Test require_package successfully imports existing package."""
+    # Test with a package that should exist
+    json_module = require_package("json", "json", install_command="pip install json")
+    assert json_module is not None
+    # Verify it's actually the json module
+    assert hasattr(json_module, "dumps")
+
+
+def test_require_package_import_error():
+    """Test require_package raises ImportError for missing package."""
+    with pytest.raises(ImportError, match="nonexistent-package.*not installed"):
+        require_package(
+            "nonexistent-package",
+            "nonexistent_package",
+            install_command="pip install nonexistent-package",
+        )
+
+
+def test_require_package_with_class():
+    """Test require_package imports specific class."""
+    # json doesn't have a class, but we can test the mechanism
+    json_module = require_package("json", "json", install_command="pip install json")
+    assert json_module is not None
+
+
+def test_get_api_key_from_env_with_param():
+    """Test get_api_key_from_env returns parameter if provided."""
+    key = get_api_key_from_env(["TEST_API_KEY"], api_key="provided-key")
+    assert key == "provided-key"
+
+
+def test_get_api_key_from_env_from_env_var():
+    """Test get_api_key_from_env reads from environment variable."""
+    with patch.dict(os.environ, {"TEST_API_KEY": "env-key-value"}):
+        key = get_api_key_from_env(["TEST_API_KEY"])
+        assert key == "env-key-value"
+
+
+def test_get_api_key_from_env_multiple_vars():
+    """Test get_api_key_from_env checks multiple environment variables."""
+    # Remove FIRST_KEY if it exists, set SECOND_KEY
+    with patch.dict(os.environ, {"SECOND_KEY": "second-value"}, clear=False):
+        # Remove FIRST_KEY if it exists
+        os.environ.pop("FIRST_KEY", None)
+        key = get_api_key_from_env(["FIRST_KEY", "SECOND_KEY"])
+        assert key == "second-value"
+
+
+def test_get_api_key_from_env_not_found():
+    """Test get_api_key_from_env returns None if not found."""
+    with patch.dict(os.environ, {}, clear=True):
+        key = get_api_key_from_env(["NONEXISTENT_KEY"])
+        assert key is None
+
+
+def test_handle_provider_error_api_key():
+    """Test handle_provider_error handles API key errors."""
+    error = Exception("Invalid API key provided")
+    with pytest.raises(RuntimeError, match="API key is invalid or missing"):
+        handle_provider_error(error, "OpenAI", "generate response")
+
+
+def test_handle_provider_error_rate_limit():
+    """Test handle_provider_error handles rate limit errors."""
+    error = Exception("Rate limit exceeded: 429")
+    with pytest.raises(RuntimeError, match="rate limit exceeded"):
+        handle_provider_error(error, "Anthropic", "generate response")
+
+
+def test_handle_provider_error_model_not_found():
+    """Test handle_provider_error handles model not found errors."""
+    error = Exception("Model 'gpt-999' not found")
+    with pytest.raises(RuntimeError, match="model not found"):
+        handle_provider_error(error, "OpenAI", "generate response")
+
+
+def test_handle_provider_error_generic():
+    """Test handle_provider_error handles generic errors."""
+    error = Exception("Network timeout")
+    with pytest.raises(RuntimeError, match="Gemini generate response failed: Network timeout"):
+        handle_provider_error(error, "Gemini", "generate response")
diff --git a/tests/test_llm_response_builder.py b/tests/test_llm_response_builder.py
new file mode 100644
index 0000000..9ac2f14
--- /dev/null
+++ b/tests/test_llm_response_builder.py
@@ -0,0 +1,95 @@
+"""
+Tests for LLMResponseBuilder helper class.
+"""
+
+import pytest
+
+from sentience.llm_provider import LLMResponse
+from sentience.llm_response_builder import LLMResponseBuilder
+
+
+class TestLLMResponseBuilder:
+    """Test LLMResponseBuilder helper methods"""
+
+    def test_from_openai_format(self):
+        """Test building response from OpenAI format"""
+        response = LLMResponseBuilder.from_openai_format(
+            content="Hello, world!",
+            prompt_tokens=10,
+            completion_tokens=5,
+            total_tokens=15,
+            model_name="gpt-4o",
+            finish_reason="stop",
+        )
+
+        assert isinstance(response, LLMResponse)
+        assert response.content == "Hello, world!"
+        assert response.prompt_tokens == 10
+        assert response.completion_tokens == 5
+        assert response.total_tokens == 15
+        assert response.model_name == "gpt-4o"
+        assert response.finish_reason == "stop"
+
+    def test_from_openai_format_auto_total(self):
+        """Test OpenAI format with auto-calculated total_tokens"""
+        response = LLMResponseBuilder.from_openai_format(
+            content="Test",
+            prompt_tokens=5,
+            completion_tokens=3,
+            model_name="gpt-4o",
+        )
+
+        assert response.total_tokens == 8  # Auto-calculated
+
+    def test_from_anthropic_format(self):
+        """Test building response from Anthropic format"""
+        response = LLMResponseBuilder.from_anthropic_format(
+            content="Claude response",
+            input_tokens=12,
+            output_tokens=8,
+            model_name="claude-3-sonnet",
+            stop_reason="end_turn",
+        )
+
+        assert isinstance(response, LLMResponse)
+        assert response.content == "Claude response"
+        assert response.prompt_tokens == 12
+        assert response.completion_tokens == 8
+        assert response.total_tokens == 20
+        assert response.model_name == "claude-3-sonnet"
+        assert response.finish_reason == "end_turn"
+
+    def test_from_gemini_format(self):
+        """Test building response from Gemini format"""
+        response = LLMResponseBuilder.from_gemini_format(
+            content="Gemini response",
+            prompt_tokens=15,
+            completion_tokens=7,
+            total_tokens=22,
+            model_name="gemini-2.0-flash-exp",
+        )
+
+        assert isinstance(response, LLMResponse)
+        assert response.content == "Gemini response"
+        assert response.prompt_tokens == 15
+        assert response.completion_tokens == 7
+        assert response.total_tokens == 22
+        assert response.model_name == "gemini-2.0-flash-exp"
+        assert response.finish_reason is None
+
+    def test_from_local_format(self):
+        """Test building response from local model format"""
+        response = LLMResponseBuilder.from_local_format(
+            content="Local model response",
+            prompt_tokens=20,
+            completion_tokens=10,
+            model_name="Qwen/Qwen2.5-3B-Instruct",
+        )
+
+        assert isinstance(response, LLMResponse)
+        assert response.content == "Local model response"
+        assert response.prompt_tokens == 20
+        assert response.completion_tokens == 10
+        assert response.total_tokens == 30
+        assert response.model_name == "Qwen/Qwen2.5-3B-Instruct"
+        assert response.finish_reason is None
diff --git a/tests/test_read.py b/tests/test_read.py
index 699144f..328eea1 100644
--- a/tests/test_read.py
+++ b/tests/test_read.py
@@ -13,12 +13,12 @@ def test_read_text():
 
         result = read(browser, output_format="text")
 
-        assert result["status"] == "success"
-        assert result["format"] == "text"
-        assert "content" in result
-        assert "length" in result
-        assert len(result["content"]) > 0
-        assert result["url"] == "https://example.com/"
+        assert result.status == "success"
+        assert result.format == "text"
+        assert result.content is not None
+        assert result.length is not None
+        assert len(result.content) > 0
+        assert result.url == "https://example.com/"
 
 
 def test_read_markdown():
@@ -29,12 +29,12 @@ def test_read_markdown():
 
         result = read(browser, output_format="markdown")
 
-        assert result["status"] == "success"
-        assert result["format"] == "markdown"
-        assert "content" in result
-        assert "length" in result
-        assert len(result["content"]) > 0
-        assert result["url"] == "https://example.com/"
+        assert result.status == "success"
+        assert result.format == "markdown"
+        assert result.content is not None
+        assert result.length is not None
+        assert len(result.content) > 0
+        assert result.url == "https://example.com/"
 
 
 def test_read_markdown_enhanced():
@@ -46,18 +46,18 @@ def test_read_markdown_enhanced():
         # Test with enhancement (default)
         result_enhanced = read(browser, output_format="markdown", enhance_markdown=True)
 
-        assert result_enhanced["status"] == "success"
-        assert result_enhanced["format"] == "markdown"
-        assert len(result_enhanced["content"]) > 0
+        assert result_enhanced.status == "success"
+        assert result_enhanced.format == "markdown"
+        assert len(result_enhanced.content) > 0
 
         # Test without enhancement
         result_basic = read(browser, output_format="markdown", enhance_markdown=False)
 
-        assert result_basic["status"] == "success"
-        assert result_basic["format"] == "markdown"
-        assert len(result_basic["content"]) > 0
+        assert result_basic.status == "success"
+        assert result_basic.format == "markdown"
+        assert len(result_basic.content) > 0
 
         # Enhanced markdown should be different (and likely better formatted)
         # Note: They might be similar for simple pages, but enhanced should handle more cases
-        assert isinstance(result_enhanced["content"], str)
-        assert isinstance(result_basic["content"], str)
+        assert isinstance(result_enhanced.content, str)
+        assert isinstance(result_basic.content, str)
diff --git a/tests/test_trace_file_manager.py b/tests/test_trace_file_manager.py
new file mode 100644
index 0000000..3774299
--- /dev/null
+++ b/tests/test_trace_file_manager.py
@@ -0,0 +1,114 @@
+"""
+Tests for TraceFileManager helper class.
+"""
+
+import json
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from sentience.trace_file_manager import TraceFileManager
+
+
+class TestTraceFileManager:
+    """Test TraceFileManager helper methods"""
+
+    def test_write_event(self):
+        """Test writing event to file handle"""
+        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".jsonl") as f:
+            temp_path = Path(f.name)
+
+        try:
+            with open(temp_path, "w", encoding="utf-8") as file_handle:
+                event = {"type": "test", "data": {"key": "value"}}
+                TraceFileManager.write_event(file_handle, event)
+
+            # Read back and verify
+            with open(temp_path, encoding="utf-8") as f:
+                line = f.read().strip()
+                assert line
+                parsed = json.loads(line)
+                assert parsed == event
+        finally:
+            temp_path.unlink()
+
+    def test_ensure_directory(self):
+        """Test ensuring directory exists"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            test_path = Path(tmpdir) / "nested" / "path" / "file.jsonl"
+            TraceFileManager.ensure_directory(test_path)
+
+            assert test_path.parent.exists()
+            assert test_path.parent.is_dir()
+
+    def test_read_events(self):
+        """Test reading events from JSONL file"""
+        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".jsonl") as f:
+            temp_path = Path(f.name)
+
+        try:
+            # Write test events
+            events = [
+                {"type": "event1", "data": {"key1": "value1"}},
+                {"type": "event2", "data": {"key2": "value2"}},
+                {"type": "event3", "data": {"key3": "value3"}},
+            ]
+
+            with open(temp_path, "w", encoding="utf-8") as f:
+                for event in events:
+                    TraceFileManager.write_event(f, event)
+
+            # Read back
+            read_events = TraceFileManager.read_events(temp_path)
+
+            assert len(read_events) == 3
+            assert read_events == events
+        finally:
+            temp_path.unlink()
+
+    def test_read_events_skips_empty_lines(self):
+        """Test that empty lines are skipped when reading"""
+        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".jsonl") as f:
+            temp_path = Path(f.name)
+
+        try:
+            # Write events with empty lines
+            with open(temp_path, "w", encoding="utf-8") as f:
+                TraceFileManager.write_event(f, {"type": "event1"})
+                f.write("\n")  # Empty line
+                f.write("  \n")  # Whitespace-only line
+                TraceFileManager.write_event(f, {"type": "event2"})
+
+            read_events = TraceFileManager.read_events(temp_path)
+
+            assert len(read_events) == 2
+            assert read_events[0]["type"] == "event1"
+            assert read_events[1]["type"] == "event2"
+        finally:
+            temp_path.unlink()
+
+    def test_read_events_handles_invalid_json(self):
+        """Test that invalid JSON lines are skipped"""
+        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".jsonl") as f:
+            temp_path = Path(f.name)
+
+        try:
+            # Write valid and invalid events
+            with open(temp_path, "w", encoding="utf-8") as f:
+                TraceFileManager.write_event(f, {"type": "event1"})
+                f.write("invalid json line\n")
+                TraceFileManager.write_event(f, {"type": "event2"})
+
+            read_events = TraceFileManager.read_events(temp_path)
+
+            assert len(read_events) == 2
+            assert read_events[0]["type"] == "event1"
+            assert read_events[1]["type"] == "event2"
+        finally:
+            temp_path.unlink()
+
+    def test_read_events_file_not_found(self):
+        """Test that FileNotFoundError is raised for non-existent file"""
+        with pytest.raises(FileNotFoundError):
+            TraceFileManager.read_events(Path("/nonexistent/file.jsonl"))
diff --git a/tests/test_trace_file_manager_extract_stats.py b/tests/test_trace_file_manager_extract_stats.py
new file mode 100644
index 0000000..cb15431
--- /dev/null
+++ b/tests/test_trace_file_manager_extract_stats.py
@@ -0,0 +1,166 @@
+"""Tests for TraceFileManager.extract_stats method"""
+
+from datetime import datetime, timezone
+
+import pytest
+
+from sentience.models import TraceStats
+from sentience.trace_file_manager import TraceFileManager
+
+
+def test_extract_stats_empty_events():
+    """Test extract_stats with empty events list."""
+    stats = TraceFileManager.extract_stats([])
+    assert stats.total_steps == 0
+    assert stats.total_events == 0
+    assert stats.duration_ms is None
+    assert stats.final_status == "unknown"
+    assert stats.started_at is None
+    assert stats.ended_at is None
+
+
+def test_extract_stats_with_run_start_and_end():
+    """Test extract_stats calculates duration from run_start and run_end."""
+    from datetime import timedelta
+
+    start_time = datetime.now(timezone.utc)
+    # Make end_time 5 seconds later using timedelta
+    end_time = start_time + timedelta(seconds=5)
+
+    events = [
+        {
+            "type": "run_start",
+            "ts": start_time.isoformat().replace("+00:00", "Z"),
+            "data": {},
+        },
+        {
+            "type": "step_start",
+            "data": {"step_index": 0},
+        },
+        {
+            "type": "step_end",
+            "data": {},
+        },
+        {
+            "type": "run_end",
+            "ts": end_time.isoformat().replace("+00:00", "Z"),
+            "data": {"steps": 1},
+        },
+    ]
+
+    stats = TraceFileManager.extract_stats(events)
+    assert stats.total_steps == 1
+    assert stats.total_events == 4
+    assert stats.duration_ms is not None
+    assert stats.duration_ms >= 5000  # At least 5 seconds
+    assert stats.started_at == start_time.isoformat().replace("+00:00", "Z")
+    assert stats.ended_at == end_time.isoformat().replace("+00:00", "Z")
+    assert stats.final_status == "success"  # Has step_end, no errors
+
+
+def test_extract_stats_counts_steps():
+    """Test extract_stats correctly counts steps from step_start events."""
+    events = [
+        {"type": "run_start", "ts": "2024-01-01T00:00:00Z", "data": {}},
+        {"type": "step_start", "data": {"step_index": 0}},
+        {"type": "step_end", "data": {}},
+        {"type": "step_start", "data": {"step_index": 1}},
+        {"type": "step_end", "data": {}},
+        {"type": "step_start", "data": {"step_index": 2}},
+        {"type": "step_end", "data": {}},
+        {"type": "run_end", "ts": "2024-01-01T00:01:00Z", "data": {"steps": 3}},
+    ]
+
+    stats = TraceFileManager.extract_stats(events)
+    assert stats.total_steps == 3
+    assert stats.total_events == 8
+
+
+def test_extract_stats_infers_status_success():
+    """Test extract_stats infers success status from step_end events."""
+    events = [
+        {"type": "run_start", "ts": "2024-01-01T00:00:00Z", "data": {}},
+        {"type": "step_start", "data": {"step_index": 0}},
+        {"type": "step_end", "data": {}},
+        {"type": "run_end", "ts": "2024-01-01T00:01:00Z", "data": {}},
+    ]
+
+    stats = TraceFileManager.extract_stats(events)
+    assert stats.final_status == "success"
+
+
+def test_extract_stats_infers_status_failure():
+    """Test extract_stats infers failure status from error events."""
+    events = [
+        {"type": "run_start", "ts": "2024-01-01T00:00:00Z", "data": {}},
+        {"type": "step_start", "data": {"step_index": 0}},
+        {"type": "error", "data": {"message": "Something went wrong"}},
+        {"type": "run_end", "ts": "2024-01-01T00:01:00Z", "data": {}},
+    ]
+
+    stats = TraceFileManager.extract_stats(events)
+    assert stats.final_status == "failure"
+
+
+def test_extract_stats_infers_status_partial():
+    """Test extract_stats infers partial status from errors with step_end."""
+    events = [
+        {"type": "run_start", "ts": "2024-01-01T00:00:00Z", "data": {}},
+        {"type": "step_start", "data": {"step_index": 0}},
+        {"type": "step_end", "data": {}},
+        {"type": "step_start", "data": {"step_index": 1}},
+        {"type": "error", "data": {"message": "Step 2 failed"}},
+        {"type": "run_end", "ts": "2024-01-01T00:01:00Z", "data": {}},
+    ]
+
+    stats = TraceFileManager.extract_stats(events)
+    assert stats.final_status == "partial"
+
+
+def test_extract_stats_uses_run_end_status():
+    """Test extract_stats uses status from run_end event if present."""
+    events = [
+        {"type": "run_start", "ts": "2024-01-01T00:00:00Z", "data": {}},
+        {"type": "step_start", "data": {"step_index": 0}},
+        {"type": "error", "data": {"message": "Error"}},
+        {
+            "type": "run_end",
+            "ts": "2024-01-01T00:01:00Z",
+            "data": {"status": "partial"},  # Explicit status overrides inference
+        },
+    ]
+
+    stats = TraceFileManager.extract_stats(events)
+    assert stats.final_status == "partial"  # Uses run_end status, not inferred "failure"
+
+
+def test_extract_stats_with_custom_inference():
+    """Test extract_stats uses custom status inference function."""
+
+    def custom_inference(events, run_end):
+        # Return a valid status value
+        return "partial"
+
+    events = [
+        {"type": "run_start", "ts": "2024-01-01T00:00:00Z", "data": {}},
+        {"type": "step_start", "data": {"step_index": 0}},
+        {"type": "step_end", "data": {}},
+        {"type": "run_end", "ts": "2024-01-01T00:01:00Z", "data": {}},
+    ]
+
+    stats = TraceFileManager.extract_stats(events, infer_status_func=custom_inference)
+    assert stats.final_status == "partial"  # Uses custom inference instead of default "success"
+
+
+def test_extract_stats_no_timestamps():
+    """Test extract_stats handles missing timestamps gracefully."""
+    events = [
+        {"type": "step_start", "data": {"step_index": 0}},
+        {"type": "step_end", "data": {}},
+    ]
+
+    stats = TraceFileManager.extract_stats(events)
+    assert stats.total_steps == 1
+    assert stats.duration_ms is None
+    assert stats.started_at is None
+    assert stats.ended_at is None
diff --git a/tests/test_tracing.py b/tests/test_tracing.py
index bc99603..7a3c254 100644
--- a/tests/test_tracing.py
+++ b/tests/test_tracing.py
@@ -223,13 +223,13 @@ def test_tracer_stats_tracking():
 
             # Get stats
             stats = tracer.get_stats()
-            assert stats["total_steps"] == 2
-            assert stats["total_events"] == 4
-            assert stats["final_status"] == "unknown"
-            assert stats["started_at"] is not None
-            assert stats["ended_at"] is not None
-            assert stats["duration_ms"] is not None
-            assert stats["duration_ms"] >= 0
+            assert stats.total_steps == 2
+            assert stats.total_events == 4
+            assert stats.final_status == "unknown"
+            assert stats.started_at is not None
+            assert stats.ended_at is not None
+            assert stats.duration_ms is not None
+            assert stats.duration_ms >= 0
 
 
 def test_tracer_set_final_status():
@@ -285,12 +285,12 @@ def test_jsonl_trace_sink_get_stats():
 
         # Get stats from sink
         stats = sink.get_stats()
-        assert stats["total_steps"] == 2
-        assert stats["total_events"] == 4
-        assert stats["final_status"] == "success"
-        assert stats["started_at"] is not None
-        assert stats["ended_at"] is not None
-        assert stats["duration_ms"] is not None
+        assert stats.total_steps == 2
+        assert stats.total_events == 4
+        assert stats.final_status == "success"
+        assert stats.started_at is not None
+        assert stats.ended_at is not None
+        assert stats.duration_ms is not None
 
 
 def test_tracer_auto_infers_final_status():
@@ -319,8 +319,8 @@ def test_tracer_auto_infers_final_status():
 
         # Verify stats reflect the inferred status
         stats = tracer.get_stats()
-        assert stats["final_status"] == "success"
-        assert stats["total_steps"] == 2
+        assert stats.final_status == "success"
+        assert stats.total_steps == 2
 
 
 def test_tracer_auto_infers_final_status_with_errors():
@@ -347,7 +347,7 @@ def test_tracer_auto_infers_final_status_with_errors():
 
         # Verify stats reflect the inferred status
         stats = tracer.get_stats()
-        assert stats["final_status"] == "partial"
+        assert stats.final_status == "partial"
 
 
 def test_tracer_auto_infers_final_status_failure():
@@ -370,7 +370,7 @@ def test_tracer_auto_infers_final_status_failure():
 
         # Verify stats reflect the inferred status
         stats = tracer.get_stats()
-        assert stats["final_status"] == "failure"
+        assert stats.final_status == "failure"
 
 
 def test_tracer_auto_infer_does_not_override_explicit_status():
@@ -397,7 +397,7 @@ def test_tracer_auto_infer_does_not_override_explicit_status():
 
         # Verify stats reflect the explicit status
         stats = tracer.get_stats()
-        assert stats["final_status"] == "partial"
+        assert stats.final_status == "partial"
 
 
 def test_tracer_close_sets_final_status_automatically():
@@ -427,8 +427,8 @@ def test_tracer_close_sets_final_status_automatically():
 
         # Verify stats reflect the inferred status
         stats = tracer.get_stats()
-        assert stats["final_status"] == "success"
-        assert stats["total_steps"] == 2
+        assert stats.final_status == "success"
+        assert stats.total_steps == 2
 
 
 def test_tracer_close_sets_final_status_in_run_end_event():
diff --git a/tests/test_utils_browser.py b/tests/test_utils_browser.py
new file mode 100644
index 0000000..145c888
--- /dev/null
+++ b/tests/test_utils_browser.py
@@ -0,0 +1,151 @@
+"""
+Unit tests for sentience.utils.browser module.
+
+Tests browser storage state saving functionality.
+"""
+
+import json
+import tempfile
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+import pytest
+
+from sentience.utils.browser import save_storage_state
+
+
+class TestSaveStorageState:
+    """Tests for save_storage_state function."""
+
+    def test_save_storage_state_creates_file(self):
+        """Test that save_storage_state creates a file with storage state."""
+        # Create a mock BrowserContext
+        mock_context = Mock()
+        mock_context.storage_state.return_value = {
+            "cookies": [
+                {
+                    "name": "session_id",
+                    "value": "abc123",
+                    "domain": "example.com",
+                    "path": "/",
+                }
+            ],
+            "origins": [
+                {
+                    "origin": "https://example.com",
+                    "localStorage": [{"name": "user_pref", "value": "dark_mode"}],
+                }
+            ],
+        }
+
+        # Use temporary file
+        with tempfile.TemporaryDirectory() as tmpdir:
+            file_path = Path(tmpdir) / "storage.json"
+
+            # Call function
+            save_storage_state(mock_context, file_path)
+
+            # Verify file was created
+            assert file_path.exists()
+
+            # Verify content
+            with open(file_path) as f:
+                data = json.load(f)
+
+            assert "cookies" in data
+            assert "origins" in data
+            assert len(data["cookies"]) == 1
+            assert data["cookies"][0]["name"] == "session_id"
+
+    def test_save_storage_state_creates_parent_directories(self):
+        """Test that save_storage_state creates parent directories if needed."""
+        mock_context = Mock()
+        mock_context.storage_state.return_value = {"cookies": [], "origins": []}
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create nested path
+            file_path = Path(tmpdir) / "nested" / "deep" / "storage.json"
+
+            # Should not raise error
+            save_storage_state(mock_context, file_path)
+
+            # Verify file was created
+            assert file_path.exists()
+            assert file_path.parent.exists()
+
+    def test_save_storage_state_with_string_path(self):
+        """Test that save_storage_state accepts string paths."""
+        mock_context = Mock()
+        mock_context.storage_state.return_value = {"cookies": [], "origins": []}
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            file_path = str(Path(tmpdir) / "storage.json")
+
+            save_storage_state(mock_context, file_path)
+
+            assert Path(file_path).exists()
+
+    def test_save_storage_state_calls_context_storage_state(self):
+        """Test that save_storage_state calls context.storage_state()."""
+        mock_context = Mock()
+        mock_context.storage_state.return_value = {"cookies": [], "origins": []}
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            file_path = Path(tmpdir) / "storage.json"
+
+            save_storage_state(mock_context, file_path)
+
+            # Verify storage_state was called
+            mock_context.storage_state.assert_called_once()
+
+    def test_save_storage_state_json_format(self):
+        """Test that saved file is valid JSON with indentation."""
+        mock_context = Mock()
+        mock_context.storage_state.return_value = {
+            "cookies": [{"name": "test", "value": "value"}],
+            "origins": [],
+        }
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            file_path = Path(tmpdir) / "storage.json"
+
+            save_storage_state(mock_context, file_path)
+
+            # Verify JSON is valid and formatted
+            with open(file_path) as f:
+                content = f.read()
+                # Should have indentation (contains newlines)
+                assert "\n" in content
+                # Should be valid JSON
+                data = json.loads(content)
+                assert isinstance(data, dict)
+
+    def test_save_storage_state_handles_empty_state(self):
+        """Test that save_storage_state handles empty storage state."""
+        mock_context = Mock()
+        mock_context.storage_state.return_value = {"cookies": [], "origins": []}
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            file_path = Path(tmpdir) / "storage.json"
+
+            save_storage_state(mock_context, file_path)
+
+            with open(file_path) as f:
+                data = json.load(f)
+
+            assert data == {"cookies": [], "origins": []}
+
+    def test_save_storage_state_prints_success_message(self, capsys):
+        """Test that save_storage_state prints success message."""
+        mock_context = Mock()
+        mock_context.storage_state.return_value = {"cookies": [], "origins": []}
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            file_path = Path(tmpdir) / "storage.json"
+
+            save_storage_state(mock_context, file_path)
+
+            captured = capsys.readouterr()
+            assert "✅" in captured.out
+            assert "Saved storage state" in captured.out
+            assert str(file_path) in captured.out
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 0000000..75b552f
--- /dev/null
+++ b/tests/unit/__init__.py
@@ -0,0 +1,6 @@
+"""
+Unit tests for Sentience SDK.
+
+These tests use mocks and protocols to test logic in isolation,
+without requiring real browser instances.
+"""
diff --git a/tests/unit/test_agent_errors.py b/tests/unit/test_agent_errors.py
new file mode 100644
index 0000000..4287683
--- /dev/null
+++ b/tests/unit/test_agent_errors.py
@@ -0,0 +1,447 @@
+"""
+Unit tests for agent error handling and edge cases.
+
+These tests use mocked browsers to test error conditions that are
+difficult to reproduce with real browsers.
+"""
+
+from typing import Any
+from unittest.mock import Mock, patch
+
+import pytest
+
+from sentience.agent import SentienceAgent
+from sentience.llm_provider import LLMProvider, LLMResponse
+from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues
+from sentience.protocols import BrowserProtocol, PageProtocol
+
+
+class MockLLMProvider(LLMProvider):
+    """Mock LLM provider for testing"""
+
+    def __init__(self, responses=None):
+        super().__init__("mock-model")
+        self.responses = responses or []
+        self.call_count = 0
+
+    @property
+    def model_name(self) -> str:
+        return "mock-model"
+
+    def supports_json_mode(self) -> bool:
+        return False
+
+    def generate(self, system_prompt: str, user_prompt: str, **kwargs):
+        self.call_count += 1
+        if self.responses:
+            response = self.responses[self.call_count % len(self.responses)]
+        else:
+            response = "CLICK(1)"
+        return LLMResponse(
+            content=response,
+            prompt_tokens=100,
+            completion_tokens=20,
+            total_tokens=120,
+            model_name="mock-model",
+        )
+
+
+class MockPage(PageProtocol):
+    """Mock page that implements PageProtocol (sync version)"""
+
+    def __init__(self, url: str = "https://example.com"):
+        self._url = url
+
+    @property
+    def url(self) -> str:
+        return self._url
+
+    def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any:
+        # Return proper snapshot structure when snapshot is called
+        # The script is a function that calls window.sentience.snapshot(options)
+        if "window.sentience.snapshot" in script or (
+            "snapshot" in script.lower() and "options" in script
+        ):
+            # Check if args contain options (for empty snapshot tests)
+            options = kwargs.get("options") or (args[0] if args else {})
+            limit = options.get("limit", 50) if isinstance(options, dict) else 50
+
+            # Return elements based on limit (0 for empty snapshot tests)
+            elements = []
+            if limit > 0:
+                elements = [
+                    {
+                        "id": 1,
+                        "role": "button",
+                        "text": "Click Me",
+                        "importance": 900,
+                        "bbox": {"x": 100, "y": 200, "width": 80, "height": 30},
+                        "visual_cues": {
+                            "is_primary": True,
+                            "is_clickable": True,
+                            "background_color_name": "blue",
+                        },
+                        "in_viewport": True,
+                        "is_occluded": False,
+                        "z_index": 10,
+                    }
+                ]
+
+            # Snapshot model expects 'elements' not 'raw_elements'
+            return {
+                "status": "success",
+                "timestamp": "2024-12-24T10:00:00Z",
+                "url": self._url,
+                "viewport": {"width": 1920, "height": 1080},
+                "elements": elements,  # Use 'elements' for Snapshot model
+                "raw_elements": elements,  # Also include for compatibility
+            }
+        # For wait_for_function calls
+        if "wait_for_function" in script or "typeof window.sentience" in script:
+            return True
+        return {}
+
+    def goto(self, url: str, **kwargs: Any) -> Any:
+        self._url = url
+        return None
+
+    def wait_for_timeout(self, timeout: int) -> None:
+        pass
+
+    def wait_for_load_state(self, state: str = "load", timeout: int | None = None) -> None:
+        pass
+
+    def wait_for_function(self, expression: str, timeout: int | None = None) -> None:
+        """Add wait_for_function to make it detectable as sync page"""
+        pass
+
+
+class MockBrowser(BrowserProtocol):
+    """Mock browser that implements BrowserProtocol"""
+
+    def __init__(self, page: MockPage | None = None, api_key: str | None = None):
+        self._page = page or MockPage()
+        self._started = False
+        self.api_key = api_key  # Required by snapshot function
+        self.api_url = None  # Required by snapshot function
+
+    @property
+    def page(self) -> MockPage | None:
+        return self._page if self._started else None
+
+    def start(self) -> None:
+        self._started = True
+
+    def close(self, output_path: str | None = None) -> str | None:
+        self._started = False
+        return output_path
+
+    def goto(self, url: str) -> None:
+        if self._page:
+            self._page.goto(url)
+
+
+def create_mock_snapshot():
+    """Create mock snapshot with test elements"""
+    elements = [
+        Element(
+            id=1,
+            role="button",
+            text="Click Me",
+            importance=900,
+            bbox=BBox(x=100, y=200, width=80, height=30),
+            visual_cues=VisualCues(
+                is_primary=True, is_clickable=True, background_color_name="blue"
+            ),
+            in_viewport=True,
+            is_occluded=False,
+            z_index=10,
+        ),
+    ]
+    return Snapshot(
+        status="success",
+        timestamp="2024-12-24T10:00:00Z",
+        url="https://example.com",
+        viewport=Viewport(width=1920, height=1080),
+        elements=elements,
+    )
+
+
+class TestAgentErrorHandling:
+    """Test agent error handling scenarios"""
+
+    def test_agent_handles_snapshot_timeout(self):
+        """Test agent handles snapshot timeout gracefully"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider()
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        # Mock snapshot to raise timeout
+        with patch("sentience.agent.snapshot") as mock_snapshot:
+            from playwright._impl._errors import TimeoutError
+
+            mock_snapshot.side_effect = TimeoutError("Snapshot timeout")
+
+            with pytest.raises(RuntimeError, match="Failed after"):
+                agent.act("Click the button", max_retries=0)
+
+    def test_agent_handles_network_failure(self):
+        """Test agent handles network failure during snapshot"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider()
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        # Mock snapshot to raise network error
+        # Patch at the agent module level since that's where it's imported
+        with patch("sentience.agent.snapshot") as mock_snapshot:
+            mock_snapshot.side_effect = ConnectionError("Network failure")
+
+            with pytest.raises(RuntimeError, match="Failed after"):
+                agent.act("Click the button", max_retries=0)
+
+    def test_agent_handles_empty_snapshot(self):
+        """Test agent handles empty snapshot (no elements)"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        # Create empty snapshot
+        empty_snap = Snapshot(
+            status="success",
+            timestamp="2024-12-24T10:00:00Z",
+            url="https://example.com",
+            viewport=Viewport(width=1920, height=1080),
+            elements=[],
+        )
+
+        with (
+            patch("sentience.snapshot.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = empty_snap
+            mock_click.return_value = ActionResult(success=False, duration_ms=100, outcome="error")
+
+            # Agent should still attempt action even with empty snapshot
+            result = agent.act("Click the button", max_retries=0)
+            assert result.success is False
+
+    def test_agent_handles_malformed_llm_response(self):
+        """Test agent handles malformed LLM response"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["INVALID_RESPONSE_FORMAT"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (patch("sentience.snapshot.snapshot") as mock_snapshot,):
+            mock_snapshot.return_value = create_mock_snapshot()
+
+            # Action executor should raise ValueError for invalid format
+            with pytest.raises(RuntimeError, match="Failed after"):
+                agent.act("Click the button", max_retries=0)
+
+    def test_agent_handles_browser_not_started(self):
+        """Test agent handles browser not started error"""
+        browser = MockBrowser()  # Not started
+        llm = MockLLMProvider()
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        # Snapshot should fail because browser.page is None
+        with patch("sentience.snapshot.snapshot") as mock_snapshot:
+            mock_snapshot.side_effect = RuntimeError("Browser not started")
+
+            with pytest.raises(RuntimeError, match="Failed after"):
+                agent.act("Click the button", max_retries=0)
+
+    def test_agent_handles_action_timeout(self):
+        """Test agent handles action execution timeout"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.snapshot.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from playwright._impl._errors import TimeoutError
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            mock_click.side_effect = TimeoutError("Action timeout")
+
+            with pytest.raises(RuntimeError, match="Failed after"):
+                agent.act("Click the button", max_retries=0)
+
+    def test_agent_handles_url_change_during_action(self):
+        """Test agent handles URL change during action execution"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.snapshot.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            # Simulate URL change after click
+            mock_click.return_value = ActionResult(
+                success=True, duration_ms=150, outcome="navigated", url_changed=True
+            )
+
+            result = agent.act("Click the button", max_retries=0)
+            assert result.success is True
+            assert result.url_changed is True
+
+    def test_agent_retry_on_transient_error(self):
+        """Test agent retries on transient errors"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.snapshot.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            # First call fails, second succeeds
+            mock_click.side_effect = [
+                RuntimeError("Transient error"),
+                ActionResult(success=True, duration_ms=150, outcome="dom_updated"),
+            ]
+
+            result = agent.act("Click the button", max_retries=1)
+            assert result.success is True
+            assert mock_click.call_count == 2
+
+
+class TestAgentEdgeCases:
+    """Test agent edge case scenarios"""
+
+    def test_agent_handles_zero_elements_in_snapshot(self):
+        """Test agent handles snapshot with zero elements"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["FINISH()"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        empty_snap = Snapshot(
+            status="success",
+            timestamp="2024-12-24T10:00:00Z",
+            url="https://example.com",
+            viewport=Viewport(width=1920, height=1080),
+            elements=[],
+        )
+
+        with patch("sentience.snapshot.snapshot") as mock_snapshot:
+            mock_snapshot.return_value = empty_snap
+
+            # Agent should handle empty snapshot and finish
+            result = agent.act("Complete task", max_retries=0)
+            assert result.action == "finish"
+            assert result.success is True
+
+    def test_agent_handles_unicode_in_actions(self):
+        """Test agent handles unicode characters in goals and actions"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=['TYPE(1, "你好世界")'])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.snapshot.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.type_text") as mock_type,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            mock_type.return_value = ActionResult(
+                success=True, duration_ms=200, outcome="dom_updated"
+            )
+
+            result = agent.act("Type 你好世界", max_retries=0)
+            assert result.success is True
+            assert result.action == "type"
+
+    def test_agent_handles_special_characters_in_goal(self):
+        """Test agent handles special characters in goal text"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.snapshot.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            mock_click.return_value = ActionResult(
+                success=True, duration_ms=150, outcome="dom_updated"
+            )
+
+            # Test with special characters
+            result = agent.act('Click the "Submit" button (with quotes)', max_retries=0)
+            assert result.success is True
+
+    def test_agent_preserves_state_on_retry(self):
+        """Test agent preserves state correctly during retries"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)"])
+        agent = SentienceAgent(browser, llm, verbose=False)
+
+        with (
+            patch("sentience.snapshot.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            # First attempt fails, second succeeds
+            mock_click.side_effect = [
+                RuntimeError("First attempt failed"),
+                ActionResult(success=True, duration_ms=150, outcome="dom_updated"),
+            ]
+
+            result = agent.act("Click the button", max_retries=1)
+            assert result.success is True
+            # History should have both attempts
+            assert len(agent.history) == 1  # Only successful attempt is recorded
+            assert agent.history[0]["attempt"] == 1  # Final successful attempt
+
+    def test_agent_handles_tracer_errors_gracefully(self):
+        """Test agent continues execution even if tracer fails"""
+        browser = MockBrowser()
+        browser.start()
+        llm = MockLLMProvider(responses=["CLICK(1)"])
+        # Create a tracer that raises errors
+        mock_tracer = Mock()
+        mock_tracer.emit.side_effect = RuntimeError("Tracer error")
+
+        agent = SentienceAgent(browser, llm, verbose=False, tracer=mock_tracer)
+
+        with (
+            patch("sentience.snapshot.snapshot") as mock_snapshot,
+            patch("sentience.action_executor.click") as mock_click,
+        ):
+            from sentience.models import ActionResult
+
+            mock_snapshot.return_value = create_mock_snapshot()
+            mock_click.return_value = ActionResult(
+                success=True, duration_ms=150, outcome="dom_updated"
+            )
+
+            # Agent should still complete action despite tracer error
+            result = agent.act("Click the button", max_retries=0)
+            assert result.success is True