SentienceAPI · rcholic · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -32,6 +32,24 @@ jobs:
     - name: Install dependencies
       run: |
         pip install -e ".[dev]"
+        pip install pre-commit mypy types-requests
+
+    - name: Lint with pre-commit
+      continue-on-error: true
+      run: |
+        pre-commit run --all-files
+
+    - name: Type check with mypy
+      continue-on-error: true
+      run: |
+        mypy sentience --ignore-missing-imports --no-strict-optional
+
+    - name: Check code style
+      continue-on-error: true
+      run: |
+        black --check sentience tests --line-length=100
+        isort --check-only --profile black sentience tests
+        flake8 sentience tests --max-line-length=100 --extend-ignore=E203,W503,E501 --max-complexity=15
 
     - name: Build extension (if needed)
       if: runner.os != 'Windows'

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -50,20 +50,19 @@ repos:
           - '--max-complexity=15'
         exclude: ^(venv/|\.venv/|build/|dist/|tests/fixtures/)
 
-  # Type checking with mypy (disabled for now - too strict)
-  # Uncomment to enable strict type checking
-  # - repo: https://github.com/pre-commit/mirrors-mypy
-  #   rev: v1.8.0
-  #   hooks:
-  #     - id: mypy
-  #       additional_dependencies:
-  #         - pydantic>=2.0
-  #         - types-requests
-  #       args:
-  #         - '--ignore-missing-imports'
-  #         - '--no-strict-optional'
-  #         - '--warn-unused-ignores'
-  #       exclude: ^(tests/|examples/|venv/|\.venv/|build/|dist/)
+  # Type checking with mypy
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.8.0
+    hooks:
+      - id: mypy
+        additional_dependencies:
+          - pydantic>=2.0
+          - types-requests
+        args:
+          - '--ignore-missing-imports'
+          - '--no-strict-optional'
+          - '--warn-unused-ignores'
+        exclude: ^(tests/|examples/|venv/|\.venv/|build/|dist/)
 
   # Security checks
   - repo: https://github.com/PyCQA/bandit

diff --git a/sentience/__init__.py b/sentience/__init__.py
@@ -14,9 +14,6 @@
 from .cloud_tracing import CloudTraceSink, SentienceLogger
 from .conversational_agent import ConversationalAgent
 from .expect import expect
-
-# Formatting (v0.12.0+)
-from .formatting import format_snapshot_for_llm
 from .generator import ScriptGenerator, generate
 from .inspector import Inspector, inspect
 from .llm_provider import (
@@ -55,19 +52,24 @@
 from .read import read
 from .recorder import Recorder, Trace, TraceStep, record
 from .screenshot import screenshot
+from .sentience_methods import AgentAction, SentienceMethod
 from .snapshot import snapshot
 from .text_search import find_text_rect
 from .tracer_factory import SENTIENCE_API_URL, create_tracer
 from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
 
 # Utilities (v0.12.0+)
+# Import from utils package (re-exports from submodules for backward compatibility)
 from .utils import (
     canonical_snapshot_loose,
     canonical_snapshot_strict,
     compute_snapshot_digests,
     save_storage_state,
     sha256_digest,
 )
+
+# Formatting (v0.12.0+)
+from .utils.formatting import format_snapshot_for_llm
 from .wait import wait_for
 
 __version__ = "0.91.1"
@@ -150,4 +152,7 @@
     "format_snapshot_for_llm",
     # Agent Config (v0.12.0+)
     "AgentConfig",
+    # Enums
+    "SentienceMethod",
+    "AgentAction",
 ]
diff --git a/sentience/action_executor.py b/sentience/action_executor.py
@@ -0,0 +1,215 @@
+"""
+Action Executor for Sentience Agent.
+
+Handles parsing and execution of action commands (CLICK, TYPE, PRESS, FINISH).
+This separates action execution concerns from LLM interaction.
+"""
+
+import re
+from typing import Any, Union
+
+from .actions import click, click_async, press, press_async, type_text, type_text_async
+from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .models import Snapshot
+from .protocols import AsyncBrowserProtocol, BrowserProtocol
+
+
+class ActionExecutor:
+    """
+    Executes actions and handles parsing of action command strings.
+
+    This class encapsulates all action execution logic, making it easier to:
+    - Test action execution independently
+    - Add new action types in one place
+    - Handle action parsing errors consistently
+    """
+
+    def __init__(
+        self,
+        browser: SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol,
+    ):
+        """
+        Initialize action executor.
+
+        Args:
+            browser: SentienceBrowser, AsyncSentienceBrowser, or protocol-compatible instance
+                    (for testing, can use mock objects that implement BrowserProtocol)
+        """
+        self.browser = browser
+        # Check if browser is async - support both concrete types and protocols
+        # Check concrete types first (most reliable)
+        if isinstance(browser, AsyncSentienceBrowser):
+            self._is_async = True
+        elif isinstance(browser, SentienceBrowser):
+            self._is_async = False
+        else:
+            # For protocol-based browsers, check if methods are actually async
+            # This is more reliable than isinstance checks which can match both protocols
+            import inspect
+
+            start_method = getattr(browser, "start", None)
+            if start_method and inspect.iscoroutinefunction(start_method):
+                self._is_async = True
+            elif isinstance(browser, BrowserProtocol):
+                # If it implements BrowserProtocol and start is not async, it's sync
+                self._is_async = False
+            else:
+                # Default to sync for unknown types
+                self._is_async = False
+
+    def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
+        """
+        Parse action string and execute SDK call (synchronous).
+
+        Args:
+            action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
+            snap: Current snapshot (for context, currently unused but kept for API consistency)
+
+        Returns:
+            Execution result dictionary with keys:
+            - success: bool
+            - action: str (e.g., "click", "type", "press", "finish")
+            - element_id: Optional[int] (for click/type actions)
+            - text: Optional[str] (for type actions)
+            - key: Optional[str] (for press actions)
+            - outcome: Optional[str] (action outcome)
+            - url_changed: Optional[bool] (for click actions)
+            - error: Optional[str] (if action failed)
+            - message: Optional[str] (for finish action)
+
+        Raises:
+            ValueError: If action format is unknown
+            RuntimeError: If called on async browser (use execute_async instead)
+        """
+        if self._is_async:
+            raise RuntimeError(
+                "ActionExecutor.execute() called on async browser. Use execute_async() instead."
+            )
+
+        # Parse CLICK(42)
+        if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
+            element_id = int(match.group(1))
+            result = click(self.browser, element_id)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "click",
+                "element_id": element_id,
+                "outcome": result.outcome,
+                "url_changed": result.url_changed,
+            }
+
+        # Parse TYPE(42, "hello world")
+        elif match := re.match(
+            r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
+            action_str,
+            re.IGNORECASE,
+        ):
+            element_id = int(match.group(1))
+            text = match.group(2)
+            result = type_text(self.browser, element_id, text)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "type",
+                "element_id": element_id,
+                "text": text,
+                "outcome": result.outcome,
+            }
+
+        # Parse PRESS("Enter")
+        elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
+            key = match.group(1)
+            result = press(self.browser, key)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "press",
+                "key": key,
+                "outcome": result.outcome,
+            }
+
+        # Parse FINISH()
+        elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
+            return {
+                "success": True,
+                "action": "finish",
+                "message": "Task marked as complete",
+            }
+
+        else:
+            raise ValueError(
+                f"Unknown action format: {action_str}\n"
+                f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
+            )
+
+    async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
+        """
+        Parse action string and execute SDK call (asynchronous).
+
+        Args:
+            action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
+            snap: Current snapshot (for context, currently unused but kept for API consistency)
+
+        Returns:
+            Execution result dictionary (same format as execute())
+
+        Raises:
+            ValueError: If action format is unknown
+            RuntimeError: If called on sync browser (use execute() instead)
+        """
+        if not self._is_async:
+            raise RuntimeError(
+                "ActionExecutor.execute_async() called on sync browser. Use execute() instead."
+            )
+
+        # Parse CLICK(42)
+        if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
+            element_id = int(match.group(1))
+            result = await click_async(self.browser, element_id)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "click",
+                "element_id": element_id,
+                "outcome": result.outcome,
+                "url_changed": result.url_changed,
+            }
+
+        # Parse TYPE(42, "hello world")
+        elif match := re.match(
+            r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
+            action_str,
+            re.IGNORECASE,
+        ):
+            element_id = int(match.group(1))
+            text = match.group(2)
+            result = await type_text_async(self.browser, element_id, text)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "type",
+                "element_id": element_id,
+                "text": text,
+                "outcome": result.outcome,
+            }
+
+        # Parse PRESS("Enter")
+        elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
+            key = match.group(1)
+            result = await press_async(self.browser, key)  # type: ignore
+            return {
+                "success": result.success,
+                "action": "press",
+                "key": key,
+                "outcome": result.outcome,
+            }
+
+        # Parse FINISH()
+        elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
+            return {
+                "success": True,
+                "action": "finish",
+                "message": "Task marked as complete",
+            }
+
+        else:
+            raise ValueError(
+                f"Unknown action format: {action_str}\n"
+                f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
+            )
diff --git a/sentience/actions.py b/sentience/actions.py
@@ -1,11 +1,15 @@
+from typing import Optional
+
 """
 Actions v1 - click, type, press
 """
 
 import time
 
 from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .browser_evaluator import BrowserEvaluator
 from .models import ActionResult, BBox, Snapshot
+from .sentience_methods import SentienceMethod
 from .snapshot import snapshot, snapshot_async
 
 
@@ -59,41 +63,22 @@ def click(  # noqa: C901
             else:
                 # Fallback to JS click if element not found in snapshot
                 try:
-                    success = browser.page.evaluate(
-                        """
-                        (id) => {
-                            return window.sentience.click(id);
-                        }
-                        """,
-                        element_id,
+                    success = BrowserEvaluator.invoke(
+                        browser.page, SentienceMethod.CLICK, element_id
                     )
                 except Exception:
                     # Navigation might have destroyed context, assume success if URL changed
                     success = True
         except Exception:
             # Fallback to JS click on error
             try:
-                success = browser.page.evaluate(
-                    """
-                    (id) => {
-                        return window.sentience.click(id);
-                    }
-                    """,
-                    element_id,
-                )
+                success = BrowserEvaluator.invoke(browser.page, SentienceMethod.CLICK, element_id)
             except Exception:
                 # Navigation might have destroyed context, assume success if URL changed
                 success = True
     else:
         # Legacy JS-based click
-        success = browser.page.evaluate(
-            """
-            (id) => {
-                return window.sentience.click(id);
-            }
-            """,
-            element_id,
-        )
+        success = BrowserEvaluator.invoke(browser.page, SentienceMethod.CLICK, element_id)
 
     # Wait a bit for navigation/DOM updates
     try: