From 275ad8caf5ad79cc87c50d289b5a94715f0eb078 Mon Sep 17 00:00:00 2001 From: rcholic Date: Fri, 2 Jan 2026 14:40:54 -0800 Subject: [PATCH 1/3] Phase 5: BrowserProtocol PageProtocl for mocking mor unit tests --- sentience/action_executor.py | 11 +- sentience/agent.py | 8 +- sentience/conversational_agent.py | 10 +- sentience/element_filter.py | 2 +- sentience/protocols.py | 231 ++++++++++++++++ tests/integration/__init__.py | 7 + tests/unit/__init__.py | 7 + tests/unit/test_agent_errors.py | 443 ++++++++++++++++++++++++++++++ 8 files changed, 709 insertions(+), 10 deletions(-) create mode 100644 sentience/protocols.py create mode 100644 tests/integration/__init__.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/test_agent_errors.py diff --git a/sentience/action_executor.py b/sentience/action_executor.py index 104e255..75c9585 100644 --- a/sentience/action_executor.py +++ b/sentience/action_executor.py @@ -6,11 +6,12 @@ """ import re -from typing import Any +from typing import Any, Union from .actions import click, click_async, press, press_async, type_text, type_text_async from .browser import AsyncSentienceBrowser, SentienceBrowser from .models import Snapshot +from .protocols import AsyncBrowserProtocol, BrowserProtocol class ActionExecutor: @@ -23,15 +24,17 @@ class ActionExecutor: - Handle action parsing errors consistently """ - def __init__(self, browser: SentienceBrowser | AsyncSentienceBrowser): + def __init__(self, browser: Union[SentienceBrowser, AsyncSentienceBrowser, BrowserProtocol, AsyncBrowserProtocol]): """ Initialize action executor. Args: - browser: SentienceBrowser or AsyncSentienceBrowser instance + browser: SentienceBrowser, AsyncSentienceBrowser, or protocol-compatible instance + (for testing, can use mock objects that implement BrowserProtocol) """ self.browser = browser - self._is_async = isinstance(browser, AsyncSentienceBrowser) + # Check if browser is async - support both concrete types and protocols + self._is_async = isinstance(browser, (AsyncSentienceBrowser, AsyncBrowserProtocol)) def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]: """ diff --git a/sentience/agent.py b/sentience/agent.py index 3238f30..e7aedf3 100644 --- a/sentience/agent.py +++ b/sentience/agent.py @@ -6,7 +6,7 @@ import asyncio import hashlib import time -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Optional, Union from .action_executor import ActionExecutor from .agent_config import AgentConfig @@ -15,6 +15,7 @@ from .element_filter import ElementFilter from .llm_interaction_handler import LLMInteractionHandler from .llm_provider import LLMProvider, LLMResponse +from .protocols import AsyncBrowserProtocol, BrowserProtocol from .models import ( ActionHistory, ActionTokenUsage, @@ -58,7 +59,7 @@ class SentienceAgent(BaseAgent): def __init__( self, - browser: SentienceBrowser, + browser: Union[SentienceBrowser, BrowserProtocol], llm: LLMProvider, default_snapshot_limit: int = 50, verbose: bool = True, @@ -69,7 +70,8 @@ def __init__( Initialize Sentience Agent Args: - browser: SentienceBrowser instance + browser: SentienceBrowser instance or BrowserProtocol-compatible object + (for testing, can use mock objects that implement BrowserProtocol) llm: LLM provider (OpenAIProvider, AnthropicProvider, etc.) default_snapshot_limit: Default maximum elements to include in context (default: 50) verbose: Print execution logs (default: True) diff --git a/sentience/conversational_agent.py b/sentience/conversational_agent.py index 238d4c5..d2cdb5a 100644 --- a/sentience/conversational_agent.py +++ b/sentience/conversational_agent.py @@ -7,9 +7,12 @@ import time from typing import Any +from typing import Union + from .agent import SentienceAgent from .browser import SentienceBrowser from .llm_provider import LLMProvider +from .protocols import BrowserProtocol from .models import ExtractionResult, Snapshot, SnapshotOptions, StepExecutionResult from .snapshot import snapshot @@ -29,12 +32,15 @@ class ConversationalAgent: The top result is from amazon.com selling the Apple Magic Mouse 2 for $79." """ - def __init__(self, browser: SentienceBrowser, llm: LLMProvider, verbose: bool = True): + def __init__( + self, browser: Union[SentienceBrowser, BrowserProtocol], llm: LLMProvider, verbose: bool = True + ): """ Initialize conversational agent Args: - browser: SentienceBrowser instance + browser: SentienceBrowser instance or BrowserProtocol-compatible object + (for testing, can use mock objects that implement BrowserProtocol) llm: LLM provider (OpenAI, Anthropic, LocalLLM, etc.) verbose: Print step-by-step execution logs (default: True) """ diff --git a/sentience/element_filter.py b/sentience/element_filter.py index df117b9..a6256c7 100644 --- a/sentience/element_filter.py +++ b/sentience/element_filter.py @@ -65,7 +65,7 @@ def filter_by_importance( def filter_by_goal( snapshot: Snapshot, goal: str | None, - max_elements: int = 50, + max_elements: int = 100, ) -> list[Element]: """ Filter elements from snapshot based on goal context. diff --git a/sentience/protocols.py b/sentience/protocols.py new file mode 100644 index 0000000..b289231 --- /dev/null +++ b/sentience/protocols.py @@ -0,0 +1,231 @@ +""" +Protocol definitions for testability and dependency injection. + +These protocols define the minimal interface required by agent classes, +enabling better testability through mocking while maintaining type safety. +""" + +from typing import TYPE_CHECKING, Any, Optional, Protocol, runtime_checkable + +if TYPE_CHECKING: + from playwright.async_api import Page as AsyncPage + from playwright.sync_api import Page + + from .models import Snapshot + + +@runtime_checkable +class PageProtocol(Protocol): + """ + Protocol for Playwright Page operations used by agents. + + This protocol defines the minimal interface required from Playwright's Page object. + Agents use this interface to interact with the browser page. + """ + + @property + def url(self) -> str: + """Current page URL.""" + ... + + def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any: + """ + Evaluate JavaScript in the page context. + + Args: + script: JavaScript code to evaluate + *args: Arguments to pass to the script + **kwargs: Keyword arguments to pass to the script + + Returns: + Result of the JavaScript evaluation + """ + ... + + def goto(self, url: str, **kwargs: Any) -> Optional[Any]: + """ + Navigate to a URL. + + Args: + url: URL to navigate to + **kwargs: Additional navigation options + + Returns: + Response object or None + """ + ... + + def wait_for_timeout(self, timeout: int) -> None: + """ + Wait for a specified timeout. + + Args: + timeout: Timeout in milliseconds + """ + ... + + def wait_for_load_state(self, state: str = "load", timeout: Optional[int] = None) -> None: + """ + Wait for page load state. + + Args: + state: Load state to wait for (e.g., "load", "domcontentloaded", "networkidle") + timeout: Optional timeout in milliseconds + """ + ... + + +@runtime_checkable +class BrowserProtocol(Protocol): + """ + Protocol for browser operations used by agents. + + This protocol defines the minimal interface required from SentienceBrowser. + Agents use this interface to interact with the browser and take snapshots. + + Note: SentienceBrowser naturally implements this protocol, so no changes + are required to existing code. This protocol enables better testability + through mocking. + """ + + @property + def page(self) -> Optional[PageProtocol]: + """ + Current Playwright Page object. + + Returns: + Page object if browser is started, None otherwise + """ + ... + + def start(self) -> None: + """Start the browser session.""" + ... + + def close(self, output_path: Optional[str] = None) -> Optional[str]: + """ + Close the browser session. + + Args: + output_path: Optional path to save browser state/output + + Returns: + Path to saved output or None + """ + ... + + def goto(self, url: str) -> None: + """ + Navigate to a URL. + + Args: + url: URL to navigate to + """ + ... + + +@runtime_checkable +class AsyncPageProtocol(Protocol): + """ + Protocol for async Playwright Page operations. + + Similar to PageProtocol but for async operations. + """ + + @property + def url(self) -> str: + """Current page URL.""" + ... + + async def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any: + """ + Evaluate JavaScript in the page context (async). + + Args: + script: JavaScript code to evaluate + *args: Arguments to pass to the script + **kwargs: Keyword arguments to pass to the script + + Returns: + Result of the JavaScript evaluation + """ + ... + + async def goto(self, url: str, **kwargs: Any) -> Optional[Any]: + """ + Navigate to a URL (async). + + Args: + url: URL to navigate to + **kwargs: Additional navigation options + + Returns: + Response object or None + """ + ... + + async def wait_for_timeout(self, timeout: int) -> None: + """ + Wait for a specified timeout (async). + + Args: + timeout: Timeout in milliseconds + """ + ... + + async def wait_for_load_state( + self, state: str = "load", timeout: Optional[int] = None + ) -> None: + """ + Wait for page load state (async). + + Args: + state: Load state to wait for (e.g., "load", "domcontentloaded", "networkidle") + timeout: Optional timeout in milliseconds + """ + ... + + +@runtime_checkable +class AsyncBrowserProtocol(Protocol): + """ + Protocol for async browser operations. + + Similar to BrowserProtocol but for async operations. + """ + + @property + def page(self) -> Optional[AsyncPageProtocol]: + """ + Current Playwright AsyncPage object. + + Returns: + AsyncPage object if browser is started, None otherwise + """ + ... + + async def start(self) -> None: + """Start the browser session (async).""" + ... + + async def close(self, output_path: Optional[str] = None) -> Optional[str]: + """ + Close the browser session (async). + + Args: + output_path: Optional path to save browser state/output + + Returns: + Path to saved output or None + """ + ... + + async def goto(self, url: str) -> None: + """ + Navigate to a URL (async). + + Args: + url: URL to navigate to + """ + ... + diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..79f0bc0 --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1,7 @@ +""" +Integration tests for Sentience SDK. + +These tests use real browser instances to test end-to-end functionality +and catch real-world bugs that mocks might miss. +""" + diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..bfdecf4 --- /dev/null +++ b/tests/unit/__init__.py @@ -0,0 +1,7 @@ +""" +Unit tests for Sentience SDK. + +These tests use mocks and protocols to test logic in isolation, +without requiring real browser instances. +""" + diff --git a/tests/unit/test_agent_errors.py b/tests/unit/test_agent_errors.py new file mode 100644 index 0000000..ae35238 --- /dev/null +++ b/tests/unit/test_agent_errors.py @@ -0,0 +1,443 @@ +""" +Unit tests for agent error handling and edge cases. + +These tests use mocked browsers to test error conditions that are +difficult to reproduce with real browsers. +""" + +from typing import Any +from unittest.mock import Mock, patch + +import pytest + +from sentience.agent import SentienceAgent +from sentience.llm_provider import LLMProvider, LLMResponse +from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues +from sentience.protocols import BrowserProtocol, PageProtocol + + +class MockLLMProvider(LLMProvider): + """Mock LLM provider for testing""" + + def __init__(self, responses=None): + super().__init__("mock-model") + self.responses = responses or [] + self.call_count = 0 + + @property + def model_name(self) -> str: + return "mock-model" + + def supports_json_mode(self) -> bool: + return False + + def generate(self, system_prompt: str, user_prompt: str, **kwargs): + self.call_count += 1 + if self.responses: + response = self.responses[self.call_count % len(self.responses)] + else: + response = "CLICK(1)" + return LLMResponse( + content=response, + prompt_tokens=100, + completion_tokens=20, + total_tokens=120, + model_name="mock-model", + ) + + +class MockPage(PageProtocol): + """Mock page that implements PageProtocol (sync version)""" + + def __init__(self, url: str = "https://example.com"): + self._url = url + + @property + def url(self) -> str: + return self._url + + def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any: + # Return proper snapshot structure when snapshot is called + # The script is a function that calls window.sentience.snapshot(options) + if "window.sentience.snapshot" in script or ("snapshot" in script.lower() and "options" in script): + # Check if args contain options (for empty snapshot tests) + options = kwargs.get("options") or (args[0] if args else {}) + limit = options.get("limit", 50) if isinstance(options, dict) else 50 + + # Return elements based on limit (0 for empty snapshot tests) + elements = [] + if limit > 0: + elements = [ + { + "id": 1, + "role": "button", + "text": "Click Me", + "importance": 900, + "bbox": {"x": 100, "y": 200, "width": 80, "height": 30}, + "visual_cues": { + "is_primary": True, + "is_clickable": True, + "background_color_name": "blue", + }, + "in_viewport": True, + "is_occluded": False, + "z_index": 10, + } + ] + + # Snapshot model expects 'elements' not 'raw_elements' + return { + "status": "success", + "timestamp": "2024-12-24T10:00:00Z", + "url": self._url, + "viewport": {"width": 1920, "height": 1080}, + "elements": elements, # Use 'elements' for Snapshot model + "raw_elements": elements, # Also include for compatibility + } + # For wait_for_function calls + if "wait_for_function" in script or "typeof window.sentience" in script: + return True + return {} + + def goto(self, url: str, **kwargs: Any) -> Any: + self._url = url + return None + + def wait_for_timeout(self, timeout: int) -> None: + pass + + def wait_for_load_state(self, state: str = "load", timeout: int | None = None) -> None: + pass + + def wait_for_function(self, expression: str, timeout: int | None = None) -> None: + """Add wait_for_function to make it detectable as sync page""" + pass + + +class MockBrowser(BrowserProtocol): + """Mock browser that implements BrowserProtocol""" + + def __init__(self, page: MockPage | None = None, api_key: str | None = None): + self._page = page or MockPage() + self._started = False + self.api_key = api_key # Required by snapshot function + self.api_url = None # Required by snapshot function + + @property + def page(self) -> MockPage | None: + return self._page if self._started else None + + def start(self) -> None: + self._started = True + + def close(self, output_path: str | None = None) -> str | None: + self._started = False + return output_path + + def goto(self, url: str) -> None: + if self._page: + self._page.goto(url) + + +def create_mock_snapshot(): + """Create mock snapshot with test elements""" + elements = [ + Element( + id=1, + role="button", + text="Click Me", + importance=900, + bbox=BBox(x=100, y=200, width=80, height=30), + visual_cues=VisualCues( + is_primary=True, is_clickable=True, background_color_name="blue" + ), + in_viewport=True, + is_occluded=False, + z_index=10, + ), + ] + return Snapshot( + status="success", + timestamp="2024-12-24T10:00:00Z", + url="https://example.com", + viewport=Viewport(width=1920, height=1080), + elements=elements, + ) + + +class TestAgentErrorHandling: + """Test agent error handling scenarios""" + + def test_agent_handles_snapshot_timeout(self): + """Test agent handles snapshot timeout gracefully""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider() + agent = SentienceAgent(browser, llm, verbose=False) + + # Mock snapshot to raise timeout + with patch("sentience.agent.snapshot") as mock_snapshot: + from playwright._impl._errors import TimeoutError + + mock_snapshot.side_effect = TimeoutError("Snapshot timeout") + + with pytest.raises(RuntimeError, match="Failed after"): + agent.act("Click the button", max_retries=0) + + def test_agent_handles_network_failure(self): + """Test agent handles network failure during snapshot""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider() + agent = SentienceAgent(browser, llm, verbose=False) + + # Mock snapshot to raise network error + with patch("sentience.snapshot.snapshot") as mock_snapshot: + mock_snapshot.side_effect = ConnectionError("Network failure") + + with pytest.raises(RuntimeError, match="Failed after"): + agent.act("Click the button", max_retries=0) + + def test_agent_handles_empty_snapshot(self): + """Test agent handles empty snapshot (no elements)""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider(responses=["CLICK(1)"]) + agent = SentienceAgent(browser, llm, verbose=False) + + # Create empty snapshot + empty_snap = Snapshot( + status="success", + timestamp="2024-12-24T10:00:00Z", + url="https://example.com", + viewport=Viewport(width=1920, height=1080), + elements=[], + ) + + with ( + patch("sentience.agent.snapshot") as mock_snapshot, + patch("sentience.action_executor.click") as mock_click, + ): + from sentience.models import ActionResult + + mock_snapshot.return_value = empty_snap + mock_click.return_value = ActionResult( + success=False, duration_ms=100, outcome="Element not found" + ) + + # Agent should still attempt action even with empty snapshot + result = agent.act("Click the button", max_retries=0) + assert result.success is False + + def test_agent_handles_malformed_llm_response(self): + """Test agent handles malformed LLM response""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider(responses=["INVALID_RESPONSE_FORMAT"]) + agent = SentienceAgent(browser, llm, verbose=False) + + with ( + patch("sentience.snapshot.snapshot") as mock_snapshot, + ): + mock_snapshot.return_value = create_mock_snapshot() + + # Action executor should raise ValueError for invalid format + with pytest.raises(RuntimeError, match="Failed after"): + agent.act("Click the button", max_retries=0) + + def test_agent_handles_browser_not_started(self): + """Test agent handles browser not started error""" + browser = MockBrowser() # Not started + llm = MockLLMProvider() + agent = SentienceAgent(browser, llm, verbose=False) + + # Snapshot should fail because browser.page is None + with patch("sentience.snapshot.snapshot") as mock_snapshot: + mock_snapshot.side_effect = RuntimeError("Browser not started") + + with pytest.raises(RuntimeError, match="Failed after"): + agent.act("Click the button", max_retries=0) + + def test_agent_handles_action_timeout(self): + """Test agent handles action execution timeout""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider(responses=["CLICK(1)"]) + agent = SentienceAgent(browser, llm, verbose=False) + + with ( + patch("sentience.snapshot.snapshot") as mock_snapshot, + patch("sentience.action_executor.click") as mock_click, + ): + from playwright._impl._errors import TimeoutError + + mock_snapshot.return_value = create_mock_snapshot() + mock_click.side_effect = TimeoutError("Action timeout") + + with pytest.raises(RuntimeError, match="Failed after"): + agent.act("Click the button", max_retries=0) + + def test_agent_handles_url_change_during_action(self): + """Test agent handles URL change during action execution""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider(responses=["CLICK(1)"]) + agent = SentienceAgent(browser, llm, verbose=False) + + with ( + patch("sentience.snapshot.snapshot") as mock_snapshot, + patch("sentience.action_executor.click") as mock_click, + ): + from sentience.models import ActionResult + + mock_snapshot.return_value = create_mock_snapshot() + # Simulate URL change after click + mock_click.return_value = ActionResult( + success=True, duration_ms=150, outcome="navigated", url_changed=True + ) + + result = agent.act("Click the button", max_retries=0) + assert result.success is True + assert result.url_changed is True + + def test_agent_retry_on_transient_error(self): + """Test agent retries on transient errors""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider(responses=["CLICK(1)"]) + agent = SentienceAgent(browser, llm, verbose=False) + + with ( + patch("sentience.snapshot.snapshot") as mock_snapshot, + patch("sentience.action_executor.click") as mock_click, + ): + from sentience.models import ActionResult + + mock_snapshot.return_value = create_mock_snapshot() + # First call fails, second succeeds + mock_click.side_effect = [ + RuntimeError("Transient error"), + ActionResult(success=True, duration_ms=150, outcome="dom_updated"), + ] + + result = agent.act("Click the button", max_retries=1) + assert result.success is True + assert mock_click.call_count == 2 + + +class TestAgentEdgeCases: + """Test agent edge case scenarios""" + + def test_agent_handles_zero_elements_in_snapshot(self): + """Test agent handles snapshot with zero elements""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider(responses=["FINISH()"]) + agent = SentienceAgent(browser, llm, verbose=False) + + empty_snap = Snapshot( + status="success", + timestamp="2024-12-24T10:00:00Z", + url="https://example.com", + viewport=Viewport(width=1920, height=1080), + elements=[], + ) + + with patch("sentience.snapshot.snapshot") as mock_snapshot: + mock_snapshot.return_value = empty_snap + + # Agent should handle empty snapshot and finish + result = agent.act("Complete task", max_retries=0) + assert result.action == "finish" + assert result.success is True + + def test_agent_handles_unicode_in_actions(self): + """Test agent handles unicode characters in goals and actions""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider(responses=['TYPE(1, "你好世界")']) + agent = SentienceAgent(browser, llm, verbose=False) + + with ( + patch("sentience.snapshot.snapshot") as mock_snapshot, + patch("sentience.action_executor.type_text") as mock_type, + ): + from sentience.models import ActionResult + + mock_snapshot.return_value = create_mock_snapshot() + mock_type.return_value = ActionResult(success=True, duration_ms=200, outcome="dom_updated") + + result = agent.act("Type 你好世界", max_retries=0) + assert result.success is True + assert result.action == "type" + + def test_agent_handles_special_characters_in_goal(self): + """Test agent handles special characters in goal text""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider(responses=["CLICK(1)"]) + agent = SentienceAgent(browser, llm, verbose=False) + + with ( + patch("sentience.snapshot.snapshot") as mock_snapshot, + patch("sentience.action_executor.click") as mock_click, + ): + from sentience.models import ActionResult + + mock_snapshot.return_value = create_mock_snapshot() + mock_click.return_value = ActionResult(success=True, duration_ms=150, outcome="dom_updated") + + # Test with special characters + result = agent.act('Click the "Submit" button (with quotes)', max_retries=0) + assert result.success is True + + def test_agent_preserves_state_on_retry(self): + """Test agent preserves state correctly during retries""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider(responses=["CLICK(1)"]) + agent = SentienceAgent(browser, llm, verbose=False) + + with ( + patch("sentience.snapshot.snapshot") as mock_snapshot, + patch("sentience.action_executor.click") as mock_click, + ): + from sentience.models import ActionResult + + mock_snapshot.return_value = create_mock_snapshot() + # First attempt fails, second succeeds + mock_click.side_effect = [ + RuntimeError("First attempt failed"), + ActionResult(success=True, duration_ms=150, outcome="dom_updated"), + ] + + result = agent.act("Click the button", max_retries=1) + assert result.success is True + # History should have both attempts + assert len(agent.history) == 1 # Only successful attempt is recorded + assert agent.history[0]["attempt"] == 1 # Final successful attempt + + def test_agent_handles_tracer_errors_gracefully(self): + """Test agent continues execution even if tracer fails""" + browser = MockBrowser() + browser.start() + llm = MockLLMProvider(responses=["CLICK(1)"]) + # Create a tracer that raises errors + mock_tracer = Mock() + mock_tracer.emit.side_effect = RuntimeError("Tracer error") + + agent = SentienceAgent(browser, llm, verbose=False, tracer=mock_tracer) + + with ( + patch("sentience.snapshot.snapshot") as mock_snapshot, + patch("sentience.action_executor.click") as mock_click, + ): + from sentience.models import ActionResult + + mock_snapshot.return_value = create_mock_snapshot() + mock_click.return_value = ActionResult(success=True, duration_ms=150, outcome="dom_updated") + + # Agent should still complete action despite tracer error + result = agent.act("Click the button", max_retries=0) + assert result.success is True + From 4496ee8ebedc4f074119bc9c92440b833359c927 Mon Sep 17 00:00:00 2001 From: rcholic Date: Fri, 2 Jan 2026 14:41:09 -0800 Subject: [PATCH 2/3] Phase 5: BrowserProtocol PageProtocl for mocking mor unit tests --- sentience/action_executor.py | 7 ++++++- sentience/agent.py | 4 ++-- sentience/conversational_agent.py | 11 ++++++----- sentience/protocols.py | 19 ++++++++----------- tests/integration/__init__.py | 1 - tests/unit/__init__.py | 1 - tests/unit/test_agent_errors.py | 25 +++++++++++++++---------- 7 files changed, 37 insertions(+), 31 deletions(-) diff --git a/sentience/action_executor.py b/sentience/action_executor.py index 75c9585..f3b4752 100644 --- a/sentience/action_executor.py +++ b/sentience/action_executor.py @@ -24,7 +24,12 @@ class ActionExecutor: - Handle action parsing errors consistently """ - def __init__(self, browser: Union[SentienceBrowser, AsyncSentienceBrowser, BrowserProtocol, AsyncBrowserProtocol]): + def __init__( + self, + browser: ( + SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol + ), + ): """ Initialize action executor. diff --git a/sentience/agent.py b/sentience/agent.py index e7aedf3..cf69878 100644 --- a/sentience/agent.py +++ b/sentience/agent.py @@ -15,7 +15,6 @@ from .element_filter import ElementFilter from .llm_interaction_handler import LLMInteractionHandler from .llm_provider import LLMProvider, LLMResponse -from .protocols import AsyncBrowserProtocol, BrowserProtocol from .models import ( ActionHistory, ActionTokenUsage, @@ -26,6 +25,7 @@ SnapshotOptions, TokenStats, ) +from .protocols import AsyncBrowserProtocol, BrowserProtocol from .snapshot import snapshot, snapshot_async from .trace_event_builder import TraceEventBuilder @@ -59,7 +59,7 @@ class SentienceAgent(BaseAgent): def __init__( self, - browser: Union[SentienceBrowser, BrowserProtocol], + browser: SentienceBrowser | BrowserProtocol, llm: LLMProvider, default_snapshot_limit: int = 50, verbose: bool = True, diff --git a/sentience/conversational_agent.py b/sentience/conversational_agent.py index d2cdb5a..f9f2fc8 100644 --- a/sentience/conversational_agent.py +++ b/sentience/conversational_agent.py @@ -5,15 +5,13 @@ import json import time -from typing import Any - -from typing import Union +from typing import Any, Union from .agent import SentienceAgent from .browser import SentienceBrowser from .llm_provider import LLMProvider -from .protocols import BrowserProtocol from .models import ExtractionResult, Snapshot, SnapshotOptions, StepExecutionResult +from .protocols import BrowserProtocol from .snapshot import snapshot @@ -33,7 +31,10 @@ class ConversationalAgent: """ def __init__( - self, browser: Union[SentienceBrowser, BrowserProtocol], llm: LLMProvider, verbose: bool = True + self, + browser: SentienceBrowser | BrowserProtocol, + llm: LLMProvider, + verbose: bool = True, ): """ Initialize conversational agent diff --git a/sentience/protocols.py b/sentience/protocols.py index b289231..8369907 100644 --- a/sentience/protocols.py +++ b/sentience/protocols.py @@ -42,7 +42,7 @@ def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any: """ ... - def goto(self, url: str, **kwargs: Any) -> Optional[Any]: + def goto(self, url: str, **kwargs: Any) -> Any | None: """ Navigate to a URL. @@ -64,7 +64,7 @@ def wait_for_timeout(self, timeout: int) -> None: """ ... - def wait_for_load_state(self, state: str = "load", timeout: Optional[int] = None) -> None: + def wait_for_load_state(self, state: str = "load", timeout: int | None = None) -> None: """ Wait for page load state. @@ -89,7 +89,7 @@ class BrowserProtocol(Protocol): """ @property - def page(self) -> Optional[PageProtocol]: + def page(self) -> PageProtocol | None: """ Current Playwright Page object. @@ -102,7 +102,7 @@ def start(self) -> None: """Start the browser session.""" ... - def close(self, output_path: Optional[str] = None) -> Optional[str]: + def close(self, output_path: str | None = None) -> str | None: """ Close the browser session. @@ -151,7 +151,7 @@ async def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any: """ ... - async def goto(self, url: str, **kwargs: Any) -> Optional[Any]: + async def goto(self, url: str, **kwargs: Any) -> Any | None: """ Navigate to a URL (async). @@ -173,9 +173,7 @@ async def wait_for_timeout(self, timeout: int) -> None: """ ... - async def wait_for_load_state( - self, state: str = "load", timeout: Optional[int] = None - ) -> None: + async def wait_for_load_state(self, state: str = "load", timeout: int | None = None) -> None: """ Wait for page load state (async). @@ -195,7 +193,7 @@ class AsyncBrowserProtocol(Protocol): """ @property - def page(self) -> Optional[AsyncPageProtocol]: + def page(self) -> AsyncPageProtocol | None: """ Current Playwright AsyncPage object. @@ -208,7 +206,7 @@ async def start(self) -> None: """Start the browser session (async).""" ... - async def close(self, output_path: Optional[str] = None) -> Optional[str]: + async def close(self, output_path: str | None = None) -> str | None: """ Close the browser session (async). @@ -228,4 +226,3 @@ async def goto(self, url: str) -> None: url: URL to navigate to """ ... - diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 79f0bc0..bc27454 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -4,4 +4,3 @@ These tests use real browser instances to test end-to-end functionality and catch real-world bugs that mocks might miss. """ - diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index bfdecf4..75b552f 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -4,4 +4,3 @@ These tests use mocks and protocols to test logic in isolation, without requiring real browser instances. """ - diff --git a/tests/unit/test_agent_errors.py b/tests/unit/test_agent_errors.py index ae35238..8b251cc 100644 --- a/tests/unit/test_agent_errors.py +++ b/tests/unit/test_agent_errors.py @@ -59,11 +59,13 @@ def url(self) -> str: def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any: # Return proper snapshot structure when snapshot is called # The script is a function that calls window.sentience.snapshot(options) - if "window.sentience.snapshot" in script or ("snapshot" in script.lower() and "options" in script): + if "window.sentience.snapshot" in script or ( + "snapshot" in script.lower() and "options" in script + ): # Check if args contain options (for empty snapshot tests) options = kwargs.get("options") or (args[0] if args else {}) limit = options.get("limit", 50) if isinstance(options, dict) else 50 - + # Return elements based on limit (0 for empty snapshot tests) elements = [] if limit > 0: @@ -84,7 +86,7 @@ def evaluate(self, script: str, *args: Any, **kwargs: Any) -> Any: "z_index": 10, } ] - + # Snapshot model expects 'elements' not 'raw_elements' return { "status": "success", @@ -236,9 +238,7 @@ def test_agent_handles_malformed_llm_response(self): llm = MockLLMProvider(responses=["INVALID_RESPONSE_FORMAT"]) agent = SentienceAgent(browser, llm, verbose=False) - with ( - patch("sentience.snapshot.snapshot") as mock_snapshot, - ): + with (patch("sentience.snapshot.snapshot") as mock_snapshot,): mock_snapshot.return_value = create_mock_snapshot() # Action executor should raise ValueError for invalid format @@ -365,7 +365,9 @@ def test_agent_handles_unicode_in_actions(self): from sentience.models import ActionResult mock_snapshot.return_value = create_mock_snapshot() - mock_type.return_value = ActionResult(success=True, duration_ms=200, outcome="dom_updated") + mock_type.return_value = ActionResult( + success=True, duration_ms=200, outcome="dom_updated" + ) result = agent.act("Type 你好世界", max_retries=0) assert result.success is True @@ -385,7 +387,9 @@ def test_agent_handles_special_characters_in_goal(self): from sentience.models import ActionResult mock_snapshot.return_value = create_mock_snapshot() - mock_click.return_value = ActionResult(success=True, duration_ms=150, outcome="dom_updated") + mock_click.return_value = ActionResult( + success=True, duration_ms=150, outcome="dom_updated" + ) # Test with special characters result = agent.act('Click the "Submit" button (with quotes)', max_retries=0) @@ -435,9 +439,10 @@ def test_agent_handles_tracer_errors_gracefully(self): from sentience.models import ActionResult mock_snapshot.return_value = create_mock_snapshot() - mock_click.return_value = ActionResult(success=True, duration_ms=150, outcome="dom_updated") + mock_click.return_value = ActionResult( + success=True, duration_ms=150, outcome="dom_updated" + ) # Agent should still complete action despite tracer error result = agent.act("Click the button", max_retries=0) assert result.success is True - From 43cfc23f6ad56bbc00ec6bd06c92282e0de9ada8 Mon Sep 17 00:00:00 2001 From: rcholic Date: Fri, 2 Jan 2026 15:05:14 -0800 Subject: [PATCH 3/3] Phase 5: fixed new tests --- sentience/action_executor.py | 24 +++++-- sentience/agent.py | 107 ++++++++++++++++++++++++++++---- tests/unit/test_agent_errors.py | 9 ++- 3 files changed, 119 insertions(+), 21 deletions(-) diff --git a/sentience/action_executor.py b/sentience/action_executor.py index f3b4752..c95f29b 100644 --- a/sentience/action_executor.py +++ b/sentience/action_executor.py @@ -26,9 +26,7 @@ class ActionExecutor: def __init__( self, - browser: ( - SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol - ), + browser: SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol, ): """ Initialize action executor. @@ -39,7 +37,25 @@ def __init__( """ self.browser = browser # Check if browser is async - support both concrete types and protocols - self._is_async = isinstance(browser, (AsyncSentienceBrowser, AsyncBrowserProtocol)) + # Check concrete types first (most reliable) + if isinstance(browser, AsyncSentienceBrowser): + self._is_async = True + elif isinstance(browser, SentienceBrowser): + self._is_async = False + else: + # For protocol-based browsers, check if methods are actually async + # This is more reliable than isinstance checks which can match both protocols + import inspect + + start_method = getattr(browser, "start", None) + if start_method and inspect.iscoroutinefunction(start_method): + self._is_async = True + elif isinstance(browser, BrowserProtocol): + # If it implements BrowserProtocol and start is not async, it's sync + self._is_async = False + else: + # Default to sync for unknown types + self._is_async = False def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]: """ diff --git a/sentience/agent.py b/sentience/agent.py index cf69878..deafbd0 100644 --- a/sentience/agent.py +++ b/sentience/agent.py @@ -33,6 +33,37 @@ from .tracing import Tracer +def _safe_tracer_call( + tracer: Optional["Tracer"], method_name: str, verbose: bool, *args, **kwargs +) -> None: + """ + Safely call tracer method, catching and logging errors without breaking execution. + + Args: + tracer: Tracer instance or None + method_name: Name of tracer method to call (e.g., "emit", "emit_error") + verbose: Whether to print error messages + *args: Positional arguments for the tracer method + **kwargs: Keyword arguments for the tracer method + """ + if not tracer: + return + try: + method = getattr(tracer, method_name) + if args and kwargs: + method(*args, **kwargs) + elif args: + method(*args) + elif kwargs: + method(**kwargs) + else: + method() + except Exception as tracer_error: + # Tracer errors should not break agent execution + if verbose: + print(f"⚠️ Tracer error (non-fatal): {tracer_error}") + + class SentienceAgent(BaseAgent): """ High-level agent that combines Sentience SDK with any LLM provider. @@ -159,7 +190,10 @@ def act( # noqa: C901 # Emit step_start trace event if tracer is enabled if self.tracer: pre_url = self.browser.page.url if self.browser.page else None - self.tracer.emit_step_start( + _safe_tracer_call( + self.tracer, + "emit_step_start", + self.verbose, step_id=step_id, step_index=self._step_count, goal=goal, @@ -228,7 +262,10 @@ def act( # noqa: C901 if snap.screenshot_format: snapshot_data["screenshot_format"] = snap.screenshot_format - self.tracer.emit( + _safe_tracer_call( + self.tracer, + "emit", + self.verbose, "snapshot", snapshot_data, step_id=step_id, @@ -254,7 +291,10 @@ def act( # noqa: C901 # Emit LLM query trace event if tracer is enabled if self.tracer: - self.tracer.emit( + _safe_tracer_call( + self.tracer, + "emit", + self.verbose, "llm_query", { "prompt_tokens": llm_response.prompt_tokens, @@ -315,7 +355,10 @@ def act( # noqa: C901 for el in filtered_snap.elements[:50] ] - self.tracer.emit( + _safe_tracer_call( + self.tracer, + "emit", + self.verbose, "action", { "action": result.action, @@ -435,14 +478,28 @@ def act( # noqa: C901 verify_data=verify_data, ) - self.tracer.emit("step_end", step_end_data, step_id=step_id) + _safe_tracer_call( + self.tracer, + "emit", + self.verbose, + "step_end", + step_end_data, + step_id=step_id, + ) return result except Exception as e: # Emit error trace event if tracer is enabled if self.tracer: - self.tracer.emit_error(step_id=step_id, error=str(e), attempt=attempt) + _safe_tracer_call( + self.tracer, + "emit_error", + self.verbose, + step_id=step_id, + error=str(e), + attempt=attempt, + ) if attempt < max_retries: if self.verbose: @@ -668,7 +725,10 @@ async def act( # noqa: C901 # Emit step_start trace event if tracer is enabled if self.tracer: pre_url = self.browser.page.url if self.browser.page else None - self.tracer.emit_step_start( + _safe_tracer_call( + self.tracer, + "emit_step_start", + self.verbose, step_id=step_id, step_index=self._step_count, goal=goal, @@ -740,7 +800,10 @@ async def act( # noqa: C901 if snap.screenshot_format: snapshot_data["screenshot_format"] = snap.screenshot_format - self.tracer.emit( + _safe_tracer_call( + self.tracer, + "emit", + self.verbose, "snapshot", snapshot_data, step_id=step_id, @@ -766,7 +829,10 @@ async def act( # noqa: C901 # Emit LLM query trace event if tracer is enabled if self.tracer: - self.tracer.emit( + _safe_tracer_call( + self.tracer, + "emit", + self.verbose, "llm_query", { "prompt_tokens": llm_response.prompt_tokens, @@ -827,7 +893,10 @@ async def act( # noqa: C901 for el in filtered_snap.elements[:50] ] - self.tracer.emit( + _safe_tracer_call( + self.tracer, + "emit", + self.verbose, "action", { "action": result.action, @@ -947,14 +1016,28 @@ async def act( # noqa: C901 verify_data=verify_data, ) - self.tracer.emit("step_end", step_end_data, step_id=step_id) + _safe_tracer_call( + self.tracer, + "emit", + self.verbose, + "step_end", + step_end_data, + step_id=step_id, + ) return result except Exception as e: # Emit error trace event if tracer is enabled if self.tracer: - self.tracer.emit_error(step_id=step_id, error=str(e), attempt=attempt) + _safe_tracer_call( + self.tracer, + "emit_error", + self.verbose, + step_id=step_id, + error=str(e), + attempt=attempt, + ) if attempt < max_retries: if self.verbose: diff --git a/tests/unit/test_agent_errors.py b/tests/unit/test_agent_errors.py index 8b251cc..4287683 100644 --- a/tests/unit/test_agent_errors.py +++ b/tests/unit/test_agent_errors.py @@ -194,7 +194,8 @@ def test_agent_handles_network_failure(self): agent = SentienceAgent(browser, llm, verbose=False) # Mock snapshot to raise network error - with patch("sentience.snapshot.snapshot") as mock_snapshot: + # Patch at the agent module level since that's where it's imported + with patch("sentience.agent.snapshot") as mock_snapshot: mock_snapshot.side_effect = ConnectionError("Network failure") with pytest.raises(RuntimeError, match="Failed after"): @@ -217,15 +218,13 @@ def test_agent_handles_empty_snapshot(self): ) with ( - patch("sentience.agent.snapshot") as mock_snapshot, + patch("sentience.snapshot.snapshot") as mock_snapshot, patch("sentience.action_executor.click") as mock_click, ): from sentience.models import ActionResult mock_snapshot.return_value = empty_snap - mock_click.return_value = ActionResult( - success=False, duration_ms=100, outcome="Element not found" - ) + mock_click.return_value = ActionResult(success=False, duration_ms=100, outcome="error") # Agent should still attempt action even with empty snapshot result = agent.act("Click the button", max_retries=0)