diff --git a/sentience/agent.py b/sentience/agent.py index 8639e01..404e507 100644 --- a/sentience/agent.py +++ b/sentience/agent.py @@ -149,23 +149,8 @@ def act( # noqa: C901 if snap_opts.goal is None: snap_opts.goal = goal - # Convert screenshot config to dict if needed - screenshot_param = snap_opts.screenshot - if isinstance(snap_opts.screenshot, ScreenshotConfig): - screenshot_param = { - "format": snap_opts.screenshot.format, - "quality": snap_opts.screenshot.quality, - } - - # Call snapshot with converted parameters - snap = snapshot( - self.browser, - screenshot=screenshot_param, - limit=snap_opts.limit, - filter=snap_opts.filter.model_dump() if snap_opts.filter else None, - use_api=snap_opts.use_api, - goal=snap_opts.goal, # Pass goal to snapshot - ) + # Call snapshot with options object (matches TypeScript API) + snap = snapshot(self.browser, snap_opts) if snap.status != "success": raise RuntimeError(f"Snapshot failed: {snap.error}") diff --git a/sentience/conversational_agent.py b/sentience/conversational_agent.py index 29fc58d..c207f04 100644 --- a/sentience/conversational_agent.py +++ b/sentience/conversational_agent.py @@ -10,7 +10,7 @@ from .agent import SentienceAgent from .browser import SentienceBrowser from .llm_provider import LLMProvider -from .models import Snapshot +from .models import Snapshot, SnapshotOptions from .snapshot import snapshot @@ -274,7 +274,7 @@ def _execute_step(self, step: dict[str, Any]) -> dict[str, Any]: elif action == "EXTRACT_INFO": info_type = params["info_type"] # Get current page snapshot and extract info - snap = snapshot(self.browser, limit=50) + snap = snapshot(self.browser, SnapshotOptions(limit=50)) # Use LLM to extract specific information extracted = self._extract_information(snap, info_type) @@ -361,7 +361,7 @@ def _verify_condition(self, condition: str) -> bool: True if condition is met, False otherwise """ try: - snap = snapshot(self.browser, limit=30) + snap = snapshot(self.browser, SnapshotOptions(limit=30)) # Build context elements_text = "\n".join([f"{el.role}: {el.text}" for el in snap.elements[:20]]) diff --git a/sentience/snapshot.py b/sentience/snapshot.py index 5644634..986a9bd 100644 --- a/sentience/snapshot.py +++ b/sentience/snapshot.py @@ -5,7 +5,7 @@ import json import os import time -from typing import Any +from typing import Any, Optional import requests @@ -41,41 +41,33 @@ def _save_trace_to_file(raw_elements: list[dict[str, Any]], trace_path: str | No def snapshot( browser: SentienceBrowser, - screenshot: bool | None = None, - limit: int | None = None, - filter: dict[str, Any] | None = None, - use_api: bool | None = None, - save_trace: bool = False, - trace_path: str | None = None, - show_overlay: bool = False, + options: Optional[SnapshotOptions] = None, ) -> Snapshot: """ Take a snapshot of the current page Args: browser: SentienceBrowser instance - screenshot: Whether to capture screenshot (bool or dict with format/quality) - limit: Limit number of elements returned - filter: Filter options (min_area, allowed_roles, min_z_index) - use_api: Force use of server-side API if True, local extension if False. - If None, uses API if api_key is set, otherwise uses local extension. - save_trace: Whether to save raw_elements to JSON for benchmarking/training - trace_path: Path to save trace file. If None, uses "trace_{timestamp}.json" - show_overlay: Show visual overlay highlighting elements in browser + options: Snapshot options (screenshot, limit, filter, etc.) + If None, uses default options. Returns: Snapshot object + + Example: + # Basic snapshot with defaults + snap = snapshot(browser) + + # With options + snap = snapshot(browser, SnapshotOptions( + screenshot=True, + limit=100, + show_overlay=True + )) """ - # Build SnapshotOptions from individual parameters - options = SnapshotOptions( - screenshot=screenshot if screenshot is not None else False, - limit=limit if limit is not None else 50, - filter=filter, - use_api=use_api, - save_trace=save_trace, - trace_path=trace_path, - show_overlay=show_overlay, - ) + # Use default options if none provided + if options is None: + options = SnapshotOptions() # Determine if we should use server-side API should_use_api = ( diff --git a/sentience/wait.py b/sentience/wait.py index 5b7e099..f2b0b9a 100644 --- a/sentience/wait.py +++ b/sentience/wait.py @@ -5,7 +5,7 @@ import time from .browser import SentienceBrowser -from .models import WaitResult +from .models import WaitResult, SnapshotOptions from .query import find from .snapshot import snapshot @@ -46,7 +46,7 @@ def wait_for( while time.time() - start_time < timeout: # Take snapshot (may be local extension or remote API) - snap = snapshot(browser, use_api=use_api) + snap = snapshot(browser, SnapshotOptions(use_api=use_api)) # Try to find element element = find(snap, selector) diff --git a/tests/test_snapshot.py b/tests/test_snapshot.py index 2d73ea4..658ab4f 100644 --- a/tests/test_snapshot.py +++ b/tests/test_snapshot.py @@ -5,6 +5,7 @@ import pytest from sentience import SentienceBrowser, snapshot +from sentience.models import SnapshotOptions @pytest.mark.requires_extension @@ -110,7 +111,7 @@ def test_snapshot_with_goal(): browser.page.wait_for_load_state("networkidle") # Test snapshot with goal - snap = snapshot(browser, goal="Find the main heading") + snap = snapshot(browser, SnapshotOptions(goal="Find the main heading")) assert snap.status == "success" assert snap.url == "https://example.com/"