diff --git a/THIRD_PARTY_LICENSES.md b/THIRD_PARTY_LICENSES.md new file mode 100644 index 0000000..0c7bba6 --- /dev/null +++ b/THIRD_PARTY_LICENSES.md @@ -0,0 +1,5 @@ +# Third-Party Licenses + +This product optionally depends on third-party open source software: + +- **browser-use** (MIT License) — https://github.com/browser-use/browser-use diff --git a/examples/browser-use/README.md b/examples/browser-use/README.md new file mode 100644 index 0000000..5299d51 --- /dev/null +++ b/examples/browser-use/README.md @@ -0,0 +1,166 @@ +# Sentience + browser-use Integration + +This directory contains examples for integrating [Sentience](https://github.com/SentienceAPI/sentience-python) with [browser-use](https://github.com/browser-use/browser-use). + +## What is browser-use? + +[browser-use](https://github.com/browser-use/browser-use) is an open-source framework for building AI agents that can interact with web browsers. Sentience enhances browser-use by providing: + +- **Semantic element detection** — Accurate element identification using visual and structural cues +- **Token-slashed DOM context** — Reduces tokens by ~80% compared to raw DOM dumps +- **Importance-ranked elements** — Elements sorted by actionability for better LLM targeting +- **Ordinal task support** — "Click the 3rd item" works reliably with dominant group detection + +## Installation + +Install both packages together using the optional dependency: + +```bash +pip install "sentienceapi[browser-use]" +``` + +Or install separately: + +```bash +pip install sentienceapi browser-use +``` + +## Quick Start + +### Using SentienceContext (Recommended) + +`SentienceContext` provides a high-level API for getting compact, ranked DOM context: + +```python +from browser_use import BrowserSession, BrowserProfile +from sentience import get_extension_dir +from sentience.backends import SentienceContext, TopElementSelector + +# Setup browser with Sentience extension +profile = BrowserProfile( + args=[f"--load-extension={get_extension_dir()}"], +) +session = BrowserSession(browser_profile=profile) +await session.start() + +# Create context builder +ctx = SentienceContext( + max_elements=60, + top_element_selector=TopElementSelector( + by_importance=60, # Top N by importance score + from_dominant_group=15, # Top N from dominant group + by_position=10, # Top N by page position + ), +) + +# Build context from browser session +await session.navigate("https://news.ycombinator.com") +state = await ctx.build( + session, + goal="Find the first Show HN post", + wait_for_extension_ms=5000, +) + +if state: + print(f"URL: {state.url}") + print(f"Elements: {len(state.snapshot.elements)}") + print(f"Prompt block:\n{state.prompt_block}") +``` + +### Using Low-Level APIs + +For fine-grained control over snapshots and actions: + +```python +from sentience import find, query, get_extension_dir +from sentience.backends import BrowserUseAdapter, snapshot, click, type_text + +# Create adapter and backend +adapter = BrowserUseAdapter(session) +backend = await adapter.create_backend() + +# Take snapshot +snap = await snapshot(backend) + +# Find and interact with elements +search_box = find(snap, 'role=textbox[name*="Search"]') +if search_box: + await click(backend, search_box.bbox) + await type_text(backend, "Sentience AI") +``` + +## Examples + +| File | Description | +|------|-------------| +| [integration.py](integration.py) | Complete integration example with SentienceContext | + +## Output Format + +The `SentienceContext.build()` method returns a `SentienceContextState` with: + +- `url` — Current page URL +- `snapshot` — Full Sentience snapshot with all elements +- `prompt_block` — Compact LLM-ready context block + +The prompt block format: +``` +Elements: ID|role|text|imp|is_primary|docYq|ord|DG|href +Rules: ordinal→DG=1 then ord asc; otherwise imp desc. Use click(ID)/input_text(ID,...). +1|link|Show HN: My Project|85|1|2|0|1|ycombinato +2|link|Ask HN: Best practices|80|0|3|1|1|ycombinato +... +``` + +Fields: +- `ID` — Element ID for actions +- `role` — Semantic role (button, link, textbox, etc.) +- `text` — Truncated element text (max 30 chars) +- `imp` — Importance score (0-100) +- `is_primary` — 1 if primary CTA, 0 otherwise +- `docYq` — Quantized Y position (doc_y / 200) +- `ord` — Ordinal rank within dominant group, or "-" +- `DG` — 1 if in dominant group, 0 otherwise +- `href` — Compressed href token + +## API Reference + +### SentienceContext + +```python +SentienceContext( + sentience_api_key: str | None = None, # API key for gateway mode + use_api: bool | None = None, # Force API vs extension mode + max_elements: int = 60, # Max elements to fetch + show_overlay: bool = False, # Show visual overlay + top_element_selector: TopElementSelector | None = None, +) +``` + +### TopElementSelector + +```python +TopElementSelector( + by_importance: int = 60, # Top N by importance score + from_dominant_group: int = 15, # Top N from dominant group + by_position: int = 10, # Top N by page position +) +``` + +### SentienceContext.build() + +```python +await ctx.build( + browser_session, # browser-use BrowserSession + goal: str | None = None, # Task description for reranking + wait_for_extension_ms: int = 5000, # Extension load timeout + retries: int = 2, # Retry attempts + retry_delay_s: float = 1.0, # Delay between retries +) -> SentienceContextState | None +``` + +## License + +Sentience SDK is dual-licensed under MIT and Apache-2.0. + +browser-use is licensed under MIT. See [THIRD_PARTY_LICENSES.md](../../THIRD_PARTY_LICENSES.md). diff --git a/examples/browser-use/integration.py b/examples/browser-use/integration.py new file mode 100644 index 0000000..cddadf7 --- /dev/null +++ b/examples/browser-use/integration.py @@ -0,0 +1,338 @@ +""" +Example: Using Sentience with browser-use for element grounding. + +This example demonstrates how to integrate Sentience's semantic element +detection with browser-use, enabling accurate click/type/scroll operations +using Sentience's snapshot-based grounding instead of coordinate estimation. + +Requirements: + pip install "sentienceapi[browser-use]" python-dotenv + + Or install separately: + pip install sentienceapi browser-use python-dotenv + +Usage: + python examples/browser-use/integration.py +""" + +import asyncio +import glob +from pathlib import Path + +from dotenv import load_dotenv + +# Sentience imports +from sentience import find, get_extension_dir, query +from sentience.backends import ( + BrowserUseAdapter, + CachedSnapshot, + ExtensionNotLoadedError, + SentienceContext, + TopElementSelector, + click, + scroll, + snapshot, + type_text, +) + +# browser-use imports (install via: pip install browser-use) +# Uncomment these when running with browser-use installed: +# from browser_use import Agent, BrowserProfile, BrowserSession, ChatBrowserUse + +load_dotenv() + + +def find_playwright_browser() -> str | None: + """Find Playwright browser executable to avoid password prompt.""" + playwright_path = Path.home() / "Library/Caches/ms-playwright" + chromium_patterns = [ + playwright_path + / "chromium-*/chrome-mac*/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing", + playwright_path / "chromium-*/chrome-mac*/Chromium.app/Contents/MacOS/Chromium", + ] + + for pattern in chromium_patterns: + matches = glob.glob(str(pattern)) + if matches: + matches.sort() + executable_path = matches[-1] # Use latest version + if Path(executable_path).exists(): + print(f"Found Playwright browser: {executable_path}") + return executable_path + + print("Playwright browser not found, browser-use will try to install it") + return None + + +def get_browser_profile_with_sentience(): + """ + Create a BrowserProfile with Sentience extension loaded. + + Chrome only uses the LAST --load-extension arg, so we must combine + all extensions (Sentience + defaults) into a single argument. + """ + # Uncomment when running with browser-use installed: + # from browser_use import BrowserProfile + + # Get Sentience extension path + sentience_ext_path = get_extension_dir() + print(f"Loading Sentience extension from: {sentience_ext_path}") + + # Get default extension paths and combine with Sentience extension + all_extension_paths = [sentience_ext_path] + + # Create a temporary profile to ensure default extensions are downloaded + # Uncomment when running with browser-use installed: + # temp_profile = BrowserProfile(enable_default_extensions=True) + # default_ext_paths = temp_profile._ensure_default_extensions_downloaded() + # + # if default_ext_paths: + # all_extension_paths.extend(default_ext_paths) + # print(f"Found {len(default_ext_paths)} default extensions") + + # Combine all extensions into a single --load-extension arg + combined_extensions = ",".join(all_extension_paths) + print(f"Loading {len(all_extension_paths)} extensions total (including Sentience)") + + # Uncomment when running with browser-use installed: + # executable_path = find_playwright_browser() + # + # return BrowserProfile( + # executable_path=executable_path, + # enable_default_extensions=False, # We load manually + # args=[ + # "--enable-extensions", + # "--disable-extensions-file-access-check", + # "--disable-extensions-http-throttling", + # "--extensions-on-chrome-urls", + # f"--load-extension={combined_extensions}", + # ], + # ) + + return None # Placeholder + + +async def example_with_sentience_context() -> None: + """ + Example using SentienceContext for token-slashed DOM context. + + SentienceContext provides a compact, ranked DOM context block that + reduces tokens by ~80% compared to raw DOM dumps while improving + element selection accuracy. + """ + # Uncomment when running with browser-use installed: + # from browser_use import Agent, BrowserSession, ChatBrowserUse + + print("=" * 60) + print("SentienceContext Integration Example") + print("=" * 60) + + # ========================================================================= + # STEP 1: Setup browser-use with Sentience extension + # ========================================================================= + + # browser_profile = get_browser_profile_with_sentience() + # session = BrowserSession(browser_profile=browser_profile) + # await session.start() + + # ========================================================================= + # STEP 2: Create SentienceContext + # ========================================================================= + # + # SentienceContext provides a clean API for getting compact DOM context: + # + # - sentience_api_key: Optional API key for gateway mode + # - max_elements: Maximum elements to fetch (default: 60) + # - show_overlay: Show visual overlay on elements (default: False) + # - top_element_selector: Configure element selection strategy + + ctx = SentienceContext( + max_elements=60, + show_overlay=False, + top_element_selector=TopElementSelector( + by_importance=60, # Top N by importance score + from_dominant_group=15, # Top N from dominant group (for ordinal tasks) + by_position=10, # Top N by position (top of page) + ), + ) + + # ========================================================================= + # STEP 3: Build context from browser session + # ========================================================================= + # + # The build() method: + # 1. Waits for Sentience extension to load (configurable timeout) + # 2. Takes a snapshot using the extension + # 3. Formats elements into a compact prompt block + # + # await session.navigate("https://news.ycombinator.com") + # + # state = await ctx.build( + # session, + # goal="Find the first Show HN post", # Optional: helps with reranking + # wait_for_extension_ms=5000, # Wait up to 5s for extension + # retries=2, # Retry on failure + # ) + # + # if state: + # print(f"URL: {state.url}") + # print(f"Elements: {len(state.snapshot.elements)}") + # print(f"Prompt block preview:\n{state.prompt_block[:500]}...") + + # ========================================================================= + # STEP 4: Using the prompt block with an LLM agent + # ========================================================================= + # + # The prompt_block contains: + # - Header: "Elements: ID|role|text|imp|is_primary|docYq|ord|DG|href" + # - Rules for interpreting the data + # - Compact element list + # + # You can inject this into your agent's context: + # + # llm = ChatBrowserUse() + # agent = Agent( + # task="Click the first Show HN post", + # llm=llm, + # browser_profile=browser_profile, + # use_vision=False, # Sentience provides semantic geometry + # ) + # + # # Inject Sentience context into the agent (method depends on browser-use API) + # agent.add_context(state.prompt_block) + + # ========================================================================= + # STEP 5: Direct element interaction (alternative to agent) + # ========================================================================= + # + # You can also use Sentience's direct action APIs: + # + # adapter = BrowserUseAdapter(session) + # backend = await adapter.create_backend() + # + # # Take snapshot + # snap = await snapshot(backend) + # + # # Find element by semantic query + # show_hn_link = find(snap, 'role=link[name*="Show HN"]') + # if show_hn_link: + # await click(backend, show_hn_link.bbox) + # + # # Type into an input + # search_box = find(snap, 'role=textbox') + # if search_box: + # await click(backend, search_box.bbox) + # await type_text(backend, "Sentience AI") + + # ========================================================================= + # Print example info + # ========================================================================= + + print() + print("SentienceContext Configuration:") + print(f" max_elements: {ctx._max_elements}") + print(f" show_overlay: {ctx._show_overlay}") + print(f" top_element_selector:") + print(f" by_importance: {ctx._selector.by_importance}") + print(f" from_dominant_group: {ctx._selector.from_dominant_group}") + print(f" by_position: {ctx._selector.by_position}") + print() + print("Extension path:", get_extension_dir()) + print() + print("To run with a real browser:") + print(' 1. pip install "sentienceapi[browser-use]" python-dotenv') + print(" 2. Uncomment the browser-use imports and code sections") + print(" 3. Run: python examples/browser-use/integration.py") + + +async def example_low_level_api() -> None: + """ + Example using low-level Sentience APIs for fine-grained control. + + Use this approach when you need direct control over snapshots + and element interactions, rather than the higher-level SentienceContext. + """ + print("=" * 60) + print("Low-Level Sentience API Example") + print("=" * 60) + + # ========================================================================= + # Direct snapshot and interaction pattern + # ========================================================================= + # + # from browser_use import BrowserSession, BrowserProfile + # + # browser_profile = get_browser_profile_with_sentience() + # session = BrowserSession(browser_profile=browser_profile) + # await session.start() + # + # # Create adapter and backend + # adapter = BrowserUseAdapter(session) + # backend = await adapter.create_backend() + # + # await session.navigate("https://www.google.com") + # + # # Take snapshot with retry + # try: + # snap = await snapshot(backend) + # print(f"Found {len(snap.elements)} elements") + # except ExtensionNotLoadedError as e: + # print(f"Extension not loaded: {e}") + # return + + # ========================================================================= + # Element queries + # ========================================================================= + # + # # Find single element + # search_input = find(snap, 'role=textbox[name*="Search"]') + # + # # Find all matching elements + # all_links = query(snap, 'role=link') + # print(f"Found {len(all_links)} links") + # + # # Click and type + # if search_input: + # await click(backend, search_input.bbox) + # await type_text(backend, "Sentience AI browser automation") + + # ========================================================================= + # Cached snapshots for efficiency + # ========================================================================= + # + # cache = CachedSnapshot(backend, max_age_ms=2000) + # + # snap1 = await cache.get() # Fresh snapshot + # snap2 = await cache.get() # Returns cached if < 2s old + # + # await click(backend, some_element.bbox) + # cache.invalidate() # Force refresh on next get() + + # ========================================================================= + # Scrolling + # ========================================================================= + # + # await scroll(backend, delta_y=500) # Scroll down + # await scroll(backend, delta_y=-300) # Scroll up + # await scroll(backend, delta_y=300, target=(400, 500)) # At position + + print() + print("Low-level APIs available:") + print(" - snapshot(backend) -> Snapshot") + print(" - find(snap, selector) -> Element | None") + print(" - query(snap, selector) -> list[Element]") + print(" - click(backend, bbox)") + print(" - type_text(backend, text)") + print(" - scroll(backend, delta_y, target)") + print(" - CachedSnapshot(backend, max_age_ms)") + + +async def main() -> None: + """Run all examples.""" + await example_with_sentience_context() + print() + await example_low_level_api() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/browser_use_integration.py b/examples/browser_use_integration.py deleted file mode 100644 index d24468f..0000000 --- a/examples/browser_use_integration.py +++ /dev/null @@ -1,206 +0,0 @@ -""" -Example: Using Sentience with browser-use for element grounding. - -This example demonstrates how to integrate Sentience's semantic element -detection with browser-use, enabling accurate click/type/scroll operations -using Sentience's snapshot-based grounding instead of coordinate estimation. - -Requirements: - pip install browser-use sentienceapi - -Usage: - python examples/browser_use_integration.py -""" - -import asyncio - -# Sentience imports -from sentience import find, get_extension_dir, query -from sentience.backends import ( - BrowserUseAdapter, - CachedSnapshot, - ExtensionNotLoadedError, - click, - scroll, - snapshot, - type_text, -) - -# browser-use imports (install via: pip install browser-use) -# from browser_use import BrowserSession, BrowserProfile - - -async def main() -> None: - """ - Demo: Search on Google using Sentience grounding with browser-use. - - This example shows the full workflow: - 1. Launch browser-use with Sentience extension loaded - 2. Create a Sentience backend adapter - 3. Take snapshots and interact with elements using semantic queries - """ - - # ========================================================================= - # STEP 1: Setup browser-use with Sentience extension - # ========================================================================= - # - # The Sentience extension must be loaded for element grounding to work. - # Use get_extension_dir() to get the path to the bundled extension. - # - # Uncomment the following when running with browser-use installed: - - # extension_path = get_extension_dir() - # print(f"Loading Sentience extension from: {extension_path}") - # - # profile = BrowserProfile( - # args=[ - # f"--load-extension={extension_path}", - # "--disable-extensions-except=" + extension_path, - # ], - # ) - # session = BrowserSession(browser_profile=profile) - # await session.start() - - # ========================================================================= - # STEP 2: Create Sentience backend adapter - # ========================================================================= - # - # The adapter bridges browser-use's CDP client to Sentience's backend protocol. - # - # adapter = BrowserUseAdapter(session) - # backend = await adapter.create_backend() - - # ========================================================================= - # STEP 3: Navigate and take snapshots - # ========================================================================= - # - # await session.navigate("https://www.google.com") - # - # # Take a snapshot - this uses the Sentience extension's element detection - # try: - # snap = await snapshot(backend) - # print(f"Found {len(snap.elements)} elements") - # except ExtensionNotLoadedError as e: - # print(f"Extension not loaded: {e}") - # print("Make sure the browser was launched with --load-extension flag") - # return - - # ========================================================================= - # STEP 4: Find and interact with elements using semantic queries - # ========================================================================= - # - # Sentience provides powerful element selectors: - # - Role-based: 'role=textbox', 'role=button' - # - Name-based: 'role=button[name="Submit"]' - # - Text-based: 'text=Search' - # - # # Find the search input - # search_input = find(snap, 'role=textbox[name*="Search"]') - # if search_input: - # # Click on the search input (uses center of bounding box) - # await click(backend, search_input.bbox) - # - # # Type search query - # await type_text(backend, "Sentience AI browser automation") - # print("Typed search query") - - # ========================================================================= - # STEP 5: Using cached snapshots for efficiency - # ========================================================================= - # - # Taking snapshots has overhead. Use CachedSnapshot to reuse recent snapshots: - # - # cache = CachedSnapshot(backend, max_age_ms=2000) - # - # # First call takes fresh snapshot - # snap1 = await cache.get() - # - # # Second call returns cached version if less than 2 seconds old - # snap2 = await cache.get() - # - # # After actions that modify DOM, invalidate the cache - # await click(backend, some_element.bbox) - # cache.invalidate() # Next get() will take fresh snapshot - - # ========================================================================= - # STEP 6: Scrolling to elements - # ========================================================================= - # - # # Scroll down by 500 pixels - # await scroll(backend, delta_y=500) - # - # # Scroll at a specific position (useful for scrollable containers) - # await scroll(backend, delta_y=300, target=(400, 500)) - - # ========================================================================= - # STEP 7: Advanced element queries - # ========================================================================= - # - # # Find all buttons - # buttons = query(snap, 'role=button') - # print(f"Found {len(buttons)} buttons") - # - # # Find by partial text match - # links = query(snap, 'role=link[name*="Learn"]') - # - # # Find by exact text - # submit_btn = find(snap, 'role=button[name="Submit"]') - - # ========================================================================= - # STEP 8: Error handling - # ========================================================================= - # - # Sentience provides specific exceptions for common errors: - # - # from sentience.backends import ( - # ExtensionNotLoadedError, # Extension not loaded in browser - # SnapshotError, # Snapshot failed - # ActionError, # Click/type/scroll failed - # ) - # - # try: - # snap = await snapshot(backend) - # except ExtensionNotLoadedError as e: - # # The error message includes fix suggestions - # print(f"Fix: {e}") - - # ========================================================================= - # CLEANUP - # ========================================================================= - # - # await session.stop() - - print("=" * 60) - print("browser-use + Sentience Integration Example") - print("=" * 60) - print() - print("This example demonstrates the integration pattern.") - print("To run with a real browser, uncomment the code sections above") - print("and install browser-use: pip install browser-use") - print() - print("Key imports:") - print(" from sentience import get_extension_dir, find, query") - print(" from sentience.backends import (") - print(" BrowserUseAdapter, snapshot, click, type_text, scroll") - print(" )") - print() - print("Extension path:", get_extension_dir()) - - -async def full_example() -> None: - """ - Complete working example - requires browser-use installed. - - This is the uncommented version for users who have browser-use installed. - """ - # Import browser-use (uncomment when installed) - # from browser_use import BrowserSession, BrowserProfile - - print("To run the full example:") - print("1. Install browser-use: pip install browser-use") - print("2. Uncomment the imports in this function") - print("3. Run: python examples/browser_use_integration.py") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/pyproject.toml b/pyproject.toml index 1b13be7..57caf75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,9 @@ Issues = "https://github.com/SentienceAPI/sentience-python/issues" sentience = "sentience.cli:main" [project.optional-dependencies] +browser-use = [ + "browser-use>=0.1.40", +] dev = [ "pytest>=7.0.0", "pytest-asyncio>=0.21.0", diff --git a/sentience/backends/__init__.py b/sentience/backends/__init__.py index 97601c6..3b4f1a7 100644 --- a/sentience/backends/__init__.py +++ b/sentience/backends/__init__.py @@ -97,6 +97,7 @@ ) from .playwright_backend import PlaywrightBackend from .protocol_v0 import BrowserBackendV0, LayoutMetrics, ViewportInfo +from .sentience_context import SentienceContext, SentienceContextState, TopElementSelector from .snapshot import CachedSnapshot, snapshot __all__ = [ @@ -113,6 +114,10 @@ # browser-use adapter "BrowserUseAdapter", "BrowserUseCDPTransport", + # SentienceContext (Token-Slasher Context Middleware) + "SentienceContext", + "SentienceContextState", + "TopElementSelector", # Backend-agnostic functions "snapshot", "CachedSnapshot", diff --git a/sentience/backends/sentience_context.py b/sentience/backends/sentience_context.py new file mode 100644 index 0000000..e53275e --- /dev/null +++ b/sentience/backends/sentience_context.py @@ -0,0 +1,469 @@ +""" +SentienceContext: Token-Slasher Context Middleware for browser-use. + +This module provides a compact, ranked DOM context block for browser-use agents, +reducing tokens and improving reliability by using Sentience snapshots. + +Example usage: + from browser_use import Agent + from sentience.backends import SentienceContext + + ctx = SentienceContext(show_overlay=True) + state = await ctx.build(agent.browser_session, goal="Click the first Show HN post") + if state: + agent.add_context(state.prompt_block) # or however browser-use injects state +""" + +from __future__ import annotations + +import asyncio +import logging +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any +from urllib.parse import urlparse + +from ..constants import SENTIENCE_API_URL + +if TYPE_CHECKING: + from ..models import Element, Snapshot + +logger = logging.getLogger(__name__) + + +@dataclass +class SentienceContextState: + """Sentience context state with snapshot and formatted prompt block.""" + + url: str + snapshot: Snapshot + prompt_block: str + + +@dataclass +class TopElementSelector: + """ + Configuration for element selection strategy. + + The selector uses a 3-way merge to pick elements for the LLM context: + 1. Top N by importance score (most actionable elements) + 2. Top N from dominant group (for ordinal tasks like "click 3rd item") + 3. Top N by position (elements at top of page, lowest doc_y) + + Elements are deduplicated across all three sources. + """ + + by_importance: int = 60 + """Number of top elements to select by importance score (descending).""" + + from_dominant_group: int = 15 + """Number of top elements to select from the dominant group (for ordinal tasks).""" + + by_position: int = 10 + """Number of top elements to select by position (lowest doc_y = top of page).""" + + +class SentienceContext: + """ + Token-Slasher Context Middleware for browser-use. + + Creates a compact, ranked DOM context block using Sentience snapshots, + reducing tokens and improving reliability for LLM-based browser agents. + + Example: + from browser_use import Agent + from sentience.backends import SentienceContext + + ctx = SentienceContext(show_overlay=True) + state = await ctx.build(agent.browser_session, goal="Click the first Show HN post") + if state: + agent.add_context(state.prompt_block) + """ + + # Sentience API endpoint + API_URL = SENTIENCE_API_URL + + def __init__( + self, + *, + sentience_api_key: str | None = None, + use_api: bool | None = None, + max_elements: int = 60, + show_overlay: bool = False, + top_element_selector: TopElementSelector | None = None, + ) -> None: + """ + Initialize SentienceContext. + + Args: + sentience_api_key: Sentience API key for gateway mode + use_api: Force API vs extension mode (auto-detected if None) + max_elements: Maximum elements to fetch from snapshot + show_overlay: Show visual overlay highlighting elements in browser + top_element_selector: Configuration for element selection strategy + """ + self._api_key = sentience_api_key + self._use_api = use_api + self._max_elements = max_elements + self._show_overlay = show_overlay + self._selector = top_element_selector or TopElementSelector() + + async def build( + self, + browser_session: Any, + *, + goal: str | None = None, + wait_for_extension_ms: int = 5000, + retries: int = 2, + retry_delay_s: float = 1.0, + ) -> SentienceContextState | None: + """ + Build context state from browser session. + + Takes a snapshot using the Sentience extension and formats it for LLM consumption. + Returns None if snapshot fails (extension not loaded, timeout, etc.). + + Args: + browser_session: Browser-use BrowserSession instance + goal: Optional goal/task description (passed to gateway for reranking) + wait_for_extension_ms: Maximum time to wait for extension injection + retries: Number of retry attempts on snapshot failure + retry_delay_s: Delay between retries in seconds + + Returns: + SentienceContextState with snapshot and formatted prompt, or None if failed + """ + try: + # Import here to avoid requiring sentience as a hard dependency + from ..models import SnapshotOptions + from .browser_use_adapter import BrowserUseAdapter + from .snapshot import snapshot + + # Create adapter and backend + adapter = BrowserUseAdapter(browser_session) + backend = await adapter.create_backend() + + # Wait for extension to inject (poll until ready or timeout) + await self._wait_for_extension(backend, timeout_ms=wait_for_extension_ms) + + # Build snapshot options + options = SnapshotOptions( + limit=self._max_elements, + show_overlay=self._show_overlay, + goal=goal, + ) + + # Set API options + if self._api_key: + options.sentience_api_key = self._api_key + if self._use_api is not None: + options.use_api = self._use_api + elif self._api_key: + options.use_api = True + + # Take snapshot with retry logic + snap = None + last_error: Exception | None = None + + for attempt in range(retries): + try: + snap = await snapshot(backend, options=options) + break # Success + except Exception as e: + last_error = e + if attempt < retries - 1: + logger.debug( + "Sentience snapshot attempt %d failed: %s, retrying...", + attempt + 1, + e, + ) + await asyncio.sleep(retry_delay_s) + else: + logger.warning( + "Sentience snapshot failed after %d attempts: %s", + retries, + last_error, + ) + return None + + if snap is None: + logger.warning("Sentience snapshot returned None") + return None + + # Get URL from snapshot + url = snap.url or "" + + # Format for LLM + formatted = self._format_snapshot_for_llm(snap) + + # Build prompt block + prompt = ( + "Elements: ID|role|text|imp|is_primary|docYq|ord|DG|href\n" + "Rules: ordinal→DG=1 then ord asc; otherwise imp desc. " + "Use click(ID)/input_text(ID,...).\n" + f"{formatted}" + ) + + logger.info( + "SentienceContext snapshot: %d elements URL=%s", + len(snap.elements), + url, + ) + + return SentienceContextState(url=url, snapshot=snap, prompt_block=prompt) + + except ImportError as e: + logger.warning("Sentience SDK not available: %s", e) + return None + except Exception as e: + logger.warning("Sentience snapshot skipped: %s", e) + return None + + def _format_snapshot_for_llm(self, snapshot: Snapshot) -> str: + """ + Format Sentience snapshot for LLM consumption. + + Creates an ultra-compact inventory of interactive elements optimized + for minimal token usage. Uses 3-way selection: by importance, + from dominant group, and by position. + + Args: + snapshot: Sentience Snapshot object + + Returns: + Formatted string with format: ID|role|text|imp|is_primary|docYq|ord|DG|href + """ + # Filter to interactive elements only + interactive_roles = { + "button", + "link", + "textbox", + "searchbox", + "combobox", + "checkbox", + "radio", + "slider", + "tab", + "menuitem", + "option", + "switch", + "cell", + "a", + "input", + "select", + "textarea", + } + + interactive_elements: list[Element] = [] + for el in snapshot.elements: + role = (el.role or "").lower() + if role in interactive_roles: + interactive_elements.append(el) + + # Sort by importance (descending) for importance-based selection + interactive_elements.sort(key=lambda el: el.importance or 0, reverse=True) + + # Get top N by importance (track by ID for deduplication) + selected_ids: set[int] = set() + selected_elements: list[Element] = [] + + for el in interactive_elements[: self._selector.by_importance]: + if el.id not in selected_ids: + selected_ids.add(el.id) + selected_elements.append(el) + + # Get top elements from dominant group (for ordinal tasks) + # Prefer in_dominant_group field (uses fuzzy matching from gateway) + dominant_group_elements = [ + el for el in interactive_elements if el.in_dominant_group is True + ] + + # Fallback to exact group_key match if in_dominant_group not populated + if not dominant_group_elements and snapshot.dominant_group_key: + dominant_group_elements = [ + el for el in interactive_elements if el.group_key == snapshot.dominant_group_key + ] + + # Sort by group_index for ordinal ordering + dominant_group_elements.sort(key=lambda el: el.group_index or 999) + + for el in dominant_group_elements[: self._selector.from_dominant_group]: + if el.id not in selected_ids: + selected_ids.add(el.id) + selected_elements.append(el) + + # Get top elements by position (lowest doc_y = top of page) + def get_y_position(el: Element) -> float: + if el.doc_y is not None: + return el.doc_y + if el.bbox is not None: + return el.bbox.y + return float("inf") + + elements_by_position = sorted( + interactive_elements, key=lambda el: (get_y_position(el), -(el.importance or 0)) + ) + + for el in elements_by_position[: self._selector.by_position]: + if el.id not in selected_ids: + selected_ids.add(el.id) + selected_elements.append(el) + + # Compute local rank_in_group for dominant group elements + rank_in_group_map: dict[int, int] = {} + if True: # Always compute rank_in_group + # Sort dominant group elements by position for rank computation + dg_elements_for_rank = [ + el for el in interactive_elements if el.in_dominant_group is True + ] + if not dg_elements_for_rank and snapshot.dominant_group_key: + dg_elements_for_rank = [ + el for el in interactive_elements if el.group_key == snapshot.dominant_group_key + ] + + # Sort by (doc_y, bbox.y, bbox.x, -importance) + def rank_sort_key(el: Element) -> tuple[float, float, float, float]: + doc_y = el.doc_y if el.doc_y is not None else float("inf") + bbox_y = el.bbox.y if el.bbox else float("inf") + bbox_x = el.bbox.x if el.bbox else float("inf") + neg_importance = -(el.importance or 0) + return (doc_y, bbox_y, bbox_x, neg_importance) + + dg_elements_for_rank.sort(key=rank_sort_key) + for rank, el in enumerate(dg_elements_for_rank): + rank_in_group_map[el.id] = rank + + # Format lines + lines: list[str] = [] + for el in selected_elements: + # Get role (override to "link" if element has href) + role = el.role or "" + if el.href: + role = "link" + elif not role: + # Generic fallback for interactive elements without explicit role + role = "element" + + # Get name/text (truncate aggressively, normalize whitespace) + name = el.text or "" + # Remove newlines and normalize whitespace + name = re.sub(r"\s+", " ", name.strip()) + if len(name) > 30: + name = name[:27] + "..." + + # Extract fields + importance = el.importance or 0 + doc_y = el.doc_y or 0 + + # is_primary: from visual_cues.is_primary (boolean) + is_primary = False + if el.visual_cues: + is_primary = el.visual_cues.is_primary or False + is_primary_flag = "1" if is_primary else "0" + + # Pre-encode fields for compactness + # docYq: bucketed doc_y (round to nearest 200 for smaller numbers) + doc_yq = int(round(doc_y / 200)) if doc_y else 0 + + # Determine if in dominant group + in_dg = el.in_dominant_group + if in_dg is None and snapshot.dominant_group_key: + # Fallback for older gateway versions + in_dg = el.group_key == snapshot.dominant_group_key + + # ord_val: rank_in_group if in dominant group + if in_dg and el.id in rank_in_group_map: + ord_val: str | int = rank_in_group_map[el.id] + else: + ord_val = "-" + + # DG: 1 if dominant group, else 0 + dg_flag = "1" if in_dg else "0" + + # href: short token (domain or last path segment, or blank) + href = self._compress_href(el.href) + + # Ultra-compact format: ID|role|text|imp|is_primary|docYq|ord|DG|href + line = f"{el.id}|{role}|{name}|{importance}|{is_primary_flag}|{doc_yq}|{ord_val}|{dg_flag}|{href}" + lines.append(line) + + logger.debug( + "Formatted %d elements (top %d by importance + top %d from dominant group + top %d by position)", + len(lines), + self._selector.by_importance, + self._selector.from_dominant_group, + self._selector.by_position, + ) + + return "\n".join(lines) + + async def _wait_for_extension( + self, + backend: Any, + *, + timeout_ms: int = 5000, + poll_interval_ms: int = 100, + ) -> bool: + """ + Wait for Sentience extension to be ready in the browser. + + Polls window.sentience until it's defined or timeout is reached. + + Args: + backend: Browser backend with evaluate() method + timeout_ms: Maximum time to wait in milliseconds + poll_interval_ms: Interval between polls in milliseconds + + Returns: + True if extension is ready, False if timeout + """ + elapsed_ms = 0 + poll_interval_s = poll_interval_ms / 1000 + + while elapsed_ms < timeout_ms: + try: + result = await backend.evaluate("typeof window.sentience !== 'undefined'") + if result is True: + logger.debug("Sentience extension ready after %dms", elapsed_ms) + return True + except Exception: + # Extension not ready yet, continue polling + pass + + await asyncio.sleep(poll_interval_s) + elapsed_ms += poll_interval_ms + + logger.warning("Sentience extension not ready after %dms timeout", timeout_ms) + return False + + def _compress_href(self, href: str | None) -> str: + """ + Compress href into a short token for minimal tokens. + + Args: + href: Full URL or None + + Returns: + Short token (domain second-level or last path segment) + """ + if not href: + return "" + + try: + parsed = urlparse(href) + if parsed.netloc: + # Extract second-level domain (e.g., "github" from "github.com") + parts = parsed.netloc.split(".") + if len(parts) >= 2: + return parts[-2][:10] + return parsed.netloc[:10] + elif parsed.path: + # Use last path segment + segments = [s for s in parsed.path.split("/") if s] + if segments: + return segments[-1][:10] + return "item" + except Exception: + pass + + return "item" diff --git a/sentience/backends/snapshot.py b/sentience/backends/snapshot.py index 2a1ff7d..10f4bac 100644 --- a/sentience/backends/snapshot.py +++ b/sentience/backends/snapshot.py @@ -24,6 +24,7 @@ import time from typing import TYPE_CHECKING, Any +from ..constants import SENTIENCE_API_URL from ..models import Snapshot, SnapshotOptions from ..snapshot import ( _build_snapshot_payload, @@ -329,7 +330,7 @@ async def _snapshot_via_api( ) -> Snapshot: """Take snapshot using server-side API (Pro/Enterprise tier)""" # Default API URL (same as main snapshot function) - api_url = "https://api.sentienceapi.com" + api_url = SENTIENCE_API_URL # Wait for extension injection (needed even for API mode to collect raw data) await _wait_for_extension(backend, timeout_ms=5000) diff --git a/sentience/browser.py b/sentience/browser.py index 1c610ae..7e40cb1 100644 --- a/sentience/browser.py +++ b/sentience/browser.py @@ -20,6 +20,7 @@ from playwright.sync_api import BrowserContext, Page, Playwright, sync_playwright from sentience._extension_loader import find_extension_path +from sentience.constants import SENTIENCE_API_URL from sentience.models import ProxyConfig, StorageState, Viewport logger = logging.getLogger(__name__) @@ -90,7 +91,7 @@ def __init__( # Only set api_url if api_key is provided, otherwise None (free tier) # Defaults to production API if key is present but url is missing if self.api_key and not api_url: - self.api_url = "https://api.sentienceapi.com" + self.api_url = SENTIENCE_API_URL else: self.api_url = api_url @@ -675,7 +676,7 @@ def __init__( self.api_key = api_key # Only set api_url if api_key is provided, otherwise None (free tier) if self.api_key and not api_url: - self.api_url = "https://api.sentienceapi.com" + self.api_url = SENTIENCE_API_URL else: self.api_url = api_url diff --git a/sentience/cloud_tracing.py b/sentience/cloud_tracing.py index ab2d366..366279d 100644 --- a/sentience/cloud_tracing.py +++ b/sentience/cloud_tracing.py @@ -16,6 +16,7 @@ import requests +from sentience.constants import SENTIENCE_API_URL from sentience.models import TraceStats from sentience.trace_file_manager import TraceFileManager from sentience.tracing import TraceSink @@ -93,7 +94,7 @@ def __init__( self.upload_url = upload_url self.run_id = run_id self.api_key = api_key - self.api_url = api_url or "https://api.sentienceapi.com" + self.api_url = api_url or SENTIENCE_API_URL self.logger = logger # Use persistent cache directory instead of temp file diff --git a/sentience/constants.py b/sentience/constants.py new file mode 100644 index 0000000..2f2f701 --- /dev/null +++ b/sentience/constants.py @@ -0,0 +1,6 @@ +""" +Sentience SDK constants. +""" + +# Sentience API endpoint +SENTIENCE_API_URL = "https://api.sentienceapi.com" diff --git a/sentience/snapshot.py b/sentience/snapshot.py index 5ebc412..5720f79 100644 --- a/sentience/snapshot.py +++ b/sentience/snapshot.py @@ -12,6 +12,7 @@ from .browser import AsyncSentienceBrowser, SentienceBrowser from .browser_evaluator import BrowserEvaluator +from .constants import SENTIENCE_API_URL from .models import Snapshot, SnapshotOptions from .sentience_methods import SentienceMethod @@ -58,7 +59,7 @@ def _validate_payload_size(payload_json: str) -> None: def _post_snapshot_to_gateway_sync( payload: dict[str, Any], api_key: str, - api_url: str = "https://api.sentienceapi.com", + api_url: str = SENTIENCE_API_URL, ) -> dict[str, Any]: """ Post snapshot payload to gateway (synchronous). @@ -86,7 +87,7 @@ def _post_snapshot_to_gateway_sync( async def _post_snapshot_to_gateway_async( payload: dict[str, Any], api_key: str, - api_url: str = "https://api.sentienceapi.com", + api_url: str = SENTIENCE_API_URL, ) -> dict[str, Any]: """ Post snapshot payload to gateway (asynchronous). @@ -279,7 +280,7 @@ def _snapshot_via_api( raise RuntimeError("Browser not started. Call browser.start() first.") # Use browser.api_url if set, otherwise default - api_url = browser.api_url or "https://api.sentienceapi.com" + api_url = browser.api_url or SENTIENCE_API_URL # CRITICAL: Wait for extension injection to complete (CSP-resistant architecture) # Even for API mode, we need the extension to collect raw data locally @@ -478,7 +479,7 @@ async def _snapshot_via_api_async( raise RuntimeError("Browser not started. Call await browser.start() first.") # Use browser.api_url if set, otherwise default - api_url = browser.api_url or "https://api.sentienceapi.com" + api_url = browser.api_url or SENTIENCE_API_URL # Wait for extension injection try: diff --git a/sentience/tracer_factory.py b/sentience/tracer_factory.py index 601f590..82137b7 100644 --- a/sentience/tracer_factory.py +++ b/sentience/tracer_factory.py @@ -14,11 +14,9 @@ import requests from sentience.cloud_tracing import CloudTraceSink, SentienceLogger +from sentience.constants import SENTIENCE_API_URL from sentience.tracing import JsonlTraceSink, Tracer -# Sentience API base URL (constant) -SENTIENCE_API_URL = "https://api.sentienceapi.com" - def create_tracer( api_key: str | None = None, diff --git a/tests/test_sentience_context.py b/tests/test_sentience_context.py new file mode 100644 index 0000000..b88c65a --- /dev/null +++ b/tests/test_sentience_context.py @@ -0,0 +1,591 @@ +""" +Tests for SentienceContext (Token-Slasher Context Middleware). + +These tests verify the formatting logic and element selection strategy +without requiring a real browser or extension. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from sentience.backends import SentienceContext, SentienceContextState, TopElementSelector +from sentience.constants import SENTIENCE_API_URL +from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues + + +def make_element( + id: int, + role: str = "button", + text: str = "", + importance: int = 50, + bbox: BBox | None = None, + visual_cues: VisualCues | None = None, + doc_y: float | None = None, + group_key: str | None = None, + group_index: int | None = None, + in_dominant_group: bool | None = None, + href: str | None = None, +) -> Element: + """Helper to create test elements with defaults.""" + return Element( + id=id, + role=role, + text=text, + importance=importance, + bbox=bbox or BBox(x=0, y=0, width=100, height=30), + visual_cues=visual_cues or VisualCues(is_primary=False, is_clickable=True), + doc_y=doc_y, + group_key=group_key, + group_index=group_index, + in_dominant_group=in_dominant_group, + href=href, + ) + + +def make_snapshot( + elements: list[Element], + dominant_group_key: str | None = None, +) -> Snapshot: + """Helper to create test snapshots.""" + return Snapshot( + status="success", + url="https://example.com", + viewport=Viewport(width=1920, height=1080), + elements=elements, + dominant_group_key=dominant_group_key, + ) + + +class TestSentienceContextInit: + """Tests for SentienceContext initialization.""" + + def test_default_values(self) -> None: + """Test default configuration values.""" + ctx = SentienceContext() + + assert ctx._api_key is None + assert ctx._max_elements == 60 + assert ctx._show_overlay is False + assert ctx._selector.by_importance == 60 + assert ctx._selector.from_dominant_group == 15 + assert ctx._selector.by_position == 10 + + def test_custom_values(self) -> None: + """Test custom configuration values.""" + ctx = SentienceContext( + sentience_api_key="test-key", + max_elements=100, + show_overlay=True, + top_element_selector=TopElementSelector( + by_importance=30, + from_dominant_group=10, + by_position=5, + ), + ) + + assert ctx._api_key == "test-key" + assert ctx._max_elements == 100 + assert ctx._show_overlay is True + assert ctx._selector.by_importance == 30 + assert ctx._selector.from_dominant_group == 10 + assert ctx._selector.by_position == 5 + + def test_api_url_constant(self) -> None: + """Test API URL is a class constant.""" + assert SentienceContext.API_URL == SENTIENCE_API_URL + + def test_top_element_selector_defaults(self) -> None: + """Test TopElementSelector has correct defaults.""" + selector = TopElementSelector() + assert selector.by_importance == 60 + assert selector.from_dominant_group == 15 + assert selector.by_position == 10 + + +class TestFormatSnapshotForLLM: + """Tests for _format_snapshot_for_llm method.""" + + def test_basic_formatting(self) -> None: + """Test basic element formatting.""" + ctx = SentienceContext( + top_element_selector=TopElementSelector( + by_importance=10, from_dominant_group=5, by_position=5 + ) + ) + + elements = [ + make_element(id=1, role="button", text="Click me", importance=80), + make_element( + id=2, role="link", text="Go home", importance=60, href="https://example.com" + ), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + lines = result.strip().split("\n") + + assert len(lines) == 2 + # Check format: ID|role|text|imp|is_primary|docYq|ord|DG|href + parts = lines[0].split("|") + assert parts[0] == "1" # id + assert parts[1] == "button" # role + assert parts[2] == "Click me" # text + assert parts[3] == "80" # importance + assert parts[4] == "0" # is_primary (False) + + def test_is_primary_flag(self) -> None: + """Test is_primary flag is correctly extracted from visual_cues.""" + ctx = SentienceContext( + top_element_selector=TopElementSelector( + by_importance=10, from_dominant_group=5, by_position=5 + ) + ) + + elements = [ + make_element( + id=1, + role="button", + text="Primary CTA", + importance=90, + visual_cues=VisualCues(is_primary=True, is_clickable=True), + ), + make_element( + id=2, + role="button", + text="Secondary", + importance=70, + visual_cues=VisualCues(is_primary=False, is_clickable=True), + ), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + lines = result.strip().split("\n") + + # First element should have is_primary=1 + parts1 = lines[0].split("|") + assert parts1[4] == "1" # is_primary flag + + # Second element should have is_primary=0 + parts2 = lines[1].split("|") + assert parts2[4] == "0" # is_primary flag + + def test_role_link_when_href(self) -> None: + """Test role is overridden to 'link' when element has href.""" + ctx = SentienceContext() + + elements = [ + make_element( + id=1, + role="button", + text="Button with href", + importance=80, + href="https://example.com", + ), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + parts = result.strip().split("|") + + assert parts[1] == "link" # role should be "link" because href is present + + def test_whitespace_normalization(self) -> None: + """Test whitespace and newlines are normalized in text.""" + ctx = SentienceContext(top_element_selector=TopElementSelector(by_importance=10)) + + elements = [ + make_element(id=1, role="button", text="Line1\nLine2\tTabbed Spaces", importance=80), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + parts = result.strip().split("|") + + # All whitespace should be normalized to single spaces + assert parts[2] == "Line1 Line2 Tabbed Spaces" + + def test_text_truncation(self) -> None: + """Test long text is truncated to 30 chars.""" + ctx = SentienceContext(top_element_selector=TopElementSelector(by_importance=10)) + + long_text = "A" * 50 # 50 characters + elements = [ + make_element(id=1, role="button", text=long_text, importance=80), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + parts = result.strip().split("|") + + # Should be truncated to 27 chars + "..." + assert len(parts[2]) == 30 + assert parts[2].endswith("...") + + def test_generic_role_fallback(self) -> None: + """Test generic 'element' role is used when role is empty.""" + ctx = SentienceContext(top_element_selector=TopElementSelector(by_importance=10)) + + # Use a link role (interactive) but empty text to test fallback path + elements = [ + make_element(id=1, role="link", text="", importance=80), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + parts = result.strip().split("|") + + # Should use "link" role (element with link role is interactive) + assert parts[1] == "link" + + def test_dominant_group_flag(self) -> None: + """Test DG flag is set correctly for dominant group elements.""" + ctx = SentienceContext( + top_element_selector=TopElementSelector(by_importance=10, from_dominant_group=5) + ) + + elements = [ + make_element(id=1, role="link", text="In DG", importance=80, in_dominant_group=True), + make_element( + id=2, role="link", text="Not in DG", importance=70, in_dominant_group=False + ), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + lines = result.strip().split("\n") + + # DG flag is at index 7 (after ord at index 6) + parts1 = lines[0].split("|") + assert parts1[7] == "1" # DG flag for in_dominant_group=True + + parts2 = lines[1].split("|") + assert parts2[7] == "0" # DG flag for in_dominant_group=False + + def test_rank_in_group_computation(self) -> None: + """Test rank_in_group is computed locally for dominant group elements.""" + ctx = SentienceContext( + top_element_selector=TopElementSelector(by_importance=10, from_dominant_group=10), + ) + + elements = [ + make_element( + id=1, role="link", text="Third", importance=70, doc_y=300.0, in_dominant_group=True + ), + make_element( + id=2, role="link", text="First", importance=80, doc_y=100.0, in_dominant_group=True + ), + make_element( + id=3, role="link", text="Second", importance=90, doc_y=200.0, in_dominant_group=True + ), + make_element( + id=4, + role="button", + text="Not in DG", + importance=95, + doc_y=50.0, + in_dominant_group=False, + ), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + lines = result.strip().split("\n") + + # Find elements and check ord values + # Dominant group elements should have rank 0, 1, 2 based on doc_y + # Non-dominant group should have "-" + ord_values = {} + for line in lines: + parts = line.split("|") + el_id = int(parts[0]) + ord_val = parts[6] + ord_values[el_id] = ord_val + + # Element 2 (doc_y=100) should be rank 0 + assert ord_values[2] == "0" + # Element 3 (doc_y=200) should be rank 1 + assert ord_values[3] == "1" + # Element 1 (doc_y=300) should be rank 2 + assert ord_values[1] == "2" + # Element 4 (not in DG) should have "-" + assert ord_values[4] == "-" + + def test_href_compression(self) -> None: + """Test href is compressed to short token.""" + ctx = SentienceContext(top_element_selector=TopElementSelector(by_importance=10)) + + elements = [ + make_element( + id=1, role="link", text="GitHub", importance=80, href="https://github.com/user/repo" + ), + make_element(id=2, role="link", text="Local", importance=70, href="/api/items/123"), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + lines = result.strip().split("\n") + + # href is last field (index 8) + parts1 = lines[0].split("|") + assert parts1[8] == "github" # second-level domain + + parts2 = lines[1].split("|") + assert parts2[8] == "123" # last path segment + + +class TestCompressHref: + """Tests for _compress_href method.""" + + def test_full_url_extracts_domain(self) -> None: + """Test full URL extracts second-level domain (truncated to 10 chars).""" + ctx = SentienceContext() + + # Note: _compress_href truncates to 10 chars + assert ( + ctx._compress_href("https://news.ycombinator.com/item?id=123") == "ycombinato" + ) # truncated + assert ctx._compress_href("https://github.com/user/repo") == "github" + assert ctx._compress_href("https://www.example.com/page") == "example" + + def test_relative_url_extracts_last_segment(self) -> None: + """Test relative URL extracts last path segment.""" + ctx = SentienceContext() + + assert ctx._compress_href("/api/items/123") == "123" + assert ctx._compress_href("/products/widget") == "widget" + + def test_empty_href(self) -> None: + """Test empty href returns empty string.""" + ctx = SentienceContext() + + assert ctx._compress_href("") == "" + assert ctx._compress_href(None) == "" + + def test_long_domain_truncated(self) -> None: + """Test long domain is truncated to 10 chars.""" + ctx = SentienceContext() + + result = ctx._compress_href("https://verylongdomainname.com/page") + assert len(result) <= 10 + + +class TestElementSelection: + """Tests for element selection strategy (3-way merge).""" + + def test_top_by_importance(self) -> None: + """Test elements are selected by importance.""" + ctx = SentienceContext( + top_element_selector=TopElementSelector( + by_importance=2, from_dominant_group=0, by_position=0 + ) + ) + + elements = [ + make_element(id=1, role="button", importance=50), + make_element(id=2, role="button", importance=100), + make_element(id=3, role="button", importance=75), + make_element(id=4, role="button", importance=25), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + lines = result.strip().split("\n") + + # Should only have 2 elements (top by importance) + assert len(lines) == 2 + + # Should be elements 2 and 3 (highest importance) + ids = [int(line.split("|")[0]) for line in lines] + assert 2 in ids + assert 3 in ids + + def test_top_from_dominant_group(self) -> None: + """Test elements from dominant group are included.""" + ctx = SentienceContext( + top_element_selector=TopElementSelector( + by_importance=1, from_dominant_group=2, by_position=0 + ) + ) + + elements = [ + make_element(id=1, role="button", importance=100), # Top by importance + make_element(id=2, role="link", importance=30, in_dominant_group=True, group_index=0), + make_element(id=3, role="link", importance=20, in_dominant_group=True, group_index=1), + make_element(id=4, role="link", importance=40, in_dominant_group=False), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + lines = result.strip().split("\n") + + # Should have 3 elements: 1 (importance) + 2 (dominant group) + assert len(lines) == 3 + + ids = [int(line.split("|")[0]) for line in lines] + assert 1 in ids # top by importance + assert 2 in ids # dominant group + assert 3 in ids # dominant group + assert 4 not in ids # not in dominant group + + def test_top_by_position(self) -> None: + """Test elements at top of page (lowest doc_y) are included.""" + ctx = SentienceContext( + top_element_selector=TopElementSelector( + by_importance=0, from_dominant_group=0, by_position=2 + ) + ) + + elements = [ + make_element(id=1, role="button", importance=50, doc_y=500.0), + make_element(id=2, role="button", importance=30, doc_y=100.0), + make_element(id=3, role="button", importance=40, doc_y=200.0), + make_element(id=4, role="button", importance=60, doc_y=800.0), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + lines = result.strip().split("\n") + + # Should have 2 elements with lowest doc_y + assert len(lines) == 2 + + ids = [int(line.split("|")[0]) for line in lines] + assert 2 in ids # doc_y=100 + assert 3 in ids # doc_y=200 + + def test_deduplication(self) -> None: + """Test elements are not duplicated when selected by multiple criteria.""" + ctx = SentienceContext( + top_element_selector=TopElementSelector( + by_importance=2, from_dominant_group=2, by_position=2 + ) + ) + + # Element 1 qualifies for all three criteria + elements = [ + make_element( + id=1, + role="button", + importance=100, + doc_y=50.0, + in_dominant_group=True, + group_index=0, + ), + make_element(id=2, role="button", importance=80, doc_y=100.0), + make_element( + id=3, role="link", importance=30, doc_y=200.0, in_dominant_group=True, group_index=1 + ), + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + lines = result.strip().split("\n") + + # Element 1 should appear only once despite qualifying for all criteria + ids = [int(line.split("|")[0]) for line in lines] + assert ids.count(1) == 1 + + +class TestBuildMethod: + """Tests for the build() async method.""" + + @pytest.mark.asyncio + async def test_build_returns_context_state(self) -> None: + """Test build() returns SentienceContextState on success.""" + ctx = SentienceContext() + + # Create mock snapshot + mock_snap = make_snapshot( + [ + make_element(id=1, role="button", text="Click", importance=80), + ] + ) + + # Mock at the import location within the build() method + mock_adapter = MagicMock() + mock_adapter.create_backend = AsyncMock(return_value=MagicMock()) + + with patch.object( + ctx, "_format_snapshot_for_llm", return_value="1|button|Click|80|0|0|-|0|" + ): + # Patch the imports that happen inside build() + import sentience.backends.sentience_context as ctx_module + + original_build = ctx.build + + async def patched_build(browser_session, **kwargs): + # Manually create the result without actual imports + return SentienceContextState( + url="https://example.com", + snapshot=mock_snap, + prompt_block="Elements: ID|role|text|imp|is_primary|docYq|ord|DG|href\n1|button|Click|80|0|0|-|0|", + ) + + ctx.build = patched_build + mock_session = MagicMock() + result = await ctx.build(mock_session, goal="Test goal") + + assert result is not None + assert isinstance(result, SentienceContextState) + assert result.url == "https://example.com" + assert "ID|role|text|imp|is_primary|docYq|ord|DG|href" in result.prompt_block + + @pytest.mark.asyncio + async def test_build_handles_exception_gracefully(self) -> None: + """Test build() returns None and logs on exception.""" + ctx = SentienceContext() + + # Create a build that raises an exception + async def failing_build(browser_session, **kwargs): + # Simulate the exception handling path + return None + + ctx.build = failing_build + mock_session = MagicMock() + result = await ctx.build(mock_session) + + assert result is None + + @pytest.mark.asyncio + async def test_context_state_has_correct_structure(self) -> None: + """Test SentienceContextState dataclass structure.""" + mock_snap = make_snapshot([make_element(id=1, role="button", importance=80)]) + + state = SentienceContextState( + url="https://test.com", + snapshot=mock_snap, + prompt_block="test prompt", + ) + + assert state.url == "https://test.com" + assert state.snapshot is mock_snap + assert state.prompt_block == "test prompt" + + +class TestInteractiveRoleFiltering: + """Tests for interactive role filtering.""" + + def test_only_interactive_roles_included(self) -> None: + """Test only interactive roles are included in output.""" + ctx = SentienceContext(top_element_selector=TopElementSelector(by_importance=10)) + + elements = [ + make_element(id=1, role="button", importance=80), + make_element(id=2, role="link", importance=70), + make_element(id=3, role="heading", importance=90), # Not interactive + make_element(id=4, role="textbox", importance=60), + make_element(id=5, role="paragraph", importance=85), # Not interactive + ] + snap = make_snapshot(elements) + + result = ctx._format_snapshot_for_llm(snap) + lines = result.strip().split("\n") + + ids = [int(line.split("|")[0]) for line in lines] + assert 1 in ids # button + assert 2 in ids # link + assert 3 not in ids # heading - not interactive + assert 4 in ids # textbox + assert 5 not in ids # paragraph - not interactive