Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 206 additions & 0 deletions examples/browser_use_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
"""
Example: Using Sentience with browser-use for element grounding.
This example demonstrates how to integrate Sentience's semantic element
detection with browser-use, enabling accurate click/type/scroll operations
using Sentience's snapshot-based grounding instead of coordinate estimation.
Requirements:
pip install browser-use sentienceapi
Usage:
python examples/browser_use_integration.py
"""

import asyncio

# Sentience imports
from sentience import find, get_extension_dir, query
from sentience.backends import (
BrowserUseAdapter,
CachedSnapshot,
ExtensionNotLoadedError,
click,
scroll,
snapshot,
type_text,
)

# browser-use imports (install via: pip install browser-use)
# from browser_use import BrowserSession, BrowserProfile


async def main() -> None:
"""
Demo: Search on Google using Sentience grounding with browser-use.
This example shows the full workflow:
1. Launch browser-use with Sentience extension loaded
2. Create a Sentience backend adapter
3. Take snapshots and interact with elements using semantic queries
"""

# =========================================================================
# STEP 1: Setup browser-use with Sentience extension
# =========================================================================
#
# The Sentience extension must be loaded for element grounding to work.
# Use get_extension_dir() to get the path to the bundled extension.
#
# Uncomment the following when running with browser-use installed:

# extension_path = get_extension_dir()
# print(f"Loading Sentience extension from: {extension_path}")
#
# profile = BrowserProfile(
# args=[
# f"--load-extension={extension_path}",
# "--disable-extensions-except=" + extension_path,
# ],
# )
# session = BrowserSession(browser_profile=profile)
# await session.start()

# =========================================================================
# STEP 2: Create Sentience backend adapter
# =========================================================================
#
# The adapter bridges browser-use's CDP client to Sentience's backend protocol.
#
# adapter = BrowserUseAdapter(session)
# backend = await adapter.create_backend()

# =========================================================================
# STEP 3: Navigate and take snapshots
# =========================================================================
#
# await session.navigate("https://www.google.com")
#
# # Take a snapshot - this uses the Sentience extension's element detection
# try:
# snap = await snapshot(backend)
# print(f"Found {len(snap.elements)} elements")
# except ExtensionNotLoadedError as e:
# print(f"Extension not loaded: {e}")
# print("Make sure the browser was launched with --load-extension flag")
# return

# =========================================================================
# STEP 4: Find and interact with elements using semantic queries
# =========================================================================
#
# Sentience provides powerful element selectors:
# - Role-based: 'role=textbox', 'role=button'
# - Name-based: 'role=button[name="Submit"]'
# - Text-based: 'text=Search'
#
# # Find the search input
# search_input = find(snap, 'role=textbox[name*="Search"]')
# if search_input:
# # Click on the search input (uses center of bounding box)
# await click(backend, search_input.bbox)
#
# # Type search query
# await type_text(backend, "Sentience AI browser automation")
# print("Typed search query")

# =========================================================================
# STEP 5: Using cached snapshots for efficiency
# =========================================================================
#
# Taking snapshots has overhead. Use CachedSnapshot to reuse recent snapshots:
#
# cache = CachedSnapshot(backend, max_age_ms=2000)
#
# # First call takes fresh snapshot
# snap1 = await cache.get()
#
# # Second call returns cached version if less than 2 seconds old
# snap2 = await cache.get()
#
# # After actions that modify DOM, invalidate the cache
# await click(backend, some_element.bbox)
# cache.invalidate() # Next get() will take fresh snapshot

# =========================================================================
# STEP 6: Scrolling to elements
# =========================================================================
#
# # Scroll down by 500 pixels
# await scroll(backend, delta_y=500)
#
# # Scroll at a specific position (useful for scrollable containers)
# await scroll(backend, delta_y=300, target=(400, 500))

# =========================================================================
# STEP 7: Advanced element queries
# =========================================================================
#
# # Find all buttons
# buttons = query(snap, 'role=button')
# print(f"Found {len(buttons)} buttons")
#
# # Find by partial text match
# links = query(snap, 'role=link[name*="Learn"]')
#
# # Find by exact text
# submit_btn = find(snap, 'role=button[name="Submit"]')

# =========================================================================
# STEP 8: Error handling
# =========================================================================
#
# Sentience provides specific exceptions for common errors:
#
# from sentience.backends import (
# ExtensionNotLoadedError, # Extension not loaded in browser
# SnapshotError, # Snapshot failed
# ActionError, # Click/type/scroll failed
# )
#
# try:
# snap = await snapshot(backend)
# except ExtensionNotLoadedError as e:
# # The error message includes fix suggestions
# print(f"Fix: {e}")

# =========================================================================
# CLEANUP
# =========================================================================
#
# await session.stop()

print("=" * 60)
print("browser-use + Sentience Integration Example")
print("=" * 60)
print()
print("This example demonstrates the integration pattern.")
print("To run with a real browser, uncomment the code sections above")
print("and install browser-use: pip install browser-use")
print()
print("Key imports:")
print(" from sentience import get_extension_dir, find, query")
print(" from sentience.backends import (")
print(" BrowserUseAdapter, snapshot, click, type_text, scroll")
print(" )")
print()
print("Extension path:", get_extension_dir())


async def full_example() -> None:
"""
Complete working example - requires browser-use installed.
This is the uncommented version for users who have browser-use installed.
"""
# Import browser-use (uncomment when installed)
# from browser_use import BrowserSession, BrowserProfile

print("To run the full example:")
print("1. Install browser-use: pip install browser-use")
print("2. Uncomment the imports in this function")
print("3. Run: python examples/browser_use_integration.py")


if __name__ == "__main__":
asyncio.run(main())
20 changes: 19 additions & 1 deletion sentience/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,25 @@
from .agent_config import AgentConfig
from .agent_runtime import AgentRuntime

# Backend-agnostic actions (aliased to avoid conflict with existing actions)
# Browser backends (for browser-use integration)
from .backends import (
BrowserBackendV0,
BrowserUseAdapter,
BrowserUseCDPTransport,
CachedSnapshot,
CDPBackendV0,
CDPTransport,
LayoutMetrics,
PlaywrightBackend,
ViewportInfo,
)
from .backends import click as backend_click
from .backends import scroll as backend_scroll
from .backends import scroll_to_element as backend_scroll_to_element
from .backends import snapshot as backend_snapshot
from .backends import type_text as backend_type_text
from .backends import wait_for_stable as backend_wait_for_stable

# Agent Layer (Phase 1 & 2)
from .base_agent import BaseAgent
Expand Down Expand Up @@ -109,7 +118,7 @@
from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync
from .wait import wait_for

__version__ = "0.92.3"
__version__ = "0.93.0"

__all__ = [
# Extension helpers (for browser-use integration)
Expand All @@ -123,10 +132,19 @@
"BrowserBackendV0",
"CDPTransport",
"CDPBackendV0",
"PlaywrightBackend",
"BrowserUseAdapter",
"BrowserUseCDPTransport",
"ViewportInfo",
"LayoutMetrics",
"backend_snapshot",
"CachedSnapshot",
# Backend-agnostic actions (prefixed to avoid conflicts)
"backend_click",
"backend_type_text",
"backend_scroll",
"backend_scroll_to_element",
"backend_wait_for_stable",
# Core SDK
"SentienceBrowser",
"Snapshot",
Expand Down
102 changes: 94 additions & 8 deletions sentience/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,29 @@
Sentience actions (click, type, scroll) to work with different browser
automation frameworks.
Supported backends:
- PlaywrightBackend: Default backend using Playwright (existing SentienceBrowser)
- CDPBackendV0: CDP-based backend for browser-use integration
Supported Backends
------------------
**PlaywrightBackend**
Wraps Playwright Page objects. Use this when integrating with existing
SentienceBrowser or Playwright-based code.
**CDPBackendV0**
Low-level CDP (Chrome DevTools Protocol) backend. Use this when you have
direct access to a CDP client and session.
**BrowserUseAdapter**
High-level adapter for browser-use framework. Automatically creates a
CDPBackendV0 from a BrowserSession.
Quick Start with browser-use
----------------------------
.. code-block:: python
For browser-use integration:
from browser_use import BrowserSession, BrowserProfile
from sentience import get_extension_dir
from sentience.backends import BrowserUseAdapter, CDPBackendV0
from sentience import get_extension_dir, find
from sentience.backends import BrowserUseAdapter, snapshot, click, type_text
# Setup browser-use with Sentience extension
profile = BrowserProfile(args=[f"--load-extension={get_extension_dir()}"])
Expand All @@ -23,13 +38,66 @@
adapter = BrowserUseAdapter(session)
backend = await adapter.create_backend()
# Use backend for precise operations
await backend.mouse_click(100, 200)
# Take snapshot and interact with elements
snap = await snapshot(backend)
search_box = find(snap, 'role=textbox[name*="Search"]')
await click(backend, search_box.bbox)
await type_text(backend, "Sentience AI")
Snapshot Caching
----------------
Use CachedSnapshot to reduce redundant snapshot calls in action loops:
.. code-block:: python
from sentience.backends import CachedSnapshot
cache = CachedSnapshot(backend, max_age_ms=2000)
snap1 = await cache.get() # Takes fresh snapshot
snap2 = await cache.get() # Returns cached if < 2s old
await click(backend, element.bbox)
cache.invalidate() # Force refresh on next get()
Error Handling
--------------
The module provides specific exceptions for common failure modes:
- ``ExtensionNotLoadedError``: Extension not loaded in browser launch args
- ``SnapshotError``: window.sentience.snapshot() failed
- ``ActionError``: Click/type/scroll operation failed
All exceptions inherit from ``SentienceBackendError`` and include helpful
fix suggestions in their error messages.
.. code-block:: python
from sentience.backends import ExtensionNotLoadedError, snapshot
try:
snap = await snapshot(backend)
except ExtensionNotLoadedError as e:
print(f"Fix suggestion: {e}")
"""

from .actions import click, scroll, scroll_to_element, type_text, wait_for_stable
from .browser_use_adapter import BrowserUseAdapter, BrowserUseCDPTransport
from .cdp_backend import CDPBackendV0, CDPTransport
from .exceptions import (
ActionError,
BackendEvalError,
ExtensionDiagnostics,
ExtensionInjectionError,
ExtensionNotLoadedError,
SentienceBackendError,
SnapshotError,
)
from .playwright_backend import PlaywrightBackend
from .protocol_v0 import BrowserBackendV0, LayoutMetrics, ViewportInfo
from .snapshot import CachedSnapshot, snapshot

__all__ = [
# Protocol
Expand All @@ -40,7 +108,25 @@
# CDP Backend
"CDPTransport",
"CDPBackendV0",
# Playwright Backend
"PlaywrightBackend",
# browser-use adapter
"BrowserUseAdapter",
"BrowserUseCDPTransport",
# Backend-agnostic functions
"snapshot",
"CachedSnapshot",
"click",
"type_text",
"scroll",
"scroll_to_element",
"wait_for_stable",
# Exceptions
"SentienceBackendError",
"ExtensionNotLoadedError",
"ExtensionInjectionError",
"ExtensionDiagnostics",
"BackendEvalError",
"SnapshotError",
"ActionError",
]
Loading