Skip to content

Commit eb45da1

Browse files
author
SentienceDEV
authored
Merge pull request #127 from SentienceAPI/browser_use_support
Phase 1: Controller for snapshot support in Browser use
2 parents 7159f28 + d2122e8 commit eb45da1

20 files changed

+3967
-179
lines changed
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
"""
2+
Example: Using Sentience with browser-use for element grounding.
3+
4+
This example demonstrates how to integrate Sentience's semantic element
5+
detection with browser-use, enabling accurate click/type/scroll operations
6+
using Sentience's snapshot-based grounding instead of coordinate estimation.
7+
8+
Requirements:
9+
pip install browser-use sentienceapi
10+
11+
Usage:
12+
python examples/browser_use_integration.py
13+
"""
14+
15+
import asyncio
16+
17+
# Sentience imports
18+
from sentience import find, get_extension_dir, query
19+
from sentience.backends import (
20+
BrowserUseAdapter,
21+
CachedSnapshot,
22+
ExtensionNotLoadedError,
23+
click,
24+
scroll,
25+
snapshot,
26+
type_text,
27+
)
28+
29+
# browser-use imports (install via: pip install browser-use)
30+
# from browser_use import BrowserSession, BrowserProfile
31+
32+
33+
async def main() -> None:
34+
"""
35+
Demo: Search on Google using Sentience grounding with browser-use.
36+
37+
This example shows the full workflow:
38+
1. Launch browser-use with Sentience extension loaded
39+
2. Create a Sentience backend adapter
40+
3. Take snapshots and interact with elements using semantic queries
41+
"""
42+
43+
# =========================================================================
44+
# STEP 1: Setup browser-use with Sentience extension
45+
# =========================================================================
46+
#
47+
# The Sentience extension must be loaded for element grounding to work.
48+
# Use get_extension_dir() to get the path to the bundled extension.
49+
#
50+
# Uncomment the following when running with browser-use installed:
51+
52+
# extension_path = get_extension_dir()
53+
# print(f"Loading Sentience extension from: {extension_path}")
54+
#
55+
# profile = BrowserProfile(
56+
# args=[
57+
# f"--load-extension={extension_path}",
58+
# "--disable-extensions-except=" + extension_path,
59+
# ],
60+
# )
61+
# session = BrowserSession(browser_profile=profile)
62+
# await session.start()
63+
64+
# =========================================================================
65+
# STEP 2: Create Sentience backend adapter
66+
# =========================================================================
67+
#
68+
# The adapter bridges browser-use's CDP client to Sentience's backend protocol.
69+
#
70+
# adapter = BrowserUseAdapter(session)
71+
# backend = await adapter.create_backend()
72+
73+
# =========================================================================
74+
# STEP 3: Navigate and take snapshots
75+
# =========================================================================
76+
#
77+
# await session.navigate("https://www.google.com")
78+
#
79+
# # Take a snapshot - this uses the Sentience extension's element detection
80+
# try:
81+
# snap = await snapshot(backend)
82+
# print(f"Found {len(snap.elements)} elements")
83+
# except ExtensionNotLoadedError as e:
84+
# print(f"Extension not loaded: {e}")
85+
# print("Make sure the browser was launched with --load-extension flag")
86+
# return
87+
88+
# =========================================================================
89+
# STEP 4: Find and interact with elements using semantic queries
90+
# =========================================================================
91+
#
92+
# Sentience provides powerful element selectors:
93+
# - Role-based: 'role=textbox', 'role=button'
94+
# - Name-based: 'role=button[name="Submit"]'
95+
# - Text-based: 'text=Search'
96+
#
97+
# # Find the search input
98+
# search_input = find(snap, 'role=textbox[name*="Search"]')
99+
# if search_input:
100+
# # Click on the search input (uses center of bounding box)
101+
# await click(backend, search_input.bbox)
102+
#
103+
# # Type search query
104+
# await type_text(backend, "Sentience AI browser automation")
105+
# print("Typed search query")
106+
107+
# =========================================================================
108+
# STEP 5: Using cached snapshots for efficiency
109+
# =========================================================================
110+
#
111+
# Taking snapshots has overhead. Use CachedSnapshot to reuse recent snapshots:
112+
#
113+
# cache = CachedSnapshot(backend, max_age_ms=2000)
114+
#
115+
# # First call takes fresh snapshot
116+
# snap1 = await cache.get()
117+
#
118+
# # Second call returns cached version if less than 2 seconds old
119+
# snap2 = await cache.get()
120+
#
121+
# # After actions that modify DOM, invalidate the cache
122+
# await click(backend, some_element.bbox)
123+
# cache.invalidate() # Next get() will take fresh snapshot
124+
125+
# =========================================================================
126+
# STEP 6: Scrolling to elements
127+
# =========================================================================
128+
#
129+
# # Scroll down by 500 pixels
130+
# await scroll(backend, delta_y=500)
131+
#
132+
# # Scroll at a specific position (useful for scrollable containers)
133+
# await scroll(backend, delta_y=300, target=(400, 500))
134+
135+
# =========================================================================
136+
# STEP 7: Advanced element queries
137+
# =========================================================================
138+
#
139+
# # Find all buttons
140+
# buttons = query(snap, 'role=button')
141+
# print(f"Found {len(buttons)} buttons")
142+
#
143+
# # Find by partial text match
144+
# links = query(snap, 'role=link[name*="Learn"]')
145+
#
146+
# # Find by exact text
147+
# submit_btn = find(snap, 'role=button[name="Submit"]')
148+
149+
# =========================================================================
150+
# STEP 8: Error handling
151+
# =========================================================================
152+
#
153+
# Sentience provides specific exceptions for common errors:
154+
#
155+
# from sentience.backends import (
156+
# ExtensionNotLoadedError, # Extension not loaded in browser
157+
# SnapshotError, # Snapshot failed
158+
# ActionError, # Click/type/scroll failed
159+
# )
160+
#
161+
# try:
162+
# snap = await snapshot(backend)
163+
# except ExtensionNotLoadedError as e:
164+
# # The error message includes fix suggestions
165+
# print(f"Fix: {e}")
166+
167+
# =========================================================================
168+
# CLEANUP
169+
# =========================================================================
170+
#
171+
# await session.stop()
172+
173+
print("=" * 60)
174+
print("browser-use + Sentience Integration Example")
175+
print("=" * 60)
176+
print()
177+
print("This example demonstrates the integration pattern.")
178+
print("To run with a real browser, uncomment the code sections above")
179+
print("and install browser-use: pip install browser-use")
180+
print()
181+
print("Key imports:")
182+
print(" from sentience import get_extension_dir, find, query")
183+
print(" from sentience.backends import (")
184+
print(" BrowserUseAdapter, snapshot, click, type_text, scroll")
185+
print(" )")
186+
print()
187+
print("Extension path:", get_extension_dir())
188+
189+
190+
async def full_example() -> None:
191+
"""
192+
Complete working example - requires browser-use installed.
193+
194+
This is the uncommented version for users who have browser-use installed.
195+
"""
196+
# Import browser-use (uncomment when installed)
197+
# from browser_use import BrowserSession, BrowserProfile
198+
199+
print("To run the full example:")
200+
print("1. Install browser-use: pip install browser-use")
201+
print("2. Uncomment the imports in this function")
202+
print("3. Run: python examples/browser_use_integration.py")
203+
204+
205+
if __name__ == "__main__":
206+
asyncio.run(main())

sentience/__init__.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,40 @@
22
Sentience Python SDK - AI Agent Browser Automation
33
"""
44

5+
# Extension helpers (for browser-use integration)
6+
from ._extension_loader import (
7+
get_extension_dir,
8+
get_extension_version,
9+
verify_extension_injected,
10+
verify_extension_injected_async,
11+
verify_extension_version,
12+
verify_extension_version_async,
13+
)
514
from .actions import click, click_rect, press, scroll_to, type_text
615
from .agent import SentienceAgent, SentienceAgentAsync
716
from .agent_config import AgentConfig
817
from .agent_runtime import AgentRuntime
918

19+
# Backend-agnostic actions (aliased to avoid conflict with existing actions)
20+
# Browser backends (for browser-use integration)
21+
from .backends import (
22+
BrowserBackendV0,
23+
BrowserUseAdapter,
24+
BrowserUseCDPTransport,
25+
CachedSnapshot,
26+
CDPBackendV0,
27+
CDPTransport,
28+
LayoutMetrics,
29+
PlaywrightBackend,
30+
ViewportInfo,
31+
)
32+
from .backends import click as backend_click
33+
from .backends import scroll as backend_scroll
34+
from .backends import scroll_to_element as backend_scroll_to_element
35+
from .backends import snapshot as backend_snapshot
36+
from .backends import type_text as backend_type_text
37+
from .backends import wait_for_stable as backend_wait_for_stable
38+
1039
# Agent Layer (Phase 1 & 2)
1140
from .base_agent import BaseAgent
1241
from .browser import SentienceBrowser
@@ -89,9 +118,33 @@
89118
from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync
90119
from .wait import wait_for
91120

92-
__version__ = "0.92.3"
121+
__version__ = "0.93.0"
93122

94123
__all__ = [
124+
# Extension helpers (for browser-use integration)
125+
"get_extension_dir",
126+
"get_extension_version",
127+
"verify_extension_injected",
128+
"verify_extension_injected_async",
129+
"verify_extension_version",
130+
"verify_extension_version_async",
131+
# Browser backends (for browser-use integration)
132+
"BrowserBackendV0",
133+
"CDPTransport",
134+
"CDPBackendV0",
135+
"PlaywrightBackend",
136+
"BrowserUseAdapter",
137+
"BrowserUseCDPTransport",
138+
"ViewportInfo",
139+
"LayoutMetrics",
140+
"backend_snapshot",
141+
"CachedSnapshot",
142+
# Backend-agnostic actions (prefixed to avoid conflicts)
143+
"backend_click",
144+
"backend_type_text",
145+
"backend_scroll",
146+
"backend_scroll_to_element",
147+
"backend_wait_for_stable",
95148
# Core SDK
96149
"SentienceBrowser",
97150
"Snapshot",

0 commit comments

Comments
 (0)