|
| 1 | +""" |
| 2 | +Example: Using Sentience with browser-use for element grounding. |
| 3 | +
|
| 4 | +This example demonstrates how to integrate Sentience's semantic element |
| 5 | +detection with browser-use, enabling accurate click/type/scroll operations |
| 6 | +using Sentience's snapshot-based grounding instead of coordinate estimation. |
| 7 | +
|
| 8 | +Requirements: |
| 9 | + pip install browser-use sentienceapi |
| 10 | +
|
| 11 | +Usage: |
| 12 | + python examples/browser_use_integration.py |
| 13 | +""" |
| 14 | + |
| 15 | +import asyncio |
| 16 | + |
| 17 | +# Sentience imports |
| 18 | +from sentience import find, get_extension_dir, query |
| 19 | +from sentience.backends import ( |
| 20 | + BrowserUseAdapter, |
| 21 | + CachedSnapshot, |
| 22 | + ExtensionNotLoadedError, |
| 23 | + click, |
| 24 | + scroll, |
| 25 | + snapshot, |
| 26 | + type_text, |
| 27 | +) |
| 28 | + |
| 29 | +# browser-use imports (install via: pip install browser-use) |
| 30 | +# from browser_use import BrowserSession, BrowserProfile |
| 31 | + |
| 32 | + |
| 33 | +async def main() -> None: |
| 34 | + """ |
| 35 | + Demo: Search on Google using Sentience grounding with browser-use. |
| 36 | +
|
| 37 | + This example shows the full workflow: |
| 38 | + 1. Launch browser-use with Sentience extension loaded |
| 39 | + 2. Create a Sentience backend adapter |
| 40 | + 3. Take snapshots and interact with elements using semantic queries |
| 41 | + """ |
| 42 | + |
| 43 | + # ========================================================================= |
| 44 | + # STEP 1: Setup browser-use with Sentience extension |
| 45 | + # ========================================================================= |
| 46 | + # |
| 47 | + # The Sentience extension must be loaded for element grounding to work. |
| 48 | + # Use get_extension_dir() to get the path to the bundled extension. |
| 49 | + # |
| 50 | + # Uncomment the following when running with browser-use installed: |
| 51 | + |
| 52 | + # extension_path = get_extension_dir() |
| 53 | + # print(f"Loading Sentience extension from: {extension_path}") |
| 54 | + # |
| 55 | + # profile = BrowserProfile( |
| 56 | + # args=[ |
| 57 | + # f"--load-extension={extension_path}", |
| 58 | + # "--disable-extensions-except=" + extension_path, |
| 59 | + # ], |
| 60 | + # ) |
| 61 | + # session = BrowserSession(browser_profile=profile) |
| 62 | + # await session.start() |
| 63 | + |
| 64 | + # ========================================================================= |
| 65 | + # STEP 2: Create Sentience backend adapter |
| 66 | + # ========================================================================= |
| 67 | + # |
| 68 | + # The adapter bridges browser-use's CDP client to Sentience's backend protocol. |
| 69 | + # |
| 70 | + # adapter = BrowserUseAdapter(session) |
| 71 | + # backend = await adapter.create_backend() |
| 72 | + |
| 73 | + # ========================================================================= |
| 74 | + # STEP 3: Navigate and take snapshots |
| 75 | + # ========================================================================= |
| 76 | + # |
| 77 | + # await session.navigate("https://www.google.com") |
| 78 | + # |
| 79 | + # # Take a snapshot - this uses the Sentience extension's element detection |
| 80 | + # try: |
| 81 | + # snap = await snapshot(backend) |
| 82 | + # print(f"Found {len(snap.elements)} elements") |
| 83 | + # except ExtensionNotLoadedError as e: |
| 84 | + # print(f"Extension not loaded: {e}") |
| 85 | + # print("Make sure the browser was launched with --load-extension flag") |
| 86 | + # return |
| 87 | + |
| 88 | + # ========================================================================= |
| 89 | + # STEP 4: Find and interact with elements using semantic queries |
| 90 | + # ========================================================================= |
| 91 | + # |
| 92 | + # Sentience provides powerful element selectors: |
| 93 | + # - Role-based: 'role=textbox', 'role=button' |
| 94 | + # - Name-based: 'role=button[name="Submit"]' |
| 95 | + # - Text-based: 'text=Search' |
| 96 | + # |
| 97 | + # # Find the search input |
| 98 | + # search_input = find(snap, 'role=textbox[name*="Search"]') |
| 99 | + # if search_input: |
| 100 | + # # Click on the search input (uses center of bounding box) |
| 101 | + # await click(backend, search_input.bbox) |
| 102 | + # |
| 103 | + # # Type search query |
| 104 | + # await type_text(backend, "Sentience AI browser automation") |
| 105 | + # print("Typed search query") |
| 106 | + |
| 107 | + # ========================================================================= |
| 108 | + # STEP 5: Using cached snapshots for efficiency |
| 109 | + # ========================================================================= |
| 110 | + # |
| 111 | + # Taking snapshots has overhead. Use CachedSnapshot to reuse recent snapshots: |
| 112 | + # |
| 113 | + # cache = CachedSnapshot(backend, max_age_ms=2000) |
| 114 | + # |
| 115 | + # # First call takes fresh snapshot |
| 116 | + # snap1 = await cache.get() |
| 117 | + # |
| 118 | + # # Second call returns cached version if less than 2 seconds old |
| 119 | + # snap2 = await cache.get() |
| 120 | + # |
| 121 | + # # After actions that modify DOM, invalidate the cache |
| 122 | + # await click(backend, some_element.bbox) |
| 123 | + # cache.invalidate() # Next get() will take fresh snapshot |
| 124 | + |
| 125 | + # ========================================================================= |
| 126 | + # STEP 6: Scrolling to elements |
| 127 | + # ========================================================================= |
| 128 | + # |
| 129 | + # # Scroll down by 500 pixels |
| 130 | + # await scroll(backend, delta_y=500) |
| 131 | + # |
| 132 | + # # Scroll at a specific position (useful for scrollable containers) |
| 133 | + # await scroll(backend, delta_y=300, target=(400, 500)) |
| 134 | + |
| 135 | + # ========================================================================= |
| 136 | + # STEP 7: Advanced element queries |
| 137 | + # ========================================================================= |
| 138 | + # |
| 139 | + # # Find all buttons |
| 140 | + # buttons = query(snap, 'role=button') |
| 141 | + # print(f"Found {len(buttons)} buttons") |
| 142 | + # |
| 143 | + # # Find by partial text match |
| 144 | + # links = query(snap, 'role=link[name*="Learn"]') |
| 145 | + # |
| 146 | + # # Find by exact text |
| 147 | + # submit_btn = find(snap, 'role=button[name="Submit"]') |
| 148 | + |
| 149 | + # ========================================================================= |
| 150 | + # STEP 8: Error handling |
| 151 | + # ========================================================================= |
| 152 | + # |
| 153 | + # Sentience provides specific exceptions for common errors: |
| 154 | + # |
| 155 | + # from sentience.backends import ( |
| 156 | + # ExtensionNotLoadedError, # Extension not loaded in browser |
| 157 | + # SnapshotError, # Snapshot failed |
| 158 | + # ActionError, # Click/type/scroll failed |
| 159 | + # ) |
| 160 | + # |
| 161 | + # try: |
| 162 | + # snap = await snapshot(backend) |
| 163 | + # except ExtensionNotLoadedError as e: |
| 164 | + # # The error message includes fix suggestions |
| 165 | + # print(f"Fix: {e}") |
| 166 | + |
| 167 | + # ========================================================================= |
| 168 | + # CLEANUP |
| 169 | + # ========================================================================= |
| 170 | + # |
| 171 | + # await session.stop() |
| 172 | + |
| 173 | + print("=" * 60) |
| 174 | + print("browser-use + Sentience Integration Example") |
| 175 | + print("=" * 60) |
| 176 | + print() |
| 177 | + print("This example demonstrates the integration pattern.") |
| 178 | + print("To run with a real browser, uncomment the code sections above") |
| 179 | + print("and install browser-use: pip install browser-use") |
| 180 | + print() |
| 181 | + print("Key imports:") |
| 182 | + print(" from sentience import get_extension_dir, find, query") |
| 183 | + print(" from sentience.backends import (") |
| 184 | + print(" BrowserUseAdapter, snapshot, click, type_text, scroll") |
| 185 | + print(" )") |
| 186 | + print() |
| 187 | + print("Extension path:", get_extension_dir()) |
| 188 | + |
| 189 | + |
| 190 | +async def full_example() -> None: |
| 191 | + """ |
| 192 | + Complete working example - requires browser-use installed. |
| 193 | +
|
| 194 | + This is the uncommented version for users who have browser-use installed. |
| 195 | + """ |
| 196 | + # Import browser-use (uncomment when installed) |
| 197 | + # from browser_use import BrowserSession, BrowserProfile |
| 198 | + |
| 199 | + print("To run the full example:") |
| 200 | + print("1. Install browser-use: pip install browser-use") |
| 201 | + print("2. Uncomment the imports in this function") |
| 202 | + print("3. Run: python examples/browser_use_integration.py") |
| 203 | + |
| 204 | + |
| 205 | +if __name__ == "__main__": |
| 206 | + asyncio.run(main()) |
0 commit comments