Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,74 @@ data_url = screenshot(browser, format="jpeg", quality=85)

</details>

<details>
<summary><h3>🔎 Text Search - Find Elements by Visible Text</h3></summary>

**`find_text_rect(browser, text, case_sensitive=False, whole_word=False, max_results=10)`** - Find text on page and get exact pixel coordinates

Find buttons, links, or any UI elements by their visible text without needing element IDs or CSS selectors. Returns exact pixel coordinates for each match.

**Example:**
```python
from sentience import SentienceBrowser, find_text_rect, click_rect

with SentienceBrowser() as browser:
browser.page.goto("https://example.com")

# Find "Sign In" button
result = find_text_rect(browser, "Sign In")
if result.status == "success" and result.results:
first_match = result.results[0]
print(f"Found at: ({first_match.rect.x}, {first_match.rect.y})")
print(f"In viewport: {first_match.in_viewport}")

# Click on the found text
if first_match.in_viewport:
click_rect(browser, {
"x": first_match.rect.x,
"y": first_match.rect.y,
"w": first_match.rect.width,
"h": first_match.rect.height
})
```

**Advanced Options:**
```python
# Case-sensitive search
result = find_text_rect(browser, "LOGIN", case_sensitive=True)

# Whole word only (won't match "login" as part of "loginButton")
result = find_text_rect(browser, "log", whole_word=True)

# Find multiple matches
result = find_text_rect(browser, "Buy", max_results=10)
for match in result.results:
if match.in_viewport:
print(f"Found '{match.text}' at ({match.rect.x}, {match.rect.y})")
print(f"Context: ...{match.context.before}[{match.text}]{match.context.after}...")
```

**Returns:** `TextRectSearchResult` with:
- **`status`**: "success" or "error"
- **`results`**: List of `TextMatch` objects with:
- `text` - The matched text
- `rect` - Absolute coordinates (with scroll offset)
- `viewport_rect` - Viewport-relative coordinates
- `context` - Surrounding text (before/after)
- `in_viewport` - Whether visible in current viewport

**Use Cases:**
- Find buttons/links by visible text without CSS selectors
- Get exact pixel coordinates for click automation
- Verify text visibility and position on page
- Search dynamic content that changes frequently

**Note:** Does not consume API credits (runs locally in browser)

**See example:** `examples/find_text_demo.py`

</details>

---

## 📋 Reference
Expand Down
100 changes: 100 additions & 0 deletions examples/find_text_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""
Text Search Demo - Using find_text_rect() to locate elements by visible text

This example demonstrates how to:
1. Find text on a webpage and get exact pixel coordinates
2. Use case-sensitive and whole-word matching options
3. Click on found text using click_rect()
4. Handle multiple matches and filter by viewport visibility
"""

from sentience import SentienceBrowser, click_rect, find_text_rect


def main():
with SentienceBrowser() as browser:
# Navigate to a search page
browser.page.goto("https://www.google.com")
browser.page.wait_for_load_state("networkidle")

print("\n" + "=" * 60)
print("Text Search Demo")
print("=" * 60 + "\n")

# Example 1: Simple text search
print("Example 1: Finding 'Google Search' button")
print("-" * 60)
result = find_text_rect(browser, "Google Search")

if result.status == "success" and result.results:
print(f"✓ Found {result.matches} match(es) for '{result.query}'")
for i, match in enumerate(result.results[:3]): # Show first 3
print(f"\nMatch {i + 1}:")
print(f" Text: '{match.text}'")
print(f" Position: ({match.rect.x:.1f}, {match.rect.y:.1f})")
print(f" Size: {match.rect.width:.1f}x{match.rect.height:.1f} pixels")
print(f" In viewport: {match.in_viewport}")
print(f" Context: ...{match.context.before}[{match.text}]{match.context.after}...")
else:
print(f"✗ Search failed: {result.error}")

# Example 2: Find and click search box
print("\n\nExample 2: Finding and clicking the search box")
print("-" * 60)
result = find_text_rect(browser, "Search", max_results=5)

if result.status == "success" and result.results:
# Find the first visible match
for match in result.results:
if match.in_viewport:
print(f"✓ Found visible match: '{match.text}'")
print(f" Clicking at ({match.rect.x:.1f}, {match.rect.y:.1f})")

# Click in the center of the text
click_result = click_rect(
browser,
{
"x": match.rect.x,
"y": match.rect.y,
"w": match.rect.width,
"h": match.rect.height,
},
)

if click_result.success:
print(f" ✓ Click successful!")
break

# Example 3: Case-sensitive search
print("\n\nExample 3: Case-sensitive search for 'GOOGLE'")
print("-" * 60)
result_insensitive = find_text_rect(browser, "GOOGLE", case_sensitive=False)
result_sensitive = find_text_rect(browser, "GOOGLE", case_sensitive=True)

print(f"Case-insensitive search: {result_insensitive.matches or 0} matches")
print(f"Case-sensitive search: {result_sensitive.matches or 0} matches")

# Example 4: Whole word search
print("\n\nExample 4: Whole word search")
print("-" * 60)
result_partial = find_text_rect(browser, "Search", whole_word=False)
result_whole = find_text_rect(browser, "Search", whole_word=True)

print(f"Partial word match: {result_partial.matches or 0} matches")
print(f"Whole word only: {result_whole.matches or 0} matches")

# Example 5: Get viewport information
print("\n\nExample 5: Viewport and scroll information")
print("-" * 60)
result = find_text_rect(browser, "Google")
if result.status == "success" and result.viewport:
print(f"Viewport size: {result.viewport.width}x{result.viewport.height}")
# Note: scroll position would be available if viewport had scroll_x/scroll_y fields

print("\n" + "=" * 60)
print("Demo complete!")
print("=" * 60 + "\n")


if __name__ == "__main__":
main()
13 changes: 13 additions & 0 deletions sentience/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,13 @@
SnapshotFilter,
SnapshotOptions,
StorageState,
TextContext,
TextMatch,
TextRect,
TextRectSearchResult,
TokenStats,
Viewport,
ViewportRect,
WaitResult,
)
from .overlay import clear_overlay, show_overlay
Expand All @@ -51,6 +56,7 @@
from .recorder import Recorder, Trace, TraceStep, record
from .screenshot import screenshot
from .snapshot import snapshot
from .text_search import find_text_rect
from .tracer_factory import SENTIENCE_API_URL, create_tracer
from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink

Expand Down Expand Up @@ -96,6 +102,13 @@
"screenshot",
"show_overlay",
"clear_overlay",
# Text Search
"find_text_rect",
"TextRectSearchResult",
"TextMatch",
"TextRect",
"ViewportRect",
"TextContext",
# Agent Layer (Phase 1 & 2)
"BaseAgent",
"LLMProvider",
Expand Down
1 change: 0 additions & 1 deletion sentience/expect.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""

import time
from typing import Optional, Union

from .browser import SentienceBrowser
from .models import Element
Expand Down
6 changes: 3 additions & 3 deletions sentience/extension/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,13 @@ async function handleScreenshotCapture(_tabId, options = {}) {
async function handleSnapshotProcessing(rawData, options = {}) {
const MAX_ELEMENTS = 10000; // Safety limit to prevent hangs
const startTime = performance.now();

try {
// Safety check: limit element count to prevent hangs
if (!Array.isArray(rawData)) {
throw new Error('rawData must be an array');
}

if (rawData.length > MAX_ELEMENTS) {
console.warn(`[Sentience Background] ⚠️ Large dataset: ${rawData.length} elements. Limiting to ${MAX_ELEMENTS} to prevent hangs.`);
rawData = rawData.slice(0, MAX_ELEMENTS);
Expand Down Expand Up @@ -186,7 +186,7 @@ async function handleSnapshotProcessing(rawData, options = {}) {
// Add timeout protection (18 seconds - less than content.js timeout)
analyzedElements = await Promise.race([
wasmPromise,
new Promise((_, reject) =>
new Promise((_, reject) =>
setTimeout(() => reject(new Error('WASM processing timeout (>18s)')), 18000)
)
]);
Expand Down
2 changes: 1 addition & 1 deletion sentience/extension/content.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ function handleSnapshotRequest(data) {
if (responded) return; // Already responded via timeout
responded = true;
clearTimeout(timeoutId);

const duration = performance.now() - startTime;

// Handle Chrome extension errors (e.g., background script crashed)
Expand Down
Loading