diff --git a/README.md b/README.md index 2631594..b8802a1 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,37 @@ with SentienceBrowser(headless=False) as browser: --- +## 🆕 What's New (2026-01-06) + +### Human-like Typing +Add realistic delays between keystrokes to mimic human typing: +```python +from sentience import type_text + +# Type instantly (default) +type_text(browser, element_id, "Hello World") + +# Type with human-like delay (~10ms between keystrokes) +type_text(browser, element_id, "Hello World", delay_ms=10) +``` + +### Scroll to Element +Scroll elements into view with smooth animation: +```python +from sentience import snapshot, find, scroll_to + +snap = snapshot(browser) +button = find(snap, 'role=button text~"Submit"') + +# Scroll element into view with smooth animation +scroll_to(browser, button.id) + +# Scroll instantly to top of viewport +scroll_to(browser, button.id, behavior='instant', block='start') +``` + +--- +

💼 Real-World Example: Amazon Shopping Bot

diff --git a/sentience/__init__.py b/sentience/__init__.py index 7b1d4e1..487786d 100644 --- a/sentience/__init__.py +++ b/sentience/__init__.py @@ -2,7 +2,7 @@ Sentience Python SDK - AI Agent Browser Automation """ -from .actions import click, click_rect, press, type_text +from .actions import click, click_rect, press, scroll_to, type_text from .agent import SentienceAgent, SentienceAgentAsync from .agent_config import AgentConfig @@ -90,6 +90,7 @@ "click", "type_text", "press", + "scroll_to", "click_rect", "wait_for", "expect", diff --git a/sentience/actions.py b/sentience/actions.py index b928b00..dbe8e71 100644 --- a/sentience/actions.py +++ b/sentience/actions.py @@ -134,7 +134,11 @@ def click( # noqa: C901 def type_text( - browser: SentienceBrowser, element_id: int, text: str, take_snapshot: bool = False + browser: SentienceBrowser, + element_id: int, + text: str, + take_snapshot: bool = False, + delay_ms: float = 0, ) -> ActionResult: """ Type text into an element (focus then input) @@ -144,9 +148,16 @@ def type_text( element_id: Element ID from snapshot text: Text to type take_snapshot: Whether to take snapshot after action + delay_ms: Delay between keystrokes in milliseconds for human-like typing (default: 0) Returns: ActionResult + + Example: + >>> # Type instantly (default behavior) + >>> type_text(browser, element_id, "Hello World") + >>> # Type with human-like delay (~10ms between keystrokes) + >>> type_text(browser, element_id, "Hello World", delay_ms=10) """ if not browser.page: raise RuntimeError("Browser not started. Call browser.start() first.") @@ -177,8 +188,8 @@ def type_text( error={"code": "focus_failed", "reason": "Element not found"}, ) - # Type using Playwright keyboard - browser.page.keyboard.type(text) + # Type using Playwright keyboard with optional delay between keystrokes + browser.page.keyboard.type(text, delay=delay_ms) duration_ms = int((time.time() - start_time) * 1000) url_after = browser.page.url @@ -242,6 +253,94 @@ def press(browser: SentienceBrowser, key: str, take_snapshot: bool = False) -> A ) +def scroll_to( + browser: SentienceBrowser, + element_id: int, + behavior: str = "smooth", + block: str = "center", + take_snapshot: bool = False, +) -> ActionResult: + """ + Scroll an element into view + + Scrolls the page so that the specified element is visible in the viewport. + Uses the element registry to find the element and scrollIntoView() to scroll it. + + Args: + browser: SentienceBrowser instance + element_id: Element ID from snapshot to scroll into view + behavior: Scroll behavior - 'smooth', 'instant', or 'auto' (default: 'smooth') + block: Vertical alignment - 'start', 'center', 'end', or 'nearest' (default: 'center') + take_snapshot: Whether to take snapshot after action + + Returns: + ActionResult + + Example: + >>> snap = snapshot(browser) + >>> button = find(snap, 'role=button[name="Submit"]') + >>> if button: + >>> # Scroll element into view with smooth animation + >>> scroll_to(browser, button.id) + >>> # Scroll instantly to top of viewport + >>> scroll_to(browser, button.id, behavior='instant', block='start') + """ + if not browser.page: + raise RuntimeError("Browser not started. Call browser.start() first.") + + start_time = time.time() + url_before = browser.page.url + + # Scroll element into view using the element registry + scrolled = browser.page.evaluate( + """ + (args) => { + const el = window.sentience_registry[args.id]; + if (el && el.scrollIntoView) { + el.scrollIntoView({ + behavior: args.behavior, + block: args.block, + inline: 'nearest' + }); + return true; + } + return false; + } + """, + {"id": element_id, "behavior": behavior, "block": block}, + ) + + if not scrolled: + return ActionResult( + success=False, + duration_ms=int((time.time() - start_time) * 1000), + outcome="error", + error={"code": "scroll_failed", "reason": "Element not found or not scrollable"}, + ) + + # Wait a bit for scroll to complete (especially for smooth scrolling) + wait_time = 500 if behavior == "smooth" else 100 + browser.page.wait_for_timeout(wait_time) + + duration_ms = int((time.time() - start_time) * 1000) + url_after = browser.page.url + url_changed = url_before != url_after + + outcome = "navigated" if url_changed else "dom_updated" + + snapshot_after: Snapshot | None = None + if take_snapshot: + snapshot_after = snapshot(browser) + + return ActionResult( + success=True, + duration_ms=duration_ms, + outcome=outcome, + url_changed=url_changed, + snapshot_after=snapshot_after, + ) + + def _highlight_rect( browser: SentienceBrowser, rect: dict[str, float], duration_sec: float = 2.0 ) -> None: @@ -553,7 +652,11 @@ async def click_async( async def type_text_async( - browser: AsyncSentienceBrowser, element_id: int, text: str, take_snapshot: bool = False + browser: AsyncSentienceBrowser, + element_id: int, + text: str, + take_snapshot: bool = False, + delay_ms: float = 0, ) -> ActionResult: """ Type text into an element (async) @@ -563,9 +666,16 @@ async def type_text_async( element_id: Element ID from snapshot text: Text to type take_snapshot: Whether to take snapshot after action + delay_ms: Delay between keystrokes in milliseconds for human-like typing (default: 0) Returns: ActionResult + + Example: + >>> # Type instantly (default behavior) + >>> await type_text_async(browser, element_id, "Hello World") + >>> # Type with human-like delay (~10ms between keystrokes) + >>> await type_text_async(browser, element_id, "Hello World", delay_ms=10) """ if not browser.page: raise RuntimeError("Browser not started. Call await browser.start() first.") @@ -596,8 +706,8 @@ async def type_text_async( error={"code": "focus_failed", "reason": "Element not found"}, ) - # Type using Playwright keyboard - await browser.page.keyboard.type(text) + # Type using Playwright keyboard with optional delay between keystrokes + await browser.page.keyboard.type(text, delay=delay_ms) duration_ms = int((time.time() - start_time) * 1000) url_after = browser.page.url @@ -663,6 +773,94 @@ async def press_async( ) +async def scroll_to_async( + browser: AsyncSentienceBrowser, + element_id: int, + behavior: str = "smooth", + block: str = "center", + take_snapshot: bool = False, +) -> ActionResult: + """ + Scroll an element into view (async) + + Scrolls the page so that the specified element is visible in the viewport. + Uses the element registry to find the element and scrollIntoView() to scroll it. + + Args: + browser: AsyncSentienceBrowser instance + element_id: Element ID from snapshot to scroll into view + behavior: Scroll behavior - 'smooth', 'instant', or 'auto' (default: 'smooth') + block: Vertical alignment - 'start', 'center', 'end', or 'nearest' (default: 'center') + take_snapshot: Whether to take snapshot after action + + Returns: + ActionResult + + Example: + >>> snap = await snapshot_async(browser) + >>> button = find(snap, 'role=button[name="Submit"]') + >>> if button: + >>> # Scroll element into view with smooth animation + >>> await scroll_to_async(browser, button.id) + >>> # Scroll instantly to top of viewport + >>> await scroll_to_async(browser, button.id, behavior='instant', block='start') + """ + if not browser.page: + raise RuntimeError("Browser not started. Call await browser.start() first.") + + start_time = time.time() + url_before = browser.page.url + + # Scroll element into view using the element registry + scrolled = await browser.page.evaluate( + """ + (args) => { + const el = window.sentience_registry[args.id]; + if (el && el.scrollIntoView) { + el.scrollIntoView({ + behavior: args.behavior, + block: args.block, + inline: 'nearest' + }); + return true; + } + return false; + } + """, + {"id": element_id, "behavior": behavior, "block": block}, + ) + + if not scrolled: + return ActionResult( + success=False, + duration_ms=int((time.time() - start_time) * 1000), + outcome="error", + error={"code": "scroll_failed", "reason": "Element not found or not scrollable"}, + ) + + # Wait a bit for scroll to complete (especially for smooth scrolling) + wait_time = 500 if behavior == "smooth" else 100 + await browser.page.wait_for_timeout(wait_time) + + duration_ms = int((time.time() - start_time) * 1000) + url_after = browser.page.url + url_changed = url_before != url_after + + outcome = "navigated" if url_changed else "dom_updated" + + snapshot_after: Snapshot | None = None + if take_snapshot: + snapshot_after = await snapshot_async(browser) + + return ActionResult( + success=True, + duration_ms=duration_ms, + outcome=outcome, + url_changed=url_changed, + snapshot_after=snapshot_after, + ) + + async def _highlight_rect_async( browser: AsyncSentienceBrowser, rect: dict[str, float], duration_sec: float = 2.0 ) -> None: diff --git a/sentience/async_api.py b/sentience/async_api.py index 70fec1b..362d947 100644 --- a/sentience/async_api.py +++ b/sentience/async_api.py @@ -23,7 +23,13 @@ # ========== Actions (Phase 1) ========== # Re-export async action functions from actions.py -from sentience.actions import click_async, click_rect_async, press_async, type_text_async +from sentience.actions import ( + click_async, + click_rect_async, + press_async, + scroll_to_async, + type_text_async, +) # ========== Phase 2C: Agent Layer ========== # Re-export async agent classes from agent.py and base_agent.py @@ -76,6 +82,7 @@ "click_async", # Re-exported from actions.py "type_text_async", # Re-exported from actions.py "press_async", # Re-exported from actions.py + "scroll_to_async", # Re-exported from actions.py "click_rect_async", # Re-exported from actions.py # Phase 2A: Core Utilities "wait_for_async", # Re-exported from wait.py diff --git a/sentience/extension/background.js b/sentience/extension/background.js index 2923f55..aff49b0 100644 --- a/sentience/extension/background.js +++ b/sentience/extension/background.js @@ -28,14 +28,14 @@ async function handleSnapshotProcessing(rawData, options = {}) { const startTime = performance.now(); try { if (!Array.isArray(rawData)) throw new Error("rawData must be an array"); - if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), + if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), !wasmReady) throw new Error("WASM module not initialized"); let analyzedElements, prunedRawData; try { const wasmPromise = new Promise((resolve, reject) => { try { let result; - result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), + result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), resolve(result); } catch (e) { reject(e); @@ -101,4 +101,4 @@ initWASM().catch(err => {}), chrome.runtime.onMessage.addListener((request, send event.preventDefault(); }), self.addEventListener("unhandledrejection", event => { event.preventDefault(); -}); \ No newline at end of file +}); diff --git a/sentience/extension/content.js b/sentience/extension/content.js index e94cde1..9d5b3bf 100644 --- a/sentience/extension/content.js +++ b/sentience/extension/content.js @@ -82,7 +82,7 @@ if (!elements || !Array.isArray(elements)) return; removeOverlay(); const host = document.createElement("div"); - host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", + host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", document.body.appendChild(host); const shadow = host.attachShadow({ mode: "closed" @@ -94,15 +94,15 @@ let color; color = isTarget ? "#FF0000" : isPrimary ? "#0066FF" : "#00FF00"; const importanceRatio = maxImportance > 0 ? importance / maxImportance : .5, borderOpacity = isTarget ? 1 : isPrimary ? .9 : Math.max(.4, .5 + .5 * importanceRatio), fillOpacity = .2 * borderOpacity, borderWidth = isTarget ? 2 : isPrimary ? 1.5 : Math.max(.5, Math.round(2 * importanceRatio)), hexOpacity = Math.round(255 * fillOpacity).toString(16).padStart(2, "0"), box = document.createElement("div"); - if (box.style.cssText = `\n position: absolute;\n left: ${bbox.x}px;\n top: ${bbox.y}px;\n width: ${bbox.width}px;\n height: ${bbox.height}px;\n border: ${borderWidth}px solid ${color};\n background-color: ${color}${hexOpacity};\n box-sizing: border-box;\n opacity: ${borderOpacity};\n pointer-events: none;\n `, + if (box.style.cssText = `\n position: absolute;\n left: ${bbox.x}px;\n top: ${bbox.y}px;\n width: ${bbox.width}px;\n height: ${bbox.height}px;\n border: ${borderWidth}px solid ${color};\n background-color: ${color}${hexOpacity};\n box-sizing: border-box;\n opacity: ${borderOpacity};\n pointer-events: none;\n `, importance > 0 || isPrimary) { const badge = document.createElement("span"); - badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, + badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, box.appendChild(badge); } if (isTarget) { const targetIndicator = document.createElement("span"); - targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", + targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", box.appendChild(targetIndicator); } shadow.appendChild(box); @@ -120,7 +120,7 @@ let overlayTimeout = null; function removeOverlay() { const existing = document.getElementById(OVERLAY_HOST_ID); - existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), + existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), overlayTimeout = null); } -}(); \ No newline at end of file +}(); diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js index c62bcab..69c7d36 100644 --- a/sentience/extension/injected_api.js +++ b/sentience/extension/injected_api.js @@ -112,7 +112,7 @@ if (labelEl) { let text = ""; try { - if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), + if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), !text && labelEl.getAttribute) { const ariaLabel = labelEl.getAttribute("aria-label"); ariaLabel && (text = ariaLabel.trim()); @@ -281,7 +281,7 @@ }); const checkStable = () => { const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime; - timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), + timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), resolve()) : setTimeout(checkStable, 50); }; checkStable(); @@ -301,7 +301,7 @@ }); const checkQuiet = () => { const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime; - timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), + timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), resolve()) : setTimeout(checkQuiet, 50); }; checkQuiet(); @@ -461,8 +461,8 @@ const requestId = `iframe-${idx}-${Date.now()}`, timeout = setTimeout(() => { resolve(null); }, 5e3), listener = event => { - "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), - window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, + "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), + window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, resolve({ iframe: iframe, data: event.data.snapshot, @@ -478,7 +478,7 @@ ...options, collectIframes: !0 } - }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), + }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null)); } catch (error) { clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null); @@ -528,7 +528,7 @@ }, 25e3), listener = e => { if ("SENTIENCE_SNAPSHOT_RESULT" === e.data.type && e.data.requestId === requestId) { if (resolved) return; - resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), + resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), e.data.error ? reject(new Error(e.data.error)) : resolve({ elements: e.data.elements, raw_elements: e.data.raw_elements, @@ -545,7 +545,7 @@ options: options }, "*"); } catch (error) { - resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), + resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), reject(new Error(`Failed to send snapshot request: ${error.message}`))); } }); @@ -555,7 +555,7 @@ options.screenshot && (screenshot = await function(options) { return new Promise(resolve => { const requestId = Math.random().toString(36).substring(7), listener = e => { - "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), + "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), resolve(e.data.screenshot)); }; window.addEventListener("message", listener), window.postMessage({ @@ -602,15 +602,15 @@ } if (node.nodeType !== Node.ELEMENT_NODE) return; const tag = node.tagName.toLowerCase(); - if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), - "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), - "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), - "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), + if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), + "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), + "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), + "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), "a" === tag) { const href = node.getAttribute("href"); markdown += href ? `](${href})` : "]", insideLink = !1; } - "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), + "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), insideLink || "h1" !== tag && "h2" !== tag && "h3" !== tag && "p" !== tag && "div" !== tag || (markdown += "\n"); }(tempDiv), markdown.replace(/\n{3,}/g, "\n\n").trim(); }(document.body) : function(root) { @@ -623,7 +623,7 @@ const style = window.getComputedStyle(node); if ("none" === style.display || "hidden" === style.visibility) return; const isBlock = "block" === style.display || "flex" === style.display || "P" === node.tagName || "DIV" === node.tagName; - isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), + isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), isBlock && (text += "\n"); } } else text += node.textContent; @@ -722,25 +722,25 @@ } function startRecording(options = {}) { const {highlightColor: highlightColor = "#ff0000", successColor: successColor = "#00ff00", autoDisableTimeout: autoDisableTimeout = 18e5, keyboardShortcut: keyboardShortcut = "Ctrl+Shift+I"} = options; - if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), + if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), () => {}; window.sentience_registry_map = new Map, window.sentience_registry.forEach((el, idx) => { el && window.sentience_registry_map.set(el, idx); }); let highlightBox = document.getElementById("sentience-highlight-box"); - highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", - highlightBox.style.cssText = `\n position: fixed;\n pointer-events: none;\n z-index: 2147483647;\n border: 2px solid ${highlightColor};\n background: rgba(255, 0, 0, 0.1);\n display: none;\n transition: all 0.1s ease;\n box-sizing: border-box;\n `, + highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", + highlightBox.style.cssText = `\n position: fixed;\n pointer-events: none;\n z-index: 2147483647;\n border: 2px solid ${highlightColor};\n background: rgba(255, 0, 0, 0.1);\n display: none;\n transition: all 0.1s ease;\n box-sizing: border-box;\n `, document.body.appendChild(highlightBox)); let recordingIndicator = document.getElementById("sentience-recording-indicator"); - recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", - recordingIndicator.style.cssText = `\n position: fixed;\n top: 0;\n left: 0;\n right: 0;\n height: 3px;\n background: ${highlightColor};\n z-index: 2147483646;\n pointer-events: none;\n `, + recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", + recordingIndicator.style.cssText = `\n position: fixed;\n top: 0;\n left: 0;\n right: 0;\n height: 3px;\n background: ${highlightColor};\n z-index: 2147483646;\n pointer-events: none;\n `, document.body.appendChild(recordingIndicator)), recordingIndicator.style.display = "block"; const mouseOverHandler = e => { const el = e.target; if (!el || el === highlightBox || el === recordingIndicator) return; const rect = el.getBoundingClientRect(); - highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", - highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", + highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", + highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", highlightBox.style.height = rect.height + "px"; }, clickHandler = e => { e.preventDefault(), e.stopPropagation(); @@ -817,7 +817,7 @@ debug_snapshot: rawData }, jsonString = JSON.stringify(snippet, null, 2); navigator.clipboard.writeText(jsonString).then(() => { - highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", + highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", setTimeout(() => { highlightBox.style.border = `2px solid ${highlightColor}`, highlightBox.style.background = "rgba(255, 0, 0, 0.1)"; }, 500); @@ -827,15 +827,15 @@ }; let timeoutId = null; const stopRecording = () => { - document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), - document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), - timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), + document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), + document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), + timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), window.sentience_registry_map && window.sentience_registry_map.clear(), window.sentience_stopRecording === stopRecording && delete window.sentience_stopRecording; }, keyboardHandler = e => { - (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), + (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), stopRecording()); }; - return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), + return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), document.addEventListener("keydown", keyboardHandler, !0), autoDisableTimeout > 0 && (timeoutId = setTimeout(() => { stopRecording(); }, autoDisableTimeout)), window.sentience_stopRecording = stopRecording, stopRecording; @@ -895,4 +895,4 @@ } }), window.sentience_iframe_handler_setup = !0)); })(); -}(); \ No newline at end of file +}(); diff --git a/sentience/extension/pkg/sentience_core.js b/sentience/extension/pkg/sentience_core.js index ecba479..2696a64 100644 --- a/sentience/extension/pkg/sentience_core.js +++ b/sentience/extension/pkg/sentience_core.js @@ -47,7 +47,7 @@ function getArrayU8FromWasm0(ptr, len) { let cachedDataViewMemory0 = null; function getDataViewMemory0() { - return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), + return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), cachedDataViewMemory0; } @@ -58,7 +58,7 @@ function getStringFromWasm0(ptr, len) { let cachedUint8ArrayMemory0 = null; function getUint8ArrayMemory0() { - return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), + return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), cachedUint8ArrayMemory0; } @@ -87,7 +87,7 @@ function isLikeNone(x) { function passStringToWasm0(arg, malloc, realloc) { if (void 0 === realloc) { const buf = cachedTextEncoder.encode(arg), ptr = malloc(buf.length, 1) >>> 0; - return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, + return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, ptr; } let len = arg.length, ptr = malloc(len, 1) >>> 0; @@ -188,7 +188,7 @@ function __wbg_get_imports() { return Number(getObject(arg0)); }, imports.wbg.__wbg___wbindgen_bigint_get_as_i64_6e32f5e6aff02e1d = function(arg0, arg1) { const v = getObject(arg1), ret = "bigint" == typeof v ? v : void 0; - getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), + getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), getDataViewMemory0().setInt32(arg0 + 0, !isLikeNone(ret), !0); }, imports.wbg.__wbg___wbindgen_boolean_get_dea25b33882b895b = function(arg0) { const v = getObject(arg0), ret = "boolean" == typeof v ? v : void 0; @@ -296,7 +296,7 @@ function __wbg_get_imports() { } function __wbg_finalize_init(instance, module) { - return wasm = instance.exports, __wbg_init.__wbindgen_wasm_module = module, cachedDataViewMemory0 = null, + return wasm = instance.exports, __wbg_init.__wbindgen_wasm_module = module, cachedDataViewMemory0 = null, cachedUint8ArrayMemory0 = null, wasm; } @@ -310,7 +310,7 @@ function initSync(module) { async function __wbg_init(module_or_path) { if (void 0 !== wasm) return wasm; - void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), + void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), void 0 === module_or_path && (module_or_path = new URL("sentience_core_bg.wasm", import.meta.url)); const imports = __wbg_get_imports(); ("string" == typeof module_or_path || "function" == typeof Request && module_or_path instanceof Request || "function" == typeof URL && module_or_path instanceof URL) && (module_or_path = fetch(module_or_path)); @@ -320,4 +320,4 @@ async function __wbg_init(module_or_path) { export { initSync }; -export default __wbg_init; \ No newline at end of file +export default __wbg_init; diff --git a/sentience/visual_agent.py b/sentience/visual_agent.py index 27421e6..60c3851 100644 --- a/sentience/visual_agent.py +++ b/sentience/visual_agent.py @@ -43,7 +43,9 @@ ImageFont = None try: - from PIL import Image as PILImage, ImageDraw as PILImageDraw, ImageFont as PILImageFont + from PIL import Image as PILImage + from PIL import ImageDraw as PILImageDraw + from PIL import ImageFont as PILImageFont PIL_AVAILABLE = True except ImportError: diff --git a/tests/test_actions.py b/tests/test_actions.py index 104a368..cc64ead 100644 --- a/tests/test_actions.py +++ b/tests/test_actions.py @@ -4,7 +4,17 @@ import pytest -from sentience import BBox, SentienceBrowser, click, click_rect, find, press, snapshot, type_text +from sentience import ( + BBox, + SentienceBrowser, + click, + click_rect, + find, + press, + scroll_to, + snapshot, + type_text, +) def test_click(): @@ -165,3 +175,85 @@ def test_click_js_approach(): assert result.duration_ms > 0 # Navigation may happen, which is expected for links assert result.outcome in ["navigated", "dom_updated"] + + +def test_scroll_to(): + """Test scroll_to action""" + with SentienceBrowser() as browser: + browser.page.goto("https://example.com") + browser.page.wait_for_load_state("networkidle") + + snap = snapshot(browser) + # Find an element to scroll to (typically the last link or element) + elements = [el for el in snap.elements if el.role == "link"] + + if elements: + # Get the last element which might be out of viewport + element = elements[-1] if len(elements) > 1 else elements[0] + result = scroll_to(browser, element.id) + assert result.success is True + assert result.duration_ms > 0 + assert result.outcome in ["navigated", "dom_updated"] + + +def test_scroll_to_instant(): + """Test scroll_to with instant behavior""" + with SentienceBrowser() as browser: + browser.page.goto("https://example.com") + browser.page.wait_for_load_state("networkidle") + + snap = snapshot(browser) + elements = [el for el in snap.elements if el.role == "link"] + + if elements: + element = elements[0] + result = scroll_to(browser, element.id, behavior="instant", block="start") + assert result.success is True + assert result.duration_ms > 0 + + +def test_scroll_to_with_snapshot(): + """Test scroll_to with snapshot after action""" + with SentienceBrowser() as browser: + browser.page.goto("https://example.com") + browser.page.wait_for_load_state("networkidle") + + snap = snapshot(browser) + elements = [el for el in snap.elements if el.role == "link"] + + if elements: + element = elements[0] + result = scroll_to(browser, element.id, take_snapshot=True) + assert result.success is True + assert result.snapshot_after is not None + assert result.snapshot_after.status == "success" + + +def test_scroll_to_invalid_element(): + """Test scroll_to with invalid element ID""" + with SentienceBrowser() as browser: + browser.page.goto("https://example.com") + browser.page.wait_for_load_state("networkidle") + + # Try to scroll to non-existent element + result = scroll_to(browser, 99999) + assert result.success is False + assert result.error is not None + assert result.error["code"] == "scroll_failed" + + +def test_type_text_with_delay(): + """Test type_text with human-like delay""" + with SentienceBrowser() as browser: + browser.page.goto("https://example.com") + browser.page.wait_for_load_state("networkidle") + + snap = snapshot(browser) + textbox = find(snap, "role=textbox") + + if textbox: + # Test with 10ms delay between keystrokes + result = type_text(browser, textbox.id, "hello", delay_ms=10) + assert result.success is True + # Duration should be longer due to delays + assert result.duration_ms >= 50 # At least 5 chars * 10ms