From fb57341f23cd1ee1b1999061958f351e811709e9 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Thu, 15 Jan 2026 16:38:25 -0800 Subject: [PATCH] humanized mouse click --- examples/human_cursor_click_demo.py | 41 ++++++ sentience/__init__.py | 2 + sentience/action_executor.py | 2 + sentience/actions.py | 153 +++++++++++++++++++++- sentience/agent.py | 4 + sentience/agent_runtime.py | 2 +- sentience/backends/actions.py | 33 ++++- sentience/cursor_policy.py | 135 +++++++++++++++++++ sentience/extension/background.js | 6 +- sentience/extension/content.js | 18 +-- sentience/extension/injected_api.js | 60 ++++----- sentience/extension/pkg/sentience_core.js | 14 +- sentience/models.py | 4 + tests/test_backends.py | 25 ++++ 14 files changed, 444 insertions(+), 55 deletions(-) create mode 100644 examples/human_cursor_click_demo.py create mode 100644 sentience/cursor_policy.py diff --git a/examples/human_cursor_click_demo.py b/examples/human_cursor_click_demo.py new file mode 100644 index 0000000..d23330b --- /dev/null +++ b/examples/human_cursor_click_demo.py @@ -0,0 +1,41 @@ +""" +Human-like cursor movement demo (Python SDK). + +This example shows how to opt into human-like mouse movement before clicking, +and how to read the returned cursor metadata for tracing/debugging. +""" + +from __future__ import annotations + +from sentience import CursorPolicy, SentienceBrowser, click, find, snapshot + + +def main() -> None: + # NOTE: This uses a real browser via Playwright. + with SentienceBrowser() as browser: + browser.page.goto("https://example.com") + browser.page.wait_for_load_state("networkidle") + + snap = snapshot(browser) + link = find(snap, "role=link") + if not link: + raise RuntimeError("No link found on page") + + policy = CursorPolicy( + mode="human", + steps=18, # more steps => smoother + duration_ms=350, + jitter_px=1.2, + overshoot_px=6.0, + pause_before_click_ms=30, + seed=123, # optional: makes motion deterministic for demos/tests + ) + + result = click(browser, link.id, use_mouse=True, cursor_policy=policy) + print("clicked:", result.success, "outcome:", result.outcome) + print("cursor meta:", result.cursor) + + +if __name__ == "__main__": + main() + diff --git a/sentience/__init__.py b/sentience/__init__.py index 4e5c3b2..ba89e3d 100644 --- a/sentience/__init__.py +++ b/sentience/__init__.py @@ -43,6 +43,7 @@ # Tracing (v0.12.0+) from .cloud_tracing import CloudTraceSink, SentienceLogger from .conversational_agent import ConversationalAgent +from .cursor_policy import CursorPolicy from .expect import expect from .generator import ScriptGenerator, generate from .inspector import Inspector, inspect @@ -172,6 +173,7 @@ "press", "scroll_to", "click_rect", + "CursorPolicy", "wait_for", "expect", "Inspector", diff --git a/sentience/action_executor.py b/sentience/action_executor.py index c95f29b..97b3455 100644 --- a/sentience/action_executor.py +++ b/sentience/action_executor.py @@ -96,6 +96,7 @@ def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]: "element_id": element_id, "outcome": result.outcome, "url_changed": result.url_changed, + "cursor": getattr(result, "cursor", None), } # Parse TYPE(42, "hello world") @@ -170,6 +171,7 @@ async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any] "element_id": element_id, "outcome": result.outcome, "url_changed": result.url_changed, + "cursor": getattr(result, "cursor", None), } # Parse TYPE(42, "hello world") diff --git a/sentience/actions.py b/sentience/actions.py index dbe8e71..74d300a 100644 --- a/sentience/actions.py +++ b/sentience/actions.py @@ -4,10 +4,12 @@ Actions v1 - click, type, press """ +import asyncio import time from .browser import AsyncSentienceBrowser, SentienceBrowser from .browser_evaluator import BrowserEvaluator +from .cursor_policy import CursorPolicy, build_human_cursor_path from .models import ActionResult, BBox, Snapshot from .sentience_methods import SentienceMethod from .snapshot import snapshot, snapshot_async @@ -18,6 +20,7 @@ def click( # noqa: C901 element_id: int, use_mouse: bool = True, take_snapshot: bool = False, + cursor_policy: CursorPolicy | None = None, ) -> ActionResult: """ Click an element by ID using hybrid approach (mouse simulation by default) @@ -37,6 +40,7 @@ def click( # noqa: C901 start_time = time.time() url_before = browser.page.url + cursor_meta: dict | None = None if use_mouse: # Hybrid approach: Get element bbox from snapshot, calculate center, use mouse.click() @@ -52,9 +56,49 @@ def click( # noqa: C901 # Calculate center of element bbox center_x = element.bbox.x + element.bbox.width / 2 center_y = element.bbox.y + element.bbox.height / 2 - # Use Playwright's native mouse click for realistic simulation + # Optional: human-like cursor movement (opt-in) try: - browser.page.mouse.click(center_x, center_y) + if cursor_policy is not None and cursor_policy.mode == "human": + # Best-effort cursor state on browser instance + pos = getattr(browser, "_sentience_cursor_pos", None) + if not isinstance(pos, tuple) or len(pos) != 2: + try: + vp = browser.page.viewport_size or {} + pos = ( + float(vp.get("width", 0)) / 2.0, + float(vp.get("height", 0)) / 2.0, + ) + except Exception: + pos = (0.0, 0.0) + + cursor_meta = build_human_cursor_path( + start=(float(pos[0]), float(pos[1])), + target=(float(center_x), float(center_y)), + policy=cursor_policy, + ) + pts = cursor_meta.get("path", []) + steps = int(cursor_meta.get("steps") or max(1, len(pts))) + duration_ms = int(cursor_meta.get("duration_ms") or 0) + per_step_s = ( + (duration_ms / max(1, len(pts))) / 1000.0 if duration_ms > 0 else 0.0 + ) + for p in pts: + browser.page.mouse.move(float(p["x"]), float(p["y"])) + if per_step_s > 0: + time.sleep(per_step_s) + pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0) + if pause_ms > 0: + time.sleep(pause_ms / 1000.0) + browser.page.mouse.click(center_x, center_y) + setattr( + browser, "_sentience_cursor_pos", (float(center_x), float(center_y)) + ) + else: + # Default behavior (no regression) + browser.page.mouse.click(center_x, center_y) + setattr( + browser, "_sentience_cursor_pos", (float(center_x), float(center_y)) + ) success = True except Exception: # If navigation happens, mouse.click might fail, but that's OK @@ -122,6 +166,7 @@ def click( # noqa: C901 outcome=outcome, url_changed=url_changed, snapshot_after=snapshot_after, + cursor=cursor_meta, error=( None if success @@ -414,6 +459,7 @@ def click_rect( highlight: bool = True, highlight_duration: float = 2.0, take_snapshot: bool = False, + cursor_policy: CursorPolicy | None = None, ) -> ActionResult: """ Click at the center of a rectangle using Playwright's native mouse simulation. @@ -469,6 +515,7 @@ def click_rect( # Calculate center of rectangle center_x = x + w / 2 center_y = y + h / 2 + cursor_meta: dict | None = None # Show highlight before clicking (if enabled) if highlight: @@ -479,7 +526,35 @@ def click_rect( # Use Playwright's native mouse click for realistic simulation # This triggers hover, focus, mousedown, mouseup sequences try: + if cursor_policy is not None and cursor_policy.mode == "human": + pos = getattr(browser, "_sentience_cursor_pos", None) + if not isinstance(pos, tuple) or len(pos) != 2: + try: + vp = browser.page.viewport_size or {} + pos = (float(vp.get("width", 0)) / 2.0, float(vp.get("height", 0)) / 2.0) + except Exception: + pos = (0.0, 0.0) + + cursor_meta = build_human_cursor_path( + start=(float(pos[0]), float(pos[1])), + target=(float(center_x), float(center_y)), + policy=cursor_policy, + ) + pts = cursor_meta.get("path", []) + duration_ms_move = int(cursor_meta.get("duration_ms") or 0) + per_step_s = ( + (duration_ms_move / max(1, len(pts))) / 1000.0 if duration_ms_move > 0 else 0.0 + ) + for p in pts: + browser.page.mouse.move(float(p["x"]), float(p["y"])) + if per_step_s > 0: + time.sleep(per_step_s) + pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0) + if pause_ms > 0: + time.sleep(pause_ms / 1000.0) + browser.page.mouse.click(center_x, center_y) + setattr(browser, "_sentience_cursor_pos", (float(center_x), float(center_y))) success = True except Exception as e: success = False @@ -512,6 +587,7 @@ def click_rect( outcome=outcome, url_changed=url_changed, snapshot_after=snapshot_after, + cursor=cursor_meta, error=( None if success @@ -531,6 +607,7 @@ async def click_async( element_id: int, use_mouse: bool = True, take_snapshot: bool = False, + cursor_policy: CursorPolicy | None = None, ) -> ActionResult: """ Click an element by ID using hybrid approach (async) @@ -549,6 +626,7 @@ async def click_async( start_time = time.time() url_before = browser.page.url + cursor_meta: dict | None = None if use_mouse: try: @@ -563,7 +641,44 @@ async def click_async( center_x = element.bbox.x + element.bbox.width / 2 center_y = element.bbox.y + element.bbox.height / 2 try: - await browser.page.mouse.click(center_x, center_y) + if cursor_policy is not None and cursor_policy.mode == "human": + pos = getattr(browser, "_sentience_cursor_pos", None) + if not isinstance(pos, tuple) or len(pos) != 2: + try: + vp = browser.page.viewport_size or {} + pos = ( + float(vp.get("width", 0)) / 2.0, + float(vp.get("height", 0)) / 2.0, + ) + except Exception: + pos = (0.0, 0.0) + + cursor_meta = build_human_cursor_path( + start=(float(pos[0]), float(pos[1])), + target=(float(center_x), float(center_y)), + policy=cursor_policy, + ) + pts = cursor_meta.get("path", []) + duration_ms = int(cursor_meta.get("duration_ms") or 0) + per_step_s = ( + (duration_ms / max(1, len(pts))) / 1000.0 if duration_ms > 0 else 0.0 + ) + for p in pts: + await browser.page.mouse.move(float(p["x"]), float(p["y"])) + if per_step_s > 0: + await asyncio.sleep(per_step_s) + pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0) + if pause_ms > 0: + await asyncio.sleep(pause_ms / 1000.0) + await browser.page.mouse.click(center_x, center_y) + setattr( + browser, "_sentience_cursor_pos", (float(center_x), float(center_y)) + ) + else: + await browser.page.mouse.click(center_x, center_y) + setattr( + browser, "_sentience_cursor_pos", (float(center_x), float(center_y)) + ) success = True except Exception: success = True @@ -640,6 +755,7 @@ async def click_async( outcome=outcome, url_changed=url_changed, snapshot_after=snapshot_after, + cursor=cursor_meta, error=( None if success @@ -922,6 +1038,7 @@ async def click_rect_async( highlight: bool = True, highlight_duration: float = 2.0, take_snapshot: bool = False, + cursor_policy: CursorPolicy | None = None, ) -> ActionResult: """ Click at the center of a rectangle (async) @@ -968,6 +1085,7 @@ async def click_rect_async( # Calculate center of rectangle center_x = x + w / 2 center_y = y + h / 2 + cursor_meta: dict | None = None # Show highlight before clicking if highlight: @@ -976,7 +1094,35 @@ async def click_rect_async( # Use Playwright's native mouse click try: + if cursor_policy is not None and cursor_policy.mode == "human": + pos = getattr(browser, "_sentience_cursor_pos", None) + if not isinstance(pos, tuple) or len(pos) != 2: + try: + vp = browser.page.viewport_size or {} + pos = (float(vp.get("width", 0)) / 2.0, float(vp.get("height", 0)) / 2.0) + except Exception: + pos = (0.0, 0.0) + + cursor_meta = build_human_cursor_path( + start=(float(pos[0]), float(pos[1])), + target=(float(center_x), float(center_y)), + policy=cursor_policy, + ) + pts = cursor_meta.get("path", []) + duration_ms_move = int(cursor_meta.get("duration_ms") or 0) + per_step_s = ( + (duration_ms_move / max(1, len(pts))) / 1000.0 if duration_ms_move > 0 else 0.0 + ) + for p in pts: + await browser.page.mouse.move(float(p["x"]), float(p["y"])) + if per_step_s > 0: + await asyncio.sleep(per_step_s) + pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0) + if pause_ms > 0: + await asyncio.sleep(pause_ms / 1000.0) + await browser.page.mouse.click(center_x, center_y) + setattr(browser, "_sentience_cursor_pos", (float(center_x), float(center_y))) success = True except Exception as e: success = False @@ -1009,6 +1155,7 @@ async def click_rect_async( outcome=outcome, url_changed=url_changed, snapshot_after=snapshot_after, + cursor=cursor_meta, error=( None if success diff --git a/sentience/agent.py b/sentience/agent.py index acd5c34..1907144 100644 --- a/sentience/agent.py +++ b/sentience/agent.py @@ -355,6 +355,7 @@ def act( # noqa: C901 url_changed=result_dict.get("url_changed"), error=result_dict.get("error"), message=result_dict.get("message"), + cursor=result_dict.get("cursor"), ) # Emit action execution trace event if tracer is enabled @@ -391,6 +392,7 @@ def act( # noqa: C901 "post_url": post_url, "elements": elements_data, # Add element data for overlay "target_element_id": result.element_id, # Highlight target in red + "cursor": result.cursor, }, step_id=step_id, ) @@ -445,6 +447,8 @@ def act( # noqa: C901 ), "duration_ms": duration_ms, } + if result.cursor is not None: + exec_data["cursor"] = result.cursor # Add optional exec fields if result.element_id is not None: diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index 6532a81..ca5ab7c 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -341,7 +341,7 @@ def assert_done( True if task is complete (assertion passed), False otherwise """ # Convenience wrapper for assert_ with required=True - ok = self.assert_(predicate, label=label, required=True) + ok = self.assertTrue(predicate, label=label, required=True) if ok: self._task_done = True self._task_done_label = label diff --git a/sentience/backends/actions.py b/sentience/backends/actions.py index 7e48b1f..a0bd168 100644 --- a/sentience/backends/actions.py +++ b/sentience/backends/actions.py @@ -21,6 +21,7 @@ import time from typing import TYPE_CHECKING, Any, Literal +from ..cursor_policy import CursorPolicy, build_human_cursor_path from ..models import ActionResult, BBox, Snapshot if TYPE_CHECKING: @@ -33,6 +34,7 @@ async def click( button: Literal["left", "right", "middle"] = "left", click_count: int = 1, move_first: bool = True, + cursor_policy: CursorPolicy | None = None, ) -> ActionResult: """ Click at coordinates using the backend. @@ -61,21 +63,47 @@ async def click( # Resolve coordinates x, y = _resolve_coordinates(target) + cursor_meta: dict | None = None try: # Optional mouse move for hover effects if move_first: - await backend.mouse_move(x, y) - await asyncio.sleep(0.02) # Brief pause for hover + if cursor_policy is not None and cursor_policy.mode == "human": + pos = getattr(backend, "_sentience_cursor_pos", None) + if not isinstance(pos, tuple) or len(pos) != 2: + pos = (float(x), float(y)) + + cursor_meta = build_human_cursor_path( + start=(float(pos[0]), float(pos[1])), + target=(float(x), float(y)), + policy=cursor_policy, + ) + pts = cursor_meta.get("path", []) + duration_ms_move = int(cursor_meta.get("duration_ms") or 0) + per_step_s = ( + (duration_ms_move / max(1, len(pts))) / 1000.0 if duration_ms_move > 0 else 0.0 + ) + for p in pts: + await backend.mouse_move(float(p["x"]), float(p["y"])) + if per_step_s > 0: + await asyncio.sleep(per_step_s) + pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0) + if pause_ms > 0: + await asyncio.sleep(pause_ms / 1000.0) + else: + await backend.mouse_move(x, y) + await asyncio.sleep(0.02) # Brief pause for hover # Perform click await backend.mouse_click(x, y, button=button, click_count=click_count) + setattr(backend, "_sentience_cursor_pos", (float(x), float(y))) duration_ms = int((time.time() - start_time) * 1000) return ActionResult( success=True, duration_ms=duration_ms, outcome="dom_updated", + cursor=cursor_meta, ) except Exception as e: duration_ms = int((time.time() - start_time) * 1000) @@ -84,6 +112,7 @@ async def click( duration_ms=duration_ms, outcome="error", error={"code": "click_failed", "reason": str(e)}, + cursor=cursor_meta, ) diff --git a/sentience/cursor_policy.py b/sentience/cursor_policy.py new file mode 100644 index 0000000..ec9d4b6 --- /dev/null +++ b/sentience/cursor_policy.py @@ -0,0 +1,135 @@ +""" +Human-like cursor movement policy + metadata. + +This is intentionally SDK-local (no snapshot schema changes). It is used by actions to: +- generate more realistic mouse movement (multiple moves with easing, optional overshoot/jitter) +- emit trace/debug metadata describing the movement path +""" + +from __future__ import annotations + +import math +import random +from dataclasses import dataclass + + +@dataclass(frozen=True) +class CursorPolicy: + """ + Policy for cursor movement. + + - mode="instant": current behavior (single click without multi-step motion) + - mode="human": move with a curved path + optional jitter/overshoot + """ + + mode: str = "instant" # "instant" | "human" + + # Motion shaping (human mode) + steps: int | None = None + duration_ms: int | None = None + jitter_px: float = 1.0 + overshoot_px: float = 6.0 + pause_before_click_ms: int = 20 + + # Determinism hook for tests/repro + seed: int | None = None + + +def _clamp(v: float, lo: float, hi: float) -> float: + return max(lo, min(hi, v)) + + +def _ease_in_out(t: float) -> float: + # Smoothstep-ish easing + return t * t * (3 - 2 * t) + + +def _bezier(p0: tuple[float, float], p1: tuple[float, float], p2: tuple[float, float], p3: tuple[float, float], t: float) -> tuple[float, float]: + u = 1.0 - t + tt = t * t + uu = u * u + uuu = uu * u + ttt = tt * t + x = uuu * p0[0] + 3 * uu * t * p1[0] + 3 * u * tt * p2[0] + ttt * p3[0] + y = uuu * p0[1] + 3 * uu * t * p1[1] + 3 * u * tt * p2[1] + ttt * p3[1] + return (x, y) + + +def build_human_cursor_path( + *, + start: tuple[float, float], + target: tuple[float, float], + policy: CursorPolicy, +) -> dict: + """ + Build a human-like cursor path and metadata. + + Returns a dict suitable for attaching to ActionResult/trace payloads: + { + "mode": "human", + "from": {"x":..., "y":...}, + "to": {"x":..., "y":...}, + "steps": ..., + "duration_ms": ..., + "pause_before_click_ms": ..., + "jitter_px": ..., + "overshoot_px": ..., + "path": [{"x":..., "y":..., "t":...}, ...] + } + """ + rng = random.Random(policy.seed) + + x0, y0 = start + x1, y1 = target + dx = x1 - x0 + dy = y1 - y0 + dist = math.hypot(dx, dy) + + # Defaults based on distance (bounded) + steps = int(policy.steps if policy.steps is not None else _clamp(10 + dist / 25.0, 12, 40)) + duration_ms = int(policy.duration_ms if policy.duration_ms is not None else _clamp(120 + dist * 0.9, 120, 700)) + + # Control points: offset roughly perpendicular to travel direction + if dist < 1e-6: + dist = 1.0 + ux, uy = dx / dist, dy / dist + px, py = -uy, ux + curve_mag = _clamp(dist / 3.5, 10.0, 140.0) + curve_mag *= rng.uniform(0.5, 1.2) + + c1 = (x0 + dx * 0.25 + px * curve_mag, y0 + dy * 0.25 + py * curve_mag) + c2 = (x0 + dx * 0.75 - px * curve_mag, y0 + dy * 0.75 - py * curve_mag) + + overshoot = float(policy.overshoot_px or 0.0) + overshoot_point = (x1 + ux * overshoot, y1 + uy * overshoot) if overshoot > 0 else (x1, y1) + + pts: list[dict] = [] + for i in range(steps): + t_raw = 0.0 if steps <= 1 else i / (steps - 1) + t = _ease_in_out(t_raw) + bx, by = _bezier((x0, y0), c1, c2, overshoot_point, t) + + # Small jitter, decaying near target + jitter_scale = float(policy.jitter_px) * (1.0 - t_raw) * 0.9 + jx = rng.uniform(-jitter_scale, jitter_scale) + jy = rng.uniform(-jitter_scale, jitter_scale) + + pts.append({"x": bx + jx, "y": by + jy, "t": round(t_raw, 4)}) + + # If we overshot, add a small correction segment back to target. + if overshoot > 0: + pts.append({"x": x1, "y": y1, "t": 1.0}) + + return { + "mode": "human", + "from": {"x": x0, "y": y0}, + "to": {"x": x1, "y": y1}, + "steps": steps, + "duration_ms": duration_ms, + "pause_before_click_ms": int(policy.pause_before_click_ms), + "jitter_px": float(policy.jitter_px), + "overshoot_px": overshoot, + # Keep path bounded for trace size + "path": pts[:64], + } + diff --git a/sentience/extension/background.js b/sentience/extension/background.js index 2923f55..aff49b0 100644 --- a/sentience/extension/background.js +++ b/sentience/extension/background.js @@ -28,14 +28,14 @@ async function handleSnapshotProcessing(rawData, options = {}) { const startTime = performance.now(); try { if (!Array.isArray(rawData)) throw new Error("rawData must be an array"); - if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), + if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), !wasmReady) throw new Error("WASM module not initialized"); let analyzedElements, prunedRawData; try { const wasmPromise = new Promise((resolve, reject) => { try { let result; - result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), + result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), resolve(result); } catch (e) { reject(e); @@ -101,4 +101,4 @@ initWASM().catch(err => {}), chrome.runtime.onMessage.addListener((request, send event.preventDefault(); }), self.addEventListener("unhandledrejection", event => { event.preventDefault(); -}); \ No newline at end of file +}); diff --git a/sentience/extension/content.js b/sentience/extension/content.js index b65cfb5..97923a2 100644 --- a/sentience/extension/content.js +++ b/sentience/extension/content.js @@ -82,7 +82,7 @@ if (!elements || !Array.isArray(elements)) return; removeOverlay(); const host = document.createElement("div"); - host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", + host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", document.body.appendChild(host); const shadow = host.attachShadow({ mode: "closed" @@ -94,15 +94,15 @@ let color; color = isTarget ? "#FF0000" : isPrimary ? "#0066FF" : "#00FF00"; const importanceRatio = maxImportance > 0 ? importance / maxImportance : .5, borderOpacity = isTarget ? 1 : isPrimary ? .9 : Math.max(.4, .5 + .5 * importanceRatio), fillOpacity = .2 * borderOpacity, borderWidth = isTarget ? 2 : isPrimary ? 1.5 : Math.max(.5, Math.round(2 * importanceRatio)), hexOpacity = Math.round(255 * fillOpacity).toString(16).padStart(2, "0"), box = document.createElement("div"); - if (box.style.cssText = `\n position: absolute;\n left: ${bbox.x}px;\n top: ${bbox.y}px;\n width: ${bbox.width}px;\n height: ${bbox.height}px;\n border: ${borderWidth}px solid ${color};\n background-color: ${color}${hexOpacity};\n box-sizing: border-box;\n opacity: ${borderOpacity};\n pointer-events: none;\n `, + if (box.style.cssText = `\n position: absolute;\n left: ${bbox.x}px;\n top: ${bbox.y}px;\n width: ${bbox.width}px;\n height: ${bbox.height}px;\n border: ${borderWidth}px solid ${color};\n background-color: ${color}${hexOpacity};\n box-sizing: border-box;\n opacity: ${borderOpacity};\n pointer-events: none;\n `, importance > 0 || isPrimary) { const badge = document.createElement("span"); - badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, + badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, box.appendChild(badge); } if (isTarget) { const targetIndicator = document.createElement("span"); - targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", + targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", box.appendChild(targetIndicator); } shadow.appendChild(box); @@ -122,7 +122,7 @@ if (!grids || !Array.isArray(grids)) return; removeOverlay(); const host = document.createElement("div"); - host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", + host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", document.body.appendChild(host); const shadow = host.attachShadow({ mode: "closed" @@ -138,10 +138,10 @@ let labelText = grid.label ? `Grid ${grid.grid_id}: ${grid.label}` : `Grid ${grid.grid_id}`; grid.is_dominant && (labelText = `⭐ ${labelText} (dominant)`); const badge = document.createElement("span"); - if (badge.textContent = labelText, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, + if (badge.textContent = labelText, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, box.appendChild(badge), isTarget) { const targetIndicator = document.createElement("span"); - targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", + targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", box.appendChild(targetIndicator); } shadow.appendChild(box); @@ -155,7 +155,7 @@ let overlayTimeout = null; function removeOverlay() { const existing = document.getElementById(OVERLAY_HOST_ID); - existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), + existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), overlayTimeout = null); } -}(); \ No newline at end of file +}(); diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js index abaee4b..daca8c4 100644 --- a/sentience/extension/injected_api.js +++ b/sentience/extension/injected_api.js @@ -112,7 +112,7 @@ if (labelEl) { let text = ""; try { - if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), + if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), !text && labelEl.getAttribute) { const ariaLabel = labelEl.getAttribute("aria-label"); ariaLabel && (text = ariaLabel.trim()); @@ -292,7 +292,7 @@ }); const checkStable = () => { const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime; - timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), + timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), resolve()) : setTimeout(checkStable, 50); }; checkStable(); @@ -318,7 +318,7 @@ }); const checkQuiet = () => { const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime; - timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), + timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), resolve()) : setTimeout(checkQuiet, 50); }; checkQuiet(); @@ -437,7 +437,7 @@ }(el); let safeValue = null, valueRedacted = null; try { - if (void 0 !== el.value || el.getAttribute && null !== el.getAttribute("value")) if (isPasswordInput) safeValue = null, + if (void 0 !== el.value || el.getAttribute && null !== el.getAttribute("value")) if (isPasswordInput) safeValue = null, valueRedacted = "true"; else { const rawValue = void 0 !== el.value ? String(el.value) : String(el.getAttribute("value")); safeValue = rawValue.length > 200 ? rawValue.substring(0, 200) : rawValue, valueRedacted = "false"; @@ -537,8 +537,8 @@ const requestId = `iframe-${idx}-${Date.now()}`, timeout = setTimeout(() => { resolve(null); }, 5e3), listener = event => { - "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), - window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, + "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), + window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, resolve({ iframe: iframe, data: event.data.snapshot, @@ -554,7 +554,7 @@ ...options, collectIframes: !0 } - }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), + }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null)); } catch (error) { clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null); @@ -604,7 +604,7 @@ }, 25e3), listener = e => { if ("SENTIENCE_SNAPSHOT_RESULT" === e.data.type && e.data.requestId === requestId) { if (resolved) return; - resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), + resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), e.data.error ? reject(new Error(e.data.error)) : resolve({ elements: e.data.elements, raw_elements: e.data.raw_elements, @@ -621,7 +621,7 @@ options: options }, "*"); } catch (error) { - resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), + resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), reject(new Error(`Failed to send snapshot request: ${error.message}`))); } }); @@ -631,7 +631,7 @@ options.screenshot && (screenshot = await function(options) { return new Promise(resolve => { const requestId = Math.random().toString(36).substring(7), listener = e => { - "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), + "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), resolve(e.data.screenshot)); }; window.addEventListener("message", listener), window.postMessage({ @@ -690,15 +690,15 @@ } if (node.nodeType !== Node.ELEMENT_NODE) return; const tag = node.tagName.toLowerCase(); - if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), - "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), - "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), - "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), + if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), + "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), + "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), + "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), "a" === tag) { const href = node.getAttribute("href"); markdown += href ? `](${href})` : "]", insideLink = !1; } - "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), + "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), insideLink || "h1" !== tag && "h2" !== tag && "h3" !== tag && "p" !== tag && "div" !== tag || (markdown += "\n"); }(tempDiv), markdown.replace(/\n{3,}/g, "\n\n").trim(); }(document.body) : function(root) { @@ -711,7 +711,7 @@ const style = window.getComputedStyle(node); if ("none" === style.display || "hidden" === style.visibility) return; const isBlock = "block" === style.display || "flex" === style.display || "P" === node.tagName || "DIV" === node.tagName; - isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), + isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), isBlock && (text += "\n"); } } else text += node.textContent; @@ -810,25 +810,25 @@ } function startRecording(options = {}) { const {highlightColor: highlightColor = "#ff0000", successColor: successColor = "#00ff00", autoDisableTimeout: autoDisableTimeout = 18e5, keyboardShortcut: keyboardShortcut = "Ctrl+Shift+I"} = options; - if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), + if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), () => {}; window.sentience_registry_map = new Map, window.sentience_registry.forEach((el, idx) => { el && window.sentience_registry_map.set(el, idx); }); let highlightBox = document.getElementById("sentience-highlight-box"); - highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", - highlightBox.style.cssText = `\n position: fixed;\n pointer-events: none;\n z-index: 2147483647;\n border: 2px solid ${highlightColor};\n background: rgba(255, 0, 0, 0.1);\n display: none;\n transition: all 0.1s ease;\n box-sizing: border-box;\n `, + highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", + highlightBox.style.cssText = `\n position: fixed;\n pointer-events: none;\n z-index: 2147483647;\n border: 2px solid ${highlightColor};\n background: rgba(255, 0, 0, 0.1);\n display: none;\n transition: all 0.1s ease;\n box-sizing: border-box;\n `, document.body.appendChild(highlightBox)); let recordingIndicator = document.getElementById("sentience-recording-indicator"); - recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", - recordingIndicator.style.cssText = `\n position: fixed;\n top: 0;\n left: 0;\n right: 0;\n height: 3px;\n background: ${highlightColor};\n z-index: 2147483646;\n pointer-events: none;\n `, + recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", + recordingIndicator.style.cssText = `\n position: fixed;\n top: 0;\n left: 0;\n right: 0;\n height: 3px;\n background: ${highlightColor};\n z-index: 2147483646;\n pointer-events: none;\n `, document.body.appendChild(recordingIndicator)), recordingIndicator.style.display = "block"; const mouseOverHandler = e => { const el = e.target; if (!el || el === highlightBox || el === recordingIndicator) return; const rect = el.getBoundingClientRect(); - highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", - highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", + highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", + highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", highlightBox.style.height = rect.height + "px"; }, clickHandler = e => { e.preventDefault(), e.stopPropagation(); @@ -905,7 +905,7 @@ debug_snapshot: rawData }, jsonString = JSON.stringify(snippet, null, 2); navigator.clipboard.writeText(jsonString).then(() => { - highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", + highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", setTimeout(() => { highlightBox.style.border = `2px solid ${highlightColor}`, highlightBox.style.background = "rgba(255, 0, 0, 0.1)"; }, 500); @@ -915,15 +915,15 @@ }; let timeoutId = null; const stopRecording = () => { - document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), - document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), - timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), + document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), + document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), + timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), window.sentience_registry_map && window.sentience_registry_map.clear(), window.sentience_stopRecording === stopRecording && delete window.sentience_stopRecording; }, keyboardHandler = e => { - (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), + (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), stopRecording()); }; - return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), + return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), document.addEventListener("keydown", keyboardHandler, !0), autoDisableTimeout > 0 && (timeoutId = setTimeout(() => { stopRecording(); }, autoDisableTimeout)), window.sentience_stopRecording = stopRecording, stopRecording; @@ -992,4 +992,4 @@ } }), window.sentience_iframe_handler_setup = !0)); })(); -}(); \ No newline at end of file +}(); diff --git a/sentience/extension/pkg/sentience_core.js b/sentience/extension/pkg/sentience_core.js index bb9cae0..c50ad61 100644 --- a/sentience/extension/pkg/sentience_core.js +++ b/sentience/extension/pkg/sentience_core.js @@ -25,7 +25,7 @@ function __wbg_get_imports() { }, __wbg___wbindgen_bigint_get_as_i64_8fcf4ce7f1ca72a2: function(arg0, arg1) { const v = getObject(arg1), ret = "bigint" == typeof v ? v : void 0; - getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), + getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), getDataViewMemory0().setInt32(arg0 + 0, !isLikeNone(ret), !0); }, __wbg___wbindgen_boolean_get_bbbb1c18aa2f5e25: function(arg0) { @@ -224,7 +224,7 @@ function getArrayU8FromWasm0(ptr, len) { let cachedDataViewMemory0 = null; function getDataViewMemory0() { - return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), + return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), cachedDataViewMemory0; } @@ -235,7 +235,7 @@ function getStringFromWasm0(ptr, len) { let cachedUint8ArrayMemory0 = null; function getUint8ArrayMemory0() { - return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), + return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), cachedUint8ArrayMemory0; } @@ -264,7 +264,7 @@ function isLikeNone(x) { function passStringToWasm0(arg, malloc, realloc) { if (void 0 === realloc) { const buf = cachedTextEncoder.encode(arg), ptr = malloc(buf.length, 1) >>> 0; - return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, + return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, ptr; } let len = arg.length, ptr = malloc(len, 1) >>> 0; @@ -319,7 +319,7 @@ const cachedTextEncoder = new TextEncoder; let wasmModule, wasm, WASM_VECTOR_LEN = 0; function __wbg_finalize_init(instance, module) { - return wasm = instance.exports, wasmModule = module, cachedDataViewMemory0 = null, + return wasm = instance.exports, wasmModule = module, cachedDataViewMemory0 = null, cachedUint8ArrayMemory0 = null, wasm; } @@ -360,7 +360,7 @@ function initSync(module) { async function __wbg_init(module_or_path) { if (void 0 !== wasm) return wasm; - void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), + void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), void 0 === module_or_path && (module_or_path = new URL("sentience_core_bg.wasm", import.meta.url)); const imports = __wbg_get_imports(); ("string" == typeof module_or_path || "function" == typeof Request && module_or_path instanceof Request || "function" == typeof URL && module_or_path instanceof URL) && (module_or_path = fetch(module_or_path)); @@ -368,4 +368,4 @@ async function __wbg_init(module_or_path) { return __wbg_finalize_init(instance, module); } -export { initSync, __wbg_init as default }; \ No newline at end of file +export { initSync, __wbg_init as default }; diff --git a/sentience/models.py b/sentience/models.py index f2083aa..9b483fa 100644 --- a/sentience/models.py +++ b/sentience/models.py @@ -593,6 +593,8 @@ class ActionResult(BaseModel): url_changed: bool | None = None snapshot_after: Snapshot | None = None error: dict | None = None + # Optional action metadata (e.g., human-like cursor movement path) + cursor: dict[str, Any] | None = None class WaitResult(BaseModel): @@ -675,6 +677,8 @@ class AgentActionResult(BaseModel): url_changed: bool | None = None error: str | None = None message: str | None = None # For FINISH action + # Optional: action metadata (e.g., human-like cursor movement path) + cursor: dict[str, Any] | None = None def __getitem__(self, key): """ diff --git a/tests/test_backends.py b/tests/test_backends.py index a9b7d85..4f7da32 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -12,6 +12,7 @@ import pytest +from sentience import CursorPolicy from sentience.backends import ( BrowserBackend, BrowserUseAdapter, @@ -574,6 +575,30 @@ async def test_click_double(self, backend: CDPBackendV0, transport: MockCDPTrans ] assert press_events[0][1]["clickCount"] == 2 + @pytest.mark.asyncio + async def test_click_human_cursor_policy( + self, backend: CDPBackendV0, transport: MockCDPTransport + ) -> None: + """Opt-in: human-like cursor movement should emit multiple mouseMoved events and return cursor metadata.""" + policy = CursorPolicy( + mode="human", + steps=6, + duration_ms=0, + jitter_px=0.0, + overshoot_px=0.0, + pause_before_click_ms=0, + seed=123, + ) + result = await click(backend, (100, 200), cursor_policy=policy) + + assert result.success is True + assert result.cursor is not None + assert result.cursor.get("mode") == "human" + + mouse_events = [c for c in transport.calls if c[0] == "Input.dispatchMouseEvent"] + # Expect more than the default (move, press, release) + assert len(mouse_events) > 3 + @pytest.mark.asyncio async def test_type_text_simple( self, backend: CDPBackendV0, transport: MockCDPTransport