diff --git a/sentience/canonicalization.py b/sentience/canonicalization.py new file mode 100644 index 0000000..cce1107 --- /dev/null +++ b/sentience/canonicalization.py @@ -0,0 +1,207 @@ +""" +Shared canonicalization utilities for snapshot comparison and indexing. + +This module provides consistent normalization functions used by both: +- trace_indexing/indexer.py (for computing stable digests) +- snapshot_diff.py (for computing diff_status labels) + +By sharing these helpers, we ensure consistent behavior: +- Same text normalization (whitespace, case, length) +- Same bbox rounding (2px precision) +- Same change detection thresholds +""" + +from typing import Any + + +def normalize_text(text: str | None, max_len: int = 80) -> str: + """ + Normalize text for canonical comparison. + + Transforms: + - Trims leading/trailing whitespace + - Collapses internal whitespace to single spaces + - Lowercases + - Caps length + + Args: + text: Input text (may be None) + max_len: Maximum length to retain (default: 80) + + Returns: + Normalized text string (empty string if input is None) + + Examples: + >>> normalize_text(" Hello World ") + 'hello world' + >>> normalize_text(None) + '' + """ + if not text: + return "" + # Trim and collapse whitespace + normalized = " ".join(text.split()) + # Lowercase + normalized = normalized.lower() + # Cap length + if len(normalized) > max_len: + normalized = normalized[:max_len] + return normalized + + +def round_bbox(bbox: dict[str, float], precision: int = 2) -> dict[str, int]: + """ + Round bbox coordinates to reduce noise. + + Snaps coordinates to grid of `precision` pixels to ignore + sub-pixel rendering differences. + + Args: + bbox: Bounding box with x, y, width, height + precision: Grid size in pixels (default: 2) + + Returns: + Rounded bbox with integer coordinates + + Examples: + >>> round_bbox({"x": 101, "y": 203, "width": 50, "height": 25}) + {'x': 100, 'y': 202, 'width': 50, 'height': 24} + """ + return { + "x": round(bbox.get("x", 0) / precision) * precision, + "y": round(bbox.get("y", 0) / precision) * precision, + "width": round(bbox.get("width", 0) / precision) * precision, + "height": round(bbox.get("height", 0) / precision) * precision, + } + + +def bbox_equal(bbox1: dict[str, Any], bbox2: dict[str, Any], threshold: float = 5.0) -> bool: + """ + Check if two bboxes are equal within a threshold. + + Args: + bbox1: First bounding box + bbox2: Second bounding box + threshold: Maximum allowed difference in pixels (default: 5.0) + + Returns: + True if all bbox properties differ by less than threshold + + Examples: + >>> bbox_equal({"x": 100, "y": 200, "width": 50, "height": 25}, + ... {"x": 102, "y": 200, "width": 50, "height": 25}) + True # 2px difference is below 5px threshold + """ + return ( + abs(bbox1.get("x", 0) - bbox2.get("x", 0)) <= threshold + and abs(bbox1.get("y", 0) - bbox2.get("y", 0)) <= threshold + and abs(bbox1.get("width", 0) - bbox2.get("width", 0)) <= threshold + and abs(bbox1.get("height", 0) - bbox2.get("height", 0)) <= threshold + ) + + +def bbox_changed(bbox1: dict[str, Any], bbox2: dict[str, Any], threshold: float = 5.0) -> bool: + """ + Check if two bboxes differ beyond the threshold. + + This is the inverse of bbox_equal, provided for semantic clarity + in diff detection code. + + Args: + bbox1: First bounding box + bbox2: Second bounding box + threshold: Maximum allowed difference in pixels (default: 5.0) + + Returns: + True if any bbox property differs by more than threshold + """ + return not bbox_equal(bbox1, bbox2, threshold) + + +def canonicalize_element(elem: dict[str, Any]) -> dict[str, Any]: + """ + Create canonical representation of an element for comparison/hashing. + + Extracts and normalizes the fields that matter for identity: + - id, role, normalized text, rounded bbox + - is_primary, is_clickable from visual_cues + + Args: + elem: Raw element dictionary + + Returns: + Canonical element dictionary with normalized fields + + Examples: + >>> canonicalize_element({ + ... "id": 1, + ... "role": "button", + ... "text": " Click Me ", + ... "bbox": {"x": 101, "y": 200, "width": 50, "height": 25}, + ... "visual_cues": {"is_primary": True, "is_clickable": True} + ... }) + {'id': 1, 'role': 'button', 'text_norm': 'click me', 'bbox': {'x': 100, 'y': 200, 'width': 50, 'height': 24}, 'is_primary': True, 'is_clickable': True} + """ + # Extract is_primary and is_clickable from visual_cues if present + visual_cues = elem.get("visual_cues", {}) + is_primary = ( + visual_cues.get("is_primary", False) + if isinstance(visual_cues, dict) + else elem.get("is_primary", False) + ) + is_clickable = ( + visual_cues.get("is_clickable", False) + if isinstance(visual_cues, dict) + else elem.get("is_clickable", False) + ) + + return { + "id": elem.get("id"), + "role": elem.get("role", ""), + "text_norm": normalize_text(elem.get("text")), + "bbox": round_bbox(elem.get("bbox", {"x": 0, "y": 0, "width": 0, "height": 0})), + "is_primary": is_primary, + "is_clickable": is_clickable, + } + + +def content_equal(elem1: dict[str, Any], elem2: dict[str, Any]) -> bool: + """ + Check if two elements have equal content (ignoring position). + + Compares normalized text, role, and visual cues. + + Args: + elem1: First element (raw or canonical) + elem2: Second element (raw or canonical) + + Returns: + True if content is equal after normalization + """ + # Normalize both elements + c1 = canonicalize_element(elem1) + c2 = canonicalize_element(elem2) + + return ( + c1["role"] == c2["role"] + and c1["text_norm"] == c2["text_norm"] + and c1["is_primary"] == c2["is_primary"] + and c1["is_clickable"] == c2["is_clickable"] + ) + + +def content_changed(elem1: dict[str, Any], elem2: dict[str, Any]) -> bool: + """ + Check if two elements have different content (ignoring position). + + This is the inverse of content_equal, provided for semantic clarity + in diff detection code. + + Args: + elem1: First element + elem2: Second element + + Returns: + True if content differs after normalization + """ + return not content_equal(elem1, elem2) diff --git a/sentience/extension/background.js b/sentience/extension/background.js index 2923f55..aff49b0 100644 --- a/sentience/extension/background.js +++ b/sentience/extension/background.js @@ -28,14 +28,14 @@ async function handleSnapshotProcessing(rawData, options = {}) { const startTime = performance.now(); try { if (!Array.isArray(rawData)) throw new Error("rawData must be an array"); - if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), + if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), !wasmReady) throw new Error("WASM module not initialized"); let analyzedElements, prunedRawData; try { const wasmPromise = new Promise((resolve, reject) => { try { let result; - result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), + result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), resolve(result); } catch (e) { reject(e); @@ -101,4 +101,4 @@ initWASM().catch(err => {}), chrome.runtime.onMessage.addListener((request, send event.preventDefault(); }), self.addEventListener("unhandledrejection", event => { event.preventDefault(); -}); \ No newline at end of file +}); diff --git a/sentience/extension/content.js b/sentience/extension/content.js index e94cde1..9d5b3bf 100644 --- a/sentience/extension/content.js +++ b/sentience/extension/content.js @@ -82,7 +82,7 @@ if (!elements || !Array.isArray(elements)) return; removeOverlay(); const host = document.createElement("div"); - host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", + host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", document.body.appendChild(host); const shadow = host.attachShadow({ mode: "closed" @@ -94,15 +94,15 @@ let color; color = isTarget ? "#FF0000" : isPrimary ? "#0066FF" : "#00FF00"; const importanceRatio = maxImportance > 0 ? importance / maxImportance : .5, borderOpacity = isTarget ? 1 : isPrimary ? .9 : Math.max(.4, .5 + .5 * importanceRatio), fillOpacity = .2 * borderOpacity, borderWidth = isTarget ? 2 : isPrimary ? 1.5 : Math.max(.5, Math.round(2 * importanceRatio)), hexOpacity = Math.round(255 * fillOpacity).toString(16).padStart(2, "0"), box = document.createElement("div"); - if (box.style.cssText = `\n position: absolute;\n left: ${bbox.x}px;\n top: ${bbox.y}px;\n width: ${bbox.width}px;\n height: ${bbox.height}px;\n border: ${borderWidth}px solid ${color};\n background-color: ${color}${hexOpacity};\n box-sizing: border-box;\n opacity: ${borderOpacity};\n pointer-events: none;\n `, + if (box.style.cssText = `\n position: absolute;\n left: ${bbox.x}px;\n top: ${bbox.y}px;\n width: ${bbox.width}px;\n height: ${bbox.height}px;\n border: ${borderWidth}px solid ${color};\n background-color: ${color}${hexOpacity};\n box-sizing: border-box;\n opacity: ${borderOpacity};\n pointer-events: none;\n `, importance > 0 || isPrimary) { const badge = document.createElement("span"); - badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, + badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, box.appendChild(badge); } if (isTarget) { const targetIndicator = document.createElement("span"); - targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", + targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", box.appendChild(targetIndicator); } shadow.appendChild(box); @@ -120,7 +120,7 @@ let overlayTimeout = null; function removeOverlay() { const existing = document.getElementById(OVERLAY_HOST_ID); - existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), + existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), overlayTimeout = null); } -}(); \ No newline at end of file +}(); diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js index c62bcab..69c7d36 100644 --- a/sentience/extension/injected_api.js +++ b/sentience/extension/injected_api.js @@ -112,7 +112,7 @@ if (labelEl) { let text = ""; try { - if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), + if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), !text && labelEl.getAttribute) { const ariaLabel = labelEl.getAttribute("aria-label"); ariaLabel && (text = ariaLabel.trim()); @@ -281,7 +281,7 @@ }); const checkStable = () => { const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime; - timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), + timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), resolve()) : setTimeout(checkStable, 50); }; checkStable(); @@ -301,7 +301,7 @@ }); const checkQuiet = () => { const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime; - timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), + timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), resolve()) : setTimeout(checkQuiet, 50); }; checkQuiet(); @@ -461,8 +461,8 @@ const requestId = `iframe-${idx}-${Date.now()}`, timeout = setTimeout(() => { resolve(null); }, 5e3), listener = event => { - "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), - window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, + "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), + window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, resolve({ iframe: iframe, data: event.data.snapshot, @@ -478,7 +478,7 @@ ...options, collectIframes: !0 } - }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), + }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null)); } catch (error) { clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null); @@ -528,7 +528,7 @@ }, 25e3), listener = e => { if ("SENTIENCE_SNAPSHOT_RESULT" === e.data.type && e.data.requestId === requestId) { if (resolved) return; - resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), + resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), e.data.error ? reject(new Error(e.data.error)) : resolve({ elements: e.data.elements, raw_elements: e.data.raw_elements, @@ -545,7 +545,7 @@ options: options }, "*"); } catch (error) { - resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), + resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), reject(new Error(`Failed to send snapshot request: ${error.message}`))); } }); @@ -555,7 +555,7 @@ options.screenshot && (screenshot = await function(options) { return new Promise(resolve => { const requestId = Math.random().toString(36).substring(7), listener = e => { - "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), + "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), resolve(e.data.screenshot)); }; window.addEventListener("message", listener), window.postMessage({ @@ -602,15 +602,15 @@ } if (node.nodeType !== Node.ELEMENT_NODE) return; const tag = node.tagName.toLowerCase(); - if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), - "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), - "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), - "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), + if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), + "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), + "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), + "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), "a" === tag) { const href = node.getAttribute("href"); markdown += href ? `](${href})` : "]", insideLink = !1; } - "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), + "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), insideLink || "h1" !== tag && "h2" !== tag && "h3" !== tag && "p" !== tag && "div" !== tag || (markdown += "\n"); }(tempDiv), markdown.replace(/\n{3,}/g, "\n\n").trim(); }(document.body) : function(root) { @@ -623,7 +623,7 @@ const style = window.getComputedStyle(node); if ("none" === style.display || "hidden" === style.visibility) return; const isBlock = "block" === style.display || "flex" === style.display || "P" === node.tagName || "DIV" === node.tagName; - isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), + isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), isBlock && (text += "\n"); } } else text += node.textContent; @@ -722,25 +722,25 @@ } function startRecording(options = {}) { const {highlightColor: highlightColor = "#ff0000", successColor: successColor = "#00ff00", autoDisableTimeout: autoDisableTimeout = 18e5, keyboardShortcut: keyboardShortcut = "Ctrl+Shift+I"} = options; - if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), + if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), () => {}; window.sentience_registry_map = new Map, window.sentience_registry.forEach((el, idx) => { el && window.sentience_registry_map.set(el, idx); }); let highlightBox = document.getElementById("sentience-highlight-box"); - highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", - highlightBox.style.cssText = `\n position: fixed;\n pointer-events: none;\n z-index: 2147483647;\n border: 2px solid ${highlightColor};\n background: rgba(255, 0, 0, 0.1);\n display: none;\n transition: all 0.1s ease;\n box-sizing: border-box;\n `, + highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", + highlightBox.style.cssText = `\n position: fixed;\n pointer-events: none;\n z-index: 2147483647;\n border: 2px solid ${highlightColor};\n background: rgba(255, 0, 0, 0.1);\n display: none;\n transition: all 0.1s ease;\n box-sizing: border-box;\n `, document.body.appendChild(highlightBox)); let recordingIndicator = document.getElementById("sentience-recording-indicator"); - recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", - recordingIndicator.style.cssText = `\n position: fixed;\n top: 0;\n left: 0;\n right: 0;\n height: 3px;\n background: ${highlightColor};\n z-index: 2147483646;\n pointer-events: none;\n `, + recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", + recordingIndicator.style.cssText = `\n position: fixed;\n top: 0;\n left: 0;\n right: 0;\n height: 3px;\n background: ${highlightColor};\n z-index: 2147483646;\n pointer-events: none;\n `, document.body.appendChild(recordingIndicator)), recordingIndicator.style.display = "block"; const mouseOverHandler = e => { const el = e.target; if (!el || el === highlightBox || el === recordingIndicator) return; const rect = el.getBoundingClientRect(); - highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", - highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", + highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", + highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", highlightBox.style.height = rect.height + "px"; }, clickHandler = e => { e.preventDefault(), e.stopPropagation(); @@ -817,7 +817,7 @@ debug_snapshot: rawData }, jsonString = JSON.stringify(snippet, null, 2); navigator.clipboard.writeText(jsonString).then(() => { - highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", + highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", setTimeout(() => { highlightBox.style.border = `2px solid ${highlightColor}`, highlightBox.style.background = "rgba(255, 0, 0, 0.1)"; }, 500); @@ -827,15 +827,15 @@ }; let timeoutId = null; const stopRecording = () => { - document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), - document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), - timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), + document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), + document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), + timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), window.sentience_registry_map && window.sentience_registry_map.clear(), window.sentience_stopRecording === stopRecording && delete window.sentience_stopRecording; }, keyboardHandler = e => { - (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), + (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), stopRecording()); }; - return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), + return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), document.addEventListener("keydown", keyboardHandler, !0), autoDisableTimeout > 0 && (timeoutId = setTimeout(() => { stopRecording(); }, autoDisableTimeout)), window.sentience_stopRecording = stopRecording, stopRecording; @@ -895,4 +895,4 @@ } }), window.sentience_iframe_handler_setup = !0)); })(); -}(); \ No newline at end of file +}(); diff --git a/sentience/extension/pkg/sentience_core.js b/sentience/extension/pkg/sentience_core.js index ecba479..2696a64 100644 --- a/sentience/extension/pkg/sentience_core.js +++ b/sentience/extension/pkg/sentience_core.js @@ -47,7 +47,7 @@ function getArrayU8FromWasm0(ptr, len) { let cachedDataViewMemory0 = null; function getDataViewMemory0() { - return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), + return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), cachedDataViewMemory0; } @@ -58,7 +58,7 @@ function getStringFromWasm0(ptr, len) { let cachedUint8ArrayMemory0 = null; function getUint8ArrayMemory0() { - return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), + return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), cachedUint8ArrayMemory0; } @@ -87,7 +87,7 @@ function isLikeNone(x) { function passStringToWasm0(arg, malloc, realloc) { if (void 0 === realloc) { const buf = cachedTextEncoder.encode(arg), ptr = malloc(buf.length, 1) >>> 0; - return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, + return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, ptr; } let len = arg.length, ptr = malloc(len, 1) >>> 0; @@ -188,7 +188,7 @@ function __wbg_get_imports() { return Number(getObject(arg0)); }, imports.wbg.__wbg___wbindgen_bigint_get_as_i64_6e32f5e6aff02e1d = function(arg0, arg1) { const v = getObject(arg1), ret = "bigint" == typeof v ? v : void 0; - getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), + getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), getDataViewMemory0().setInt32(arg0 + 0, !isLikeNone(ret), !0); }, imports.wbg.__wbg___wbindgen_boolean_get_dea25b33882b895b = function(arg0) { const v = getObject(arg0), ret = "boolean" == typeof v ? v : void 0; @@ -296,7 +296,7 @@ function __wbg_get_imports() { } function __wbg_finalize_init(instance, module) { - return wasm = instance.exports, __wbg_init.__wbindgen_wasm_module = module, cachedDataViewMemory0 = null, + return wasm = instance.exports, __wbg_init.__wbindgen_wasm_module = module, cachedDataViewMemory0 = null, cachedUint8ArrayMemory0 = null, wasm; } @@ -310,7 +310,7 @@ function initSync(module) { async function __wbg_init(module_or_path) { if (void 0 !== wasm) return wasm; - void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), + void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), void 0 === module_or_path && (module_or_path = new URL("sentience_core_bg.wasm", import.meta.url)); const imports = __wbg_get_imports(); ("string" == typeof module_or_path || "function" == typeof Request && module_or_path instanceof Request || "function" == typeof URL && module_or_path instanceof URL) && (module_or_path = fetch(module_or_path)); @@ -320,4 +320,4 @@ async function __wbg_init(module_or_path) { export { initSync }; -export default __wbg_init; \ No newline at end of file +export default __wbg_init; diff --git a/sentience/snapshot_diff.py b/sentience/snapshot_diff.py index 4464837..e92d37e 100644 --- a/sentience/snapshot_diff.py +++ b/sentience/snapshot_diff.py @@ -2,10 +2,12 @@ Snapshot comparison utilities for diff_status detection. Implements change detection logic for the Diff Overlay feature. -""" -from typing import Literal +Uses shared canonicalization helpers from canonicalization.py to ensure +consistent comparison behavior with trace_indexing/indexer.py. +""" +from .canonicalization import bbox_changed, content_changed from .models import Element, Snapshot @@ -18,55 +20,30 @@ class SnapshotDiff: - REMOVED: Element existed in previous but not in current - MODIFIED: Element exists in both but has changed - MOVED: Element exists in both but position changed - """ - - @staticmethod - def _has_bbox_changed(el1: Element, el2: Element, threshold: float = 5.0) -> bool: - """ - Check if element's bounding box has changed significantly. - Args: - el1: First element - el2: Second element - threshold: Position change threshold in pixels (default: 5.0) - - Returns: - True if position or size changed beyond threshold - """ - return ( - abs(el1.bbox.x - el2.bbox.x) > threshold - or abs(el1.bbox.y - el2.bbox.y) > threshold - or abs(el1.bbox.width - el2.bbox.width) > threshold - or abs(el1.bbox.height - el2.bbox.height) > threshold - ) + Uses canonicalized comparisons (normalized text, rounded bbox) to reduce + noise from insignificant changes like sub-pixel rendering differences + or whitespace variations. + """ @staticmethod - def _has_content_changed(el1: Element, el2: Element) -> bool: - """ - Check if element's content has changed. - - Args: - el1: First element - el2: Second element - - Returns: - True if text, role, or visual properties changed - """ - # Compare text content - if el1.text != el2.text: - return True - - # Compare role - if el1.role != el2.role: - return True - - # Compare visual cues - if el1.visual_cues.is_primary != el2.visual_cues.is_primary: - return True - if el1.visual_cues.is_clickable != el2.visual_cues.is_clickable: - return True - - return False + def _element_to_dict(el: Element) -> dict: + """Convert Element model to dict for canonicalization helpers.""" + return { + "id": el.id, + "role": el.role, + "text": el.text, + "bbox": { + "x": el.bbox.x, + "y": el.bbox.y, + "width": el.bbox.width, + "height": el.bbox.height, + }, + "visual_cues": { + "is_primary": el.visual_cues.is_primary, + "is_clickable": el.visual_cues.is_clickable, + }, + } @staticmethod def compute_diff_status( @@ -76,6 +53,10 @@ def compute_diff_status( """ Compare current snapshot with previous and set diff_status on elements. + Uses canonicalized comparisons: + - Text is normalized (trimmed, collapsed whitespace, lowercased) + - Bbox is rounded to 2px grid to ignore sub-pixel differences + Args: current: Current snapshot previous: Previous snapshot (None if this is the first snapshot) @@ -110,19 +91,23 @@ def compute_diff_status( # Element is new - mark as ADDED el_dict["diff_status"] = "ADDED" else: - # Element existed before - check for changes + # Element existed before - check for changes using canonicalized comparisons prev_el = previous_by_id[el.id] - bbox_changed = SnapshotDiff._has_bbox_changed(el, prev_el) - content_changed = SnapshotDiff._has_content_changed(el, prev_el) + # Convert to dicts for canonicalization helpers + el_data = SnapshotDiff._element_to_dict(el) + prev_el_data = SnapshotDiff._element_to_dict(prev_el) + + has_bbox_changed = bbox_changed(el_data["bbox"], prev_el_data["bbox"]) + has_content_changed = content_changed(el_data, prev_el_data) - if bbox_changed and content_changed: + if has_bbox_changed and has_content_changed: # Both position and content changed - mark as MODIFIED el_dict["diff_status"] = "MODIFIED" - elif bbox_changed: + elif has_bbox_changed: # Only position changed - mark as MOVED el_dict["diff_status"] = "MOVED" - elif content_changed: + elif has_content_changed: # Only content changed - mark as MODIFIED el_dict["diff_status"] = "MODIFIED" else: diff --git a/sentience/trace_indexing/indexer.py b/sentience/trace_indexing/indexer.py index 444086c..b70cc3d 100644 --- a/sentience/trace_indexing/indexer.py +++ b/sentience/trace_indexing/indexer.py @@ -9,6 +9,7 @@ from pathlib import Path from typing import Any, Optional +from ..canonicalization import canonicalize_element from .index_schema import ( ActionInfo, SnapshotInfo, @@ -20,30 +21,6 @@ ) -def _normalize_text(text: str | None, max_len: int = 80) -> str: - """Normalize text for digest: trim, collapse whitespace, lowercase, cap length.""" - if not text: - return "" - # Trim and collapse whitespace - normalized = " ".join(text.split()) - # Lowercase - normalized = normalized.lower() - # Cap length - if len(normalized) > max_len: - normalized = normalized[:max_len] - return normalized - - -def _round_bbox(bbox: dict[str, float], precision: int = 2) -> dict[str, int]: - """Round bbox coordinates to reduce noise (default: 2px precision).""" - return { - "x": round(bbox.get("x", 0) / precision) * precision, - "y": round(bbox.get("y", 0) / precision) * precision, - "width": round(bbox.get("width", 0) / precision) * precision, - "height": round(bbox.get("height", 0) / precision) * precision, - } - - def _compute_snapshot_digest(snapshot_data: dict[str, Any]) -> str: """ Compute stable digest of snapshot for diffing. @@ -55,31 +32,8 @@ def _compute_snapshot_digest(snapshot_data: dict[str, Any]) -> str: viewport = snapshot_data.get("viewport", {}) elements = snapshot_data.get("elements", []) - # Canonicalize elements - canonical_elements = [] - for elem in elements: - # Extract is_primary and is_clickable from visual_cues if present - visual_cues = elem.get("visual_cues", {}) - is_primary = ( - visual_cues.get("is_primary", False) - if isinstance(visual_cues, dict) - else elem.get("is_primary", False) - ) - is_clickable = ( - visual_cues.get("is_clickable", False) - if isinstance(visual_cues, dict) - else elem.get("is_clickable", False) - ) - - canonical_elem = { - "id": elem.get("id"), - "role": elem.get("role", ""), - "text_norm": _normalize_text(elem.get("text")), - "bbox": _round_bbox(elem.get("bbox", {"x": 0, "y": 0, "width": 0, "height": 0})), - "is_primary": is_primary, - "is_clickable": is_clickable, - } - canonical_elements.append(canonical_elem) + # Canonicalize elements using shared helper + canonical_elements = [canonicalize_element(elem) for elem in elements] # Sort by element id for determinism canonical_elements.sort(key=lambda e: e.get("id", 0))