SentienceAPI
diff --git a/‎sentience/canonicalization.py‎
Lines changed: 207 additions & 0 deletions b/‎sentience/canonicalization.py‎
Lines changed: 207 additions & 0 deletions
diff --git a/‎sentience/extension/background.js‎
Lines changed: 3 additions & 3 deletions b/‎sentience/extension/background.js‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎sentience/extension/content.js‎
Lines changed: 6 additions & 6 deletions b/‎sentience/extension/content.js‎
Lines changed: 6 additions & 6 deletions
@@ -0,0 +1,207 @@
+"""
+Shared canonicalization utilities for snapshot comparison and indexing.
+
+This module provides consistent normalization functions used by both:
+- trace_indexing/indexer.py (for computing stable digests)
+- snapshot_diff.py (for computing diff_status labels)
+
+By sharing these helpers, we ensure consistent behavior:
+- Same text normalization (whitespace, case, length)
+- Same bbox rounding (2px precision)
+- Same change detection thresholds
+"""
+
+from typing import Any
+
+
+def normalize_text(text: str | None, max_len: int = 80) -> str:
+    """
+    Normalize text for canonical comparison.
+
+    Transforms:
+    - Trims leading/trailing whitespace
+    - Collapses internal whitespace to single spaces
+    - Lowercases
+    - Caps length
+
+    Args:
+        text: Input text (may be None)
+        max_len: Maximum length to retain (default: 80)
+
+    Returns:
+        Normalized text string (empty string if input is None)
+
+    Examples:
+        >>> normalize_text("  Hello   World  ")
+        'hello world'
+        >>> normalize_text(None)
+        ''
+    """
+    if not text:
+        return ""
+    # Trim and collapse whitespace
+    normalized = " ".join(text.split())
+    # Lowercase
+    normalized = normalized.lower()
+    # Cap length
+    if len(normalized) > max_len:
+        normalized = normalized[:max_len]
+    return normalized
+
+
+def round_bbox(bbox: dict[str, float], precision: int = 2) -> dict[str, int]:
+    """
+    Round bbox coordinates to reduce noise.
+
+    Snaps coordinates to grid of `precision` pixels to ignore
+    sub-pixel rendering differences.
+
+    Args:
+        bbox: Bounding box with x, y, width, height
+        precision: Grid size in pixels (default: 2)
+
+    Returns:
+        Rounded bbox with integer coordinates
+
+    Examples:
+        >>> round_bbox({"x": 101, "y": 203, "width": 50, "height": 25})
+        {'x': 100, 'y': 202, 'width': 50, 'height': 24}
+    """
+    return {
+        "x": round(bbox.get("x", 0) / precision) * precision,
+        "y": round(bbox.get("y", 0) / precision) * precision,
+        "width": round(bbox.get("width", 0) / precision) * precision,
+        "height": round(bbox.get("height", 0) / precision) * precision,
+    }
+
+
+def bbox_equal(bbox1: dict[str, Any], bbox2: dict[str, Any], threshold: float = 5.0) -> bool:
+    """
+    Check if two bboxes are equal within a threshold.
+
+    Args:
+        bbox1: First bounding box
+        bbox2: Second bounding box
+        threshold: Maximum allowed difference in pixels (default: 5.0)
+
+    Returns:
+        True if all bbox properties differ by less than threshold
+
+    Examples:
+        >>> bbox_equal({"x": 100, "y": 200, "width": 50, "height": 25},
+        ...            {"x": 102, "y": 200, "width": 50, "height": 25})
+        True  # 2px difference is below 5px threshold
+    """
+    return (
+        abs(bbox1.get("x", 0) - bbox2.get("x", 0)) <= threshold
+        and abs(bbox1.get("y", 0) - bbox2.get("y", 0)) <= threshold
+        and abs(bbox1.get("width", 0) - bbox2.get("width", 0)) <= threshold
+        and abs(bbox1.get("height", 0) - bbox2.get("height", 0)) <= threshold
+    )
+
+
+def bbox_changed(bbox1: dict[str, Any], bbox2: dict[str, Any], threshold: float = 5.0) -> bool:
+    """
+    Check if two bboxes differ beyond the threshold.
+
+    This is the inverse of bbox_equal, provided for semantic clarity
+    in diff detection code.
+
+    Args:
+        bbox1: First bounding box
+        bbox2: Second bounding box
+        threshold: Maximum allowed difference in pixels (default: 5.0)
+
+    Returns:
+        True if any bbox property differs by more than threshold
+    """
+    return not bbox_equal(bbox1, bbox2, threshold)
+
+
+def canonicalize_element(elem: dict[str, Any]) -> dict[str, Any]:
+    """
+    Create canonical representation of an element for comparison/hashing.
+
+    Extracts and normalizes the fields that matter for identity:
+    - id, role, normalized text, rounded bbox
+    - is_primary, is_clickable from visual_cues
+
+    Args:
+        elem: Raw element dictionary
+
+    Returns:
+        Canonical element dictionary with normalized fields
+
+    Examples:
+        >>> canonicalize_element({
+        ...     "id": 1,
+        ...     "role": "button",
+        ...     "text": "  Click Me  ",
+        ...     "bbox": {"x": 101, "y": 200, "width": 50, "height": 25},
+        ...     "visual_cues": {"is_primary": True, "is_clickable": True}
+        ... })
+        {'id': 1, 'role': 'button', 'text_norm': 'click me', 'bbox': {'x': 100, 'y': 200, 'width': 50, 'height': 24}, 'is_primary': True, 'is_clickable': True}
+    """
+    # Extract is_primary and is_clickable from visual_cues if present
+    visual_cues = elem.get("visual_cues", {})
+    is_primary = (
+        visual_cues.get("is_primary", False)
+        if isinstance(visual_cues, dict)
+        else elem.get("is_primary", False)
+    )
+    is_clickable = (
+        visual_cues.get("is_clickable", False)
+        if isinstance(visual_cues, dict)
+        else elem.get("is_clickable", False)
+    )
+
+    return {
+        "id": elem.get("id"),
+        "role": elem.get("role", ""),
+        "text_norm": normalize_text(elem.get("text")),
+        "bbox": round_bbox(elem.get("bbox", {"x": 0, "y": 0, "width": 0, "height": 0})),
+        "is_primary": is_primary,
+        "is_clickable": is_clickable,
+    }
+
+
+def content_equal(elem1: dict[str, Any], elem2: dict[str, Any]) -> bool:
+    """
+    Check if two elements have equal content (ignoring position).
+
+    Compares normalized text, role, and visual cues.
+
+    Args:
+        elem1: First element (raw or canonical)
+        elem2: Second element (raw or canonical)
+
+    Returns:
+        True if content is equal after normalization
+    """
+    # Normalize both elements
+    c1 = canonicalize_element(elem1)
+    c2 = canonicalize_element(elem2)
+
+    return (
+        c1["role"] == c2["role"]
+        and c1["text_norm"] == c2["text_norm"]
+        and c1["is_primary"] == c2["is_primary"]
+        and c1["is_clickable"] == c2["is_clickable"]
+    )
+
+
+def content_changed(elem1: dict[str, Any], elem2: dict[str, Any]) -> bool:
+    """
+    Check if two elements have different content (ignoring position).
+
+    This is the inverse of content_equal, provided for semantic clarity
+    in diff detection code.
+
+    Args:
+        elem1: First element
+        elem2: Second element
+
+    Returns:
+        True if content differs after normalization
+    """
+    return not content_equal(elem1, elem2)
@@ -28,14 +28,14 @@ async function handleSnapshotProcessing(rawData, options = {}) {
     const startTime = performance.now();
     try {
         if (!Array.isArray(rawData)) throw new Error("rawData must be an array");
-        if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), 
+        if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(),
         !wasmReady) throw new Error("WASM module not initialized");
         let analyzedElements, prunedRawData;
         try {
             const wasmPromise = new Promise((resolve, reject) => {
                 try {
                     let result;
-                    result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), 
+                    result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData),
                     resolve(result);
                 } catch (e) {
                     reject(e);
@@ -101,4 +101,4 @@ initWASM().catch(err => {}), chrome.runtime.onMessage.addListener((request, send
     event.preventDefault();
 }), self.addEventListener("unhandledrejection", event => {
     event.preventDefault();
-});
+});
@@ -82,7 +82,7 @@
                 if (!elements || !Array.isArray(elements)) return;
                 removeOverlay();
                 const host = document.createElement("div");
-                host.id = OVERLAY_HOST_ID, host.style.cssText = "\n        position: fixed !important;\n        top: 0 !important;\n        left: 0 !important;\n        width: 100vw !important;\n        height: 100vh !important;\n        pointer-events: none !important;\n        z-index: 2147483647 !important;\n        margin: 0 !important;\n        padding: 0 !important;\n    ", 
+                host.id = OVERLAY_HOST_ID, host.style.cssText = "\n        position: fixed !important;\n        top: 0 !important;\n        left: 0 !important;\n        width: 100vw !important;\n        height: 100vh !important;\n        pointer-events: none !important;\n        z-index: 2147483647 !important;\n        margin: 0 !important;\n        padding: 0 !important;\n    ",
                 document.body.appendChild(host);
                 const shadow = host.attachShadow({
                     mode: "closed"
@@ -94,15 +94,15 @@
                     let color;
                     color = isTarget ? "#FF0000" : isPrimary ? "#0066FF" : "#00FF00";
                     const importanceRatio = maxImportance > 0 ? importance / maxImportance : .5, borderOpacity = isTarget ? 1 : isPrimary ? .9 : Math.max(.4, .5 + .5 * importanceRatio), fillOpacity = .2 * borderOpacity, borderWidth = isTarget ? 2 : isPrimary ? 1.5 : Math.max(.5, Math.round(2 * importanceRatio)), hexOpacity = Math.round(255 * fillOpacity).toString(16).padStart(2, "0"), box = document.createElement("div");
-                    if (box.style.cssText = `\n            position: absolute;\n            left: ${bbox.x}px;\n            top: ${bbox.y}px;\n            width: ${bbox.width}px;\n            height: ${bbox.height}px;\n            border: ${borderWidth}px solid ${color};\n            background-color: ${color}${hexOpacity};\n            box-sizing: border-box;\n            opacity: ${borderOpacity};\n            pointer-events: none;\n        `, 
+                    if (box.style.cssText = `\n            position: absolute;\n            left: ${bbox.x}px;\n            top: ${bbox.y}px;\n            width: ${bbox.width}px;\n            height: ${bbox.height}px;\n            border: ${borderWidth}px solid ${color};\n            background-color: ${color}${hexOpacity};\n            box-sizing: border-box;\n            opacity: ${borderOpacity};\n            pointer-events: none;\n        `,
                     importance > 0 || isPrimary) {
                         const badge = document.createElement("span");
-                        badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n                position: absolute;\n                top: -18px;\n                left: 0;\n                background: ${color};\n                color: white;\n                font-size: 11px;\n                font-weight: bold;\n                padding: 2px 6px;\n                font-family: Arial, sans-serif;\n                border-radius: 3px;\n                opacity: 0.95;\n                white-space: nowrap;\n                pointer-events: none;\n            `, 
+                        badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n                position: absolute;\n                top: -18px;\n                left: 0;\n                background: ${color};\n                color: white;\n                font-size: 11px;\n                font-weight: bold;\n                padding: 2px 6px;\n                font-family: Arial, sans-serif;\n                border-radius: 3px;\n                opacity: 0.95;\n                white-space: nowrap;\n                pointer-events: none;\n            `,
                         box.appendChild(badge);
                     }
                     if (isTarget) {
                         const targetIndicator = document.createElement("span");
-                        targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n                position: absolute;\n                top: -18px;\n                right: 0;\n                font-size: 16px;\n                pointer-events: none;\n            ", 
+                        targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n                position: absolute;\n                top: -18px;\n                right: 0;\n                font-size: 16px;\n                pointer-events: none;\n            ",
                         box.appendChild(targetIndicator);
                     }
                     shadow.appendChild(box);
@@ -120,7 +120,7 @@
     let overlayTimeout = null;
     function removeOverlay() {
         const existing = document.getElementById(OVERLAY_HOST_ID);
-        existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), 
+        existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout),
         overlayTimeout = null);
     }
-}();
+}();