From 9dd2eec391b0a5d101672ab29bdb49669e12eb30 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sat, 3 Jan 2026 16:33:10 -0800 Subject: [PATCH] add elements to step_end --- sentience/agent.py | 12 ++ sentience/extension/background.js | 6 +- sentience/extension/content.js | 2 +- sentience/extension/injected_api.js | 214 ++++++++++----------- sentience/llm_interaction_handler.py | 10 +- sentience/trace_event_builder.py | 16 +- sentience/tracer_factory.py | 1 + tests/test_trace_event_builder.py | 275 +++++++++++++++++++++++++++ 8 files changed, 416 insertions(+), 120 deletions(-) create mode 100644 tests/test_trace_event_builder.py diff --git a/sentience/agent.py b/sentience/agent.py index ec8433b..acd5c34 100644 --- a/sentience/agent.py +++ b/sentience/agent.py @@ -486,6 +486,11 @@ def act( # noqa: C901 "signals": verify_signals, } + # Build elements data for pre field (include diff_status from snap_with_diff) + # Use the same format as build_snapshot_event for consistency + snapshot_event_data = TraceEventBuilder.build_snapshot_event(snap_with_diff) + pre_elements = snapshot_event_data.get("elements", []) + # Build complete step_end event step_end_data = TraceEventBuilder.build_step_end_event( step_id=step_id, @@ -498,6 +503,7 @@ def act( # noqa: C901 llm_data=llm_data, exec_data=exec_data, verify_data=verify_data, + pre_elements=pre_elements, ) _safe_tracer_call( @@ -1045,6 +1051,11 @@ async def act( # noqa: C901 "signals": verify_signals, } + # Build elements data for pre field (include diff_status from snap_with_diff) + # Use the same format as build_snapshot_event for consistency + snapshot_event_data = TraceEventBuilder.build_snapshot_event(snap_with_diff) + pre_elements = snapshot_event_data.get("elements", []) + # Build complete step_end event step_end_data = TraceEventBuilder.build_step_end_event( step_id=step_id, @@ -1057,6 +1068,7 @@ async def act( # noqa: C901 llm_data=llm_data, exec_data=exec_data, verify_data=verify_data, + pre_elements=pre_elements, ) _safe_tracer_call( diff --git a/sentience/extension/background.js b/sentience/extension/background.js index 811303f..f359ba6 100644 --- a/sentience/extension/background.js +++ b/sentience/extension/background.js @@ -144,13 +144,13 @@ async function handleScreenshotCapture(_tabId, options = {}) { async function handleSnapshotProcessing(rawData, options = {}) { const MAX_ELEMENTS = 10000; // Safety limit to prevent hangs const startTime = performance.now(); - + try { // Safety check: limit element count to prevent hangs if (!Array.isArray(rawData)) { throw new Error('rawData must be an array'); } - + if (rawData.length > MAX_ELEMENTS) { console.warn(`[Sentience Background] ⚠️ Large dataset: ${rawData.length} elements. Limiting to ${MAX_ELEMENTS} to prevent hangs.`); rawData = rawData.slice(0, MAX_ELEMENTS); @@ -186,7 +186,7 @@ async function handleSnapshotProcessing(rawData, options = {}) { // Add timeout protection (18 seconds - less than content.js timeout) analyzedElements = await Promise.race([ wasmPromise, - new Promise((_, reject) => + new Promise((_, reject) => setTimeout(() => reject(new Error('WASM processing timeout (>18s)')), 18000) ) ]); diff --git a/sentience/extension/content.js b/sentience/extension/content.js index 62ae408..8d3b0d4 100644 --- a/sentience/extension/content.js +++ b/sentience/extension/content.js @@ -92,7 +92,7 @@ function handleSnapshotRequest(data) { if (responded) return; // Already responded via timeout responded = true; clearTimeout(timeoutId); - + const duration = performance.now() - startTime; // Handle Chrome extension errors (e.g., background script crashed) diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js index 45c4337..e81c9be 100644 --- a/sentience/extension/injected_api.js +++ b/sentience/extension/injected_api.js @@ -66,10 +66,10 @@ // --- HELPER: Safe Class Name Extractor (Handles SVGAnimatedString) --- function getClassName(el) { if (!el || !el.className) return ''; - + // Handle string (HTML elements) if (typeof el.className === 'string') return el.className; - + // Handle SVGAnimatedString (SVG elements) if (typeof el.className === 'object') { if ('baseVal' in el.className && typeof el.className.baseVal === 'string') { @@ -85,17 +85,17 @@ return ''; } } - + return ''; } // --- HELPER: Paranoid String Converter (Handles SVGAnimatedString) --- function toSafeString(value) { if (value === null || value === undefined) return null; - + // 1. If it's already a primitive string, return it if (typeof value === 'string') return value; - + // 2. Handle SVG objects (SVGAnimatedString, SVGAnimatedNumber, etc.) if (typeof value === 'object') { // Try extracting baseVal (standard SVG property) @@ -114,7 +114,7 @@ return null; } } - + // 3. Last resort cast for primitives try { return String(value); @@ -127,9 +127,9 @@ // For SVG elements, get the fill or stroke color (SVGs use fill/stroke, not backgroundColor) function getSVGColor(el) { if (!el || el.tagName !== 'SVG') return null; - + const style = window.getComputedStyle(el); - + // Try fill first (most common for SVG icons) const fill = style.fill; if (fill && fill !== 'none' && fill !== 'transparent' && fill !== 'rgba(0, 0, 0, 0)') { @@ -144,7 +144,7 @@ return fill; } } - + // Fallback to stroke if fill is not available const stroke = style.stroke; if (stroke && stroke !== 'none' && stroke !== 'transparent' && stroke !== 'rgba(0, 0, 0, 0)') { @@ -158,7 +158,7 @@ return stroke; } } - + return null; } @@ -168,28 +168,28 @@ // This handles rgba(0,0,0,0) and transparent values that browsers commonly return function getEffectiveBackgroundColor(el) { if (!el) return null; - + // For SVG elements, use fill/stroke instead of backgroundColor if (el.tagName === 'SVG') { const svgColor = getSVGColor(el); if (svgColor) return svgColor; } - + let current = el; const maxDepth = 10; // Prevent infinite loops let depth = 0; - + while (current && depth < maxDepth) { const style = window.getComputedStyle(current); - + // For SVG elements in the tree, also check fill/stroke if (current.tagName === 'SVG') { const svgColor = getSVGColor(current); if (svgColor) return svgColor; } - + const bgColor = style.backgroundColor; - + if (bgColor && bgColor !== 'transparent' && bgColor !== 'rgba(0, 0, 0, 0)') { // Check if it's rgba with alpha < 1 (semi-transparent) const rgbaMatch = bgColor.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/); @@ -209,12 +209,12 @@ return bgColor; } } - + // Move up the DOM tree current = current.parentElement; depth++; } - + // Fallback: return null if nothing found return null; } @@ -235,7 +235,7 @@ // Only check for elements that are likely to be occluded (overlays, modals, tooltips) const zIndex = parseInt(style.zIndex, 10); const position = style.position; - + // Skip occlusion check for normal flow elements (vast majority) // Only check for positioned elements or high z-index (likely overlays) if (position === 'static' && (isNaN(zIndex) || zIndex <= 10)) { @@ -308,7 +308,7 @@ }; window.addEventListener('message', listener); - + try { window.postMessage({ type: 'SENTIENCE_SNAPSHOT_REQUEST', @@ -514,7 +514,7 @@ function extractRawElementData(el) { const style = window.getComputedStyle(el); const rect = el.getBoundingClientRect(); - + return { tag: el.tagName, rect: { @@ -548,12 +548,12 @@ // --- HELPER: Generate Unique CSS Selector (for Golden Set) --- function getUniqueSelector(el) { if (!el || !el.tagName) return ''; - + // If element has a unique ID, use it if (el.id) { return `#${el.id}`; } - + // Try data attributes or aria-label for uniqueness for (const attr of el.attributes) { if (attr.name.startsWith('data-') || attr.name === 'aria-label') { @@ -561,21 +561,21 @@ return `${el.tagName.toLowerCase()}[${attr.name}="${value}"]`; } } - + // Build path with classes and nth-child for uniqueness const path = []; let current = el; - + while (current && current !== document.body && current !== document.documentElement) { let selector = current.tagName.toLowerCase(); - + // If current element has ID, use it and stop if (current.id) { selector = `#${current.id}`; path.unshift(selector); break; } - + // Add class if available if (current.className && typeof current.className === 'string') { const classes = current.className.trim().split(/\s+/).filter(c => c); @@ -584,7 +584,7 @@ selector += `.${classes[0]}`; } } - + // Add nth-of-type if needed for uniqueness if (current.parentElement) { const siblings = Array.from(current.parentElement.children); @@ -594,11 +594,11 @@ selector += `:nth-of-type(${index + 1})`; } } - + path.unshift(selector); current = current.parentElement; } - + return path.join(' > ') || el.tagName.toLowerCase(); } @@ -613,7 +613,7 @@ } = options; const startTime = Date.now(); - + return new Promise((resolve) => { // Check if DOM already has enough nodes const nodeCount = document.querySelectorAll('*').length; @@ -623,17 +623,17 @@ const observer = new MutationObserver(() => { lastChange = Date.now(); }); - + observer.observe(document.body, { childList: true, subtree: true, attributes: false }); - + const checkStable = () => { const timeSinceLastChange = Date.now() - lastChange; const totalWait = Date.now() - startTime; - + if (timeSinceLastChange >= quietPeriod) { observer.disconnect(); resolve(); @@ -645,14 +645,14 @@ setTimeout(checkStable, 50); } }; - + checkStable(); } else { // DOM doesn't have enough nodes yet, wait for them const observer = new MutationObserver(() => { const currentCount = document.querySelectorAll('*').length; const totalWait = Date.now() - startTime; - + if (currentCount >= minNodeCount) { observer.disconnect(); // Now wait for quiet period @@ -660,17 +660,17 @@ const quietObserver = new MutationObserver(() => { lastChange = Date.now(); }); - + quietObserver.observe(document.body, { childList: true, subtree: true, attributes: false }); - + const checkQuiet = () => { const timeSinceLastChange = Date.now() - lastChange; const totalWait = Date.now() - startTime; - + if (timeSinceLastChange >= quietPeriod) { quietObserver.disconnect(); resolve(); @@ -682,7 +682,7 @@ setTimeout(checkQuiet, 50); } }; - + checkQuiet(); } else if (totalWait >= maxWait) { observer.disconnect(); @@ -690,13 +690,13 @@ resolve(); } }); - + observer.observe(document.body, { childList: true, subtree: true, attributes: false }); - + // Timeout fallback setTimeout(() => { observer.disconnect(); @@ -710,21 +710,21 @@ // --- HELPER: Collect Iframe Snapshots (Frame Stitching) --- // Recursively collects snapshot data from all child iframes // This enables detection of elements inside iframes (e.g., Stripe forms) - // + // // NOTE: Cross-origin iframes cannot be accessed due to browser security (Same-Origin Policy). // Only same-origin iframes will return snapshot data. Cross-origin iframes will be skipped // with a warning. For cross-origin iframes, users must manually switch frames using // Playwright's page.frame() API. async function collectIframeSnapshots(options = {}) { const iframeData = new Map(); // Map of iframe element -> snapshot data - + // Find all iframe elements in current document const iframes = Array.from(document.querySelectorAll('iframe')); - + if (iframes.length === 0) { return iframeData; } - + console.log(`[SentienceAPI] Found ${iframes.length} iframe(s), requesting snapshots...`); // Request snapshot from each iframe const iframePromises = iframes.map((iframe, idx) => { @@ -737,13 +737,13 @@ return new Promise((resolve) => { const requestId = `iframe-${idx}-${Date.now()}`; - + // 1. EXTENDED TIMEOUT (Handle slow children) const timeout = setTimeout(() => { console.warn(`[SentienceAPI] ⚠️ Iframe ${idx} snapshot TIMEOUT (id: ${requestId})`); resolve(null); }, 5000); // Increased to 5s to handle slow processing - + // 2. ROBUST LISTENER with debugging const listener = (event) => { // Debug: Log all SENTIENCE_IFRAME_SNAPSHOT_RESPONSE messages to see what's happening @@ -753,14 +753,14 @@ // console.log(`[SentienceAPI] Received response for different request: ${event.data.requestId} (expected: ${requestId})`); } } - + // Check if this is the response we're waiting for - if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' && + if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' && event.data?.requestId === requestId) { - + clearTimeout(timeout); window.removeEventListener('message', listener); - + if (event.data.error) { console.warn(`[SentienceAPI] Iframe ${idx} returned error:`, event.data.error); resolve(null); @@ -775,9 +775,9 @@ } } }; - + window.addEventListener('message', listener); - + // 3. SEND REQUEST with error handling try { if (iframe.contentWindow) { @@ -785,8 +785,8 @@ iframe.contentWindow.postMessage({ type: 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST', requestId: requestId, - options: { - ...options, + options: { + ...options, collectIframes: true // Enable recursion for nested iframes } }, '*'); // Use '*' for cross-origin, but browser will enforce same-origin policy @@ -804,10 +804,10 @@ } }); }); - + // Wait for all iframe responses const results = await Promise.all(iframePromises); - + // Store iframe data results.forEach((result, idx) => { if (result && result.data && !result.error) { @@ -819,7 +819,7 @@ console.warn(`[SentienceAPI] Iframe ${idx} returned no data (timeout or error)`); } }); - + return iframeData; } @@ -832,7 +832,7 @@ // Security: only respond to snapshot requests from parent frames if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST') { const { requestId, options } = event.data; - + try { // Generate snapshot for this iframe's content // Allow recursive collection - querySelectorAll('iframe') only finds direct children, @@ -840,7 +840,7 @@ // waitForStability: false makes performance better - i.e. don't wait for children frames const snapshotOptions = { ...options, collectIframes: true, waitForStability: options.waitForStability === false ? false : false }; const snapshot = await window.sentience.snapshot(snapshotOptions); - + // Send response back to parent if (event.source && event.source.postMessage) { event.source.postMessage({ @@ -864,7 +864,7 @@ } }); } - + // Setup iframe handler when script loads (only once) if (!window.sentience_iframe_handler_setup) { setupIframeSnapshotHandler(); @@ -880,7 +880,7 @@ if (options.waitForStability !== false) { await waitForStability(options.waitForStability || {}); } - + // Step 1: Collect raw DOM data (Main World - CSP can't block this!) const rawData = []; window.sentience_registry = []; @@ -896,17 +896,17 @@ const textVal = getText(el); const inView = isInViewport(rect); - + // Get computed style once (needed for both occlusion check and data collection) const style = window.getComputedStyle(el); - + // Only check occlusion for elements likely to be occluded (optimized) // This avoids layout thrashing for the vast majority of elements const occluded = inView ? isOccluded(el, rect, style) : false; - + // Get effective background color (traverses DOM to find non-transparent color) const effectiveBgColor = getEffectiveBackgroundColor(el); - + rawData.push({ id: idx, tag: el.tagName.toLowerCase(), @@ -946,26 +946,26 @@ // This allows WASM to process all elements uniformly (no recursion needed) let allRawElements = [...rawData]; // Start with main frame elements let totalIframeElements = 0; - + if (options.collectIframes !== false) { try { console.log(`[SentienceAPI] Starting iframe collection...`); const iframeSnapshots = await collectIframeSnapshots(options); console.log(`[SentienceAPI] Iframe collection complete. Received ${iframeSnapshots.size} snapshot(s)`); - + if (iframeSnapshots.size > 0) { // FLATTEN IMMEDIATELY: Don't nest them. Just append them with coordinate translation. iframeSnapshots.forEach((iframeSnapshot, iframeEl) => { // Debug: Log structure to verify data is correct // console.log(`[SentienceAPI] Processing iframe snapshot:`, iframeSnapshot); - + if (iframeSnapshot && iframeSnapshot.raw_elements) { const rawElementsCount = iframeSnapshot.raw_elements.length; console.log(`[SentienceAPI] Processing ${rawElementsCount} elements from iframe (src: ${iframeEl.src || 'unknown'})`); // Get iframe's bounding rect (offset for coordinate translation) const iframeRect = iframeEl.getBoundingClientRect(); const offset = { x: iframeRect.x, y: iframeRect.y }; - + // Get iframe context for frame switching (Playwright needs this) const iframeSrc = iframeEl.src || iframeEl.getAttribute('src') || ''; let isSameOrigin = false; @@ -975,11 +975,11 @@ } catch (e) { isSameOrigin = false; } - + // Adjust coordinates and add iframe context to each element const adjustedElements = iframeSnapshot.raw_elements.map(el => { const adjusted = { ...el }; - + // Adjust rect coordinates to parent viewport if (adjusted.rect) { adjusted.rect = { @@ -988,22 +988,22 @@ y: adjusted.rect.y + offset.y }; } - + // Add iframe context so agents can switch frames in Playwright adjusted.iframe_context = { src: iframeSrc, is_same_origin: isSameOrigin }; - + return adjusted; }); - + // Append flattened iframe elements to main array allRawElements.push(...adjustedElements); totalIframeElements += adjustedElements.length; } }); - + // console.log(`[SentienceAPI] Merged ${iframeSnapshots.size} iframe(s). Total elements: ${allRawElements.length} (${rawData.length} main + ${totalIframeElements} iframe)`); } } catch (error) { @@ -1016,7 +1016,7 @@ // No recursion needed - everything is already flat console.log(`[SentienceAPI] Sending ${allRawElements.length} total elements to WASM (${rawData.length} main + ${totalIframeElements} iframe)`); const processed = await processSnapshotInBackground(allRawElements, options); - + if (!processed || !processed.elements) { throw new Error('WASM processing returned invalid result'); } @@ -1032,10 +1032,10 @@ const cleanedRawElements = cleanElement(processed.raw_elements); // FIXED: Removed undefined 'totalIframeRawElements' - // FIXED: Logic updated for "Flatten Early" architecture. + // FIXED: Logic updated for "Flatten Early" architecture. // processed.elements ALREADY contains the merged iframe elements, // so we simply use .length. No addition needed. - + const totalCount = cleanedElements.length; const totalRaw = cleanedRawElements.length; const iframeCount = totalIframeElements || 0; @@ -1253,23 +1253,23 @@ autoDisableTimeout = 30 * 60 * 1000, // 30 minutes default keyboardShortcut = 'Ctrl+Shift+I' } = options; - + console.log("🔴 [Sentience] Recording Mode STARTED. Click an element to copy its Ground Truth JSON."); console.log(` Press ${keyboardShortcut} or call stopRecording() to stop.`); - + // Validate registry is populated if (!window.sentience_registry || window.sentience_registry.length === 0) { console.warn("⚠️ Registry empty. Call `await window.sentience.snapshot()` first to populate registry."); alert("Registry empty. Run `await window.sentience.snapshot()` first!"); return () => {}; // Return no-op cleanup function } - + // Create reverse mapping for O(1) lookup (fixes registry lookup bug) window.sentience_registry_map = new Map(); window.sentience_registry.forEach((el, idx) => { if (el) window.sentience_registry_map.set(el, idx); }); - + // Create highlight box overlay let highlightBox = document.getElementById('sentience-highlight-box'); if (!highlightBox) { @@ -1287,7 +1287,7 @@ `; document.body.appendChild(highlightBox); } - + // Create visual indicator (red border on page when recording) let recordingIndicator = document.getElementById('sentience-recording-indicator'); if (!recordingIndicator) { @@ -1306,12 +1306,12 @@ document.body.appendChild(recordingIndicator); } recordingIndicator.style.display = 'block'; - + // Hover handler (visual feedback) const mouseOverHandler = (e) => { const el = e.target; if (!el || el === highlightBox || el === recordingIndicator) return; - + const rect = el.getBoundingClientRect(); highlightBox.style.display = 'block'; highlightBox.style.top = (rect.top + window.scrollY) + 'px'; @@ -1319,15 +1319,15 @@ highlightBox.style.width = rect.width + 'px'; highlightBox.style.height = rect.height + 'px'; }; - + // Click handler (capture ground truth data) const clickHandler = (e) => { e.preventDefault(); e.stopPropagation(); - + const el = e.target; if (!el || el === highlightBox || el === recordingIndicator) return; - + // Use Map for reliable O(1) lookup const sentienceId = window.sentience_registry_map.get(el); if (sentienceId === undefined) { @@ -1335,13 +1335,13 @@ alert("Element not in registry. Run `await window.sentience.snapshot()` first!"); return; } - + // Extract raw data (ground truth + raw signals, NOT model outputs) const rawData = extractRawElementData(el); const selector = getUniqueSelector(el); const role = el.getAttribute('role') || el.tagName.toLowerCase(); const text = getText(el); - + // Build golden set JSON (ground truth + raw signals only) const snippet = { task: `Interact with ${text.substring(0, 20)}${text.length > 20 ? '...' : ''}`, @@ -1355,12 +1355,12 @@ }, debug_snapshot: rawData }; - + // Copy to clipboard const jsonString = JSON.stringify(snippet, null, 2); navigator.clipboard.writeText(jsonString).then(() => { console.log("✅ Copied Ground Truth to clipboard:", snippet); - + // Flash green to indicate success highlightBox.style.border = `2px solid ${successColor}`; highlightBox.style.background = 'rgba(0, 255, 0, 0.2)'; @@ -1373,42 +1373,42 @@ alert("Failed to copy to clipboard. Check console for JSON."); }); }; - + // Auto-disable timeout let timeoutId = null; - + // Cleanup function to stop recording (defined before use) const stopRecording = () => { document.removeEventListener('mouseover', mouseOverHandler, true); document.removeEventListener('click', clickHandler, true); document.removeEventListener('keydown', keyboardHandler, true); - + if (timeoutId) { clearTimeout(timeoutId); timeoutId = null; } - + if (highlightBox) { highlightBox.style.display = 'none'; } - + if (recordingIndicator) { recordingIndicator.style.display = 'none'; } - + // Clean up registry map (optional, but good practice) if (window.sentience_registry_map) { window.sentience_registry_map.clear(); } - + // Remove global reference if (window.sentience_stopRecording === stopRecording) { delete window.sentience_stopRecording; } - + console.log("⚪ [Sentience] Recording Mode STOPPED."); }; - + // Keyboard shortcut handler (defined after stopRecording) const keyboardHandler = (e) => { // Ctrl+Shift+I or Cmd+Shift+I @@ -1417,12 +1417,12 @@ stopRecording(); } }; - + // Attach event listeners (use capture phase to intercept early) document.addEventListener('mouseover', mouseOverHandler, true); document.addEventListener('click', clickHandler, true); document.addEventListener('keydown', keyboardHandler, true); - + // Set up auto-disable timeout if (autoDisableTimeout > 0) { timeoutId = setTimeout(() => { @@ -1430,10 +1430,10 @@ stopRecording(); }, autoDisableTimeout); } - + // Store stop function globally for keyboard shortcut access window.sentience_stopRecording = stopRecording; - + return stopRecording; } }; diff --git a/sentience/llm_interaction_handler.py b/sentience/llm_interaction_handler.py index 60c9311..1f8c133 100644 --- a/sentience/llm_interaction_handler.py +++ b/sentience/llm_interaction_handler.py @@ -60,7 +60,7 @@ def build_context(self, snap: Snapshot, goal: str | None = None) -> str: # Format element line with improved readability # Ensure cues is defined before using it in f-string cues_str = f" {{{','.join(cues)}}}" if cues else "" - + # Better text handling - show truncation indicator text_preview = "" if el.text: @@ -68,13 +68,13 @@ def build_context(self, snap: Snapshot, goal: str | None = None) -> str: text_preview = f'"{el.text[:50]}..."' else: text_preview = f'"{el.text}"' - + # Build position and size info x, y = int(el.bbox.x), int(el.bbox.y) width, height = int(el.bbox.width), int(el.bbox.height) position_str = f"@ ({x},{y})" size_str = f"size:{width}x{height}" - + # Build status indicators (only include if relevant) status_parts = [] if not el.in_viewport: @@ -84,10 +84,10 @@ def build_context(self, snap: Snapshot, goal: str | None = None) -> str: if el.diff_status: status_parts.append(f"diff:{el.diff_status}") status_str = f" [{','.join(status_parts)}]" if status_parts else "" - + # Format: [ID] "text" {cues} @ (x,y) size:WxH importance:score [status] lines.append( - f'[{el.id}] <{el.role}> {text_preview}{cues_str} ' + f"[{el.id}] <{el.role}> {text_preview}{cues_str} " f"{position_str} {size_str} importance:{el.importance}{status_str}" ) diff --git a/sentience/trace_event_builder.py b/sentience/trace_event_builder.py index 560865e..d2e5f9f 100644 --- a/sentience/trace_event_builder.py +++ b/sentience/trace_event_builder.py @@ -83,6 +83,7 @@ def build_step_end_event( llm_data: dict[str, Any], exec_data: dict[str, Any], verify_data: dict[str, Any], + pre_elements: list[dict[str, Any]] | None = None, ) -> dict[str, Any]: """ Build step_end trace event data. @@ -98,20 +99,27 @@ def build_step_end_event( llm_data: LLM interaction data exec_data: Action execution data verify_data: Verification data + pre_elements: Optional list of elements from pre-snapshot (with diff_status) Returns: Dictionary with step_end event data """ + pre_data: dict[str, Any] = { + "url": pre_url, + "snapshot_digest": snapshot_digest, + } + + # Add elements to pre field if provided (for diff overlay support) + if pre_elements is not None: + pre_data["elements"] = pre_elements + return { "v": 1, "step_id": step_id, "step_index": step_index, "goal": goal, "attempt": attempt, - "pre": { - "url": pre_url, - "snapshot_digest": snapshot_digest, - }, + "pre": pre_data, "llm": llm_data, "exec": exec_data, "post": { diff --git a/sentience/tracer_factory.py b/sentience/tracer_factory.py index f2e9f57..0ec2f3b 100644 --- a/sentience/tracer_factory.py +++ b/sentience/tracer_factory.py @@ -10,6 +10,7 @@ import os import uuid from pathlib import Path +from typing import Any import requests diff --git a/tests/test_trace_event_builder.py b/tests/test_trace_event_builder.py new file mode 100644 index 0000000..fc0b5d7 --- /dev/null +++ b/tests/test_trace_event_builder.py @@ -0,0 +1,275 @@ +""" +Tests for trace event builder functionality. +""" + +import pytest + +from sentience.models import BBox, Element, Snapshot, Viewport, VisualCues +from sentience.snapshot_diff import SnapshotDiff +from sentience.trace_event_builder import TraceEventBuilder + + +def create_element( + element_id: int, + role: str = "button", + text: str | None = "Test", + x: float = 100.0, + y: float = 100.0, + width: float = 50.0, + height: float = 20.0, + diff_status: str | None = None, +) -> Element: + """Helper to create test elements.""" + return Element( + id=element_id, + role=role, + text=text, + importance=500, + bbox=BBox(x=x, y=y, width=width, height=height), + visual_cues=VisualCues(is_primary=False, is_clickable=True), + diff_status=diff_status, + ) + + +def create_snapshot(elements: list[Element], url: str = "http://example.com") -> Snapshot: + """Helper to create test snapshots.""" + return Snapshot( + status="success", + url=url, + viewport=Viewport(width=1920, height=1080), + elements=elements, + ) + + +def test_build_step_end_event_basic(): + """Test basic step_end event building without elements.""" + llm_data = { + "response_text": "click(123)", + "response_hash": "sha256:abc123", + "usage": {"prompt_tokens": 100, "completion_tokens": 10, "total_tokens": 110}, + } + + exec_data = { + "success": True, + "action": "click", + "outcome": "Clicked element 123", + "duration_ms": 500, + "element_id": 123, + } + + verify_data = { + "passed": True, + "signals": {"url_changed": True}, + } + + result = TraceEventBuilder.build_step_end_event( + step_id="step-1", + step_index=1, + goal="Click the button", + attempt=0, + pre_url="http://example.com/page1", + post_url="http://example.com/page2", + snapshot_digest="sha256:digest123", + llm_data=llm_data, + exec_data=exec_data, + verify_data=verify_data, + ) + + assert result["v"] == 1 + assert result["step_id"] == "step-1" + assert result["step_index"] == 1 + assert result["goal"] == "Click the button" + assert result["attempt"] == 0 + assert result["pre"]["url"] == "http://example.com/page1" + assert result["pre"]["snapshot_digest"] == "sha256:digest123" + assert "elements" not in result["pre"] # No elements provided + assert result["post"]["url"] == "http://example.com/page2" + assert result["llm"] == llm_data + assert result["exec"] == exec_data + assert result["verify"] == verify_data + + +def test_build_step_end_event_with_elements(): + """Test step_end event building with elements array (for diff overlay).""" + # Create snapshot with diff_status + elements = [ + create_element(1, text="Button 1", diff_status="ADDED"), + create_element(2, text="Button 2", diff_status=None), + create_element(3, text="Button 3", diff_status="MODIFIED"), + ] + snapshot = create_snapshot(elements) + + # Build snapshot event to get formatted elements + snapshot_event_data = TraceEventBuilder.build_snapshot_event(snapshot) + pre_elements = snapshot_event_data.get("elements", []) + + llm_data = { + "response_text": "click(1)", + "response_hash": "sha256:abc123", + "usage": {"prompt_tokens": 100, "completion_tokens": 10, "total_tokens": 110}, + } + + exec_data = { + "success": True, + "action": "click", + "outcome": "Clicked element 1", + "duration_ms": 500, + "element_id": 1, + } + + verify_data = { + "passed": True, + "signals": {"url_changed": True}, + } + + result = TraceEventBuilder.build_step_end_event( + step_id="step-1", + step_index=1, + goal="Click the button", + attempt=0, + pre_url="http://example.com/page1", + post_url="http://example.com/page2", + snapshot_digest="sha256:digest123", + llm_data=llm_data, + exec_data=exec_data, + verify_data=verify_data, + pre_elements=pre_elements, + ) + + # Verify elements are included in pre field + assert "elements" in result["pre"] + assert len(result["pre"]["elements"]) == 3 + + # Verify element data structure + el1 = result["pre"]["elements"][0] + assert el1["id"] == 1 + assert el1["role"] == "button" + assert el1["text"] == "Button 1" + assert el1["diff_status"] == "ADDED" + assert "bbox" in el1 + assert "importance" in el1 + assert "importance_score" in el1 + + el2 = result["pre"]["elements"][1] + assert el2["id"] == 2 + assert el2["diff_status"] is None + + el3 = result["pre"]["elements"][2] + assert el3["id"] == 3 + assert el3["diff_status"] == "MODIFIED" + + +def test_build_step_end_event_with_diff_status_integration(): + """Test full integration: compute diff_status, build snapshot event, include in step_end.""" + # Previous snapshot + previous_elements = [ + create_element(1, text="Button 1"), + create_element(2, text="Old Text"), + ] + previous_snapshot = create_snapshot(previous_elements) + + # Current snapshot + current_elements = [ + create_element(1, text="Button 1"), # Unchanged + create_element(2, text="New Text"), # Modified + create_element(3, text="New Button"), # Added + ] + current_snapshot = create_snapshot(current_elements) + + # Compute diff_status + elements_with_diff = SnapshotDiff.compute_diff_status(current_snapshot, previous_snapshot) + + # Create snapshot with diff_status + snapshot_with_diff = Snapshot( + status=current_snapshot.status, + url=current_snapshot.url, + viewport=current_snapshot.viewport, + elements=elements_with_diff, + timestamp=current_snapshot.timestamp, + screenshot=current_snapshot.screenshot, + screenshot_format=current_snapshot.screenshot_format, + error=current_snapshot.error, + ) + + # Build snapshot event to get formatted elements + snapshot_event_data = TraceEventBuilder.build_snapshot_event(snapshot_with_diff) + pre_elements = snapshot_event_data.get("elements", []) + + # Build step_end event + result = TraceEventBuilder.build_step_end_event( + step_id="step-1", + step_index=1, + goal="Click the button", + attempt=0, + pre_url="http://example.com", + post_url="http://example.com", + snapshot_digest="sha256:digest123", + llm_data={"response_text": "click(3)", "response_hash": "sha256:xyz"}, + exec_data={"success": True, "action": "click"}, + verify_data={"passed": True, "signals": {}}, + pre_elements=pre_elements, + ) + + # Verify elements are in step_end event with correct diff_status + assert "elements" in result["pre"] + # Should have 3 elements (1 unchanged, 1 modified, 1 added) + # Note: REMOVED elements are also included in diff computation but not in current snapshot + assert len(result["pre"]["elements"]) == 3 + + # Find elements by ID + elements_by_id = {el["id"]: el for el in result["pre"]["elements"]} + + # Element 1: unchanged (diff_status should be None) + assert elements_by_id[1]["diff_status"] is None + + # Element 2: modified + assert elements_by_id[2]["diff_status"] == "MODIFIED" + + # Element 3: added + assert elements_by_id[3]["diff_status"] == "ADDED" + + +def test_build_snapshot_event_with_importance_score(): + """Test that build_snapshot_event includes importance_score normalization.""" + elements = [ + create_element(1, text="Low importance", diff_status="ADDED"), + create_element(2, text="Medium importance", diff_status=None), + create_element(3, text="High importance", diff_status="MODIFIED"), + ] + # Set different importance values + elements[0].importance = 100 + elements[1].importance = 500 + elements[2].importance = 1000 + + snapshot = create_snapshot(elements) + result = TraceEventBuilder.build_snapshot_event(snapshot) + + # Verify importance_score is normalized to [0, 1] + assert result["elements"][0]["importance_score"] == 0.0 # Min + assert result["elements"][1]["importance_score"] == pytest.approx(0.444, abs=0.01) # Mid + assert result["elements"][2]["importance_score"] == 1.0 # Max + + +def test_build_step_end_event_empty_elements(): + """Test step_end event with empty elements array.""" + snapshot = create_snapshot([]) # No elements + snapshot_event_data = TraceEventBuilder.build_snapshot_event(snapshot) + pre_elements = snapshot_event_data.get("elements", []) + + result = TraceEventBuilder.build_step_end_event( + step_id="step-1", + step_index=1, + goal="Navigate to page", + attempt=0, + pre_url="http://example.com", + post_url="http://example.com", + snapshot_digest="sha256:digest123", + llm_data={"response_text": "navigate", "response_hash": "sha256:xyz"}, + exec_data={"success": True, "action": "navigate"}, + verify_data={"passed": True, "signals": {}}, + pre_elements=pre_elements, + ) + + # Should have elements field but it's empty + assert "elements" in result["pre"] + assert len(result["pre"]["elements"]) == 0