From d19d92ed469b8209719d258a0321d94c52eb7888 Mon Sep 17 00:00:00 2001 From: rcholic Date: Thu, 1 Jan 2026 21:52:49 -0800 Subject: [PATCH 1/2] align trace and indexing file data format --- sentience/agent.py | 215 ++++++++++++- sentience/extension/background.js | 6 +- sentience/extension/content.js | 2 +- sentience/extension/injected_api.js | 214 ++++++------- sentience/schemas/trace_v1.json | 48 ++- sentience/trace_indexing/index_schema.py | 102 +++++- sentience/trace_indexing/indexer.py | 114 ++++++- tests/test_trace_indexing.py | 383 ++++++++++++++++++++++- 8 files changed, 934 insertions(+), 150 deletions(-) diff --git a/sentience/agent.py b/sentience/agent.py index ebdbe10..15ec940 100644 --- a/sentience/agent.py +++ b/sentience/agent.py @@ -4,6 +4,7 @@ """ import asyncio +import hashlib import re import time from typing import TYPE_CHECKING, Any, Optional @@ -95,6 +96,24 @@ def __init__( # Step counter for tracing self._step_count = 0 + def _compute_hash(self, text: str) -> str: + """Compute SHA256 hash of text.""" + return hashlib.sha256(text.encode("utf-8")).hexdigest() + + def _get_element_bbox(self, element_id: int | None, snap: Snapshot) -> dict[str, float] | None: + """Get bounding box for an element from snapshot.""" + if element_id is None: + return None + for el in snap.elements: + if el.id == element_id: + return { + "x": el.bbox.x, + "y": el.bbox.y, + "width": el.bbox.width, + "height": el.bbox.height, + } + return None + def act( # noqa: C901 self, goal: str, @@ -343,15 +362,99 @@ def act( # noqa: C901 # Emit step completion trace event if tracer is enabled if self.tracer: - self.tracer.emit( - "step_end", - { - "success": result.success, - "duration_ms": duration_ms, - "action": result.action, + # Get pre_url from step_start (stored in tracer or use current) + pre_url = snap.url + post_url = self.browser.page.url if self.browser.page else None + + # Compute snapshot digest (simplified - use URL + timestamp) + snapshot_digest = f"sha256:{self._compute_hash(f'{pre_url}{snap.timestamp}')}" + + # Build LLM data + llm_response_text = llm_response.content + llm_response_hash = f"sha256:{self._compute_hash(llm_response_text)}" + llm_data = { + "response_text": llm_response_text, + "response_hash": llm_response_hash, + "usage": { + "prompt_tokens": llm_response.prompt_tokens or 0, + "completion_tokens": llm_response.completion_tokens or 0, + "total_tokens": llm_response.total_tokens or 0, }, - step_id=step_id, + } + + # Build exec data + exec_data = { + "success": result.success, + "action": result.action, + "outcome": result.outcome + or ( + f"Action {result.action} executed successfully" + if result.success + else f"Action {result.action} failed" + ), + "duration_ms": duration_ms, + } + + # Add optional exec fields + if result.element_id is not None: + exec_data["element_id"] = result.element_id + # Add bounding box if element found + bbox = self._get_element_bbox(result.element_id, snap) + if bbox: + exec_data["bounding_box"] = bbox + if result.text is not None: + exec_data["text"] = result.text + if result.key is not None: + exec_data["key"] = result.key + if result.error is not None: + exec_data["error"] = result.error + + # Build verify data (simplified - based on success and url_changed) + verify_passed = result.success and ( + result.url_changed or result.action != "click" ) + verify_signals = { + "url_changed": result.url_changed or False, + } + if result.error: + verify_signals["error"] = result.error + + # Add elements_found array if element was targeted + if result.element_id is not None: + bbox = self._get_element_bbox(result.element_id, snap) + if bbox: + verify_signals["elements_found"] = [ + { + "label": f"Element {result.element_id}", + "bounding_box": bbox, + } + ] + + verify_data = { + "passed": verify_passed, + "signals": verify_signals, + } + + # Build complete step_end event + step_end_data = { + "v": 1, + "step_id": step_id, + "step_index": self._step_count, + "goal": goal, + "attempt": attempt, + "pre": { + "url": pre_url, + "snapshot_digest": snapshot_digest, + }, + "llm": llm_data, + "exec": exec_data, + "post": { + "url": post_url, + }, + "verify": verify_data, + } + + self.tracer.emit("step_end", step_end_data, step_id=step_id) return result @@ -1026,15 +1129,99 @@ async def act( # noqa: C901 # Emit step completion trace event if tracer is enabled if self.tracer: - self.tracer.emit( - "step_end", - { - "success": result.success, - "duration_ms": duration_ms, - "action": result.action, + # Get pre_url from step_start (stored in tracer or use current) + pre_url = snap.url + post_url = self.browser.page.url if self.browser.page else None + + # Compute snapshot digest (simplified - use URL + timestamp) + snapshot_digest = f"sha256:{self._compute_hash(f'{pre_url}{snap.timestamp}')}" + + # Build LLM data + llm_response_text = llm_response.content + llm_response_hash = f"sha256:{self._compute_hash(llm_response_text)}" + llm_data = { + "response_text": llm_response_text, + "response_hash": llm_response_hash, + "usage": { + "prompt_tokens": llm_response.prompt_tokens or 0, + "completion_tokens": llm_response.completion_tokens or 0, + "total_tokens": llm_response.total_tokens or 0, }, - step_id=step_id, + } + + # Build exec data + exec_data = { + "success": result.success, + "action": result.action, + "outcome": result.outcome + or ( + f"Action {result.action} executed successfully" + if result.success + else f"Action {result.action} failed" + ), + "duration_ms": duration_ms, + } + + # Add optional exec fields + if result.element_id is not None: + exec_data["element_id"] = result.element_id + # Add bounding box if element found + bbox = self._get_element_bbox(result.element_id, snap) + if bbox: + exec_data["bounding_box"] = bbox + if result.text is not None: + exec_data["text"] = result.text + if result.key is not None: + exec_data["key"] = result.key + if result.error is not None: + exec_data["error"] = result.error + + # Build verify data (simplified - based on success and url_changed) + verify_passed = result.success and ( + result.url_changed or result.action != "click" ) + verify_signals = { + "url_changed": result.url_changed or False, + } + if result.error: + verify_signals["error"] = result.error + + # Add elements_found array if element was targeted + if result.element_id is not None: + bbox = self._get_element_bbox(result.element_id, snap) + if bbox: + verify_signals["elements_found"] = [ + { + "label": f"Element {result.element_id}", + "bounding_box": bbox, + } + ] + + verify_data = { + "passed": verify_passed, + "signals": verify_signals, + } + + # Build complete step_end event + step_end_data = { + "v": 1, + "step_id": step_id, + "step_index": self._step_count, + "goal": goal, + "attempt": attempt, + "pre": { + "url": pre_url, + "snapshot_digest": snapshot_digest, + }, + "llm": llm_data, + "exec": exec_data, + "post": { + "url": post_url, + }, + "verify": verify_data, + } + + self.tracer.emit("step_end", step_end_data, step_id=step_id) return result diff --git a/sentience/extension/background.js b/sentience/extension/background.js index 811303f..f359ba6 100644 --- a/sentience/extension/background.js +++ b/sentience/extension/background.js @@ -144,13 +144,13 @@ async function handleScreenshotCapture(_tabId, options = {}) { async function handleSnapshotProcessing(rawData, options = {}) { const MAX_ELEMENTS = 10000; // Safety limit to prevent hangs const startTime = performance.now(); - + try { // Safety check: limit element count to prevent hangs if (!Array.isArray(rawData)) { throw new Error('rawData must be an array'); } - + if (rawData.length > MAX_ELEMENTS) { console.warn(`[Sentience Background] ⚠️ Large dataset: ${rawData.length} elements. Limiting to ${MAX_ELEMENTS} to prevent hangs.`); rawData = rawData.slice(0, MAX_ELEMENTS); @@ -186,7 +186,7 @@ async function handleSnapshotProcessing(rawData, options = {}) { // Add timeout protection (18 seconds - less than content.js timeout) analyzedElements = await Promise.race([ wasmPromise, - new Promise((_, reject) => + new Promise((_, reject) => setTimeout(() => reject(new Error('WASM processing timeout (>18s)')), 18000) ) ]); diff --git a/sentience/extension/content.js b/sentience/extension/content.js index 62ae408..8d3b0d4 100644 --- a/sentience/extension/content.js +++ b/sentience/extension/content.js @@ -92,7 +92,7 @@ function handleSnapshotRequest(data) { if (responded) return; // Already responded via timeout responded = true; clearTimeout(timeoutId); - + const duration = performance.now() - startTime; // Handle Chrome extension errors (e.g., background script crashed) diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js index 45c4337..e81c9be 100644 --- a/sentience/extension/injected_api.js +++ b/sentience/extension/injected_api.js @@ -66,10 +66,10 @@ // --- HELPER: Safe Class Name Extractor (Handles SVGAnimatedString) --- function getClassName(el) { if (!el || !el.className) return ''; - + // Handle string (HTML elements) if (typeof el.className === 'string') return el.className; - + // Handle SVGAnimatedString (SVG elements) if (typeof el.className === 'object') { if ('baseVal' in el.className && typeof el.className.baseVal === 'string') { @@ -85,17 +85,17 @@ return ''; } } - + return ''; } // --- HELPER: Paranoid String Converter (Handles SVGAnimatedString) --- function toSafeString(value) { if (value === null || value === undefined) return null; - + // 1. If it's already a primitive string, return it if (typeof value === 'string') return value; - + // 2. Handle SVG objects (SVGAnimatedString, SVGAnimatedNumber, etc.) if (typeof value === 'object') { // Try extracting baseVal (standard SVG property) @@ -114,7 +114,7 @@ return null; } } - + // 3. Last resort cast for primitives try { return String(value); @@ -127,9 +127,9 @@ // For SVG elements, get the fill or stroke color (SVGs use fill/stroke, not backgroundColor) function getSVGColor(el) { if (!el || el.tagName !== 'SVG') return null; - + const style = window.getComputedStyle(el); - + // Try fill first (most common for SVG icons) const fill = style.fill; if (fill && fill !== 'none' && fill !== 'transparent' && fill !== 'rgba(0, 0, 0, 0)') { @@ -144,7 +144,7 @@ return fill; } } - + // Fallback to stroke if fill is not available const stroke = style.stroke; if (stroke && stroke !== 'none' && stroke !== 'transparent' && stroke !== 'rgba(0, 0, 0, 0)') { @@ -158,7 +158,7 @@ return stroke; } } - + return null; } @@ -168,28 +168,28 @@ // This handles rgba(0,0,0,0) and transparent values that browsers commonly return function getEffectiveBackgroundColor(el) { if (!el) return null; - + // For SVG elements, use fill/stroke instead of backgroundColor if (el.tagName === 'SVG') { const svgColor = getSVGColor(el); if (svgColor) return svgColor; } - + let current = el; const maxDepth = 10; // Prevent infinite loops let depth = 0; - + while (current && depth < maxDepth) { const style = window.getComputedStyle(current); - + // For SVG elements in the tree, also check fill/stroke if (current.tagName === 'SVG') { const svgColor = getSVGColor(current); if (svgColor) return svgColor; } - + const bgColor = style.backgroundColor; - + if (bgColor && bgColor !== 'transparent' && bgColor !== 'rgba(0, 0, 0, 0)') { // Check if it's rgba with alpha < 1 (semi-transparent) const rgbaMatch = bgColor.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/); @@ -209,12 +209,12 @@ return bgColor; } } - + // Move up the DOM tree current = current.parentElement; depth++; } - + // Fallback: return null if nothing found return null; } @@ -235,7 +235,7 @@ // Only check for elements that are likely to be occluded (overlays, modals, tooltips) const zIndex = parseInt(style.zIndex, 10); const position = style.position; - + // Skip occlusion check for normal flow elements (vast majority) // Only check for positioned elements or high z-index (likely overlays) if (position === 'static' && (isNaN(zIndex) || zIndex <= 10)) { @@ -308,7 +308,7 @@ }; window.addEventListener('message', listener); - + try { window.postMessage({ type: 'SENTIENCE_SNAPSHOT_REQUEST', @@ -514,7 +514,7 @@ function extractRawElementData(el) { const style = window.getComputedStyle(el); const rect = el.getBoundingClientRect(); - + return { tag: el.tagName, rect: { @@ -548,12 +548,12 @@ // --- HELPER: Generate Unique CSS Selector (for Golden Set) --- function getUniqueSelector(el) { if (!el || !el.tagName) return ''; - + // If element has a unique ID, use it if (el.id) { return `#${el.id}`; } - + // Try data attributes or aria-label for uniqueness for (const attr of el.attributes) { if (attr.name.startsWith('data-') || attr.name === 'aria-label') { @@ -561,21 +561,21 @@ return `${el.tagName.toLowerCase()}[${attr.name}="${value}"]`; } } - + // Build path with classes and nth-child for uniqueness const path = []; let current = el; - + while (current && current !== document.body && current !== document.documentElement) { let selector = current.tagName.toLowerCase(); - + // If current element has ID, use it and stop if (current.id) { selector = `#${current.id}`; path.unshift(selector); break; } - + // Add class if available if (current.className && typeof current.className === 'string') { const classes = current.className.trim().split(/\s+/).filter(c => c); @@ -584,7 +584,7 @@ selector += `.${classes[0]}`; } } - + // Add nth-of-type if needed for uniqueness if (current.parentElement) { const siblings = Array.from(current.parentElement.children); @@ -594,11 +594,11 @@ selector += `:nth-of-type(${index + 1})`; } } - + path.unshift(selector); current = current.parentElement; } - + return path.join(' > ') || el.tagName.toLowerCase(); } @@ -613,7 +613,7 @@ } = options; const startTime = Date.now(); - + return new Promise((resolve) => { // Check if DOM already has enough nodes const nodeCount = document.querySelectorAll('*').length; @@ -623,17 +623,17 @@ const observer = new MutationObserver(() => { lastChange = Date.now(); }); - + observer.observe(document.body, { childList: true, subtree: true, attributes: false }); - + const checkStable = () => { const timeSinceLastChange = Date.now() - lastChange; const totalWait = Date.now() - startTime; - + if (timeSinceLastChange >= quietPeriod) { observer.disconnect(); resolve(); @@ -645,14 +645,14 @@ setTimeout(checkStable, 50); } }; - + checkStable(); } else { // DOM doesn't have enough nodes yet, wait for them const observer = new MutationObserver(() => { const currentCount = document.querySelectorAll('*').length; const totalWait = Date.now() - startTime; - + if (currentCount >= minNodeCount) { observer.disconnect(); // Now wait for quiet period @@ -660,17 +660,17 @@ const quietObserver = new MutationObserver(() => { lastChange = Date.now(); }); - + quietObserver.observe(document.body, { childList: true, subtree: true, attributes: false }); - + const checkQuiet = () => { const timeSinceLastChange = Date.now() - lastChange; const totalWait = Date.now() - startTime; - + if (timeSinceLastChange >= quietPeriod) { quietObserver.disconnect(); resolve(); @@ -682,7 +682,7 @@ setTimeout(checkQuiet, 50); } }; - + checkQuiet(); } else if (totalWait >= maxWait) { observer.disconnect(); @@ -690,13 +690,13 @@ resolve(); } }); - + observer.observe(document.body, { childList: true, subtree: true, attributes: false }); - + // Timeout fallback setTimeout(() => { observer.disconnect(); @@ -710,21 +710,21 @@ // --- HELPER: Collect Iframe Snapshots (Frame Stitching) --- // Recursively collects snapshot data from all child iframes // This enables detection of elements inside iframes (e.g., Stripe forms) - // + // // NOTE: Cross-origin iframes cannot be accessed due to browser security (Same-Origin Policy). // Only same-origin iframes will return snapshot data. Cross-origin iframes will be skipped // with a warning. For cross-origin iframes, users must manually switch frames using // Playwright's page.frame() API. async function collectIframeSnapshots(options = {}) { const iframeData = new Map(); // Map of iframe element -> snapshot data - + // Find all iframe elements in current document const iframes = Array.from(document.querySelectorAll('iframe')); - + if (iframes.length === 0) { return iframeData; } - + console.log(`[SentienceAPI] Found ${iframes.length} iframe(s), requesting snapshots...`); // Request snapshot from each iframe const iframePromises = iframes.map((iframe, idx) => { @@ -737,13 +737,13 @@ return new Promise((resolve) => { const requestId = `iframe-${idx}-${Date.now()}`; - + // 1. EXTENDED TIMEOUT (Handle slow children) const timeout = setTimeout(() => { console.warn(`[SentienceAPI] ⚠️ Iframe ${idx} snapshot TIMEOUT (id: ${requestId})`); resolve(null); }, 5000); // Increased to 5s to handle slow processing - + // 2. ROBUST LISTENER with debugging const listener = (event) => { // Debug: Log all SENTIENCE_IFRAME_SNAPSHOT_RESPONSE messages to see what's happening @@ -753,14 +753,14 @@ // console.log(`[SentienceAPI] Received response for different request: ${event.data.requestId} (expected: ${requestId})`); } } - + // Check if this is the response we're waiting for - if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' && + if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' && event.data?.requestId === requestId) { - + clearTimeout(timeout); window.removeEventListener('message', listener); - + if (event.data.error) { console.warn(`[SentienceAPI] Iframe ${idx} returned error:`, event.data.error); resolve(null); @@ -775,9 +775,9 @@ } } }; - + window.addEventListener('message', listener); - + // 3. SEND REQUEST with error handling try { if (iframe.contentWindow) { @@ -785,8 +785,8 @@ iframe.contentWindow.postMessage({ type: 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST', requestId: requestId, - options: { - ...options, + options: { + ...options, collectIframes: true // Enable recursion for nested iframes } }, '*'); // Use '*' for cross-origin, but browser will enforce same-origin policy @@ -804,10 +804,10 @@ } }); }); - + // Wait for all iframe responses const results = await Promise.all(iframePromises); - + // Store iframe data results.forEach((result, idx) => { if (result && result.data && !result.error) { @@ -819,7 +819,7 @@ console.warn(`[SentienceAPI] Iframe ${idx} returned no data (timeout or error)`); } }); - + return iframeData; } @@ -832,7 +832,7 @@ // Security: only respond to snapshot requests from parent frames if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST') { const { requestId, options } = event.data; - + try { // Generate snapshot for this iframe's content // Allow recursive collection - querySelectorAll('iframe') only finds direct children, @@ -840,7 +840,7 @@ // waitForStability: false makes performance better - i.e. don't wait for children frames const snapshotOptions = { ...options, collectIframes: true, waitForStability: options.waitForStability === false ? false : false }; const snapshot = await window.sentience.snapshot(snapshotOptions); - + // Send response back to parent if (event.source && event.source.postMessage) { event.source.postMessage({ @@ -864,7 +864,7 @@ } }); } - + // Setup iframe handler when script loads (only once) if (!window.sentience_iframe_handler_setup) { setupIframeSnapshotHandler(); @@ -880,7 +880,7 @@ if (options.waitForStability !== false) { await waitForStability(options.waitForStability || {}); } - + // Step 1: Collect raw DOM data (Main World - CSP can't block this!) const rawData = []; window.sentience_registry = []; @@ -896,17 +896,17 @@ const textVal = getText(el); const inView = isInViewport(rect); - + // Get computed style once (needed for both occlusion check and data collection) const style = window.getComputedStyle(el); - + // Only check occlusion for elements likely to be occluded (optimized) // This avoids layout thrashing for the vast majority of elements const occluded = inView ? isOccluded(el, rect, style) : false; - + // Get effective background color (traverses DOM to find non-transparent color) const effectiveBgColor = getEffectiveBackgroundColor(el); - + rawData.push({ id: idx, tag: el.tagName.toLowerCase(), @@ -946,26 +946,26 @@ // This allows WASM to process all elements uniformly (no recursion needed) let allRawElements = [...rawData]; // Start with main frame elements let totalIframeElements = 0; - + if (options.collectIframes !== false) { try { console.log(`[SentienceAPI] Starting iframe collection...`); const iframeSnapshots = await collectIframeSnapshots(options); console.log(`[SentienceAPI] Iframe collection complete. Received ${iframeSnapshots.size} snapshot(s)`); - + if (iframeSnapshots.size > 0) { // FLATTEN IMMEDIATELY: Don't nest them. Just append them with coordinate translation. iframeSnapshots.forEach((iframeSnapshot, iframeEl) => { // Debug: Log structure to verify data is correct // console.log(`[SentienceAPI] Processing iframe snapshot:`, iframeSnapshot); - + if (iframeSnapshot && iframeSnapshot.raw_elements) { const rawElementsCount = iframeSnapshot.raw_elements.length; console.log(`[SentienceAPI] Processing ${rawElementsCount} elements from iframe (src: ${iframeEl.src || 'unknown'})`); // Get iframe's bounding rect (offset for coordinate translation) const iframeRect = iframeEl.getBoundingClientRect(); const offset = { x: iframeRect.x, y: iframeRect.y }; - + // Get iframe context for frame switching (Playwright needs this) const iframeSrc = iframeEl.src || iframeEl.getAttribute('src') || ''; let isSameOrigin = false; @@ -975,11 +975,11 @@ } catch (e) { isSameOrigin = false; } - + // Adjust coordinates and add iframe context to each element const adjustedElements = iframeSnapshot.raw_elements.map(el => { const adjusted = { ...el }; - + // Adjust rect coordinates to parent viewport if (adjusted.rect) { adjusted.rect = { @@ -988,22 +988,22 @@ y: adjusted.rect.y + offset.y }; } - + // Add iframe context so agents can switch frames in Playwright adjusted.iframe_context = { src: iframeSrc, is_same_origin: isSameOrigin }; - + return adjusted; }); - + // Append flattened iframe elements to main array allRawElements.push(...adjustedElements); totalIframeElements += adjustedElements.length; } }); - + // console.log(`[SentienceAPI] Merged ${iframeSnapshots.size} iframe(s). Total elements: ${allRawElements.length} (${rawData.length} main + ${totalIframeElements} iframe)`); } } catch (error) { @@ -1016,7 +1016,7 @@ // No recursion needed - everything is already flat console.log(`[SentienceAPI] Sending ${allRawElements.length} total elements to WASM (${rawData.length} main + ${totalIframeElements} iframe)`); const processed = await processSnapshotInBackground(allRawElements, options); - + if (!processed || !processed.elements) { throw new Error('WASM processing returned invalid result'); } @@ -1032,10 +1032,10 @@ const cleanedRawElements = cleanElement(processed.raw_elements); // FIXED: Removed undefined 'totalIframeRawElements' - // FIXED: Logic updated for "Flatten Early" architecture. + // FIXED: Logic updated for "Flatten Early" architecture. // processed.elements ALREADY contains the merged iframe elements, // so we simply use .length. No addition needed. - + const totalCount = cleanedElements.length; const totalRaw = cleanedRawElements.length; const iframeCount = totalIframeElements || 0; @@ -1253,23 +1253,23 @@ autoDisableTimeout = 30 * 60 * 1000, // 30 minutes default keyboardShortcut = 'Ctrl+Shift+I' } = options; - + console.log("🔴 [Sentience] Recording Mode STARTED. Click an element to copy its Ground Truth JSON."); console.log(` Press ${keyboardShortcut} or call stopRecording() to stop.`); - + // Validate registry is populated if (!window.sentience_registry || window.sentience_registry.length === 0) { console.warn("⚠️ Registry empty. Call `await window.sentience.snapshot()` first to populate registry."); alert("Registry empty. Run `await window.sentience.snapshot()` first!"); return () => {}; // Return no-op cleanup function } - + // Create reverse mapping for O(1) lookup (fixes registry lookup bug) window.sentience_registry_map = new Map(); window.sentience_registry.forEach((el, idx) => { if (el) window.sentience_registry_map.set(el, idx); }); - + // Create highlight box overlay let highlightBox = document.getElementById('sentience-highlight-box'); if (!highlightBox) { @@ -1287,7 +1287,7 @@ `; document.body.appendChild(highlightBox); } - + // Create visual indicator (red border on page when recording) let recordingIndicator = document.getElementById('sentience-recording-indicator'); if (!recordingIndicator) { @@ -1306,12 +1306,12 @@ document.body.appendChild(recordingIndicator); } recordingIndicator.style.display = 'block'; - + // Hover handler (visual feedback) const mouseOverHandler = (e) => { const el = e.target; if (!el || el === highlightBox || el === recordingIndicator) return; - + const rect = el.getBoundingClientRect(); highlightBox.style.display = 'block'; highlightBox.style.top = (rect.top + window.scrollY) + 'px'; @@ -1319,15 +1319,15 @@ highlightBox.style.width = rect.width + 'px'; highlightBox.style.height = rect.height + 'px'; }; - + // Click handler (capture ground truth data) const clickHandler = (e) => { e.preventDefault(); e.stopPropagation(); - + const el = e.target; if (!el || el === highlightBox || el === recordingIndicator) return; - + // Use Map for reliable O(1) lookup const sentienceId = window.sentience_registry_map.get(el); if (sentienceId === undefined) { @@ -1335,13 +1335,13 @@ alert("Element not in registry. Run `await window.sentience.snapshot()` first!"); return; } - + // Extract raw data (ground truth + raw signals, NOT model outputs) const rawData = extractRawElementData(el); const selector = getUniqueSelector(el); const role = el.getAttribute('role') || el.tagName.toLowerCase(); const text = getText(el); - + // Build golden set JSON (ground truth + raw signals only) const snippet = { task: `Interact with ${text.substring(0, 20)}${text.length > 20 ? '...' : ''}`, @@ -1355,12 +1355,12 @@ }, debug_snapshot: rawData }; - + // Copy to clipboard const jsonString = JSON.stringify(snippet, null, 2); navigator.clipboard.writeText(jsonString).then(() => { console.log("✅ Copied Ground Truth to clipboard:", snippet); - + // Flash green to indicate success highlightBox.style.border = `2px solid ${successColor}`; highlightBox.style.background = 'rgba(0, 255, 0, 0.2)'; @@ -1373,42 +1373,42 @@ alert("Failed to copy to clipboard. Check console for JSON."); }); }; - + // Auto-disable timeout let timeoutId = null; - + // Cleanup function to stop recording (defined before use) const stopRecording = () => { document.removeEventListener('mouseover', mouseOverHandler, true); document.removeEventListener('click', clickHandler, true); document.removeEventListener('keydown', keyboardHandler, true); - + if (timeoutId) { clearTimeout(timeoutId); timeoutId = null; } - + if (highlightBox) { highlightBox.style.display = 'none'; } - + if (recordingIndicator) { recordingIndicator.style.display = 'none'; } - + // Clean up registry map (optional, but good practice) if (window.sentience_registry_map) { window.sentience_registry_map.clear(); } - + // Remove global reference if (window.sentience_stopRecording === stopRecording) { delete window.sentience_stopRecording; } - + console.log("⚪ [Sentience] Recording Mode STOPPED."); }; - + // Keyboard shortcut handler (defined after stopRecording) const keyboardHandler = (e) => { // Ctrl+Shift+I or Cmd+Shift+I @@ -1417,12 +1417,12 @@ stopRecording(); } }; - + // Attach event listeners (use capture phase to intercept early) document.addEventListener('mouseover', mouseOverHandler, true); document.addEventListener('click', clickHandler, true); document.addEventListener('keydown', keyboardHandler, true); - + // Set up auto-disable timeout if (autoDisableTimeout > 0) { timeoutId = setTimeout(() => { @@ -1430,10 +1430,10 @@ stopRecording(); }, autoDisableTimeout); } - + // Store stop function globally for keyboard shortcut access window.sentience_stopRecording = stopRecording; - + return stopRecording; } }; diff --git a/sentience/schemas/trace_v1.json b/sentience/schemas/trace_v1.json index 7935c64..eb27750 100644 --- a/sentience/schemas/trace_v1.json +++ b/sentience/schemas/trace_v1.json @@ -119,7 +119,15 @@ "required": ["response_text", "response_hash"], "properties": { "response_text": {"type": "string"}, - "response_hash": {"type": "string"} + "response_hash": {"type": "string"}, + "usage": { + "type": "object", + "properties": { + "prompt_tokens": {"type": "integer"}, + "completion_tokens": {"type": "integer"}, + "total_tokens": {"type": "integer"} + } + } } }, "action": { @@ -145,7 +153,17 @@ "text": {"type": "string"}, "key": {"type": "string"}, "url_changed": {"type": ["boolean", "null"]}, - "duration_ms": {"type": "integer"} + "duration_ms": {"type": "integer"}, + "error": {"type": ["string", "null"]}, + "bounding_box": { + "type": "object", + "properties": { + "x": {"type": "number"}, + "y": {"type": "number"}, + "width": {"type": "number"}, + "height": {"type": "number"} + } + } } }, "post": { @@ -162,7 +180,31 @@ "properties": { "policy": {"type": "string"}, "passed": {"type": "boolean"}, - "signals": {"type": "object"} + "signals": { + "type": "object", + "properties": { + "url_changed": {"type": "boolean"}, + "error": {"type": ["string", "null"]}, + "elements_found": { + "type": "array", + "items": { + "type": "object", + "properties": { + "label": {"type": "string"}, + "bounding_box": { + "type": "object", + "properties": { + "x": {"type": "number"}, + "y": {"type": "number"}, + "width": {"type": "number"}, + "height": {"type": "number"} + } + } + } + } + } + } + } } }, "recovery": { diff --git a/sentience/trace_indexing/index_schema.py b/sentience/trace_indexing/index_schema.py index f630b1b..b86101c 100644 --- a/sentience/trace_indexing/index_schema.py +++ b/sentience/trace_indexing/index_schema.py @@ -13,6 +13,7 @@ class TraceFileInfo: path: str size_bytes: int sha256: str + line_count: int | None = None # Number of lines in the trace file def to_dict(self) -> dict: return asdict(self) @@ -28,6 +29,12 @@ class TraceSummary: step_count: int error_count: int final_url: str | None + status: Literal["success", "failure", "partial", "unknown"] | None = None + agent_name: str | None = None # Agent name from run_start event + duration_ms: int | None = None # Calculated duration in milliseconds + counters: dict[str, int] | None = ( + None # Aggregated counters (snapshot_count, action_count, error_count) + ) def to_dict(self) -> dict: return asdict(self) @@ -78,17 +85,18 @@ class StepIndex: step_index: int step_id: str goal: str | None - status: Literal["ok", "error", "partial"] + status: Literal["success", "failure", "partial", "unknown"] ts_start: str ts_end: str offset_start: int offset_end: int - url_before: str | None - url_after: str | None - snapshot_before: SnapshotInfo - snapshot_after: SnapshotInfo - action: ActionInfo - counters: StepCounters + line_number: int | None = None # Line number for byte-range fetching + url_before: str | None = None + url_after: str | None = None + snapshot_before: SnapshotInfo = field(default_factory=SnapshotInfo) + snapshot_after: SnapshotInfo = field(default_factory=SnapshotInfo) + action: ActionInfo = field(default_factory=ActionInfo) + counters: StepCounters = field(default_factory=StepCounters) def to_dict(self) -> dict: result = asdict(self) @@ -109,3 +117,83 @@ class TraceIndex: def to_dict(self) -> dict: """Convert to dictionary for JSON serialization.""" return asdict(self) + + def to_sentience_studio_dict(self) -> dict: + """ + Convert to SS-compatible format. + + Maps SDK field names to frontend expectations: + - created_at -> generated_at + - first_ts -> start_time + - last_ts -> end_time + - step_index (0-based) -> step (1-based) + - ts_start -> timestamp + - Filters out "unknown" status + """ + from datetime import datetime + + # Calculate duration if not already set + duration_ms = self.summary.duration_ms + if duration_ms is None and self.summary.first_ts and self.summary.last_ts: + try: + start = datetime.fromisoformat(self.summary.first_ts.replace("Z", "+00:00")) + end = datetime.fromisoformat(self.summary.last_ts.replace("Z", "+00:00")) + duration_ms = int((end - start).total_seconds() * 1000) + except (ValueError, AttributeError): + duration_ms = None + + # Aggregate counters if not already set + counters = self.summary.counters + if counters is None: + snapshot_count = sum(step.counters.snapshots for step in self.steps) + action_count = sum(step.counters.actions for step in self.steps) + counters = { + "snapshot_count": snapshot_count, + "action_count": action_count, + "error_count": self.summary.error_count, + } + + return { + "version": self.version, + "run_id": self.run_id, + "generated_at": self.created_at, # Renamed from created_at + "trace_file": { + "path": self.trace_file.path, + "size_bytes": self.trace_file.size_bytes, + "line_count": self.trace_file.line_count, # Added + }, + "summary": { + "agent_name": self.summary.agent_name, # Added + "total_steps": self.summary.step_count, # Renamed from step_count + "status": ( + self.summary.status if self.summary.status != "unknown" else None + ), # Filter out unknown + "start_time": self.summary.first_ts, # Renamed from first_ts + "end_time": self.summary.last_ts, # Renamed from last_ts + "duration_ms": duration_ms, # Added + "counters": counters, # Added + }, + "steps": [ + { + "step": s.step_index + 1, # Convert 0-based to 1-based + "byte_offset": s.offset_start, + "line_number": s.line_number, # Added + "timestamp": s.ts_start, # Use start time + "action": { + "type": s.action.type or "", + "goal": s.goal, # Move goal into action + "digest": s.action.args_digest, + }, + "snapshot": ( + { + "url": s.snapshot_after.url, + "digest": s.snapshot_after.digest, + } + if s.snapshot_after.url + else None + ), + "status": s.status if s.status != "unknown" else None, # Filter out unknown + } + for s in self.steps + ], + } diff --git a/sentience/trace_indexing/indexer.py b/sentience/trace_indexing/indexer.py index 52f3fab..d9d5c7e 100644 --- a/sentience/trace_indexing/indexer.py +++ b/sentience/trace_indexing/indexer.py @@ -7,7 +7,7 @@ import os from datetime import datetime, timezone from pathlib import Path -from typing import Any, Dict, List +from typing import Any from .index_schema import ( ActionInfo, @@ -149,15 +149,21 @@ def build_trace_index(trace_path: str) -> TraceIndex: event_count = 0 error_count = 0 final_url = None + run_end_status = None # Track status from run_end event + agent_name = None # Extract from run_start event + line_count = 0 # Track total line count steps_by_id: dict[str, StepIndex] = {} step_order: list[str] = [] # Track order of first appearance - # Stream through file, tracking byte offsets + # Stream through file, tracking byte offsets and line numbers with open(trace_path, "rb") as f: byte_offset = 0 + line_number = 0 # Track line number for each event for line_bytes in f: + line_number += 1 + line_count += 1 line_len = len(line_bytes) try: @@ -182,6 +188,10 @@ def build_trace_index(trace_path: str) -> TraceIndex: if event_type == "error": error_count += 1 + # Extract agent_name from run_start event + if event_type == "run_start": + agent_name = data.get("agent") + # Initialize step if first time seeing this step_id if step_id not in steps_by_id: step_order.append(step_id) @@ -189,11 +199,12 @@ def build_trace_index(trace_path: str) -> TraceIndex: step_index=len(step_order), step_id=step_id, goal=None, - status="partial", + status="failure", # Default to failure (will be updated by step_end event) ts_start=ts, ts_end=ts, offset_start=byte_offset, offset_end=byte_offset + line_len, + line_number=line_number, # Track line number url_before=None, url_after=None, snapshot_before=SnapshotInfo(), @@ -207,6 +218,7 @@ def build_trace_index(trace_path: str) -> TraceIndex: # Update step metadata step.ts_end = ts step.offset_end = byte_offset + line_len + step.line_number = line_number # Update line number on each event step.counters.events += 1 # Handle specific event types @@ -214,7 +226,8 @@ def build_trace_index(trace_path: str) -> TraceIndex: step.goal = data.get("goal") step.url_before = data.get("pre_url") - elif event_type == "snapshot": + elif event_type == "snapshot" or event_type == "snapshot_taken": + # Handle both "snapshot" (current) and "snapshot_taken" (schema) for backward compatibility snapshot_id = data.get("snapshot_id") url = data.get("url") digest = _compute_snapshot_digest(data) @@ -231,7 +244,8 @@ def build_trace_index(trace_path: str) -> TraceIndex: step.counters.snapshots += 1 final_url = url - elif event_type == "action": + elif event_type == "action" or event_type == "action_executed": + # Handle both "action" (current) and "action_executed" (schema) for backward compatibility step.action = ActionInfo( type=data.get("type"), target_element_id=data.get("target_element_id"), @@ -240,18 +254,83 @@ def build_trace_index(trace_path: str) -> TraceIndex: ) step.counters.actions += 1 - elif event_type == "llm_response": + elif event_type == "llm_response" or event_type == "llm_called": + # Handle both "llm_response" (current) and "llm_called" (schema) for backward compatibility step.counters.llm_calls += 1 elif event_type == "error": - step.status = "error" + step.status = "failure" elif event_type == "step_end": - if step.status != "error": - step.status = "ok" + # Determine status from step_end event data + # Frontend expects: success, failure, or partial + # Logic: success = exec.success && verify.passed + # partial = exec.success && !verify.passed + # failure = !exec.success + exec_data = data.get("exec", {}) + verify_data = data.get("verify", {}) + + exec_success = exec_data.get("success", False) + verify_passed = verify_data.get("passed", False) + + if exec_success and verify_passed: + step.status = "success" + elif exec_success and not verify_passed: + step.status = "partial" + elif not exec_success: + step.status = "failure" + else: + # Fallback: if step_end exists but no exec/verify data, default to failure + step.status = "failure" + + elif event_type == "run_end": + # Extract status from run_end event + run_end_status = data.get("status") + # Validate status value + if run_end_status not in ["success", "failure", "partial", "unknown"]: + run_end_status = None byte_offset += line_len + # Use run_end status if available, otherwise infer from step statuses + if run_end_status is None: + step_statuses = [step.status for step in steps_by_id.values()] + if step_statuses: + # Infer overall status from step statuses + if all(s == "success" for s in step_statuses): + run_end_status = "success" + elif any(s == "failure" for s in step_statuses): + # If any failure and no successes, it's failure; otherwise partial + if any(s == "success" for s in step_statuses): + run_end_status = "partial" + else: + run_end_status = "failure" + elif any(s == "partial" for s in step_statuses): + run_end_status = "partial" + else: + run_end_status = "failure" # Default to failure instead of unknown + else: + run_end_status = "failure" # Default to failure instead of unknown + + # Calculate duration + duration_ms = None + if first_ts and last_ts: + try: + start = datetime.fromisoformat(first_ts.replace("Z", "+00:00")) + end = datetime.fromisoformat(last_ts.replace("Z", "+00:00")) + duration_ms = int((end - start).total_seconds() * 1000) + except (ValueError, AttributeError): + duration_ms = None + + # Aggregate counters + snapshot_count = sum(step.counters.snapshots for step in steps_by_id.values()) + action_count = sum(step.counters.actions for step in steps_by_id.values()) + counters = { + "snapshot_count": snapshot_count, + "action_count": action_count, + "error_count": error_count, + } + # Build summary summary = TraceSummary( first_ts=first_ts, @@ -260,6 +339,10 @@ def build_trace_index(trace_path: str) -> TraceIndex: step_count=len(steps_by_id), error_count=error_count, final_url=final_url, + status=run_end_status, + agent_name=agent_name, + duration_ms=duration_ms, + counters=counters, ) # Build steps list in order @@ -270,6 +353,7 @@ def build_trace_index(trace_path: str) -> TraceIndex: path=str(trace_path), size_bytes=os.path.getsize(trace_path), sha256=_compute_file_sha256(str(trace_path)), + line_count=line_count, ) # Build final index @@ -285,13 +369,16 @@ def build_trace_index(trace_path: str) -> TraceIndex: return index -def write_trace_index(trace_path: str, index_path: str | None = None) -> str: +def write_trace_index( + trace_path: str, index_path: str | None = None, frontend_format: bool = False +) -> str: """ Build index and write to file. Args: trace_path: Path to trace JSONL file index_path: Optional custom path for index file (default: trace_path with .index.json) + frontend_format: If True, write in frontend-compatible format (default: False) Returns: Path to written index file @@ -301,8 +388,11 @@ def write_trace_index(trace_path: str, index_path: str | None = None) -> str: index = build_trace_index(trace_path) - with open(index_path, "w") as f: - json.dump(index.to_dict(), f, indent=2) + with open(index_path, "w", encoding="utf-8") as f: + if frontend_format: + json.dump(index.to_sentience_studio_dict(), f, indent=2) + else: + json.dump(index.to_dict(), f, indent=2) return index_path diff --git a/tests/test_trace_indexing.py b/tests/test_trace_indexing.py index 927a25e..e27f513 100644 --- a/tests/test_trace_indexing.py +++ b/tests/test_trace_indexing.py @@ -62,7 +62,23 @@ def test_single_step_trace(self): "type": "step_end", "ts": "2025-12-29T10:00:02.000Z", "step_id": "step-1", - "data": {}, + "data": { + "v": 1, + "step_id": "step-1", + "step_index": 1, + "goal": "Test goal", + "attempt": 0, + "pre": {"url": "https://example.com", "snapshot_digest": "sha256:test"}, + "llm": {"response_text": "CLICK(42)", "response_hash": "sha256:test"}, + "exec": { + "success": True, + "action": "click", + "outcome": "Action executed", + "duration_ms": 100, + }, + "post": {"url": "https://example.com"}, + "verify": {"passed": True, "signals": {}}, + }, }, ] @@ -81,7 +97,7 @@ def test_single_step_trace(self): assert step.step_id == "step-1" assert step.step_index == 1 assert step.goal == "Test goal" - assert step.status == "ok" + assert step.status == "success" assert step.counters.events == 3 assert step.counters.actions == 1 assert step.offset_start == 0 @@ -457,6 +473,29 @@ def test_error_counting(self): "step_id": "step-1", "data": {"message": "Something failed"}, }, + { + "v": 1, + "type": "step_end", + "ts": "2025-12-29T10:00:02.000Z", + "step_id": "step-1", + "data": { + "v": 1, + "step_id": "step-1", + "step_index": 1, + "goal": "Test goal", + "attempt": 0, + "pre": {"url": "https://example.com", "snapshot_digest": "sha256:test"}, + "llm": {"response_text": "CLICK(42)", "response_hash": "sha256:test"}, + "exec": { + "success": False, + "action": "click", + "outcome": "Action failed", + "duration_ms": 100, + }, + "post": {"url": "https://example.com"}, + "verify": {"passed": False, "signals": {}}, + }, + }, ] with open(trace_path, "w") as f: @@ -466,7 +505,7 @@ def test_error_counting(self): index = build_trace_index(str(trace_path)) assert index.summary.error_count == 1 - assert index.steps[0].status == "error" + assert index.steps[0].status == "failure" def test_llm_call_counting(self): """LLM calls should be counted per step.""" @@ -524,3 +563,341 @@ def test_file_not_found(self): """Should raise FileNotFoundError for non-existent file.""" with pytest.raises(FileNotFoundError): build_trace_index("/nonexistent/trace.jsonl") + + def test_line_number_tracking(self): + """Index should track line numbers for each step.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "line-numbers.jsonl" + + events = [ + { + "v": 1, + "type": "run_start", + "ts": "2025-12-29T10:00:00.000Z", + "data": {"agent": "TestAgent", "llm_model": "gpt-4"}, + }, + { + "v": 1, + "type": "step_start", + "ts": "2025-12-29T10:00:01.000Z", + "step_id": "step-1", + "data": {"goal": "Test goal"}, + }, + { + "v": 1, + "type": "action", + "ts": "2025-12-29T10:00:02.000Z", + "step_id": "step-1", + "data": {"type": "CLICK"}, + }, + ] + + with open(trace_path, "w") as f: + for event in events: + f.write(json.dumps(event) + "\n") + + index = build_trace_index(str(trace_path)) + + # run_start creates synthetic step-0 on line 1, step-1 has events on lines 2-3 + assert len(index.steps) >= 2 + # Find step-1 (skip synthetic step-0 from run_start) + step1 = next(s for s in index.steps if s.step_id == "step-1") + # line_number tracks the last event for this step (action on line 3) + assert step1.line_number == 3 + assert index.trace_file.line_count == 3 + + def test_agent_name_extraction(self): + """Index should extract agent name from run_start event.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "agent-name.jsonl" + + events = [ + { + "v": 1, + "type": "run_start", + "ts": "2025-12-29T10:00:00.000Z", + "data": {"agent": "MyTestAgent", "llm_model": "gpt-4"}, + }, + { + "v": 1, + "type": "run_end", + "ts": "2025-12-29T10:00:01.000Z", + "data": {}, + }, + ] + + with open(trace_path, "w") as f: + for event in events: + f.write(json.dumps(event) + "\n") + + index = build_trace_index(str(trace_path)) + + assert index.summary.agent_name == "MyTestAgent" + + def test_duration_calculation(self): + """Index should calculate duration_ms from timestamps.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "duration.jsonl" + + events = [ + { + "v": 1, + "type": "run_start", + "ts": "2025-12-29T10:00:00.000Z", + "data": {}, + }, + { + "v": 1, + "type": "run_end", + "ts": "2025-12-29T10:01:30.000Z", # 90 seconds later + "data": {}, + }, + ] + + with open(trace_path, "w") as f: + for event in events: + f.write(json.dumps(event) + "\n") + + index = build_trace_index(str(trace_path)) + + assert index.summary.duration_ms == 90000 # 90 seconds = 90000ms + + def test_counters_aggregation(self): + """Index should aggregate counters across all steps.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "counters.jsonl" + + events = [ + { + "v": 1, + "type": "step_start", + "ts": "2025-12-29T10:00:00.000Z", + "step_id": "step-1", + "data": {}, + }, + { + "v": 1, + "type": "snapshot", + "ts": "2025-12-29T10:00:01.000Z", + "step_id": "step-1", + "data": {"url": "https://example.com"}, + }, + { + "v": 1, + "type": "action", + "ts": "2025-12-29T10:00:02.000Z", + "step_id": "step-1", + "data": {"type": "CLICK"}, + }, + { + "v": 1, + "type": "step_start", + "ts": "2025-12-29T10:00:03.000Z", + "step_id": "step-2", + "data": {}, + }, + { + "v": 1, + "type": "snapshot", + "ts": "2025-12-29T10:00:04.000Z", + "step_id": "step-2", + "data": {"url": "https://example.com"}, + }, + { + "v": 1, + "type": "action", + "ts": "2025-12-29T10:00:05.000Z", + "step_id": "step-2", + "data": {"type": "TYPE"}, + }, + ] + + with open(trace_path, "w") as f: + for event in events: + f.write(json.dumps(event) + "\n") + + index = build_trace_index(str(trace_path)) + + assert index.summary.counters is not None + assert index.summary.counters["snapshot_count"] == 2 + assert index.summary.counters["action_count"] == 2 + assert index.summary.counters["error_count"] == 0 + + def test_status_defaults_to_failure(self): + """Steps without step_end should default to 'failure' status.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "default-status.jsonl" + + events = [ + { + "v": 1, + "type": "step_start", + "ts": "2025-12-29T10:00:00.000Z", + "step_id": "step-1", + "data": {}, + }, + # No step_end event - should default to failure + ] + + with open(trace_path, "w") as f: + for event in events: + f.write(json.dumps(event) + "\n") + + index = build_trace_index(str(trace_path)) + + assert index.steps[0].status == "failure" + + def test_to_frontend_dict(self): + """to_frontend_dict should produce frontend-compatible format.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "frontend-format.jsonl" + + events = [ + { + "v": 1, + "type": "run_start", + "ts": "2025-12-29T10:00:00.000Z", + "data": {"agent": "TestAgent"}, + }, + { + "v": 1, + "type": "step_start", + "ts": "2025-12-29T10:00:01.000Z", + "step_id": "step-1", + "data": {"goal": "Test goal"}, + }, + { + "v": 1, + "type": "step_end", + "ts": "2025-12-29T10:00:02.000Z", + "step_id": "step-1", + "data": { + "v": 1, + "step_id": "step-1", + "step_index": 1, + "goal": "Test goal", + "attempt": 0, + "pre": {"snapshot_digest": "sha256:test"}, + "llm": {"response_text": "CLICK(42)", "response_hash": "sha256:test"}, + "exec": { + "success": True, + "action": "click", + "outcome": "Action executed", + "duration_ms": 100, + }, + "post": {"url": "https://example.com"}, + "verify": {"passed": True, "signals": {}}, + }, + }, + { + "v": 1, + "type": "run_end", + "ts": "2025-12-29T10:00:03.000Z", + "data": {"steps": 1, "status": "success"}, + }, + ] + + with open(trace_path, "w") as f: + for event in events: + f.write(json.dumps(event) + "\n") + + index = build_trace_index(str(trace_path)) + frontend_dict = index.to_sentience_studio_dict() + + # Check field name mappings + assert "generated_at" in frontend_dict # Renamed from created_at + assert "trace_file" in frontend_dict + assert frontend_dict["trace_file"]["line_count"] == 4 + assert "summary" in frontend_dict + assert frontend_dict["summary"]["agent_name"] == "TestAgent" + # Includes synthetic step-0 from run_start, so total_steps is 2 + assert frontend_dict["summary"]["total_steps"] >= 1 # Renamed from step_count + assert frontend_dict["summary"]["start_time"] is not None # Renamed from first_ts + assert frontend_dict["summary"]["end_time"] is not None # Renamed from last_ts + assert frontend_dict["summary"]["duration_ms"] > 0 + assert "counters" in frontend_dict["summary"] + assert "steps" in frontend_dict + # Find step-1 (skip synthetic step-0 from run_start) + step1_dict = next( + s for s in frontend_dict["steps"] if s.get("action", {}).get("goal") == "Test goal" + ) + assert step1_dict["step"] >= 1 # Converted from 0-based to 1-based + assert step1_dict["line_number"] is not None + assert step1_dict["status"] == "success" + assert "action" in step1_dict + assert step1_dict["action"]["goal"] == "Test goal" # Goal moved into action + + def test_write_trace_index_frontend_format(self): + """write_trace_index with frontend_format=True should use frontend format.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "test.jsonl" + + events = [ + { + "v": 1, + "type": "run_start", + "ts": "2025-12-29T10:00:00.000Z", + "data": {"agent": "TestAgent"}, + }, + ] + + with open(trace_path, "w") as f: + for event in events: + f.write(json.dumps(event) + "\n") + + index_path = write_trace_index(str(trace_path), frontend_format=True) + + with open(index_path) as f: + index_data = json.load(f) + + # Check frontend format fields + assert "generated_at" in index_data # Frontend format + assert "summary" in index_data + assert "agent_name" in index_data["summary"] + + def test_event_type_backward_compatibility(self): + """Indexer should handle both old and new event type names.""" + with tempfile.TemporaryDirectory() as tmpdir: + trace_path = Path(tmpdir) / "event-types.jsonl" + + events = [ + { + "v": 1, + "type": "step_start", + "ts": "2025-12-29T10:00:00.000Z", + "step_id": "step-1", + "data": {}, + }, + { + "v": 1, + "type": "snapshot_taken", # New schema name + "ts": "2025-12-29T10:00:01.000Z", + "step_id": "step-1", + "data": {"url": "https://example.com"}, + }, + { + "v": 1, + "type": "action_executed", # New schema name + "ts": "2025-12-29T10:00:02.000Z", + "step_id": "step-1", + "data": {"type": "CLICK"}, + }, + { + "v": 1, + "type": "llm_called", # New schema name + "ts": "2025-12-29T10:00:03.000Z", + "step_id": "step-1", + "data": {}, + }, + ] + + with open(trace_path, "w") as f: + for event in events: + f.write(json.dumps(event) + "\n") + + index = build_trace_index(str(trace_path)) + + # Should process all events correctly + assert index.steps[0].counters.snapshots == 1 + assert index.steps[0].counters.actions == 1 + assert index.steps[0].counters.llm_calls == 1 From 06ca7f71cce07ebdf7a03f503fdc6f6965580537 Mon Sep 17 00:00:00 2001 From: rcholic Date: Thu, 1 Jan 2026 22:19:07 -0800 Subject: [PATCH 2/2] fix windows tests --- tests/test_smart_selector.py | 6 +++--- tests/test_video_recording.py | 4 ++-- tests/test_wait.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_smart_selector.py b/tests/test_smart_selector.py index b59be70..bf72316 100644 --- a/tests/test_smart_selector.py +++ b/tests/test_smart_selector.py @@ -9,7 +9,7 @@ def test_smart_selector_inference(): """Test that recorder infers selectors automatically""" with SentienceBrowser() as browser: browser.page.goto("https://example.com") - browser.page.wait_for_load_state("networkidle") + browser.page.wait_for_load_state("networkidle", timeout=30000) # Take snapshot to get element snap = snapshot(browser) @@ -31,7 +31,7 @@ def test_smart_selector_with_text(): """Test selector inference for elements with text""" with SentienceBrowser() as browser: browser.page.goto("https://example.com") - browser.page.wait_for_load_state("networkidle") + browser.page.wait_for_load_state("networkidle", timeout=30000) snap = snapshot(browser) # Find element with text @@ -55,7 +55,7 @@ def test_smart_selector_validation(): """Test that inferred selectors are validated""" with SentienceBrowser() as browser: browser.page.goto("https://example.com") - browser.page.wait_for_load_state("networkidle") + browser.page.wait_for_load_state("networkidle", timeout=30000) snap = snapshot(browser) if len(snap.elements) > 0: diff --git a/tests/test_video_recording.py b/tests/test_video_recording.py index d1400ce..7db966b 100644 --- a/tests/test_video_recording.py +++ b/tests/test_video_recording.py @@ -117,7 +117,7 @@ def test_no_video_recording_when_disabled(): try: browser.page.goto("https://example.com") - browser.page.wait_for_load_state("networkidle", timeout=10000) + browser.page.wait_for_load_state("networkidle", timeout=30000) video_path = browser.close() @@ -188,7 +188,7 @@ def test_video_recording_multiple_sessions(): try: browser.page.goto("https://example.com") - browser.page.wait_for_load_state("networkidle", timeout=10000) + browser.page.wait_for_load_state("networkidle", timeout=30000) output_path = video_dir / f"video_{i}.webm" video_path = browser.close(output_path=str(output_path)) diff --git a/tests/test_wait.py b/tests/test_wait.py index 7150708..2a5e462 100644 --- a/tests/test_wait.py +++ b/tests/test_wait.py @@ -10,7 +10,7 @@ def test_wait_for(): # Auto-detect headless mode (True in CI, False locally) with SentienceBrowser() as browser: browser.page.goto("https://example.com") - browser.page.wait_for_load_state("networkidle", timeout=10000) + browser.page.wait_for_load_state("networkidle", timeout=30000) result = wait_for(browser, "role=link", timeout=5.0) assert result.found is True @@ -23,7 +23,7 @@ def test_wait_for_timeout(): """Test wait_for timeout""" with SentienceBrowser() as browser: browser.page.goto("https://example.com") - browser.page.wait_for_load_state("networkidle", timeout=10000) + browser.page.wait_for_load_state("networkidle", timeout=30000) # Wait for non-existent element result = wait_for(browser, "role=button text~'NonExistentButton'", timeout=1.0) @@ -35,7 +35,7 @@ def test_expect_to_exist(): """Test expect().to_exist()""" with SentienceBrowser() as browser: browser.page.goto("https://example.com") - browser.page.wait_for_load_state("networkidle", timeout=10000) + browser.page.wait_for_load_state("networkidle", timeout=30000) element = expect(browser, "role=link").to_exist(timeout=5.0) assert element is not None @@ -46,7 +46,7 @@ def test_expect_to_be_visible(): """Test expect().to_be_visible()""" with SentienceBrowser() as browser: browser.page.goto("https://example.com") - browser.page.wait_for_load_state("networkidle", timeout=10000) + browser.page.wait_for_load_state("networkidle", timeout=30000) element = expect(browser, "role=link").to_be_visible(timeout=5.0) assert element is not None