From 7c200891b87546df2268f360883021a61b196554 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 28 Dec 2025 17:43:47 -0800 Subject: [PATCH 1/2] file size reporting --- pyproject.toml | 2 +- sentience/__init__.py | 5 +- sentience/cloud_tracing.py | 90 ++++++++++- sentience/extension/background.js | 6 +- sentience/extension/content.js | 2 +- sentience/extension/injected_api.js | 214 ++++++++++++------------- sentience/tracer_factory.py | 12 +- tests/test_file_size_tracking.py | 233 ++++++++++++++++++++++++++++ 8 files changed, 446 insertions(+), 118 deletions(-) create mode 100644 tests/test_file_size_tracking.py diff --git a/pyproject.toml b/pyproject.toml index 6478a18..390418d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sentienceapi" -version = "0.90.3" +version = "0.90.5" description = "Python SDK for Sentience AI Agent Browser Automation" readme = "README.md" requires-python = ">=3.11" diff --git a/sentience/__init__.py b/sentience/__init__.py index 12304d1..bb82cf8 100644 --- a/sentience/__init__.py +++ b/sentience/__init__.py @@ -11,7 +11,7 @@ from .browser import SentienceBrowser # Tracing (v0.12.0+) -from .cloud_tracing import CloudTraceSink +from .cloud_tracing import CloudTraceSink, SentienceLogger from .conversational_agent import ConversationalAgent from .expect import expect @@ -64,7 +64,7 @@ ) from .wait import wait_for -__version__ = "0.90.3" +__version__ = "0.90.5" __all__ = [ # Core SDK @@ -123,6 +123,7 @@ "TraceSink", "JsonlTraceSink", "CloudTraceSink", + "SentienceLogger", "TraceEvent", "create_tracer", "SENTIENCE_API_URL", diff --git a/sentience/cloud_tracing.py b/sentience/cloud_tracing.py index 9d778af..984f48f 100644 --- a/sentience/cloud_tracing.py +++ b/sentience/cloud_tracing.py @@ -10,13 +10,29 @@ import threading from collections.abc import Callable from pathlib import Path -from typing import Any +from typing import Any, Protocol import requests from sentience.tracing import TraceSink +class SentienceLogger(Protocol): + """Protocol for optional logger interface.""" + + def info(self, message: str) -> None: + """Log info message.""" + ... + + def warning(self, message: str) -> None: + """Log warning message.""" + ... + + def error(self, message: str) -> None: + """Log error message.""" + ... + + class CloudTraceSink(TraceSink): """ Enterprise Cloud Sink: "Local Write, Batch Upload" pattern. @@ -51,7 +67,14 @@ class CloudTraceSink(TraceSink): >>> tracer.close(blocking=False) # Returns immediately """ - def __init__(self, upload_url: str, run_id: str): + def __init__( + self, + upload_url: str, + run_id: str, + api_key: str | None = None, + api_url: str | None = None, + logger: SentienceLogger | None = None, + ): """ Initialize cloud trace sink. @@ -59,9 +82,15 @@ def __init__(self, upload_url: str, run_id: str): upload_url: Pre-signed PUT URL from Sentience API (e.g., "https://sentience.nyc3.digitaloceanspaces.com/...") run_id: Unique identifier for this agent run (used for persistent cache) + api_key: Sentience API key for calling /v1/traces/complete + api_url: Sentience API base URL (default: https://api.sentienceapi.com) + logger: Optional logger instance for logging file sizes and errors """ self.upload_url = upload_url self.run_id = run_id + self.api_key = api_key + self.api_url = api_url or "https://api.sentienceapi.com" + self.logger = logger # Use persistent cache directory instead of temp file # This ensures traces survive process crashes @@ -74,6 +103,10 @@ def __init__(self, upload_url: str, run_id: str): self._closed = False self._upload_successful = False + # File size tracking (NEW) + self.trace_file_size_bytes = 0 + self.screenshot_total_size_bytes = 0 + def emit(self, event: dict[str, Any]) -> None: """ Write event to local persistent file (Fast, non-blocking). @@ -140,6 +173,18 @@ def _do_upload(self, on_progress: Callable[[int, int], None] | None = None) -> N compressed_data = gzip.compress(trace_data) compressed_size = len(compressed_data) + # Measure trace file size (NEW) + self.trace_file_size_bytes = compressed_size + + # Log file sizes if logger is provided (NEW) + if self.logger: + self.logger.info( + f"Trace file size: {self.trace_file_size_bytes / 1024 / 1024:.2f} MB" + ) + self.logger.info( + f"Screenshot total: {self.screenshot_total_size_bytes / 1024 / 1024:.2f} MB" + ) + # Report progress: start if on_progress: on_progress(0, compressed_size) @@ -165,6 +210,9 @@ def _do_upload(self, on_progress: Callable[[int, int], None] | None = None) -> N if on_progress: on_progress(compressed_size, compressed_size) + # Call /v1/traces/complete to report file sizes (NEW) + self._complete_trace() + # Delete file only on successful upload if os.path.exists(self._path): try: @@ -183,6 +231,44 @@ def _do_upload(self, on_progress: Callable[[int, int], None] | None = None) -> N print(f" Local trace preserved at: {self._path}") # Don't raise - preserve trace locally even if upload fails + def _complete_trace(self) -> None: + """ + Call /v1/traces/complete to report file sizes to gateway. + + This is a best-effort call - failures are logged but don't affect upload success. + """ + if not self.api_key: + # No API key - skip complete call + return + + try: + response = requests.post( + f"{self.api_url}/v1/traces/complete", + headers={"Authorization": f"Bearer {self.api_key}"}, + json={ + "run_id": self.run_id, + "stats": { + "trace_file_size_bytes": self.trace_file_size_bytes, + "screenshot_total_size_bytes": self.screenshot_total_size_bytes, + }, + }, + timeout=10, + ) + + if response.status_code == 200: + if self.logger: + self.logger.info("Trace completion reported to gateway") + else: + if self.logger: + self.logger.warning( + f"Failed to report trace completion: HTTP {response.status_code}" + ) + + except Exception as e: + # Best-effort - log but don't fail + if self.logger: + self.logger.warning(f"Error reporting trace completion: {e}") + def __enter__(self): """Context manager support.""" return self diff --git a/sentience/extension/background.js b/sentience/extension/background.js index 811303f..f359ba6 100644 --- a/sentience/extension/background.js +++ b/sentience/extension/background.js @@ -144,13 +144,13 @@ async function handleScreenshotCapture(_tabId, options = {}) { async function handleSnapshotProcessing(rawData, options = {}) { const MAX_ELEMENTS = 10000; // Safety limit to prevent hangs const startTime = performance.now(); - + try { // Safety check: limit element count to prevent hangs if (!Array.isArray(rawData)) { throw new Error('rawData must be an array'); } - + if (rawData.length > MAX_ELEMENTS) { console.warn(`[Sentience Background] ⚠️ Large dataset: ${rawData.length} elements. Limiting to ${MAX_ELEMENTS} to prevent hangs.`); rawData = rawData.slice(0, MAX_ELEMENTS); @@ -186,7 +186,7 @@ async function handleSnapshotProcessing(rawData, options = {}) { // Add timeout protection (18 seconds - less than content.js timeout) analyzedElements = await Promise.race([ wasmPromise, - new Promise((_, reject) => + new Promise((_, reject) => setTimeout(() => reject(new Error('WASM processing timeout (>18s)')), 18000) ) ]); diff --git a/sentience/extension/content.js b/sentience/extension/content.js index 62ae408..8d3b0d4 100644 --- a/sentience/extension/content.js +++ b/sentience/extension/content.js @@ -92,7 +92,7 @@ function handleSnapshotRequest(data) { if (responded) return; // Already responded via timeout responded = true; clearTimeout(timeoutId); - + const duration = performance.now() - startTime; // Handle Chrome extension errors (e.g., background script crashed) diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js index 712542d..bd827c8 100644 --- a/sentience/extension/injected_api.js +++ b/sentience/extension/injected_api.js @@ -66,10 +66,10 @@ // --- HELPER: Safe Class Name Extractor (Handles SVGAnimatedString) --- function getClassName(el) { if (!el || !el.className) return ''; - + // Handle string (HTML elements) if (typeof el.className === 'string') return el.className; - + // Handle SVGAnimatedString (SVG elements) if (typeof el.className === 'object') { if ('baseVal' in el.className && typeof el.className.baseVal === 'string') { @@ -85,17 +85,17 @@ return ''; } } - + return ''; } // --- HELPER: Paranoid String Converter (Handles SVGAnimatedString) --- function toSafeString(value) { if (value === null || value === undefined) return null; - + // 1. If it's already a primitive string, return it if (typeof value === 'string') return value; - + // 2. Handle SVG objects (SVGAnimatedString, SVGAnimatedNumber, etc.) if (typeof value === 'object') { // Try extracting baseVal (standard SVG property) @@ -114,7 +114,7 @@ return null; } } - + // 3. Last resort cast for primitives try { return String(value); @@ -127,9 +127,9 @@ // For SVG elements, get the fill or stroke color (SVGs use fill/stroke, not backgroundColor) function getSVGColor(el) { if (!el || el.tagName !== 'SVG') return null; - + const style = window.getComputedStyle(el); - + // Try fill first (most common for SVG icons) const fill = style.fill; if (fill && fill !== 'none' && fill !== 'transparent' && fill !== 'rgba(0, 0, 0, 0)') { @@ -144,7 +144,7 @@ return fill; } } - + // Fallback to stroke if fill is not available const stroke = style.stroke; if (stroke && stroke !== 'none' && stroke !== 'transparent' && stroke !== 'rgba(0, 0, 0, 0)') { @@ -158,7 +158,7 @@ return stroke; } } - + return null; } @@ -168,28 +168,28 @@ // This handles rgba(0,0,0,0) and transparent values that browsers commonly return function getEffectiveBackgroundColor(el) { if (!el) return null; - + // For SVG elements, use fill/stroke instead of backgroundColor if (el.tagName === 'SVG') { const svgColor = getSVGColor(el); if (svgColor) return svgColor; } - + let current = el; const maxDepth = 10; // Prevent infinite loops let depth = 0; - + while (current && depth < maxDepth) { const style = window.getComputedStyle(current); - + // For SVG elements in the tree, also check fill/stroke if (current.tagName === 'SVG') { const svgColor = getSVGColor(current); if (svgColor) return svgColor; } - + const bgColor = style.backgroundColor; - + if (bgColor && bgColor !== 'transparent' && bgColor !== 'rgba(0, 0, 0, 0)') { // Check if it's rgba with alpha < 1 (semi-transparent) const rgbaMatch = bgColor.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/); @@ -209,12 +209,12 @@ return bgColor; } } - + // Move up the DOM tree current = current.parentElement; depth++; } - + // Fallback: return null if nothing found return null; } @@ -235,7 +235,7 @@ // Only check for elements that are likely to be occluded (overlays, modals, tooltips) const zIndex = parseInt(style.zIndex, 10); const position = style.position; - + // Skip occlusion check for normal flow elements (vast majority) // Only check for positioned elements or high z-index (likely overlays) if (position === 'static' && (isNaN(zIndex) || zIndex <= 10)) { @@ -308,7 +308,7 @@ }; window.addEventListener('message', listener); - + try { window.postMessage({ type: 'SENTIENCE_SNAPSHOT_REQUEST', @@ -514,7 +514,7 @@ function extractRawElementData(el) { const style = window.getComputedStyle(el); const rect = el.getBoundingClientRect(); - + return { tag: el.tagName, rect: { @@ -548,12 +548,12 @@ // --- HELPER: Generate Unique CSS Selector (for Golden Set) --- function getUniqueSelector(el) { if (!el || !el.tagName) return ''; - + // If element has a unique ID, use it if (el.id) { return `#${el.id}`; } - + // Try data attributes or aria-label for uniqueness for (const attr of el.attributes) { if (attr.name.startsWith('data-') || attr.name === 'aria-label') { @@ -561,21 +561,21 @@ return `${el.tagName.toLowerCase()}[${attr.name}="${value}"]`; } } - + // Build path with classes and nth-child for uniqueness const path = []; let current = el; - + while (current && current !== document.body && current !== document.documentElement) { let selector = current.tagName.toLowerCase(); - + // If current element has ID, use it and stop if (current.id) { selector = `#${current.id}`; path.unshift(selector); break; } - + // Add class if available if (current.className && typeof current.className === 'string') { const classes = current.className.trim().split(/\s+/).filter(c => c); @@ -584,7 +584,7 @@ selector += `.${classes[0]}`; } } - + // Add nth-of-type if needed for uniqueness if (current.parentElement) { const siblings = Array.from(current.parentElement.children); @@ -594,11 +594,11 @@ selector += `:nth-of-type(${index + 1})`; } } - + path.unshift(selector); current = current.parentElement; } - + return path.join(' > ') || el.tagName.toLowerCase(); } @@ -613,7 +613,7 @@ } = options; const startTime = Date.now(); - + return new Promise((resolve) => { // Check if DOM already has enough nodes const nodeCount = document.querySelectorAll('*').length; @@ -623,17 +623,17 @@ const observer = new MutationObserver(() => { lastChange = Date.now(); }); - + observer.observe(document.body, { childList: true, subtree: true, attributes: false }); - + const checkStable = () => { const timeSinceLastChange = Date.now() - lastChange; const totalWait = Date.now() - startTime; - + if (timeSinceLastChange >= quietPeriod) { observer.disconnect(); resolve(); @@ -645,14 +645,14 @@ setTimeout(checkStable, 50); } }; - + checkStable(); } else { // DOM doesn't have enough nodes yet, wait for them const observer = new MutationObserver(() => { const currentCount = document.querySelectorAll('*').length; const totalWait = Date.now() - startTime; - + if (currentCount >= minNodeCount) { observer.disconnect(); // Now wait for quiet period @@ -660,17 +660,17 @@ const quietObserver = new MutationObserver(() => { lastChange = Date.now(); }); - + quietObserver.observe(document.body, { childList: true, subtree: true, attributes: false }); - + const checkQuiet = () => { const timeSinceLastChange = Date.now() - lastChange; const totalWait = Date.now() - startTime; - + if (timeSinceLastChange >= quietPeriod) { quietObserver.disconnect(); resolve(); @@ -682,7 +682,7 @@ setTimeout(checkQuiet, 50); } }; - + checkQuiet(); } else if (totalWait >= maxWait) { observer.disconnect(); @@ -690,13 +690,13 @@ resolve(); } }); - + observer.observe(document.body, { childList: true, subtree: true, attributes: false }); - + // Timeout fallback setTimeout(() => { observer.disconnect(); @@ -710,21 +710,21 @@ // --- HELPER: Collect Iframe Snapshots (Frame Stitching) --- // Recursively collects snapshot data from all child iframes // This enables detection of elements inside iframes (e.g., Stripe forms) - // + // // NOTE: Cross-origin iframes cannot be accessed due to browser security (Same-Origin Policy). // Only same-origin iframes will return snapshot data. Cross-origin iframes will be skipped // with a warning. For cross-origin iframes, users must manually switch frames using // Playwright's page.frame() API. async function collectIframeSnapshots(options = {}) { const iframeData = new Map(); // Map of iframe element -> snapshot data - + // Find all iframe elements in current document const iframes = Array.from(document.querySelectorAll('iframe')); - + if (iframes.length === 0) { return iframeData; } - + console.log(`[SentienceAPI] Found ${iframes.length} iframe(s), requesting snapshots...`); // Request snapshot from each iframe const iframePromises = iframes.map((iframe, idx) => { @@ -737,13 +737,13 @@ return new Promise((resolve) => { const requestId = `iframe-${idx}-${Date.now()}`; - + // 1. EXTENDED TIMEOUT (Handle slow children) const timeout = setTimeout(() => { console.warn(`[SentienceAPI] ⚠️ Iframe ${idx} snapshot TIMEOUT (id: ${requestId})`); resolve(null); }, 5000); // Increased to 5s to handle slow processing - + // 2. ROBUST LISTENER with debugging const listener = (event) => { // Debug: Log all SENTIENCE_IFRAME_SNAPSHOT_RESPONSE messages to see what's happening @@ -753,14 +753,14 @@ // console.log(`[SentienceAPI] Received response for different request: ${event.data.requestId} (expected: ${requestId})`); } } - + // Check if this is the response we're waiting for - if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' && + if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' && event.data?.requestId === requestId) { - + clearTimeout(timeout); window.removeEventListener('message', listener); - + if (event.data.error) { console.warn(`[SentienceAPI] Iframe ${idx} returned error:`, event.data.error); resolve(null); @@ -775,9 +775,9 @@ } } }; - + window.addEventListener('message', listener); - + // 3. SEND REQUEST with error handling try { if (iframe.contentWindow) { @@ -785,8 +785,8 @@ iframe.contentWindow.postMessage({ type: 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST', requestId: requestId, - options: { - ...options, + options: { + ...options, collectIframes: true // Enable recursion for nested iframes } }, '*'); // Use '*' for cross-origin, but browser will enforce same-origin policy @@ -804,10 +804,10 @@ } }); }); - + // Wait for all iframe responses const results = await Promise.all(iframePromises); - + // Store iframe data results.forEach((result, idx) => { if (result && result.data && !result.error) { @@ -819,7 +819,7 @@ console.warn(`[SentienceAPI] Iframe ${idx} returned no data (timeout or error)`); } }); - + return iframeData; } @@ -832,7 +832,7 @@ // Security: only respond to snapshot requests from parent frames if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST') { const { requestId, options } = event.data; - + try { // Generate snapshot for this iframe's content // Allow recursive collection - querySelectorAll('iframe') only finds direct children, @@ -840,7 +840,7 @@ // waitForStability: false makes performance better - i.e. don't wait for children frames const snapshotOptions = { ...options, collectIframes: true, waitForStability: options.waitForStability === false ? false : false }; const snapshot = await window.sentience.snapshot(snapshotOptions); - + // Send response back to parent if (event.source && event.source.postMessage) { event.source.postMessage({ @@ -864,7 +864,7 @@ } }); } - + // Setup iframe handler when script loads (only once) if (!window.sentience_iframe_handler_setup) { setupIframeSnapshotHandler(); @@ -880,7 +880,7 @@ if (options.waitForStability !== false) { await waitForStability(options.waitForStability || {}); } - + // Step 1: Collect raw DOM data (Main World - CSP can't block this!) const rawData = []; window.sentience_registry = []; @@ -896,17 +896,17 @@ const textVal = getText(el); const inView = isInViewport(rect); - + // Get computed style once (needed for both occlusion check and data collection) const style = window.getComputedStyle(el); - + // Only check occlusion for elements likely to be occluded (optimized) // This avoids layout thrashing for the vast majority of elements const occluded = inView ? isOccluded(el, rect, style) : false; - + // Get effective background color (traverses DOM to find non-transparent color) const effectiveBgColor = getEffectiveBackgroundColor(el); - + rawData.push({ id: idx, tag: el.tagName.toLowerCase(), @@ -943,26 +943,26 @@ // This allows WASM to process all elements uniformly (no recursion needed) let allRawElements = [...rawData]; // Start with main frame elements let totalIframeElements = 0; - + if (options.collectIframes !== false) { try { console.log(`[SentienceAPI] Starting iframe collection...`); const iframeSnapshots = await collectIframeSnapshots(options); console.log(`[SentienceAPI] Iframe collection complete. Received ${iframeSnapshots.size} snapshot(s)`); - + if (iframeSnapshots.size > 0) { // FLATTEN IMMEDIATELY: Don't nest them. Just append them with coordinate translation. iframeSnapshots.forEach((iframeSnapshot, iframeEl) => { // Debug: Log structure to verify data is correct // console.log(`[SentienceAPI] Processing iframe snapshot:`, iframeSnapshot); - + if (iframeSnapshot && iframeSnapshot.raw_elements) { const rawElementsCount = iframeSnapshot.raw_elements.length; console.log(`[SentienceAPI] Processing ${rawElementsCount} elements from iframe (src: ${iframeEl.src || 'unknown'})`); // Get iframe's bounding rect (offset for coordinate translation) const iframeRect = iframeEl.getBoundingClientRect(); const offset = { x: iframeRect.x, y: iframeRect.y }; - + // Get iframe context for frame switching (Playwright needs this) const iframeSrc = iframeEl.src || iframeEl.getAttribute('src') || ''; let isSameOrigin = false; @@ -972,11 +972,11 @@ } catch (e) { isSameOrigin = false; } - + // Adjust coordinates and add iframe context to each element const adjustedElements = iframeSnapshot.raw_elements.map(el => { const adjusted = { ...el }; - + // Adjust rect coordinates to parent viewport if (adjusted.rect) { adjusted.rect = { @@ -985,22 +985,22 @@ y: adjusted.rect.y + offset.y }; } - + // Add iframe context so agents can switch frames in Playwright adjusted.iframe_context = { src: iframeSrc, is_same_origin: isSameOrigin }; - + return adjusted; }); - + // Append flattened iframe elements to main array allRawElements.push(...adjustedElements); totalIframeElements += adjustedElements.length; } }); - + // console.log(`[SentienceAPI] Merged ${iframeSnapshots.size} iframe(s). Total elements: ${allRawElements.length} (${rawData.length} main + ${totalIframeElements} iframe)`); } } catch (error) { @@ -1013,7 +1013,7 @@ // No recursion needed - everything is already flat console.log(`[SentienceAPI] Sending ${allRawElements.length} total elements to WASM (${rawData.length} main + ${totalIframeElements} iframe)`); const processed = await processSnapshotInBackground(allRawElements, options); - + if (!processed || !processed.elements) { throw new Error('WASM processing returned invalid result'); } @@ -1029,10 +1029,10 @@ const cleanedRawElements = cleanElement(processed.raw_elements); // FIXED: Removed undefined 'totalIframeRawElements' - // FIXED: Logic updated for "Flatten Early" architecture. + // FIXED: Logic updated for "Flatten Early" architecture. // processed.elements ALREADY contains the merged iframe elements, // so we simply use .length. No addition needed. - + const totalCount = cleanedElements.length; const totalRaw = cleanedRawElements.length; const iframeCount = totalIframeElements || 0; @@ -1250,23 +1250,23 @@ autoDisableTimeout = 30 * 60 * 1000, // 30 minutes default keyboardShortcut = 'Ctrl+Shift+I' } = options; - + console.log("🔴 [Sentience] Recording Mode STARTED. Click an element to copy its Ground Truth JSON."); console.log(` Press ${keyboardShortcut} or call stopRecording() to stop.`); - + // Validate registry is populated if (!window.sentience_registry || window.sentience_registry.length === 0) { console.warn("⚠️ Registry empty. Call `await window.sentience.snapshot()` first to populate registry."); alert("Registry empty. Run `await window.sentience.snapshot()` first!"); return () => {}; // Return no-op cleanup function } - + // Create reverse mapping for O(1) lookup (fixes registry lookup bug) window.sentience_registry_map = new Map(); window.sentience_registry.forEach((el, idx) => { if (el) window.sentience_registry_map.set(el, idx); }); - + // Create highlight box overlay let highlightBox = document.getElementById('sentience-highlight-box'); if (!highlightBox) { @@ -1284,7 +1284,7 @@ `; document.body.appendChild(highlightBox); } - + // Create visual indicator (red border on page when recording) let recordingIndicator = document.getElementById('sentience-recording-indicator'); if (!recordingIndicator) { @@ -1303,12 +1303,12 @@ document.body.appendChild(recordingIndicator); } recordingIndicator.style.display = 'block'; - + // Hover handler (visual feedback) const mouseOverHandler = (e) => { const el = e.target; if (!el || el === highlightBox || el === recordingIndicator) return; - + const rect = el.getBoundingClientRect(); highlightBox.style.display = 'block'; highlightBox.style.top = (rect.top + window.scrollY) + 'px'; @@ -1316,15 +1316,15 @@ highlightBox.style.width = rect.width + 'px'; highlightBox.style.height = rect.height + 'px'; }; - + // Click handler (capture ground truth data) const clickHandler = (e) => { e.preventDefault(); e.stopPropagation(); - + const el = e.target; if (!el || el === highlightBox || el === recordingIndicator) return; - + // Use Map for reliable O(1) lookup const sentienceId = window.sentience_registry_map.get(el); if (sentienceId === undefined) { @@ -1332,13 +1332,13 @@ alert("Element not in registry. Run `await window.sentience.snapshot()` first!"); return; } - + // Extract raw data (ground truth + raw signals, NOT model outputs) const rawData = extractRawElementData(el); const selector = getUniqueSelector(el); const role = el.getAttribute('role') || el.tagName.toLowerCase(); const text = getText(el); - + // Build golden set JSON (ground truth + raw signals only) const snippet = { task: `Interact with ${text.substring(0, 20)}${text.length > 20 ? '...' : ''}`, @@ -1352,12 +1352,12 @@ }, debug_snapshot: rawData }; - + // Copy to clipboard const jsonString = JSON.stringify(snippet, null, 2); navigator.clipboard.writeText(jsonString).then(() => { console.log("✅ Copied Ground Truth to clipboard:", snippet); - + // Flash green to indicate success highlightBox.style.border = `2px solid ${successColor}`; highlightBox.style.background = 'rgba(0, 255, 0, 0.2)'; @@ -1370,42 +1370,42 @@ alert("Failed to copy to clipboard. Check console for JSON."); }); }; - + // Auto-disable timeout let timeoutId = null; - + // Cleanup function to stop recording (defined before use) const stopRecording = () => { document.removeEventListener('mouseover', mouseOverHandler, true); document.removeEventListener('click', clickHandler, true); document.removeEventListener('keydown', keyboardHandler, true); - + if (timeoutId) { clearTimeout(timeoutId); timeoutId = null; } - + if (highlightBox) { highlightBox.style.display = 'none'; } - + if (recordingIndicator) { recordingIndicator.style.display = 'none'; } - + // Clean up registry map (optional, but good practice) if (window.sentience_registry_map) { window.sentience_registry_map.clear(); } - + // Remove global reference if (window.sentience_stopRecording === stopRecording) { delete window.sentience_stopRecording; } - + console.log("⚪ [Sentience] Recording Mode STOPPED."); }; - + // Keyboard shortcut handler (defined after stopRecording) const keyboardHandler = (e) => { // Ctrl+Shift+I or Cmd+Shift+I @@ -1414,12 +1414,12 @@ stopRecording(); } }; - + // Attach event listeners (use capture phase to intercept early) document.addEventListener('mouseover', mouseOverHandler, true); document.addEventListener('click', clickHandler, true); document.addEventListener('keydown', keyboardHandler, true); - + // Set up auto-disable timeout if (autoDisableTimeout > 0) { timeoutId = setTimeout(() => { @@ -1427,10 +1427,10 @@ stopRecording(); }, autoDisableTimeout); } - + // Store stop function globally for keyboard shortcut access window.sentience_stopRecording = stopRecording; - + return stopRecording; } }; diff --git a/sentience/tracer_factory.py b/sentience/tracer_factory.py index 04ffb1a..2353f1c 100644 --- a/sentience/tracer_factory.py +++ b/sentience/tracer_factory.py @@ -11,7 +11,7 @@ import requests -from sentience.cloud_tracing import CloudTraceSink +from sentience.cloud_tracing import CloudTraceSink, SentienceLogger from sentience.tracing import JsonlTraceSink, Tracer # Sentience API base URL (constant) @@ -22,6 +22,7 @@ def create_tracer( api_key: str | None = None, run_id: str | None = None, api_url: str | None = None, + logger: SentienceLogger | None = None, ) -> Tracer: """ Create tracer with automatic tier detection. @@ -36,6 +37,7 @@ def create_tracer( - Pro/Enterprise: Valid API key run_id: Unique identifier for this agent run. If not provided, generates UUID. api_url: Sentience API base URL (default: https://api.sentienceapi.com) + logger: Optional logger instance for logging file sizes and errors Returns: Tracer configured with appropriate sink @@ -83,7 +85,13 @@ def create_tracer( print("☁️ [Sentience] Cloud tracing enabled (Pro tier)") return Tracer( run_id=run_id, - sink=CloudTraceSink(upload_url=upload_url, run_id=run_id), + sink=CloudTraceSink( + upload_url=upload_url, + run_id=run_id, + api_key=api_key, + api_url=api_url, + logger=logger, + ), ) else: print("⚠️ [Sentience] Cloud init response missing upload_url") diff --git a/tests/test_file_size_tracking.py b/tests/test_file_size_tracking.py new file mode 100644 index 0000000..a1cbc86 --- /dev/null +++ b/tests/test_file_size_tracking.py @@ -0,0 +1,233 @@ +""" +Tests for file size tracking and /v1/traces/complete functionality. + +Tests the Phase 5 SDK changes for enforcing storage quota. +""" + +import gzip +import json +import os +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from sentience.cloud_tracing import CloudTraceSink, SentienceLogger +from sentience.tracer_factory import create_tracer +from sentience.tracing import Tracer + + +class TestFileSizeTracking: + """Test file size tracking in CloudTraceSink.""" + + def test_cloud_sink_tracks_trace_file_size(self, tmp_path): + """Test that CloudTraceSink measures compressed trace file size.""" + # Create mock logger + mock_logger = Mock(spec=SentienceLogger) + + # Create mock upload URL + upload_url = "https://example.com/upload" + + # Create CloudTraceSink with logger + sink = CloudTraceSink( + upload_url=upload_url, + run_id="test-run", + api_key="sk_test_key", + api_url="https://api.example.com", + logger=mock_logger, + ) + + # Verify logger is set + assert sink.logger == mock_logger + + # Verify file size tracking fields exist + assert hasattr(sink, "trace_file_size_bytes") + assert hasattr(sink, "screenshot_total_size_bytes") + assert sink.trace_file_size_bytes == 0 + assert sink.screenshot_total_size_bytes == 0 + + def test_cloud_sink_without_logger(self): + """Test that CloudTraceSink works without a logger (backward compatibility).""" + upload_url = "https://example.com/upload" + + # Create CloudTraceSink without logger (should not fail) + sink = CloudTraceSink( + upload_url=upload_url, + run_id="test-run", + ) + + assert sink.logger is None + assert sink.trace_file_size_bytes == 0 + assert sink.screenshot_total_size_bytes == 0 + + @patch("sentience.cloud_tracing.requests") + def test_cloud_sink_logs_file_sizes(self, mock_requests): + """Test that CloudTraceSink logs file sizes when logger is provided.""" + # Create mock logger + mock_logger = Mock(spec=SentienceLogger) + + # Mock successful upload + mock_response = Mock() + mock_response.status_code = 200 + mock_requests.put.return_value = mock_response + mock_requests.post.return_value = mock_response + + upload_url = "https://example.com/upload" + + # Create CloudTraceSink with logger + sink = CloudTraceSink( + upload_url=upload_url, + run_id="test-run-size", + api_key="sk_test_key", + logger=mock_logger, + ) + + # Emit some events + sink.emit({"type": "test", "data": "test"}) + + # Close to trigger upload + sink.close() + + # Verify logger.info was called with file size information + info_calls = [str(call) for call in mock_logger.info.call_args_list] + assert any("Trace file size:" in call for call in info_calls) + assert any("Screenshot total:" in call for call in info_calls) + + @patch("sentience.cloud_tracing.requests") + def test_complete_trace_called_after_upload(self, mock_requests): + """Test that /v1/traces/complete is called after successful upload.""" + # Mock successful upload and complete + mock_put_response = Mock() + mock_put_response.status_code = 200 + + mock_post_response = Mock() + mock_post_response.status_code = 200 + + mock_requests.put.return_value = mock_put_response + mock_requests.post.return_value = mock_post_response + + upload_url = "https://example.com/upload" + api_url = "https://api.example.com" + + # Create CloudTraceSink with API key + sink = CloudTraceSink( + upload_url=upload_url, + run_id="test-complete", + api_key="sk_test_key", + api_url=api_url, + ) + + # Emit event and close + sink.emit({"type": "test"}) + sink.close() + + # Verify /v1/traces/complete was called + post_calls = mock_requests.post.call_args_list + assert len(post_calls) > 0 + + # Find the complete trace call + complete_call = None + for call in post_calls: + args, kwargs = call + if "/v1/traces/complete" in args[0]: + complete_call = call + break + + assert complete_call is not None, "Expected /v1/traces/complete to be called" + + # Verify the payload + args, kwargs = complete_call + payload = kwargs.get("json") or json.loads(kwargs.get("data", "{}")) + assert "run_id" in payload + assert payload["run_id"] == "test-complete" + assert "stats" in payload + assert "trace_file_size_bytes" in payload["stats"] + assert "screenshot_total_size_bytes" in payload["stats"] + + @patch("sentience.cloud_tracing.requests") + def test_complete_trace_not_called_without_api_key(self, mock_requests): + """Test that /v1/traces/complete is not called without API key.""" + # Mock successful upload + mock_response = Mock() + mock_response.status_code = 200 + mock_requests.put.return_value = mock_response + + upload_url = "https://example.com/upload" + + # Create CloudTraceSink WITHOUT API key + sink = CloudTraceSink( + upload_url=upload_url, + run_id="test-no-key", + ) + + # Emit event and close + sink.emit({"type": "test"}) + sink.close() + + # Verify POST was NOT called + assert mock_requests.post.call_count == 0 + + @patch("sentience.tracer_factory.requests") + def test_create_tracer_passes_logger_to_cloud_sink(self, mock_requests): + """Test that create_tracer passes logger to CloudTraceSink.""" + # Mock successful API response + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"upload_url": "https://example.com/upload"} + mock_requests.post.return_value = mock_response + + # Create mock logger + mock_logger = Mock(spec=SentienceLogger) + + # Create tracer with logger + with patch("sentience.tracer_factory._recover_orphaned_traces"): + tracer = create_tracer( + api_key="sk_test_key", + run_id="test-logger", + logger=mock_logger, + ) + + # Verify tracer was created + assert isinstance(tracer, Tracer) + + # Verify sink has logger + assert hasattr(tracer.sink, "logger") + assert tracer.sink.logger == mock_logger + + +class TestBackwardCompatibility: + """Test that existing code continues to work.""" + + @patch("sentience.tracer_factory.requests") + def test_create_tracer_without_logger_still_works(self, mock_requests): + """Test that create_tracer works without logger parameter (backward compat).""" + # Mock successful API response + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"upload_url": "https://example.com/upload"} + mock_requests.post.return_value = mock_response + + # Create tracer WITHOUT logger (old API) + with patch("sentience.tracer_factory._recover_orphaned_traces"): + tracer = create_tracer( + api_key="sk_test_key", + run_id="test-compat", + ) + + # Should still work + assert isinstance(tracer, Tracer) + + def test_cloud_sink_backward_compatible_signature(self): + """Test that CloudTraceSink can be created with old 2-parameter signature.""" + # Old signature: CloudTraceSink(upload_url, run_id) + sink = CloudTraceSink( + upload_url="https://example.com/upload", + run_id="test-old-api", + ) + + # Should work fine + assert sink.upload_url == "https://example.com/upload" + assert sink.run_id == "test-old-api" + assert sink.logger is None # No logger + assert sink.api_key is None # No API key From 663e3458bb1e13d389c7174004add1aca2fbeb18 Mon Sep 17 00:00:00 2001 From: rcholic Date: Sun, 28 Dec 2025 17:46:18 -0800 Subject: [PATCH 2/2] file size reporting --- sentience/generator.py | 4 ---- sentience/llm_provider.py | 1 - sentience/query.py | 6 +++--- sentience/screenshot.py | 2 +- sentience/wait.py | 1 - tests/test_snapshot.py | 2 -- tests/test_wait.py | 4 ---- 7 files changed, 4 insertions(+), 16 deletions(-) diff --git a/sentience/generator.py b/sentience/generator.py index 01fd12a..ecb5a6a 100644 --- a/sentience/generator.py +++ b/sentience/generator.py @@ -2,10 +2,6 @@ Script Generator - converts trace into executable code """ -import json -from typing import List, Optional - -from .query import find from .recorder import Trace, TraceStep diff --git a/sentience/llm_provider.py b/sentience/llm_provider.py index 1c4200f..a333e26 100644 --- a/sentience/llm_provider.py +++ b/sentience/llm_provider.py @@ -5,7 +5,6 @@ from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Any, Dict, Optional @dataclass diff --git a/sentience/query.py b/sentience/query.py index 141fee5..ed43b6c 100644 --- a/sentience/query.py +++ b/sentience/query.py @@ -3,12 +3,12 @@ """ import re -from typing import Any, Dict, List, Optional, Union +from typing import Any from .models import Element, Snapshot -def parse_selector(selector: str) -> dict[str, Any]: +def parse_selector(selector: str) -> dict[str, Any]: # noqa: C901 """ Parse string DSL selector into structured query @@ -136,7 +136,7 @@ def parse_selector(selector: str) -> dict[str, Any]: return query -def match_element(element: Element, query: dict[str, Any]) -> bool: +def match_element(element: Element, query: dict[str, Any]) -> bool: # noqa: C901 """Check if element matches query criteria""" # Role exact match diff --git a/sentience/screenshot.py b/sentience/screenshot.py index b5ce7fe..9414d95 100644 --- a/sentience/screenshot.py +++ b/sentience/screenshot.py @@ -2,7 +2,7 @@ Screenshot functionality - standalone screenshot capture """ -from typing import Any, Dict, Literal, Optional +from typing import Any, Literal from .browser import SentienceBrowser diff --git a/sentience/wait.py b/sentience/wait.py index 3b458eb..5b7e099 100644 --- a/sentience/wait.py +++ b/sentience/wait.py @@ -3,7 +3,6 @@ """ import time -from typing import Optional, Union from .browser import SentienceBrowser from .models import WaitResult diff --git a/tests/test_snapshot.py b/tests/test_snapshot.py index a4001d1..30b045d 100644 --- a/tests/test_snapshot.py +++ b/tests/test_snapshot.py @@ -5,8 +5,6 @@ import pytest from sentience import SentienceBrowser, snapshot -from sentience.models import Snapshot - @pytest.mark.requires_extension def test_snapshot_basic(): diff --git a/tests/test_wait.py b/tests/test_wait.py index 9600b27..05a3ab2 100644 --- a/tests/test_wait.py +++ b/tests/test_wait.py @@ -2,10 +2,6 @@ Tests for wait functionality """ -import os - -import pytest - from sentience import SentienceBrowser, expect, wait_for