From 7c200891b87546df2268f360883021a61b196554 Mon Sep 17 00:00:00 2001
From: rcholic <ivytony@gmail.com>
Date: Sun, 28 Dec 2025 17:43:47 -0800
Subject: [PATCH 1/2] file size reporting

---
 pyproject.toml                      |   2 +-
 sentience/__init__.py               |   5 +-
 sentience/cloud_tracing.py          |  90 ++++++++++-
 sentience/extension/background.js   |   6 +-
 sentience/extension/content.js      |   2 +-
 sentience/extension/injected_api.js | 214 ++++++++++++-------------
 sentience/tracer_factory.py         |  12 +-
 tests/test_file_size_tracking.py    | 233 ++++++++++++++++++++++++++++
 8 files changed, 446 insertions(+), 118 deletions(-)
 create mode 100644 tests/test_file_size_tracking.py

diff --git a/pyproject.toml b/pyproject.toml
index 6478a18..390418d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "sentienceapi"
-version = "0.90.3"
+version = "0.90.5"
 description = "Python SDK for Sentience AI Agent Browser Automation"
 readme = "README.md"
 requires-python = ">=3.11"
diff --git a/sentience/__init__.py b/sentience/__init__.py
index 12304d1..bb82cf8 100644
--- a/sentience/__init__.py
+++ b/sentience/__init__.py
@@ -11,7 +11,7 @@
 from .browser import SentienceBrowser
 
 # Tracing (v0.12.0+)
-from .cloud_tracing import CloudTraceSink
+from .cloud_tracing import CloudTraceSink, SentienceLogger
 from .conversational_agent import ConversationalAgent
 from .expect import expect
 
@@ -64,7 +64,7 @@
 )
 from .wait import wait_for
 
-__version__ = "0.90.3"
+__version__ = "0.90.5"
 
 __all__ = [
     # Core SDK
@@ -123,6 +123,7 @@
     "TraceSink",
     "JsonlTraceSink",
     "CloudTraceSink",
+    "SentienceLogger",
     "TraceEvent",
     "create_tracer",
     "SENTIENCE_API_URL",
diff --git a/sentience/cloud_tracing.py b/sentience/cloud_tracing.py
index 9d778af..984f48f 100644
--- a/sentience/cloud_tracing.py
+++ b/sentience/cloud_tracing.py
@@ -10,13 +10,29 @@
 import threading
 from collections.abc import Callable
 from pathlib import Path
-from typing import Any
+from typing import Any, Protocol
 
 import requests
 
 from sentience.tracing import TraceSink
 
 
+class SentienceLogger(Protocol):
+    """Protocol for optional logger interface."""
+
+    def info(self, message: str) -> None:
+        """Log info message."""
+        ...
+
+    def warning(self, message: str) -> None:
+        """Log warning message."""
+        ...
+
+    def error(self, message: str) -> None:
+        """Log error message."""
+        ...
+
+
 class CloudTraceSink(TraceSink):
     """
     Enterprise Cloud Sink: "Local Write, Batch Upload" pattern.
@@ -51,7 +67,14 @@ class CloudTraceSink(TraceSink):
         >>> tracer.close(blocking=False)  # Returns immediately
     """
 
-    def __init__(self, upload_url: str, run_id: str):
+    def __init__(
+        self,
+        upload_url: str,
+        run_id: str,
+        api_key: str | None = None,
+        api_url: str | None = None,
+        logger: SentienceLogger | None = None,
+    ):
         """
         Initialize cloud trace sink.
 
@@ -59,9 +82,15 @@ def __init__(self, upload_url: str, run_id: str):
             upload_url: Pre-signed PUT URL from Sentience API
                         (e.g., "https://sentience.nyc3.digitaloceanspaces.com/...")
             run_id: Unique identifier for this agent run (used for persistent cache)
+            api_key: Sentience API key for calling /v1/traces/complete
+            api_url: Sentience API base URL (default: https://api.sentienceapi.com)
+            logger: Optional logger instance for logging file sizes and errors
         """
         self.upload_url = upload_url
         self.run_id = run_id
+        self.api_key = api_key
+        self.api_url = api_url or "https://api.sentienceapi.com"
+        self.logger = logger
 
         # Use persistent cache directory instead of temp file
         # This ensures traces survive process crashes
@@ -74,6 +103,10 @@ def __init__(self, upload_url: str, run_id: str):
         self._closed = False
         self._upload_successful = False
 
+        # File size tracking (NEW)
+        self.trace_file_size_bytes = 0
+        self.screenshot_total_size_bytes = 0
+
     def emit(self, event: dict[str, Any]) -> None:
         """
         Write event to local persistent file (Fast, non-blocking).
@@ -140,6 +173,18 @@ def _do_upload(self, on_progress: Callable[[int, int], None] | None = None) -> N
             compressed_data = gzip.compress(trace_data)
             compressed_size = len(compressed_data)
 
+            # Measure trace file size (NEW)
+            self.trace_file_size_bytes = compressed_size
+
+            # Log file sizes if logger is provided (NEW)
+            if self.logger:
+                self.logger.info(
+                    f"Trace file size: {self.trace_file_size_bytes / 1024 / 1024:.2f} MB"
+                )
+                self.logger.info(
+                    f"Screenshot total: {self.screenshot_total_size_bytes / 1024 / 1024:.2f} MB"
+                )
+
             # Report progress: start
             if on_progress:
                 on_progress(0, compressed_size)
@@ -165,6 +210,9 @@ def _do_upload(self, on_progress: Callable[[int, int], None] | None = None) -> N
                 if on_progress:
                     on_progress(compressed_size, compressed_size)
 
+                # Call /v1/traces/complete to report file sizes (NEW)
+                self._complete_trace()
+
                 # Delete file only on successful upload
                 if os.path.exists(self._path):
                     try:
@@ -183,6 +231,44 @@ def _do_upload(self, on_progress: Callable[[int, int], None] | None = None) -> N
             print(f"   Local trace preserved at: {self._path}")
             # Don't raise - preserve trace locally even if upload fails
 
+    def _complete_trace(self) -> None:
+        """
+        Call /v1/traces/complete to report file sizes to gateway.
+
+        This is a best-effort call - failures are logged but don't affect upload success.
+        """
+        if not self.api_key:
+            # No API key - skip complete call
+            return
+
+        try:
+            response = requests.post(
+                f"{self.api_url}/v1/traces/complete",
+                headers={"Authorization": f"Bearer {self.api_key}"},
+                json={
+                    "run_id": self.run_id,
+                    "stats": {
+                        "trace_file_size_bytes": self.trace_file_size_bytes,
+                        "screenshot_total_size_bytes": self.screenshot_total_size_bytes,
+                    },
+                },
+                timeout=10,
+            )
+
+            if response.status_code == 200:
+                if self.logger:
+                    self.logger.info("Trace completion reported to gateway")
+            else:
+                if self.logger:
+                    self.logger.warning(
+                        f"Failed to report trace completion: HTTP {response.status_code}"
+                    )
+
+        except Exception as e:
+            # Best-effort - log but don't fail
+            if self.logger:
+                self.logger.warning(f"Error reporting trace completion: {e}")
+
     def __enter__(self):
         """Context manager support."""
         return self
diff --git a/sentience/extension/background.js b/sentience/extension/background.js
index 811303f..f359ba6 100644
--- a/sentience/extension/background.js
+++ b/sentience/extension/background.js
@@ -144,13 +144,13 @@ async function handleScreenshotCapture(_tabId, options = {}) {
 async function handleSnapshotProcessing(rawData, options = {}) {
     const MAX_ELEMENTS = 10000; // Safety limit to prevent hangs
     const startTime = performance.now();
-    
+
     try {
         // Safety check: limit element count to prevent hangs
         if (!Array.isArray(rawData)) {
             throw new Error('rawData must be an array');
         }
-        
+
         if (rawData.length > MAX_ELEMENTS) {
             console.warn(`[Sentience Background] ⚠️ Large dataset: ${rawData.length} elements. Limiting to ${MAX_ELEMENTS} to prevent hangs.`);
             rawData = rawData.slice(0, MAX_ELEMENTS);
@@ -186,7 +186,7 @@ async function handleSnapshotProcessing(rawData, options = {}) {
             // Add timeout protection (18 seconds - less than content.js timeout)
             analyzedElements = await Promise.race([
                 wasmPromise,
-                new Promise((_, reject) => 
+                new Promise((_, reject) =>
                     setTimeout(() => reject(new Error('WASM processing timeout (>18s)')), 18000)
                 )
             ]);
diff --git a/sentience/extension/content.js b/sentience/extension/content.js
index 62ae408..8d3b0d4 100644
--- a/sentience/extension/content.js
+++ b/sentience/extension/content.js
@@ -92,7 +92,7 @@ function handleSnapshotRequest(data) {
                 if (responded) return; // Already responded via timeout
                 responded = true;
                 clearTimeout(timeoutId);
-                
+
                 const duration = performance.now() - startTime;
 
                 // Handle Chrome extension errors (e.g., background script crashed)
diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js
index 712542d..bd827c8 100644
--- a/sentience/extension/injected_api.js
+++ b/sentience/extension/injected_api.js
@@ -66,10 +66,10 @@
     // --- HELPER: Safe Class Name Extractor (Handles SVGAnimatedString) ---
     function getClassName(el) {
         if (!el || !el.className) return '';
-        
+
         // Handle string (HTML elements)
         if (typeof el.className === 'string') return el.className;
-        
+
         // Handle SVGAnimatedString (SVG elements)
         if (typeof el.className === 'object') {
             if ('baseVal' in el.className && typeof el.className.baseVal === 'string') {
@@ -85,17 +85,17 @@
                 return '';
             }
         }
-        
+
         return '';
     }
 
     // --- HELPER: Paranoid String Converter (Handles SVGAnimatedString) ---
     function toSafeString(value) {
         if (value === null || value === undefined) return null;
-        
+
         // 1. If it's already a primitive string, return it
         if (typeof value === 'string') return value;
-        
+
         // 2. Handle SVG objects (SVGAnimatedString, SVGAnimatedNumber, etc.)
         if (typeof value === 'object') {
             // Try extracting baseVal (standard SVG property)
@@ -114,7 +114,7 @@
                 return null;
             }
         }
-        
+
         // 3. Last resort cast for primitives
         try {
             return String(value);
@@ -127,9 +127,9 @@
     // For SVG elements, get the fill or stroke color (SVGs use fill/stroke, not backgroundColor)
     function getSVGColor(el) {
         if (!el || el.tagName !== 'SVG') return null;
-        
+
         const style = window.getComputedStyle(el);
-        
+
         // Try fill first (most common for SVG icons)
         const fill = style.fill;
         if (fill && fill !== 'none' && fill !== 'transparent' && fill !== 'rgba(0, 0, 0, 0)') {
@@ -144,7 +144,7 @@
                 return fill;
             }
         }
-        
+
         // Fallback to stroke if fill is not available
         const stroke = style.stroke;
         if (stroke && stroke !== 'none' && stroke !== 'transparent' && stroke !== 'rgba(0, 0, 0, 0)') {
@@ -158,7 +158,7 @@
                 return stroke;
             }
         }
-        
+
         return null;
     }
 
@@ -168,28 +168,28 @@
     // This handles rgba(0,0,0,0) and transparent values that browsers commonly return
     function getEffectiveBackgroundColor(el) {
         if (!el) return null;
-        
+
         // For SVG elements, use fill/stroke instead of backgroundColor
         if (el.tagName === 'SVG') {
             const svgColor = getSVGColor(el);
             if (svgColor) return svgColor;
         }
-        
+
         let current = el;
         const maxDepth = 10; // Prevent infinite loops
         let depth = 0;
-        
+
         while (current && depth < maxDepth) {
             const style = window.getComputedStyle(current);
-            
+
             // For SVG elements in the tree, also check fill/stroke
             if (current.tagName === 'SVG') {
                 const svgColor = getSVGColor(current);
                 if (svgColor) return svgColor;
             }
-            
+
             const bgColor = style.backgroundColor;
-            
+
             if (bgColor && bgColor !== 'transparent' && bgColor !== 'rgba(0, 0, 0, 0)') {
                 // Check if it's rgba with alpha < 1 (semi-transparent)
                 const rgbaMatch = bgColor.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
@@ -209,12 +209,12 @@
                     return bgColor;
                 }
             }
-            
+
             // Move up the DOM tree
             current = current.parentElement;
             depth++;
         }
-        
+
         // Fallback: return null if nothing found
         return null;
     }
@@ -235,7 +235,7 @@
         // Only check for elements that are likely to be occluded (overlays, modals, tooltips)
         const zIndex = parseInt(style.zIndex, 10);
         const position = style.position;
-        
+
         // Skip occlusion check for normal flow elements (vast majority)
         // Only check for positioned elements or high z-index (likely overlays)
         if (position === 'static' && (isNaN(zIndex) || zIndex <= 10)) {
@@ -308,7 +308,7 @@
             };
 
             window.addEventListener('message', listener);
-            
+
             try {
                 window.postMessage({
                     type: 'SENTIENCE_SNAPSHOT_REQUEST',
@@ -514,7 +514,7 @@
     function extractRawElementData(el) {
         const style = window.getComputedStyle(el);
         const rect = el.getBoundingClientRect();
-        
+
         return {
             tag: el.tagName,
             rect: {
@@ -548,12 +548,12 @@
     // --- HELPER: Generate Unique CSS Selector (for Golden Set) ---
     function getUniqueSelector(el) {
         if (!el || !el.tagName) return '';
-        
+
         // If element has a unique ID, use it
         if (el.id) {
             return `#${el.id}`;
         }
-        
+
         // Try data attributes or aria-label for uniqueness
         for (const attr of el.attributes) {
             if (attr.name.startsWith('data-') || attr.name === 'aria-label') {
@@ -561,21 +561,21 @@
                 return `${el.tagName.toLowerCase()}[${attr.name}="${value}"]`;
             }
         }
-        
+
         // Build path with classes and nth-child for uniqueness
         const path = [];
         let current = el;
-        
+
         while (current && current !== document.body && current !== document.documentElement) {
             let selector = current.tagName.toLowerCase();
-            
+
             // If current element has ID, use it and stop
             if (current.id) {
                 selector = `#${current.id}`;
                 path.unshift(selector);
                 break;
             }
-            
+
             // Add class if available
             if (current.className && typeof current.className === 'string') {
                 const classes = current.className.trim().split(/\s+/).filter(c => c);
@@ -584,7 +584,7 @@
                     selector += `.${classes[0]}`;
                 }
             }
-            
+
             // Add nth-of-type if needed for uniqueness
             if (current.parentElement) {
                 const siblings = Array.from(current.parentElement.children);
@@ -594,11 +594,11 @@
                     selector += `:nth-of-type(${index + 1})`;
                 }
             }
-            
+
             path.unshift(selector);
             current = current.parentElement;
         }
-        
+
         return path.join(' > ') || el.tagName.toLowerCase();
     }
 
@@ -613,7 +613,7 @@
         } = options;
 
         const startTime = Date.now();
-        
+
         return new Promise((resolve) => {
             // Check if DOM already has enough nodes
             const nodeCount = document.querySelectorAll('*').length;
@@ -623,17 +623,17 @@
                 const observer = new MutationObserver(() => {
                     lastChange = Date.now();
                 });
-                
+
                 observer.observe(document.body, {
                     childList: true,
                     subtree: true,
                     attributes: false
                 });
-                
+
                 const checkStable = () => {
                     const timeSinceLastChange = Date.now() - lastChange;
                     const totalWait = Date.now() - startTime;
-                    
+
                     if (timeSinceLastChange >= quietPeriod) {
                         observer.disconnect();
                         resolve();
@@ -645,14 +645,14 @@
                         setTimeout(checkStable, 50);
                     }
                 };
-                
+
                 checkStable();
             } else {
                 // DOM doesn't have enough nodes yet, wait for them
                 const observer = new MutationObserver(() => {
                     const currentCount = document.querySelectorAll('*').length;
                     const totalWait = Date.now() - startTime;
-                    
+
                     if (currentCount >= minNodeCount) {
                         observer.disconnect();
                         // Now wait for quiet period
@@ -660,17 +660,17 @@
                         const quietObserver = new MutationObserver(() => {
                             lastChange = Date.now();
                         });
-                        
+
                         quietObserver.observe(document.body, {
                             childList: true,
                             subtree: true,
                             attributes: false
                         });
-                        
+
                         const checkQuiet = () => {
                             const timeSinceLastChange = Date.now() - lastChange;
                             const totalWait = Date.now() - startTime;
-                            
+
                             if (timeSinceLastChange >= quietPeriod) {
                                 quietObserver.disconnect();
                                 resolve();
@@ -682,7 +682,7 @@
                                 setTimeout(checkQuiet, 50);
                             }
                         };
-                        
+
                         checkQuiet();
                     } else if (totalWait >= maxWait) {
                         observer.disconnect();
@@ -690,13 +690,13 @@
                         resolve();
                     }
                 });
-                
+
                 observer.observe(document.body, {
                     childList: true,
                     subtree: true,
                     attributes: false
                 });
-                
+
                 // Timeout fallback
                 setTimeout(() => {
                     observer.disconnect();
@@ -710,21 +710,21 @@
     // --- HELPER: Collect Iframe Snapshots (Frame Stitching) ---
     // Recursively collects snapshot data from all child iframes
     // This enables detection of elements inside iframes (e.g., Stripe forms)
-    // 
+    //
     // NOTE: Cross-origin iframes cannot be accessed due to browser security (Same-Origin Policy).
     // Only same-origin iframes will return snapshot data. Cross-origin iframes will be skipped
     // with a warning. For cross-origin iframes, users must manually switch frames using
     // Playwright's page.frame() API.
     async function collectIframeSnapshots(options = {}) {
         const iframeData = new Map(); // Map of iframe element -> snapshot data
-        
+
         // Find all iframe elements in current document
         const iframes = Array.from(document.querySelectorAll('iframe'));
-        
+
         if (iframes.length === 0) {
             return iframeData;
         }
-        
+
         console.log(`[SentienceAPI] Found ${iframes.length} iframe(s), requesting snapshots...`);
         // Request snapshot from each iframe
         const iframePromises = iframes.map((iframe, idx) => {
@@ -737,13 +737,13 @@
 
             return new Promise((resolve) => {
                 const requestId = `iframe-${idx}-${Date.now()}`;
-                
+
                 // 1. EXTENDED TIMEOUT (Handle slow children)
                 const timeout = setTimeout(() => {
                     console.warn(`[SentienceAPI] ⚠️ Iframe ${idx} snapshot TIMEOUT (id: ${requestId})`);
                     resolve(null);
                 }, 5000); // Increased to 5s to handle slow processing
-                
+
                 // 2. ROBUST LISTENER with debugging
                 const listener = (event) => {
                     // Debug: Log all SENTIENCE_IFRAME_SNAPSHOT_RESPONSE messages to see what's happening
@@ -753,14 +753,14 @@
                             // console.log(`[SentienceAPI] Received response for different request: ${event.data.requestId} (expected: ${requestId})`);
                         }
                     }
-                    
+
                     // Check if this is the response we're waiting for
-                    if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' && 
+                    if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' &&
                         event.data?.requestId === requestId) {
-                        
+
                         clearTimeout(timeout);
                         window.removeEventListener('message', listener);
-                        
+
                         if (event.data.error) {
                             console.warn(`[SentienceAPI] Iframe ${idx} returned error:`, event.data.error);
                             resolve(null);
@@ -775,9 +775,9 @@
                         }
                     }
                 };
-                
+
                 window.addEventListener('message', listener);
-                
+
                 // 3. SEND REQUEST with error handling
                 try {
                     if (iframe.contentWindow) {
@@ -785,8 +785,8 @@
                         iframe.contentWindow.postMessage({
                             type: 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST',
                             requestId: requestId,
-                            options: { 
-                                ...options, 
+                            options: {
+                                ...options,
                                 collectIframes: true // Enable recursion for nested iframes
                             }
                         }, '*'); // Use '*' for cross-origin, but browser will enforce same-origin policy
@@ -804,10 +804,10 @@
                 }
             });
         });
-        
+
         // Wait for all iframe responses
         const results = await Promise.all(iframePromises);
-        
+
         // Store iframe data
         results.forEach((result, idx) => {
             if (result && result.data && !result.error) {
@@ -819,7 +819,7 @@
                 console.warn(`[SentienceAPI] Iframe ${idx} returned no data (timeout or error)`);
             }
         });
-        
+
         return iframeData;
     }
 
@@ -832,7 +832,7 @@
             // Security: only respond to snapshot requests from parent frames
             if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST') {
                 const { requestId, options } = event.data;
-                
+
                 try {
                     // Generate snapshot for this iframe's content
                     // Allow recursive collection - querySelectorAll('iframe') only finds direct children,
@@ -840,7 +840,7 @@
                     // waitForStability: false makes performance better - i.e. don't wait for children frames
                     const snapshotOptions = { ...options, collectIframes: true, waitForStability: options.waitForStability === false ? false : false };
                     const snapshot = await window.sentience.snapshot(snapshotOptions);
-                    
+
                     // Send response back to parent
                     if (event.source && event.source.postMessage) {
                         event.source.postMessage({
@@ -864,7 +864,7 @@
             }
         });
     }
-    
+
     // Setup iframe handler when script loads (only once)
     if (!window.sentience_iframe_handler_setup) {
         setupIframeSnapshotHandler();
@@ -880,7 +880,7 @@
                 if (options.waitForStability !== false) {
                     await waitForStability(options.waitForStability || {});
                 }
-                
+
                 // Step 1: Collect raw DOM data (Main World - CSP can't block this!)
                 const rawData = [];
                 window.sentience_registry = [];
@@ -896,17 +896,17 @@
 
                     const textVal = getText(el);
                     const inView = isInViewport(rect);
-                    
+
                     // Get computed style once (needed for both occlusion check and data collection)
                     const style = window.getComputedStyle(el);
-                    
+
                     // Only check occlusion for elements likely to be occluded (optimized)
                     // This avoids layout thrashing for the vast majority of elements
                     const occluded = inView ? isOccluded(el, rect, style) : false;
-                    
+
                     // Get effective background color (traverses DOM to find non-transparent color)
                     const effectiveBgColor = getEffectiveBackgroundColor(el);
-                    
+
                     rawData.push({
                         id: idx,
                         tag: el.tagName.toLowerCase(),
@@ -943,26 +943,26 @@
                 // This allows WASM to process all elements uniformly (no recursion needed)
                 let allRawElements = [...rawData]; // Start with main frame elements
                 let totalIframeElements = 0;
-                
+
                 if (options.collectIframes !== false) {
                     try {
                         console.log(`[SentienceAPI] Starting iframe collection...`);
                         const iframeSnapshots = await collectIframeSnapshots(options);
                         console.log(`[SentienceAPI] Iframe collection complete. Received ${iframeSnapshots.size} snapshot(s)`);
-                        
+
                         if (iframeSnapshots.size > 0) {
                             // FLATTEN IMMEDIATELY: Don't nest them. Just append them with coordinate translation.
                             iframeSnapshots.forEach((iframeSnapshot, iframeEl) => {
                                 // Debug: Log structure to verify data is correct
                                 // console.log(`[SentienceAPI] Processing iframe snapshot:`, iframeSnapshot);
-                                
+
                                 if (iframeSnapshot && iframeSnapshot.raw_elements) {
                                     const rawElementsCount = iframeSnapshot.raw_elements.length;
                                     console.log(`[SentienceAPI] Processing ${rawElementsCount} elements from iframe (src: ${iframeEl.src || 'unknown'})`);
                                     // Get iframe's bounding rect (offset for coordinate translation)
                                     const iframeRect = iframeEl.getBoundingClientRect();
                                     const offset = { x: iframeRect.x, y: iframeRect.y };
-                                    
+
                                     // Get iframe context for frame switching (Playwright needs this)
                                     const iframeSrc = iframeEl.src || iframeEl.getAttribute('src') || '';
                                     let isSameOrigin = false;
@@ -972,11 +972,11 @@
                                     } catch (e) {
                                         isSameOrigin = false;
                                     }
-                                    
+
                                     // Adjust coordinates and add iframe context to each element
                                     const adjustedElements = iframeSnapshot.raw_elements.map(el => {
                                         const adjusted = { ...el };
-                                        
+
                                         // Adjust rect coordinates to parent viewport
                                         if (adjusted.rect) {
                                             adjusted.rect = {
@@ -985,22 +985,22 @@
                                                 y: adjusted.rect.y + offset.y
                                             };
                                         }
-                                        
+
                                         // Add iframe context so agents can switch frames in Playwright
                                         adjusted.iframe_context = {
                                             src: iframeSrc,
                                             is_same_origin: isSameOrigin
                                         };
-                                        
+
                                         return adjusted;
                                     });
-                                    
+
                                     // Append flattened iframe elements to main array
                                     allRawElements.push(...adjustedElements);
                                     totalIframeElements += adjustedElements.length;
                                 }
                             });
-                            
+
                             // console.log(`[SentienceAPI] Merged ${iframeSnapshots.size} iframe(s). Total elements: ${allRawElements.length} (${rawData.length} main + ${totalIframeElements} iframe)`);
                         }
                     } catch (error) {
@@ -1013,7 +1013,7 @@
                 // No recursion needed - everything is already flat
                 console.log(`[SentienceAPI] Sending ${allRawElements.length} total elements to WASM (${rawData.length} main + ${totalIframeElements} iframe)`);
                 const processed = await processSnapshotInBackground(allRawElements, options);
-                
+
                 if (!processed || !processed.elements) {
                     throw new Error('WASM processing returned invalid result');
                 }
@@ -1029,10 +1029,10 @@
                 const cleanedRawElements = cleanElement(processed.raw_elements);
 
                 // FIXED: Removed undefined 'totalIframeRawElements'
-                // FIXED: Logic updated for "Flatten Early" architecture. 
+                // FIXED: Logic updated for "Flatten Early" architecture.
                 // processed.elements ALREADY contains the merged iframe elements,
                 // so we simply use .length. No addition needed.
-                
+
                 const totalCount = cleanedElements.length;
                 const totalRaw = cleanedRawElements.length;
                 const iframeCount = totalIframeElements || 0;
@@ -1250,23 +1250,23 @@
                 autoDisableTimeout = 30 * 60 * 1000, // 30 minutes default
                 keyboardShortcut = 'Ctrl+Shift+I'
             } = options;
-            
+
             console.log("🔴 [Sentience] Recording Mode STARTED. Click an element to copy its Ground Truth JSON.");
             console.log(`   Press ${keyboardShortcut} or call stopRecording() to stop.`);
-            
+
             // Validate registry is populated
             if (!window.sentience_registry || window.sentience_registry.length === 0) {
                 console.warn("⚠️ Registry empty. Call `await window.sentience.snapshot()` first to populate registry.");
                 alert("Registry empty. Run `await window.sentience.snapshot()` first!");
                 return () => {}; // Return no-op cleanup function
             }
-            
+
             // Create reverse mapping for O(1) lookup (fixes registry lookup bug)
             window.sentience_registry_map = new Map();
             window.sentience_registry.forEach((el, idx) => {
                 if (el) window.sentience_registry_map.set(el, idx);
             });
-            
+
             // Create highlight box overlay
             let highlightBox = document.getElementById('sentience-highlight-box');
             if (!highlightBox) {
@@ -1284,7 +1284,7 @@
                 `;
                 document.body.appendChild(highlightBox);
             }
-            
+
             // Create visual indicator (red border on page when recording)
             let recordingIndicator = document.getElementById('sentience-recording-indicator');
             if (!recordingIndicator) {
@@ -1303,12 +1303,12 @@
                 document.body.appendChild(recordingIndicator);
             }
             recordingIndicator.style.display = 'block';
-            
+
             // Hover handler (visual feedback)
             const mouseOverHandler = (e) => {
                 const el = e.target;
                 if (!el || el === highlightBox || el === recordingIndicator) return;
-                
+
                 const rect = el.getBoundingClientRect();
                 highlightBox.style.display = 'block';
                 highlightBox.style.top = (rect.top + window.scrollY) + 'px';
@@ -1316,15 +1316,15 @@
                 highlightBox.style.width = rect.width + 'px';
                 highlightBox.style.height = rect.height + 'px';
             };
-            
+
             // Click handler (capture ground truth data)
             const clickHandler = (e) => {
                 e.preventDefault();
                 e.stopPropagation();
-                
+
                 const el = e.target;
                 if (!el || el === highlightBox || el === recordingIndicator) return;
-                
+
                 // Use Map for reliable O(1) lookup
                 const sentienceId = window.sentience_registry_map.get(el);
                 if (sentienceId === undefined) {
@@ -1332,13 +1332,13 @@
                     alert("Element not in registry. Run `await window.sentience.snapshot()` first!");
                     return;
                 }
-                
+
                 // Extract raw data (ground truth + raw signals, NOT model outputs)
                 const rawData = extractRawElementData(el);
                 const selector = getUniqueSelector(el);
                 const role = el.getAttribute('role') || el.tagName.toLowerCase();
                 const text = getText(el);
-                
+
                 // Build golden set JSON (ground truth + raw signals only)
                 const snippet = {
                     task: `Interact with ${text.substring(0, 20)}${text.length > 20 ? '...' : ''}`,
@@ -1352,12 +1352,12 @@
                     },
                     debug_snapshot: rawData
                 };
-                
+
                 // Copy to clipboard
                 const jsonString = JSON.stringify(snippet, null, 2);
                 navigator.clipboard.writeText(jsonString).then(() => {
                     console.log("✅ Copied Ground Truth to clipboard:", snippet);
-                    
+
                     // Flash green to indicate success
                     highlightBox.style.border = `2px solid ${successColor}`;
                     highlightBox.style.background = 'rgba(0, 255, 0, 0.2)';
@@ -1370,42 +1370,42 @@
                     alert("Failed to copy to clipboard. Check console for JSON.");
                 });
             };
-            
+
             // Auto-disable timeout
             let timeoutId = null;
-            
+
             // Cleanup function to stop recording (defined before use)
             const stopRecording = () => {
                 document.removeEventListener('mouseover', mouseOverHandler, true);
                 document.removeEventListener('click', clickHandler, true);
                 document.removeEventListener('keydown', keyboardHandler, true);
-                
+
                 if (timeoutId) {
                     clearTimeout(timeoutId);
                     timeoutId = null;
                 }
-                
+
                 if (highlightBox) {
                     highlightBox.style.display = 'none';
                 }
-                
+
                 if (recordingIndicator) {
                     recordingIndicator.style.display = 'none';
                 }
-                
+
                 // Clean up registry map (optional, but good practice)
                 if (window.sentience_registry_map) {
                     window.sentience_registry_map.clear();
                 }
-                
+
                 // Remove global reference
                 if (window.sentience_stopRecording === stopRecording) {
                     delete window.sentience_stopRecording;
                 }
-                
+
                 console.log("⚪ [Sentience] Recording Mode STOPPED.");
             };
-            
+
             // Keyboard shortcut handler (defined after stopRecording)
             const keyboardHandler = (e) => {
                 // Ctrl+Shift+I or Cmd+Shift+I
@@ -1414,12 +1414,12 @@
                     stopRecording();
                 }
             };
-            
+
             // Attach event listeners (use capture phase to intercept early)
             document.addEventListener('mouseover', mouseOverHandler, true);
             document.addEventListener('click', clickHandler, true);
             document.addEventListener('keydown', keyboardHandler, true);
-            
+
             // Set up auto-disable timeout
             if (autoDisableTimeout > 0) {
                 timeoutId = setTimeout(() => {
@@ -1427,10 +1427,10 @@
                     stopRecording();
                 }, autoDisableTimeout);
             }
-            
+
             // Store stop function globally for keyboard shortcut access
             window.sentience_stopRecording = stopRecording;
-            
+
             return stopRecording;
         }
     };
diff --git a/sentience/tracer_factory.py b/sentience/tracer_factory.py
index 04ffb1a..2353f1c 100644
--- a/sentience/tracer_factory.py
+++ b/sentience/tracer_factory.py
@@ -11,7 +11,7 @@
 
 import requests
 
-from sentience.cloud_tracing import CloudTraceSink
+from sentience.cloud_tracing import CloudTraceSink, SentienceLogger
 from sentience.tracing import JsonlTraceSink, Tracer
 
 # Sentience API base URL (constant)
@@ -22,6 +22,7 @@ def create_tracer(
     api_key: str | None = None,
     run_id: str | None = None,
     api_url: str | None = None,
+    logger: SentienceLogger | None = None,
 ) -> Tracer:
     """
     Create tracer with automatic tier detection.
@@ -36,6 +37,7 @@ def create_tracer(
                  - Pro/Enterprise: Valid API key
         run_id: Unique identifier for this agent run. If not provided, generates UUID.
         api_url: Sentience API base URL (default: https://api.sentienceapi.com)
+        logger: Optional logger instance for logging file sizes and errors
 
     Returns:
         Tracer configured with appropriate sink
@@ -83,7 +85,13 @@ def create_tracer(
                     print("☁️  [Sentience] Cloud tracing enabled (Pro tier)")
                     return Tracer(
                         run_id=run_id,
-                        sink=CloudTraceSink(upload_url=upload_url, run_id=run_id),
+                        sink=CloudTraceSink(
+                            upload_url=upload_url,
+                            run_id=run_id,
+                            api_key=api_key,
+                            api_url=api_url,
+                            logger=logger,
+                        ),
                     )
                 else:
                     print("⚠️  [Sentience] Cloud init response missing upload_url")
diff --git a/tests/test_file_size_tracking.py b/tests/test_file_size_tracking.py
new file mode 100644
index 0000000..a1cbc86
--- /dev/null
+++ b/tests/test_file_size_tracking.py
@@ -0,0 +1,233 @@
+"""
+Tests for file size tracking and /v1/traces/complete functionality.
+
+Tests the Phase 5 SDK changes for enforcing storage quota.
+"""
+
+import gzip
+import json
+import os
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
+
+from sentience.cloud_tracing import CloudTraceSink, SentienceLogger
+from sentience.tracer_factory import create_tracer
+from sentience.tracing import Tracer
+
+
+class TestFileSizeTracking:
+    """Test file size tracking in CloudTraceSink."""
+
+    def test_cloud_sink_tracks_trace_file_size(self, tmp_path):
+        """Test that CloudTraceSink measures compressed trace file size."""
+        # Create mock logger
+        mock_logger = Mock(spec=SentienceLogger)
+
+        # Create mock upload URL
+        upload_url = "https://example.com/upload"
+
+        # Create CloudTraceSink with logger
+        sink = CloudTraceSink(
+            upload_url=upload_url,
+            run_id="test-run",
+            api_key="sk_test_key",
+            api_url="https://api.example.com",
+            logger=mock_logger,
+        )
+
+        # Verify logger is set
+        assert sink.logger == mock_logger
+
+        # Verify file size tracking fields exist
+        assert hasattr(sink, "trace_file_size_bytes")
+        assert hasattr(sink, "screenshot_total_size_bytes")
+        assert sink.trace_file_size_bytes == 0
+        assert sink.screenshot_total_size_bytes == 0
+
+    def test_cloud_sink_without_logger(self):
+        """Test that CloudTraceSink works without a logger (backward compatibility)."""
+        upload_url = "https://example.com/upload"
+
+        # Create CloudTraceSink without logger (should not fail)
+        sink = CloudTraceSink(
+            upload_url=upload_url,
+            run_id="test-run",
+        )
+
+        assert sink.logger is None
+        assert sink.trace_file_size_bytes == 0
+        assert sink.screenshot_total_size_bytes == 0
+
+    @patch("sentience.cloud_tracing.requests")
+    def test_cloud_sink_logs_file_sizes(self, mock_requests):
+        """Test that CloudTraceSink logs file sizes when logger is provided."""
+        # Create mock logger
+        mock_logger = Mock(spec=SentienceLogger)
+
+        # Mock successful upload
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_requests.put.return_value = mock_response
+        mock_requests.post.return_value = mock_response
+
+        upload_url = "https://example.com/upload"
+
+        # Create CloudTraceSink with logger
+        sink = CloudTraceSink(
+            upload_url=upload_url,
+            run_id="test-run-size",
+            api_key="sk_test_key",
+            logger=mock_logger,
+        )
+
+        # Emit some events
+        sink.emit({"type": "test", "data": "test"})
+
+        # Close to trigger upload
+        sink.close()
+
+        # Verify logger.info was called with file size information
+        info_calls = [str(call) for call in mock_logger.info.call_args_list]
+        assert any("Trace file size:" in call for call in info_calls)
+        assert any("Screenshot total:" in call for call in info_calls)
+
+    @patch("sentience.cloud_tracing.requests")
+    def test_complete_trace_called_after_upload(self, mock_requests):
+        """Test that /v1/traces/complete is called after successful upload."""
+        # Mock successful upload and complete
+        mock_put_response = Mock()
+        mock_put_response.status_code = 200
+
+        mock_post_response = Mock()
+        mock_post_response.status_code = 200
+
+        mock_requests.put.return_value = mock_put_response
+        mock_requests.post.return_value = mock_post_response
+
+        upload_url = "https://example.com/upload"
+        api_url = "https://api.example.com"
+
+        # Create CloudTraceSink with API key
+        sink = CloudTraceSink(
+            upload_url=upload_url,
+            run_id="test-complete",
+            api_key="sk_test_key",
+            api_url=api_url,
+        )
+
+        # Emit event and close
+        sink.emit({"type": "test"})
+        sink.close()
+
+        # Verify /v1/traces/complete was called
+        post_calls = mock_requests.post.call_args_list
+        assert len(post_calls) > 0
+
+        # Find the complete trace call
+        complete_call = None
+        for call in post_calls:
+            args, kwargs = call
+            if "/v1/traces/complete" in args[0]:
+                complete_call = call
+                break
+
+        assert complete_call is not None, "Expected /v1/traces/complete to be called"
+
+        # Verify the payload
+        args, kwargs = complete_call
+        payload = kwargs.get("json") or json.loads(kwargs.get("data", "{}"))
+        assert "run_id" in payload
+        assert payload["run_id"] == "test-complete"
+        assert "stats" in payload
+        assert "trace_file_size_bytes" in payload["stats"]
+        assert "screenshot_total_size_bytes" in payload["stats"]
+
+    @patch("sentience.cloud_tracing.requests")
+    def test_complete_trace_not_called_without_api_key(self, mock_requests):
+        """Test that /v1/traces/complete is not called without API key."""
+        # Mock successful upload
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_requests.put.return_value = mock_response
+
+        upload_url = "https://example.com/upload"
+
+        # Create CloudTraceSink WITHOUT API key
+        sink = CloudTraceSink(
+            upload_url=upload_url,
+            run_id="test-no-key",
+        )
+
+        # Emit event and close
+        sink.emit({"type": "test"})
+        sink.close()
+
+        # Verify POST was NOT called
+        assert mock_requests.post.call_count == 0
+
+    @patch("sentience.tracer_factory.requests")
+    def test_create_tracer_passes_logger_to_cloud_sink(self, mock_requests):
+        """Test that create_tracer passes logger to CloudTraceSink."""
+        # Mock successful API response
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"upload_url": "https://example.com/upload"}
+        mock_requests.post.return_value = mock_response
+
+        # Create mock logger
+        mock_logger = Mock(spec=SentienceLogger)
+
+        # Create tracer with logger
+        with patch("sentience.tracer_factory._recover_orphaned_traces"):
+            tracer = create_tracer(
+                api_key="sk_test_key",
+                run_id="test-logger",
+                logger=mock_logger,
+            )
+
+        # Verify tracer was created
+        assert isinstance(tracer, Tracer)
+
+        # Verify sink has logger
+        assert hasattr(tracer.sink, "logger")
+        assert tracer.sink.logger == mock_logger
+
+
+class TestBackwardCompatibility:
+    """Test that existing code continues to work."""
+
+    @patch("sentience.tracer_factory.requests")
+    def test_create_tracer_without_logger_still_works(self, mock_requests):
+        """Test that create_tracer works without logger parameter (backward compat)."""
+        # Mock successful API response
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"upload_url": "https://example.com/upload"}
+        mock_requests.post.return_value = mock_response
+
+        # Create tracer WITHOUT logger (old API)
+        with patch("sentience.tracer_factory._recover_orphaned_traces"):
+            tracer = create_tracer(
+                api_key="sk_test_key",
+                run_id="test-compat",
+            )
+
+        # Should still work
+        assert isinstance(tracer, Tracer)
+
+    def test_cloud_sink_backward_compatible_signature(self):
+        """Test that CloudTraceSink can be created with old 2-parameter signature."""
+        # Old signature: CloudTraceSink(upload_url, run_id)
+        sink = CloudTraceSink(
+            upload_url="https://example.com/upload",
+            run_id="test-old-api",
+        )
+
+        # Should work fine
+        assert sink.upload_url == "https://example.com/upload"
+        assert sink.run_id == "test-old-api"
+        assert sink.logger is None  # No logger
+        assert sink.api_key is None  # No API key

From 663e3458bb1e13d389c7174004add1aca2fbeb18 Mon Sep 17 00:00:00 2001
From: rcholic <ivytony@gmail.com>
Date: Sun, 28 Dec 2025 17:46:18 -0800
Subject: [PATCH 2/2] file size reporting

---
 sentience/generator.py    | 4 ----
 sentience/llm_provider.py | 1 -
 sentience/query.py        | 6 +++---
 sentience/screenshot.py   | 2 +-
 sentience/wait.py         | 1 -
 tests/test_snapshot.py    | 2 --
 tests/test_wait.py        | 4 ----
 7 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/sentience/generator.py b/sentience/generator.py
index 01fd12a..ecb5a6a 100644
--- a/sentience/generator.py
+++ b/sentience/generator.py
@@ -2,10 +2,6 @@
 Script Generator - converts trace into executable code
 """
 
-import json
-from typing import List, Optional
-
-from .query import find
 from .recorder import Trace, TraceStep
 
 
diff --git a/sentience/llm_provider.py b/sentience/llm_provider.py
index 1c4200f..a333e26 100644
--- a/sentience/llm_provider.py
+++ b/sentience/llm_provider.py
@@ -5,7 +5,6 @@
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Any, Dict, Optional
 
 
 @dataclass
diff --git a/sentience/query.py b/sentience/query.py
index 141fee5..ed43b6c 100644
--- a/sentience/query.py
+++ b/sentience/query.py
@@ -3,12 +3,12 @@
 """
 
 import re
-from typing import Any, Dict, List, Optional, Union
+from typing import Any
 
 from .models import Element, Snapshot
 
 
-def parse_selector(selector: str) -> dict[str, Any]:
+def parse_selector(selector: str) -> dict[str, Any]: # noqa: C901
     """
     Parse string DSL selector into structured query
 
@@ -136,7 +136,7 @@ def parse_selector(selector: str) -> dict[str, Any]:
     return query
 
 
-def match_element(element: Element, query: dict[str, Any]) -> bool:
+def match_element(element: Element, query: dict[str, Any]) -> bool: # noqa: C901
     """Check if element matches query criteria"""
 
     # Role exact match
diff --git a/sentience/screenshot.py b/sentience/screenshot.py
index b5ce7fe..9414d95 100644
--- a/sentience/screenshot.py
+++ b/sentience/screenshot.py
@@ -2,7 +2,7 @@
 Screenshot functionality - standalone screenshot capture
 """
 
-from typing import Any, Dict, Literal, Optional
+from typing import Any, Literal
 
 from .browser import SentienceBrowser
 
diff --git a/sentience/wait.py b/sentience/wait.py
index 3b458eb..5b7e099 100644
--- a/sentience/wait.py
+++ b/sentience/wait.py
@@ -3,7 +3,6 @@
 """
 
 import time
-from typing import Optional, Union
 
 from .browser import SentienceBrowser
 from .models import WaitResult
diff --git a/tests/test_snapshot.py b/tests/test_snapshot.py
index a4001d1..30b045d 100644
--- a/tests/test_snapshot.py
+++ b/tests/test_snapshot.py
@@ -5,8 +5,6 @@
 import pytest
 
 from sentience import SentienceBrowser, snapshot
-from sentience.models import Snapshot
-
 
 @pytest.mark.requires_extension
 def test_snapshot_basic():
diff --git a/tests/test_wait.py b/tests/test_wait.py
index 9600b27..05a3ab2 100644
--- a/tests/test_wait.py
+++ b/tests/test_wait.py
@@ -2,10 +2,6 @@
 Tests for wait functionality
 """
 
-import os
-
-import pytest
-
 from sentience import SentienceBrowser, expect, wait_for