From a2ca0ffba8112ee0d06a9cbea070ab7385be1de4 Mon Sep 17 00:00:00 2001 From: rcholic Date: Thu, 25 Dec 2025 05:06:02 +0000 Subject: [PATCH] chore: sync extension files from sentience-chrome v2.0.0 --- sentience/extension/background.js | 211 ++++++--- sentience/extension/content.js | 79 +++- sentience/extension/injected_api.js | 405 ++++++++---------- sentience/extension/manifest.json | 8 +- .../extension/pkg/sentience_core_bg.wasm | Bin 101498 -> 101498 bytes sentience/extension/release.json | 62 +-- 6 files changed, 451 insertions(+), 314 deletions(-) diff --git a/sentience/extension/background.js b/sentience/extension/background.js index bb5ad6f..c108aed 100644 --- a/sentience/extension/background.js +++ b/sentience/extension/background.js @@ -1,63 +1,170 @@ -// background.js - Service Worker for screenshot capture -// Chrome extensions can only capture screenshots from the background script -// Listen for screenshot requests from content script +// background.js - Service Worker with WASM (CSP-Immune!) +// This runs in an isolated environment, completely immune to page CSP policies + +// ✅ STATIC IMPORTS at top level - Required for Service Workers! +// Dynamic import() is FORBIDDEN in ServiceWorkerGlobalScope +import init, { analyze_page, analyze_page_with_options, prune_for_api } from './pkg/sentience_core.js'; + +console.log('[Sentience Background] Initializing...'); + +// Global WASM initialization state +let wasmReady = false; +let wasmInitPromise = null; + +/** + * Initialize WASM module - called once on service worker startup + * Uses static imports (not dynamic import()) which is required for Service Workers + */ +async function initWASM() { + if (wasmReady) return; + if (wasmInitPromise) return wasmInitPromise; + + wasmInitPromise = (async () => { + try { + console.log('[Sentience Background] Loading WASM module...'); + + // Define the js_click_element function that WASM expects + // In Service Workers, use 'globalThis' instead of 'window' + // In background context, we can't actually click, so we log a warning + globalThis.js_click_element = (_id) => { + console.warn('[Sentience Background] js_click_element called in background (ignored)'); + }; + + // Initialize WASM - this calls the init() function from the static import + // The init() function handles fetching and instantiating the .wasm file + await init(); + + wasmReady = true; + console.log('[Sentience Background] ✓ WASM ready!'); + console.log('[Sentience Background] Available functions: analyze_page, analyze_page_with_options, prune_for_api'); + } catch (error) { + console.error('[Sentience Background] WASM initialization failed:', error); + throw error; + } + })(); + + return wasmInitPromise; +} + +// Initialize WASM on service worker startup +initWASM().catch(err => { + console.error('[Sentience Background] Failed to initialize WASM:', err); +}); + +/** + * Message handler for all extension communication + */ chrome.runtime.onMessage.addListener((request, sender, sendResponse) => { - if (request.action === 'captureScreenshot') { - handleScreenshotCapture(sender.tab.id, request.options) - .then(screenshot => { - sendResponse({ success: true, screenshot }); - }) - .catch(error => { - console.error('[Sentience] Screenshot capture failed:', error); - sendResponse({ - success: false, - error: error.message || 'Screenshot capture failed' - }); - }); + // Handle screenshot requests (existing functionality) + if (request.action === 'captureScreenshot') { + handleScreenshotCapture(sender.tab.id, request.options) + .then(screenshot => { + sendResponse({ success: true, screenshot }); + }) + .catch(error => { + console.error('[Sentience Background] Screenshot capture failed:', error); + sendResponse({ + success: false, + error: error.message || 'Screenshot capture failed' + }); + }); + return true; // Async response + } - // Return true to indicate we'll send response asynchronously - return true; - } + // Handle WASM processing requests (NEW!) + if (request.action === 'processSnapshot') { + handleSnapshotProcessing(request.rawData, request.options) + .then(result => { + sendResponse({ success: true, result }); + }) + .catch(error => { + console.error('[Sentience Background] Snapshot processing failed:', error); + sendResponse({ + success: false, + error: error.message || 'Snapshot processing failed' + }); + }); + return true; // Async response + } + + // Unknown action + console.warn('[Sentience Background] Unknown action:', request.action); + sendResponse({ success: false, error: 'Unknown action' }); + return false; }); /** - * Capture screenshot of the active tab - * @param {number} tabId - Tab ID to capture - * @param {Object} options - Screenshot options - * @returns {Promise} Base64-encoded PNG data URL + * Handle screenshot capture (existing functionality) */ -async function handleScreenshotCapture(tabId, options = {}) { - try { - const { - format = 'png', // 'png' or 'jpeg' - quality = 90 // JPEG quality (0-100), ignored for PNG - } = options; - - // Capture visible tab as data URL - const dataUrl = await chrome.tabs.captureVisibleTab(null, { - format: format, - quality: quality - }); - - console.log(`[Sentience] Screenshot captured: ${format}, size: ${dataUrl.length} bytes`); - - return dataUrl; - } catch (error) { - console.error('[Sentience] Screenshot error:', error); - throw new Error(`Failed to capture screenshot: ${error.message}`); - } +async function handleScreenshotCapture(_tabId, options = {}) { + try { + const { + format = 'png', + quality = 90 + } = options; + + const dataUrl = await chrome.tabs.captureVisibleTab(null, { + format: format, + quality: quality + }); + + console.log(`[Sentience Background] Screenshot captured: ${format}, size: ${dataUrl.length} bytes`); + return dataUrl; + } catch (error) { + console.error('[Sentience Background] Screenshot error:', error); + throw new Error(`Failed to capture screenshot: ${error.message}`); + } } /** - * Optional: Add viewport-specific capture (requires additional setup) - * This would allow capturing specific regions, not just visible area + * Handle snapshot processing with WASM (NEW!) + * This is where the magic happens - completely CSP-immune! + * + * @param {Array} rawData - Raw element data from injected_api.js + * @param {Object} options - Snapshot options (limit, filter, etc.) + * @returns {Promise} Processed snapshot result */ -async function captureRegion(tabId, region) { - // For region capture, you'd need to: - // 1. Capture full visible tab - // 2. Use Canvas API to crop to region - // 3. Return cropped image - - // Not implemented in this basic version - throw new Error('Region capture not yet implemented'); +async function handleSnapshotProcessing(rawData, options = {}) { + try { + // Ensure WASM is initialized + await initWASM(); + if (!wasmReady) { + throw new Error('WASM module not initialized'); + } + + console.log(`[Sentience Background] Processing ${rawData.length} elements with options:`, options); + + // Run WASM processing using the imported functions directly + let analyzedElements; + try { + if (options.limit || options.filter) { + analyzedElements = analyze_page_with_options(rawData, options); + } else { + analyzedElements = analyze_page(rawData); + } + } catch (e) { + throw new Error(`WASM analyze_page failed: ${e.message}`); + } + + // Prune elements for API (prevents 413 errors on large sites) + let prunedRawData; + try { + prunedRawData = prune_for_api(rawData); + } catch (e) { + console.warn('[Sentience Background] prune_for_api failed, using original data:', e); + prunedRawData = rawData; + } + + console.log(`[Sentience Background] ✓ Processed: ${analyzedElements.length} analyzed, ${prunedRawData.length} pruned`); + + return { + elements: analyzedElements, + raw_elements: prunedRawData + }; + } catch (error) { + console.error('[Sentience Background] Processing error:', error); + throw error; + } } + +console.log('[Sentience Background] Service worker ready'); diff --git a/sentience/extension/content.js b/sentience/extension/content.js index de24fa5..cf6ba37 100644 --- a/sentience/extension/content.js +++ b/sentience/extension/content.js @@ -1,22 +1,83 @@ -// content.js - ISOLATED WORLD -console.log('[Sentience] Bridge loaded.'); +// content.js - ISOLATED WORLD (Bridge between Main World and Background) +console.log('[Sentience Bridge] Loaded.'); -// 1. Pass Extension ID to Main World (So WASM knows where to load from) +// 1. Pass Extension ID to Main World (So API knows where to find resources) document.documentElement.dataset.sentienceExtensionId = chrome.runtime.id; -// 2. Proxy for Screenshots (The only thing Isolated World needs to do) +// 2. Message Router - Handles all communication between page and background window.addEventListener('message', (event) => { // Security check: only accept messages from same window - if (event.source !== window || event.data.type !== 'SENTIENCE_SCREENSHOT_REQUEST') return; + if (event.source !== window) return; + // Route different message types + switch (event.data.type) { + case 'SENTIENCE_SCREENSHOT_REQUEST': + handleScreenshotRequest(event.data); + break; + + case 'SENTIENCE_SNAPSHOT_REQUEST': + handleSnapshotRequest(event.data); + break; + + default: + // Ignore unknown message types + break; + } +}); + +/** + * Handle screenshot requests (existing functionality) + */ +function handleScreenshotRequest(data) { chrome.runtime.sendMessage( - { action: 'captureScreenshot', options: event.data.options }, + { action: 'captureScreenshot', options: data.options }, (response) => { window.postMessage({ type: 'SENTIENCE_SCREENSHOT_RESULT', - requestId: event.data.requestId, - screenshot: response?.success ? response.screenshot : null + requestId: data.requestId, + screenshot: response?.success ? response.screenshot : null, + error: response?.error }, '*'); } ); -}); \ No newline at end of file +} + +/** + * Handle snapshot processing requests (NEW!) + * Sends raw DOM data to background worker for WASM processing + */ +function handleSnapshotRequest(data) { + const startTime = performance.now(); + + chrome.runtime.sendMessage( + { + action: 'processSnapshot', + rawData: data.rawData, + options: data.options + }, + (response) => { + const duration = performance.now() - startTime; + + if (response?.success) { + console.log(`[Sentience Bridge] ✓ WASM processing complete in ${duration.toFixed(1)}ms`); + window.postMessage({ + type: 'SENTIENCE_SNAPSHOT_RESULT', + requestId: data.requestId, + elements: response.result.elements, + raw_elements: response.result.raw_elements, + duration: duration + }, '*'); + } else { + console.error('[Sentience Bridge] WASM processing failed:', response?.error); + window.postMessage({ + type: 'SENTIENCE_SNAPSHOT_RESULT', + requestId: data.requestId, + error: response?.error || 'Processing failed', + duration: duration + }, '*'); + } + } + ); +} + +console.log('[Sentience Bridge] Ready - Extension ID:', chrome.runtime.id); diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js index 941478e..80b59f6 100644 --- a/sentience/extension/injected_api.js +++ b/sentience/extension/injected_api.js @@ -1,49 +1,52 @@ -// injected_api.js - MAIN WORLD +// injected_api.js - MAIN WORLD (NO WASM! CSP-Resistant!) +// This script ONLY collects raw DOM data and sends it to background for processing (async () => { - // 1. Get Extension ID (Wait for content.js to set it) + console.log('[SentienceAPI] Initializing (CSP-Resistant Mode)...'); + + // Wait for Extension ID from content.js const getExtensionId = () => document.documentElement.dataset.sentienceExtensionId; let extId = getExtensionId(); - - // Safety poller for async loading race conditions + if (!extId) { await new Promise(resolve => { const check = setInterval(() => { extId = getExtensionId(); if (extId) { clearInterval(check); resolve(); } }, 50); + setTimeout(() => resolve(), 5000); // Max 5s wait }); } - const EXT_URL = `chrome-extension://${extId}/`; - console.log('[SentienceAPI.com] Initializing from:', EXT_URL); + if (!extId) { + console.error('[SentienceAPI] Failed to get extension ID'); + return; + } + + console.log('[SentienceAPI] Extension ID:', extId); + // Registry for click actions (still needed for click() function) window.sentience_registry = []; - let wasmModule = null; // --- HELPER: Deep Walker with Native Filter --- function getAllElements(root = document) { const elements = []; - // FILTER: Skip Script, Style, Comments, Metadata tags during traversal - // This prevents collecting them in the first place, saving memory and CPU const filter = { acceptNode: function(node) { // Skip metadata and script/style tags if (['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'LINK', 'HEAD'].includes(node.tagName)) { return NodeFilter.FILTER_REJECT; } - // Skip deep SVG children (keep root only, unless you need path data) - // This reduces noise from complex SVG graphics while preserving icon containers + // Skip deep SVG children if (node.parentNode && node.parentNode.tagName === 'SVG' && node.tagName !== 'SVG') { return NodeFilter.FILTER_REJECT; } return NodeFilter.FILTER_ACCEPT; } }; - + const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT, filter); while(walker.nextNode()) { const node = walker.currentNode; - // Pre-check: Don't even process empty/detached nodes if (node.isConnected) { elements.push(node); if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot)); @@ -61,25 +64,19 @@ } // --- HELPER: Safe Class Name Extractor --- - // Fixes the SVGAnimatedString error by ensuring we always get a primitive string function getClassName(el) { if (typeof el.className === 'string') return el.className; - // Handle SVGAnimatedString (has baseVal and animVal) if (el.className && typeof el.className.baseVal === 'string') return el.className.baseVal; return ''; } // --- HELPER: Safe String Converter --- - // Converts any value (including SVGAnimatedString) to a plain string or null - // This prevents WASM deserialization errors on SVG elements function toSafeString(value) { if (value === null || value === undefined) return null; if (typeof value === 'string') return value; - // Handle SVGAnimatedString (has baseVal property) if (value && typeof value === 'object' && 'baseVal' in value) { return typeof value.baseVal === 'string' ? value.baseVal : null; } - // Convert other types to string try { return String(value); } catch (e) { @@ -87,7 +84,7 @@ } } - // --- HELPER: Viewport Check (NEW) --- + // --- HELPER: Viewport Check --- function isInViewport(rect) { return ( rect.top < window.innerHeight && rect.bottom > 0 && @@ -95,19 +92,16 @@ ); } - // --- HELPER: Occlusion Check (NEW) --- + // --- HELPER: Occlusion Check --- function isOccluded(el, rect) { - // Fast center-point check const cx = rect.x + rect.width / 2; const cy = rect.y + rect.height / 2; - - // If point is off-screen, elementFromPoint returns null, assume NOT occluded for safety + if (cx < 0 || cx > window.innerWidth || cy < 0 || cy > window.innerHeight) return false; const topEl = document.elementFromPoint(cx, cy); if (!topEl) return false; - - // It's visible if the top element is us, or contains us, or we contain it + return !(el === topEl || el.contains(topEl) || topEl.contains(el)); } @@ -123,45 +117,73 @@ }; window.addEventListener('message', listener); window.postMessage({ type: 'SENTIENCE_SCREENSHOT_REQUEST', requestId, options }, '*'); + setTimeout(() => { + window.removeEventListener('message', listener); + resolve(null); + }, 10000); // 10s timeout }); } - // --- HELPER: Get Raw HTML for Turndown/External Processing --- - // Returns cleaned HTML that can be processed by Turndown or other Node.js libraries + // --- HELPER: Snapshot Processing Bridge (NEW!) --- + function processSnapshotInBackground(rawData, options) { + return new Promise((resolve, reject) => { + const requestId = Math.random().toString(36).substring(7); + const timeout = setTimeout(() => { + window.removeEventListener('message', listener); + reject(new Error('WASM processing timeout')); + }, 15000); // 15s timeout + + const listener = (e) => { + if (e.data.type === 'SENTIENCE_SNAPSHOT_RESULT' && e.data.requestId === requestId) { + clearTimeout(timeout); + window.removeEventListener('message', listener); + + if (e.data.error) { + reject(new Error(e.data.error)); + } else { + resolve({ + elements: e.data.elements, + raw_elements: e.data.raw_elements, + duration: e.data.duration + }); + } + } + }; + + window.addEventListener('message', listener); + window.postMessage({ + type: 'SENTIENCE_SNAPSHOT_REQUEST', + requestId, + rawData, + options + }, '*'); + }); + } + + // --- HELPER: Raw HTML Extractor (unchanged) --- function getRawHTML(root) { const sourceRoot = root || document.body; const clone = sourceRoot.cloneNode(true); - - // Remove unwanted elements by tag name (simple and reliable) + const unwantedTags = ['nav', 'footer', 'header', 'script', 'style', 'noscript', 'iframe', 'svg']; unwantedTags.forEach(tag => { const elements = clone.querySelectorAll(tag); elements.forEach(el => { - if (el.parentNode) { - el.parentNode.removeChild(el); - } + if (el.parentNode) el.parentNode.removeChild(el); }); }); - // Remove invisible elements from original DOM and find matching ones in clone - // We'll use a simple approach: mark elements in original, then remove from clone + // Remove invisible elements const invisibleSelectors = []; - const walker = document.createTreeWalker( - sourceRoot, - NodeFilter.SHOW_ELEMENT, - null, - false - ); - + const walker = document.createTreeWalker(sourceRoot, NodeFilter.SHOW_ELEMENT, null, false); let node; while (node = walker.nextNode()) { const tag = node.tagName.toLowerCase(); if (tag === 'head' || tag === 'title') continue; - + const style = window.getComputedStyle(node); if (style.display === 'none' || style.visibility === 'hidden' || (node.offsetWidth === 0 && node.offsetHeight === 0)) { - // Build a selector for this element let selector = tag; if (node.id) { selector = `#${node.id}`; @@ -175,30 +197,25 @@ } } - // Remove invisible elements from clone (if we can find them) invisibleSelectors.forEach(selector => { try { const elements = clone.querySelectorAll(selector); elements.forEach(el => { - if (el.parentNode) { - el.parentNode.removeChild(el); - } + if (el.parentNode) el.parentNode.removeChild(el); }); } catch (e) { // Invalid selector, skip } }); - // Resolve relative URLs in links and images + // Resolve relative URLs const links = clone.querySelectorAll('a[href]'); links.forEach(link => { const href = link.getAttribute('href'); if (href && !href.startsWith('http://') && !href.startsWith('https://') && !href.startsWith('#')) { try { link.setAttribute('href', new URL(href, document.baseURI).href); - } catch (e) { - // Keep original href if URL parsing fails - } + } catch (e) {} } }); @@ -208,32 +225,24 @@ if (src && !src.startsWith('http://') && !src.startsWith('https://') && !src.startsWith('data:')) { try { img.setAttribute('src', new URL(src, document.baseURI).href); - } catch (e) { - // Keep original src if URL parsing fails - } + } catch (e) {} } }); return clone.innerHTML; } - // --- HELPER: Simple Markdown Converter (Lightweight) --- - // Uses getRawHTML() and then converts to markdown for consistency + // --- HELPER: Markdown Converter (unchanged) --- function convertToMarkdown(root) { - // Get cleaned HTML first const rawHTML = getRawHTML(root); - - // Create a temporary container to parse the HTML const tempDiv = document.createElement('div'); tempDiv.innerHTML = rawHTML; - + let markdown = ''; - let insideLink = false; // Track if we're inside an tag + let insideLink = false; function walk(node) { if (node.nodeType === Node.TEXT_NODE) { - // Keep minimal whitespace to prevent words merging - // Strip newlines inside text nodes to prevent broken links const text = node.textContent.replace(/[\r\n]+/g, ' ').replace(/\s+/g, ' '); if (text.trim()) markdown += text; return; @@ -248,13 +257,12 @@ if (tag === 'h2') markdown += '\n## '; if (tag === 'h3') markdown += '\n### '; if (tag === 'li') markdown += '\n- '; - // IMPORTANT: Don't add newlines for block elements when inside a link if (!insideLink && (tag === 'p' || tag === 'div' || tag === 'br')) markdown += '\n'; if (tag === 'strong' || tag === 'b') markdown += '**'; if (tag === 'em' || tag === 'i') markdown += '_'; if (tag === 'a') { markdown += '['; - insideLink = true; // Mark that we're entering a link + insideLink = true; } // Children @@ -266,25 +274,21 @@ // Suffix if (tag === 'a') { - // Get absolute URL from href attribute (already resolved in getRawHTML) const href = node.getAttribute('href'); if (href) markdown += `](${href})`; else markdown += ']'; - insideLink = false; // Mark that we're exiting the link + insideLink = false; } if (tag === 'strong' || tag === 'b') markdown += '**'; if (tag === 'em' || tag === 'i') markdown += '_'; - // IMPORTANT: Don't add newlines for block elements when inside a link (suffix section too) if (!insideLink && (tag === 'h1' || tag === 'h2' || tag === 'h3' || tag === 'p' || tag === 'div')) markdown += '\n'; } walk(tempDiv); - - // Cleanup: remove excessive newlines return markdown.replace(/\n{3,}/g, '\n\n').trim(); } - // --- HELPER: Raw Text Extractor --- + // --- HELPER: Text Extractor (unchanged) --- function convertToText(root) { let text = ''; function walk(node) { @@ -294,22 +298,20 @@ } if (node.nodeType === Node.ELEMENT_NODE) { const tag = node.tagName.toLowerCase(); - // Skip nav/footer/header/script/style/noscript/iframe/svg if (['nav', 'footer', 'header', 'script', 'style', 'noscript', 'iframe', 'svg'].includes(tag)) return; const style = window.getComputedStyle(node); if (style.display === 'none' || style.visibility === 'hidden') return; - - // Block level elements get a newline + const isBlock = style.display === 'block' || style.display === 'flex' || node.tagName === 'P' || node.tagName === 'DIV'; if (isBlock) text += ' '; - + if (node.shadowRoot) { Array.from(node.shadowRoot.childNodes).forEach(walk); } else { node.childNodes.forEach(walk); } - + if (isBlock) text += '\n'; } } @@ -317,170 +319,127 @@ return text.replace(/\n{3,}/g, '\n\n').trim(); } - // Load WASM - try { - const wasmUrl = EXT_URL + 'pkg/sentience_core.js'; - const module = await import(wasmUrl); - const imports = { - env: { - js_click_element: (id) => { - const el = window.sentience_registry[id]; - if (el) { el.click(); el.focus(); } + // --- HELPER: Clean null/undefined fields --- + function cleanElement(obj) { + if (Array.isArray(obj)) { + return obj.map(cleanElement); + } + if (obj !== null && typeof obj === 'object') { + const cleaned = {}; + for (const [key, value] of Object.entries(obj)) { + if (value !== null && value !== undefined) { + if (typeof value === 'object') { + const deepClean = cleanElement(value); + if (Object.keys(deepClean).length > 0) { + cleaned[key] = deepClean; + } + } else { + cleaned[key] = value; + } } } - }; - await module.default(undefined, imports); - wasmModule = module; - - // Verify functions are available - if (!wasmModule.analyze_page) { - console.error('[SentienceAPI.com] available'); - } else { - console.log('[SentienceAPI.com] ✓ Ready!'); - console.log('[SentienceAPI.com] Available functions:', Object.keys(wasmModule).filter(k => k.startsWith('analyze'))); + return cleaned; } - } catch (e) { - console.error('[SentienceAPI.com] Extension Load Failed:', e); + return obj; } - // REMOVED: Headless detection - no longer needed (license system removed) - // --- GLOBAL API --- window.sentience = { - // 1. Geometry snapshot (existing) + // 1. Geometry snapshot (NEW ARCHITECTURE - No WASM in Main World!) snapshot: async (options = {}) => { - if (!wasmModule) return { error: "WASM not ready" }; - - const rawData = []; - // Remove textMap as we include text in rawData - window.sentience_registry = []; - - const nodes = getAllElements(); - - nodes.forEach((el, idx) => { - if (!el.getBoundingClientRect) return; - const rect = el.getBoundingClientRect(); - if (rect.width < 5 || rect.height < 5) return; - - window.sentience_registry[idx] = el; - - // Calculate properties for Fat Payload - const textVal = getText(el); - const inView = isInViewport(rect); - // Only check occlusion if visible (Optimization) - const occluded = inView ? isOccluded(el, rect) : false; - - const style = window.getComputedStyle(el); - rawData.push({ - id: idx, - tag: el.tagName.toLowerCase(), - rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height }, - styles: { - display: toSafeString(style.display), - visibility: toSafeString(style.visibility), - opacity: toSafeString(style.opacity), - z_index: toSafeString(style.zIndex || "auto"), - bg_color: toSafeString(style.backgroundColor), - color: toSafeString(style.color), - cursor: toSafeString(style.cursor), - font_weight: toSafeString(style.fontWeight), - font_size: toSafeString(style.fontSize) - }, - attributes: { - role: toSafeString(el.getAttribute('role')), - type_: toSafeString(el.getAttribute('type')), - aria_label: toSafeString(el.getAttribute('aria-label')), - // Handle both regular href and SVGAnimatedString href - href: toSafeString(el.href), - class: toSafeString(getClassName(el)) - }, - // Pass to WASM - ensure text is also a safe string - text: toSafeString(textVal), - in_viewport: inView, - is_occluded: occluded + try { + // Step 1: Collect raw DOM data (Main World - CSP can't block this!) + const rawData = []; + window.sentience_registry = []; + + const nodes = getAllElements(); + + nodes.forEach((el, idx) => { + if (!el.getBoundingClientRect) return; + const rect = el.getBoundingClientRect(); + if (rect.width < 5 || rect.height < 5) return; + + window.sentience_registry[idx] = el; + + const textVal = getText(el); + const inView = isInViewport(rect); + const occluded = inView ? isOccluded(el, rect) : false; + + const style = window.getComputedStyle(el); + rawData.push({ + id: idx, + tag: el.tagName.toLowerCase(), + rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height }, + styles: { + display: toSafeString(style.display), + visibility: toSafeString(style.visibility), + opacity: toSafeString(style.opacity), + z_index: toSafeString(style.zIndex || "auto"), + bg_color: toSafeString(style.backgroundColor), + color: toSafeString(style.color), + cursor: toSafeString(style.cursor), + font_weight: toSafeString(style.fontWeight), + font_size: toSafeString(style.fontSize) + }, + attributes: { + role: toSafeString(el.getAttribute('role')), + type_: toSafeString(el.getAttribute('type')), + aria_label: toSafeString(el.getAttribute('aria-label')), + href: toSafeString(el.href), + class: toSafeString(getClassName(el)) + }, + text: toSafeString(textVal), + in_viewport: inView, + is_occluded: occluded + }); }); - }); - let result; - try { - if (options.limit || options.filter) { - result = wasmModule.analyze_page_with_options(rawData, options); - } else { - result = wasmModule.analyze_page(rawData); - } - } catch (e) { - return { status: "error", error: e.message }; - } + console.log(`[SentienceAPI] Collected ${rawData.length} elements, sending to background for WASM processing...`); - // Hydration step removed - // Capture Screenshot - let screenshot = null; - if (options.screenshot) { - screenshot = await captureScreenshot(options.screenshot); - } + // Step 2: Send to background worker for WASM processing (CSP-immune!) + const processed = await processSnapshotInBackground(rawData, options); - // C. Clean up null/undefined fields to save tokens - const cleanElement = (obj) => { - if (Array.isArray(obj)) { - return obj.map(cleanElement); + // Step 3: Capture screenshot if requested + let screenshot = null; + if (options.screenshot) { + screenshot = await captureScreenshot(options.screenshot); } - if (obj !== null && typeof obj === 'object') { - const cleaned = {}; - for (const [key, value] of Object.entries(obj)) { - // Explicitly skip null AND undefined - if (value !== null && value !== undefined) { - // Recursively clean objects - if (typeof value === 'object') { - const deepClean = cleanElement(value); - // Only keep object if it's not empty (optional optimization) - if (Object.keys(deepClean).length > 0) { - cleaned[key] = deepClean; - } - } else { - cleaned[key] = value; - } - } - } - return cleaned; - } - return obj; - }; - - const cleanedElements = cleanElement(result); - // DEBUG: Check rawData before pruning - // console.log(`[DEBUG] rawData length BEFORE pruning: ${rawData.length}`); - // Prune raw elements using WASM before sending to API - // This prevents 413 errors on large sites (Amazon: 5000+ -> ~200-400) - const prunedRawData = wasmModule.prune_for_api(rawData); - - // Clean up null/undefined fields in raw_elements as well - const cleanedRawElements = cleanElement(prunedRawData); - - return { - status: "success", - url: window.location.href, - elements: cleanedElements, - raw_elements: cleanedRawElements, // Send cleaned pruned data to prevent 413 errors - screenshot: screenshot - }; + // Step 4: Clean and return + const cleanedElements = cleanElement(processed.elements); + const cleanedRawElements = cleanElement(processed.raw_elements); + + console.log(`[SentienceAPI] ✓ Complete: ${cleanedElements.length} elements, ${cleanedRawElements.length} raw (WASM took ${processed.duration?.toFixed(1)}ms)`); + + return { + status: "success", + url: window.location.href, + elements: cleanedElements, + raw_elements: cleanedRawElements, + screenshot: screenshot + }; + } catch (error) { + console.error('[SentienceAPI] snapshot() failed:', error); + return { + status: "error", + error: error.message || 'Unknown error' + }; + } }, - // 2. Read Content (New) + + // 2. Read Content (unchanged) read: (options = {}) => { - const format = options.format || 'raw'; // 'raw', 'text', or 'markdown' + const format = options.format || 'raw'; let content; - + if (format === 'raw') { - // Return raw HTML suitable for Turndown or other Node.js libraries content = getRawHTML(document.body); } else if (format === 'markdown') { - // Return lightweight markdown conversion content = convertToMarkdown(document.body); } else { - // Default to text content = convertToText(document.body); } - + return { status: "success", url: window.location.href, @@ -490,11 +449,17 @@ }; }, - // 3. Action + // 3. Click Action (unchanged) click: (id) => { const el = window.sentience_registry[id]; - if (el) { el.click(); el.focus(); return true; } + if (el) { + el.click(); + el.focus(); + return true; + } return false; } }; -})(); \ No newline at end of file + + console.log('[SentienceAPI] ✓ Ready! (CSP-Resistant - WASM runs in background)'); +})(); diff --git a/sentience/extension/manifest.json b/sentience/extension/manifest.json index a27f969..5227103 100644 --- a/sentience/extension/manifest.json +++ b/sentience/extension/manifest.json @@ -1,10 +1,14 @@ { "manifest_version": 3, "name": "Sentience Semantic Visual Grounding Extractor", - "version": "1.0.9", + "version": "2.0.0", "description": "Extract semantic visual grounding data from web pages", "permissions": ["activeTab", "scripting"], "host_permissions": [""], + "background": { + "service_worker": "background.js", + "type": "module" + }, "web_accessible_resources": [ { "resources": ["pkg/*"], @@ -27,4 +31,4 @@ "content_security_policy": { "extension_pages": "script-src 'self' 'wasm-unsafe-eval'; object-src 'self'" } -} \ No newline at end of file +} diff --git a/sentience/extension/pkg/sentience_core_bg.wasm b/sentience/extension/pkg/sentience_core_bg.wasm index bf178637ff5231c64b9737bd4ad5fa7094603655..4f94996ec56d31ad14fbe9409a349776e5ae4e83 100644 GIT binary patch delta 1062 zcmZ9IZ){Ul7{>S9Q`fB(xEs@zis5dcTPKq)n_EyJAp(o6_r}t#2npC3wnEVgV=>IA zP*?;F!Rc{(8=N1^A{z-H;|y=q#0AD*HJA{y8TddD*ve2CEd0p`!f7||2fsY$_de%+ z-Z$rr#nZ>)=?7cv+_b;UKeQL{oA$6`e?qLtSg7P(W&f}n>=yf%-DKBUf{n0iY?KYN znC*Agh8v29_c}aEQK1-i9GQ>9eBbMPu~7t8 z<9vD5R6a+y^Mt+zgWPZUF(^8WgABVx_0q1%_?|xqwctImtM;sdVP3oX-IPw_iPhB; z6s)V6pyL*m)j#$+EXoc)_F~~T#}i^SY)r#Gx#L;>R%|h%Vo%HqT;=<>S8sJ+eA<0RUf$Fi`WpFipr-EA4bHSrTopNt+p@Le?;#Oyr`xXitsk(B20L!CZc z;txA3a7k2jb+~arW_4`fp1uHv_(y%ONc}OTxBKQ{NR%F1lr8_FsXyxG+wath9itZ@ z3Pv@E@NFqWBQfyA9UX0={O*=aw6&V>;Pb;5@s%B|=KC3V!A|d}n1xpQLPa}$I|aX) z#aRfa(~B+?(~(^0)aAmHIfoRdrYx{U)CY<@)yS+gXPoA^(1cwy#f2H>Nf$~d;cHs| z9NsopK95Zfhe&2p0a{QZTmJK+nbF^6m`gvGp@6nZGM{+~F$F)E-&CMPLAUwGT=Yz$ zUn&ux_DbZN_bRc&M)D0$DI;G}zYdLdCW|ec4j8g%(UC>>eQ;86B Q3^X>ZjfOomb0?NP0*c;iw*UYD delta 1009 zcmY+AdrTBp6vqALS_XD2E~4(T#W>i941!tec8T$kG!fCVGwlkjk3<0@tYW$<5~EE? zT_rV58cV#w+4x9fh(c3n(F>haqefAq(V+1U@R8V9UyDyf(S*e6?1G#0pYQ&@@7!~4 z?imUv422Wg>t%jRPUYXrI)5$)qz`1VEODNc@t8eh57`UW$0Fu+ZpB ztP#(oEPm3WN|~y7XgM_(fAH3YzhRy5f5pJNwNzf~mHEcP6?nj}7KZuKLMN~DhVh7B z*3!5~3-ff%$s^io+~YpohkK$)Z)eymiWjvcql3RGtH%bhv;3-rK3=}++nA>D$g1KI z@>i6M(AhY-E8k~LaT*tRpM?TThC9TIfS!gU<{OiEX=pw+ibEk6@R+x4xAUjlibiPZ z_F-B(;$3yc!zA|A4W-~Oe)F_b9Bxbm?uet5o{U!Bd$1Tmk$s2(5$fbRP?3#o7V#b;Lapo?z^aPzEFc^YZJ@5~N zJfJ`nyi~v+^q2FX>90zjflpPv=p%w=wgJ`+~TPWFq4om86SOtkc3WSe;#rrv>BIYqGJplFTgCiz5sUGQ-Ik< z$^xt#O@oQ$;gPoPCi6q-lpUSXs+%))(1E3@L?iqj9-1|W9XruN-&3d z{Fs7X`r3~S%JyR(SkQlmZ2|vvE2X zr}Hr~>$YC%F2yAET0H29Upf<~3-Rg=vXmhgSu{I(WKysU-#|4^mLbVP*Q#Kn7E_E! w6|5O9$D)eL%%Ym=HNk+YkAKGEcbMB|LI3~& diff --git a/sentience/extension/release.json b/sentience/extension/release.json index bb86577..e2b79b5 100644 --- a/sentience/extension/release.json +++ b/sentience/extension/release.json @@ -1,9 +1,9 @@ { - "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272366797", - "assets_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272366797/assets", - "upload_url": "https://uploads.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272366797/assets{?name,label}", - "html_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/tag/v1.0.9", - "id": 272366797, + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272763044", + "assets_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272763044/assets", + "upload_url": "https://uploads.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272763044/assets{?name,label}", + "html_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/tag/v2.0.0", + "id": 272763044, "author": { "login": "github-actions[bot]", "id": 41898282, @@ -25,21 +25,21 @@ "user_view_type": "public", "site_admin": false }, - "node_id": "RE_kwDOQshiJ84QO_zN", - "tag_name": "v1.0.9", + "node_id": "RE_kwDOQshiJ84QQgik", + "tag_name": "v2.0.0", "target_commitish": "main", - "name": "Release v1.0.9", + "name": "Release v2.0.0", "draft": false, "immutable": false, "prerelease": false, - "created_at": "2025-12-23T03:09:03Z", - "updated_at": "2025-12-23T03:10:45Z", - "published_at": "2025-12-23T03:10:45Z", + "created_at": "2025-12-25T04:37:21Z", + "updated_at": "2025-12-25T04:55:17Z", + "published_at": "2025-12-25T04:53:47Z", "assets": [ { - "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/331995822", - "id": 331995822, - "node_id": "RA_kwDOQshiJ84Tydqu", + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/332739489", + "id": 332739489, + "node_id": "RA_kwDOQshiJ84T1TOh", "name": "extension-files.tar.gz", "label": "", "uploader": { @@ -65,17 +65,17 @@ }, "content_type": "application/gzip", "state": "uploaded", - "size": 63205, - "digest": "sha256:69ea28d2e30b9a168608be87c00ee0c678ed2893c90e6220727b885ac6c99e17", - "download_count": 3, - "created_at": "2025-12-23T03:10:45Z", - "updated_at": "2025-12-23T03:10:45Z", - "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v1.0.9/extension-files.tar.gz" + "size": 63242, + "digest": "sha256:415804bc3771b8a3a7bf4a42fafe1c4249219e8609e28d91468069b267bca2f1", + "download_count": 0, + "created_at": "2025-12-25T04:55:17Z", + "updated_at": "2025-12-25T04:55:17Z", + "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.0.0/extension-files.tar.gz" }, { - "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/331995821", - "id": 331995821, - "node_id": "RA_kwDOQshiJ84Tydqt", + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/332739488", + "id": 332739488, + "node_id": "RA_kwDOQshiJ84T1TOg", "name": "extension-package.zip", "label": "", "uploader": { @@ -101,16 +101,16 @@ }, "content_type": "application/zip", "state": "uploaded", - "size": 65639, - "digest": "sha256:cd3339d905ac5453d2652b6a8c3bd8f68a66bd45a2e6660aa2824a38e56d5f31", + "size": 65809, + "digest": "sha256:1c1f020e271e8f77948b968d38de0c9ccc4a9000fa9b196c91a539606fff4200", "download_count": 0, - "created_at": "2025-12-23T03:10:45Z", - "updated_at": "2025-12-23T03:10:45Z", - "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v1.0.9/extension-package.zip" + "created_at": "2025-12-25T04:55:17Z", + "updated_at": "2025-12-25T04:55:17Z", + "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.0.0/extension-package.zip" } ], - "tarball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/tarball/v1.0.9", - "zipball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/zipball/v1.0.9", - "body": "## What's Changed\n* svg string fix by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/14\n\n\n**Full Changelog**: https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/compare/v0.10.4...v1.0.9", + "tarball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/tarball/v2.0.0", + "zipball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/zipball/v2.0.0", + "body": "## What's Changed\n* fix CSP strict sites by moving to background thread by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/15\n\n\n**Full Changelog**: https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/compare/v1.0.9...v2.0.0", "mentions_count": 1 }