diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js index 8f6eb15..55352e2 100644 --- a/sentience/extension/injected_api.js +++ b/sentience/extension/injected_api.js @@ -20,14 +20,34 @@ window.sentience_registry = []; let wasmModule = null; - // --- HELPER: Deep Walker --- + // --- HELPER: Deep Walker with Native Filter --- function getAllElements(root = document) { const elements = []; - const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT); + // FILTER: Skip Script, Style, Comments, Metadata tags during traversal + // This prevents collecting them in the first place, saving memory and CPU + const filter = { + acceptNode: function(node) { + // Skip metadata and script/style tags + if (['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'LINK', 'HEAD'].includes(node.tagName)) { + return NodeFilter.FILTER_REJECT; + } + // Skip deep SVG children (keep root only, unless you need path data) + // This reduces noise from complex SVG graphics while preserving icon containers + if (node.parentNode && node.parentNode.tagName === 'SVG' && node.tagName !== 'SVG') { + return NodeFilter.FILTER_REJECT; + } + return NodeFilter.FILTER_ACCEPT; + } + }; + + const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT, filter); while(walker.nextNode()) { const node = walker.currentNode; - elements.push(node); - if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot)); + // Pre-check: Don't even process empty/detached nodes + if (node.isConnected) { + elements.push(node); + if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot)); + } } return elements; } @@ -40,6 +60,15 @@ return (el.innerText || '').replace(/\s+/g, ' ').trim().substring(0, 100); } + // --- HELPER: Safe Class Name Extractor --- + // Fixes the SVGAnimatedString error by ensuring we always get a primitive string + function getClassName(el) { + if (typeof el.className === 'string') return el.className; + // Handle SVGAnimatedString (has baseVal and animVal) + if (el.className && typeof el.className.baseVal === 'string') return el.className.baseVal; + return ''; + } + // --- HELPER: Viewport Check (NEW) --- function isInViewport(rect) { return ( @@ -287,13 +316,13 @@ // Verify functions are available if (!wasmModule.analyze_page) { - console.error('[SentienceAPI.com] WASM functions not available'); + console.error('[SentienceAPI.com] available'); } else { - console.log('[SentienceAPI.com] ✓ API Ready!'); + console.log('[SentienceAPI.com] ✓ Ready!'); console.log('[SentienceAPI.com] Available functions:', Object.keys(wasmModule).filter(k => k.startsWith('analyze'))); } } catch (e) { - console.error('[SentienceAPI.com] WASM Load Failed:', e); + console.error('[SentienceAPI.com] Extension Load Failed:', e); } // REMOVED: Headless detection - no longer needed (license system removed) @@ -332,19 +361,20 @@ display: style.display, visibility: style.visibility, opacity: style.opacity, - z_index: style.zIndex || "0", + z_index: String(style.zIndex || "auto"), // Force string conversion bg_color: style.backgroundColor, color: style.color, cursor: style.cursor, - font_weight: style.fontWeight, + font_weight: String(style.fontWeight), // Force string conversion font_size: style.fontSize }, attributes: { role: el.getAttribute('role'), type_: el.getAttribute('type'), aria_label: el.getAttribute('aria-label'), - href: el.href, - class: el.className + // Convert SVGAnimatedString to string for SVG elements + href: el.href?.baseVal || el.href || null, + class: getClassName(el) || null }, // Pass to WASM text: textVal || null, @@ -353,10 +383,6 @@ }); }); - // FREE TIER: No license checks - extension provides basic geometry data - // Pro/Enterprise tiers will be handled server-side (future work) - - // 1. Get Geometry from WASM let result; try { if (options.limit || options.filter) { @@ -368,24 +394,33 @@ return { status: "error", error: e.message }; } - // Hydration step removed as WASM now returns populated structs - + // Hydration step removed // Capture Screenshot let screenshot = null; if (options.screenshot) { screenshot = await captureScreenshot(options.screenshot); } - // C. Clean up null/undefined fields to save tokens (Your existing cleaner) + // C. Clean up null/undefined fields to save tokens const cleanElement = (obj) => { if (Array.isArray(obj)) { return obj.map(cleanElement); - } else if (obj !== null && typeof obj === 'object') { + } + if (obj !== null && typeof obj === 'object') { const cleaned = {}; for (const [key, value] of Object.entries(obj)) { - // Keep boolean false for critical flags if desired, or remove to match Rust defaults + // Explicitly skip null AND undefined if (value !== null && value !== undefined) { - cleaned[key] = cleanElement(value); + // Recursively clean objects + if (typeof value === 'object') { + const deepClean = cleanElement(value); + // Only keep object if it's not empty (optional optimization) + if (Object.keys(deepClean).length > 0) { + cleaned[key] = deepClean; + } + } else { + cleaned[key] = value; + } } } return cleaned; @@ -395,11 +430,20 @@ const cleanedElements = cleanElement(result); + // DEBUG: Check rawData before pruning + // console.log(`[DEBUG] rawData length BEFORE pruning: ${rawData.length}`); + // Prune raw elements using WASM before sending to API + // This prevents 413 errors on large sites (Amazon: 5000+ -> ~200-400) + const prunedRawData = wasmModule.prune_for_api(rawData); + + // Clean up null/undefined fields in raw_elements as well + const cleanedRawElements = cleanElement(prunedRawData); + return { status: "success", url: window.location.href, elements: cleanedElements, - raw_elements: rawData, // Include raw data for server-side processing (safe to expose - no proprietary value) + raw_elements: cleanedRawElements, // Send cleaned pruned data to prevent 413 errors screenshot: screenshot }; }, diff --git a/sentience/extension/pkg/sentience_core.d.ts b/sentience/extension/pkg/sentience_core.d.ts index 017160d..e280c26 100644 --- a/sentience/extension/pkg/sentience_core.d.ts +++ b/sentience/extension/pkg/sentience_core.d.ts @@ -7,6 +7,14 @@ export function analyze_page_with_options(val: any, options: any): any; export function decide_and_act(_raw_elements: any): void; +/** + * Prune raw elements before sending to API + * This is a "dumb" filter that reduces payload size without leaking proprietary IP + * Filters out: tiny elements, invisible elements, non-interactive wrapper divs + * Amazon: 5000-6000 elements -> ~200-400 elements (~95% reduction) + */ +export function prune_for_api(val: any): any; + export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module; export interface InitOutput { @@ -14,6 +22,7 @@ export interface InitOutput { readonly analyze_page: (a: number) => number; readonly analyze_page_with_options: (a: number, b: number) => number; readonly decide_and_act: (a: number) => void; + readonly prune_for_api: (a: number) => number; readonly __wbindgen_export: (a: number, b: number) => number; readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number; readonly __wbindgen_export3: (a: number) => void; diff --git a/sentience/extension/pkg/sentience_core.js b/sentience/extension/pkg/sentience_core.js index bb44be7..b232d13 100644 --- a/sentience/extension/pkg/sentience_core.js +++ b/sentience/extension/pkg/sentience_core.js @@ -223,6 +223,19 @@ export function decide_and_act(_raw_elements) { wasm.decide_and_act(addHeapObject(_raw_elements)); } +/** + * Prune raw elements before sending to API + * This is a "dumb" filter that reduces payload size without leaking proprietary IP + * Filters out: tiny elements, invisible elements, non-interactive wrapper divs + * Amazon: 5000-6000 elements -> ~200-400 elements (~95% reduction) + * @param {any} val + * @returns {any} + */ +export function prune_for_api(val) { + const ret = wasm.prune_for_api(addHeapObject(val)); + return takeObject(ret); +} + const EXPECTED_RESPONSE_TYPES = new Set(['basic', 'cors', 'default']); async function __wbg_load(module, imports) { @@ -338,6 +351,9 @@ function __wbg_get_imports() { const ret = getObject(arg0).done; return ret; }; + imports.wbg.__wbg_error_7bc7d576a6aaf855 = function(arg0) { + console.error(getObject(arg0)); + }; imports.wbg.__wbg_get_6b7bd52aca3f9671 = function(arg0, arg1) { const ret = getObject(arg0)[arg1 >>> 0]; return addHeapObject(ret); diff --git a/sentience/extension/pkg/sentience_core_bg.wasm b/sentience/extension/pkg/sentience_core_bg.wasm index ea9c9cf..778e818 100644 Binary files a/sentience/extension/pkg/sentience_core_bg.wasm and b/sentience/extension/pkg/sentience_core_bg.wasm differ diff --git a/sentience/extension/pkg/sentience_core_bg.wasm.d.ts b/sentience/extension/pkg/sentience_core_bg.wasm.d.ts index 3544143..dccf049 100644 --- a/sentience/extension/pkg/sentience_core_bg.wasm.d.ts +++ b/sentience/extension/pkg/sentience_core_bg.wasm.d.ts @@ -4,6 +4,7 @@ export const memory: WebAssembly.Memory; export const analyze_page: (a: number) => number; export const analyze_page_with_options: (a: number, b: number) => number; export const decide_and_act: (a: number) => void; +export const prune_for_api: (a: number) => number; export const __wbindgen_export: (a: number, b: number) => number; export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number; export const __wbindgen_export3: (a: number) => void; diff --git a/sentience/extension/release.json b/sentience/extension/release.json index 1cc6f95..bcba583 100644 --- a/sentience/extension/release.json +++ b/sentience/extension/release.json @@ -1,9 +1,9 @@ { - "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272122068", - "assets_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272122068/assets", - "upload_url": "https://uploads.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272122068/assets{?name,label}", - "html_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/tag/v0.10.3", - "id": 272122068, + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272300094", + "assets_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272300094/assets", + "upload_url": "https://uploads.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272300094/assets{?name,label}", + "html_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/tag/v0.10.4", + "id": 272300094, "author": { "login": "github-actions[bot]", "id": 41898282, @@ -25,21 +25,21 @@ "user_view_type": "public", "site_admin": false }, - "node_id": "RE_kwDOQshiJ84QOEDU", - "tag_name": "v0.10.3", + "node_id": "RE_kwDOQshiJ84QOvg-", + "tag_name": "v0.10.4", "target_commitish": "main", - "name": "Release v0.10.3", + "name": "Release v0.10.4", "draft": false, "immutable": false, "prerelease": false, - "created_at": "2025-12-22T07:06:41Z", - "updated_at": "2025-12-22T07:07:53Z", - "published_at": "2025-12-22T07:07:53Z", + "created_at": "2025-12-22T19:42:33Z", + "updated_at": "2025-12-22T19:43:39Z", + "published_at": "2025-12-22T19:43:38Z", "assets": [ { - "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/331630535", - "id": 331630535, - "node_id": "RA_kwDOQshiJ84TxEfH", + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/331850955", + "id": 331850955, + "node_id": "RA_kwDOQshiJ84Tx6TL", "name": "extension-files.tar.gz", "label": "", "uploader": { @@ -65,17 +65,17 @@ }, "content_type": "application/gzip", "state": "uploaded", - "size": 61175, - "digest": "sha256:21ef5d067ffba9a1dd93630c03b39efd57dabb3d1598543c7e3519ff9d3bf387", + "size": 63026, + "digest": "sha256:f1f888c2b98e15c4433cee3a45e0109bbcef0dec0b4eef9d889156c45382f1ca", "download_count": 0, - "created_at": "2025-12-22T07:07:53Z", - "updated_at": "2025-12-22T07:07:53Z", - "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v0.10.3/extension-files.tar.gz" + "created_at": "2025-12-22T19:43:39Z", + "updated_at": "2025-12-22T19:43:39Z", + "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v0.10.4/extension-files.tar.gz" }, { - "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/331630534", - "id": 331630534, - "node_id": "RA_kwDOQshiJ84TxEfG", + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/331850954", + "id": 331850954, + "node_id": "RA_kwDOQshiJ84Tx6TK", "name": "extension-package.zip", "label": "", "uploader": { @@ -101,16 +101,16 @@ }, "content_type": "application/zip", "state": "uploaded", - "size": 63347, - "digest": "sha256:78ca5a87ce41e249a66de63b3fa34aa34268920abe590e412ba6036ec93ceb2f", + "size": 65474, + "digest": "sha256:798aa7b8a37ea110d25310ce62c4796245124a48202fa01ca7909d2fb13b07ee", "download_count": 0, - "created_at": "2025-12-22T07:07:53Z", - "updated_at": "2025-12-22T07:07:53Z", - "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v0.10.3/extension-package.zip" + "created_at": "2025-12-22T19:43:39Z", + "updated_at": "2025-12-22T19:43:39Z", + "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v0.10.4/extension-package.zip" } ], - "tarball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/tarball/v0.10.3", - "zipball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/zipball/v0.10.3", - "body": "## What's Changed\n* release fix by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/8\n* transferred by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/9\n* fix build and release by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/10\n* verify loop by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/11\n* more robust by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/12\n\n\n**Full Changelog**: https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/compare/vv0.9.0...v0.10.3", + "tarball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/tarball/v0.10.4", + "zipball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/zipball/v0.10.4", + "body": "## What's Changed\n* prune raw element by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/13\n\n\n**Full Changelog**: https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/compare/v0.10.3...v0.10.4", "mentions_count": 1 } diff --git a/tests/test_generator.py b/tests/test_generator.py index b226abe..dd6bb43 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -6,7 +6,7 @@ import tempfile import os from sentience import SentienceBrowser, record -from sentience.recorder import Trace +from sentience.recorder import Trace, TraceStep from sentience.generator import ScriptGenerator, generate @@ -111,15 +111,18 @@ def test_generator_without_selector(): browser.page.goto("https://example.com") browser.page.wait_for_load_state("networkidle") - with record(browser) as rec: - rec.record_click(1) # No selector - - # Explicitly remove selector to test the no-selector case - # (The recorder automatically infers selectors, so we need to clear it) - if rec.trace.steps: - rec.trace.steps[-1].selector = None - - generator = ScriptGenerator(rec.trace) + # Create a trace manually with a step that has no selector + # (The recorder automatically infers selectors, so we create the step directly) + trace = Trace("https://example.com") + step = TraceStep( + ts=0, + type="click", + element_id=1, + selector=None # Explicitly no selector + ) + trace.add_step(step) + + generator = ScriptGenerator(trace) code = generator.generate_python() # Should include TODO comment for missing selector