Skip to content

Commit 9839580

Browse files
authored
Merge pull request #40 from SentienceAPI/sync-extension-v0.10.4
Sync Extension: v0.10.4
2 parents 4fdedad + 954e18d commit 9839580

File tree

7 files changed

+135
-62
lines changed

7 files changed

+135
-62
lines changed

sentience/extension/injected_api.js

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,34 @@
2020
window.sentience_registry = [];
2121
let wasmModule = null;
2222

23-
// --- HELPER: Deep Walker ---
23+
// --- HELPER: Deep Walker with Native Filter ---
2424
function getAllElements(root = document) {
2525
const elements = [];
26-
const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
26+
// FILTER: Skip Script, Style, Comments, Metadata tags during traversal
27+
// This prevents collecting them in the first place, saving memory and CPU
28+
const filter = {
29+
acceptNode: function(node) {
30+
// Skip metadata and script/style tags
31+
if (['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'LINK', 'HEAD'].includes(node.tagName)) {
32+
return NodeFilter.FILTER_REJECT;
33+
}
34+
// Skip deep SVG children (keep root <svg> only, unless you need path data)
35+
// This reduces noise from complex SVG graphics while preserving icon containers
36+
if (node.parentNode && node.parentNode.tagName === 'SVG' && node.tagName !== 'SVG') {
37+
return NodeFilter.FILTER_REJECT;
38+
}
39+
return NodeFilter.FILTER_ACCEPT;
40+
}
41+
};
42+
43+
const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT, filter);
2744
while(walker.nextNode()) {
2845
const node = walker.currentNode;
29-
elements.push(node);
30-
if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot));
46+
// Pre-check: Don't even process empty/detached nodes
47+
if (node.isConnected) {
48+
elements.push(node);
49+
if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot));
50+
}
3151
}
3252
return elements;
3353
}
@@ -40,6 +60,15 @@
4060
return (el.innerText || '').replace(/\s+/g, ' ').trim().substring(0, 100);
4161
}
4262

63+
// --- HELPER: Safe Class Name Extractor ---
64+
// Fixes the SVGAnimatedString error by ensuring we always get a primitive string
65+
function getClassName(el) {
66+
if (typeof el.className === 'string') return el.className;
67+
// Handle SVGAnimatedString (has baseVal and animVal)
68+
if (el.className && typeof el.className.baseVal === 'string') return el.className.baseVal;
69+
return '';
70+
}
71+
4372
// --- HELPER: Viewport Check (NEW) ---
4473
function isInViewport(rect) {
4574
return (
@@ -287,13 +316,13 @@
287316

288317
// Verify functions are available
289318
if (!wasmModule.analyze_page) {
290-
console.error('[SentienceAPI.com] WASM functions not available');
319+
console.error('[SentienceAPI.com] available');
291320
} else {
292-
console.log('[SentienceAPI.com] ✓ API Ready!');
321+
console.log('[SentienceAPI.com] ✓ Ready!');
293322
console.log('[SentienceAPI.com] Available functions:', Object.keys(wasmModule).filter(k => k.startsWith('analyze')));
294323
}
295324
} catch (e) {
296-
console.error('[SentienceAPI.com] WASM Load Failed:', e);
325+
console.error('[SentienceAPI.com] Extension Load Failed:', e);
297326
}
298327

299328
// REMOVED: Headless detection - no longer needed (license system removed)
@@ -332,19 +361,20 @@
332361
display: style.display,
333362
visibility: style.visibility,
334363
opacity: style.opacity,
335-
z_index: style.zIndex || "0",
364+
z_index: String(style.zIndex || "auto"), // Force string conversion
336365
bg_color: style.backgroundColor,
337366
color: style.color,
338367
cursor: style.cursor,
339-
font_weight: style.fontWeight,
368+
font_weight: String(style.fontWeight), // Force string conversion
340369
font_size: style.fontSize
341370
},
342371
attributes: {
343372
role: el.getAttribute('role'),
344373
type_: el.getAttribute('type'),
345374
aria_label: el.getAttribute('aria-label'),
346-
href: el.href,
347-
class: el.className
375+
// Convert SVGAnimatedString to string for SVG elements
376+
href: el.href?.baseVal || el.href || null,
377+
class: getClassName(el) || null
348378
},
349379
// Pass to WASM
350380
text: textVal || null,
@@ -353,10 +383,6 @@
353383
});
354384
});
355385

356-
// FREE TIER: No license checks - extension provides basic geometry data
357-
// Pro/Enterprise tiers will be handled server-side (future work)
358-
359-
// 1. Get Geometry from WASM
360386
let result;
361387
try {
362388
if (options.limit || options.filter) {
@@ -368,24 +394,33 @@
368394
return { status: "error", error: e.message };
369395
}
370396

371-
// Hydration step removed as WASM now returns populated structs
372-
397+
// Hydration step removed
373398
// Capture Screenshot
374399
let screenshot = null;
375400
if (options.screenshot) {
376401
screenshot = await captureScreenshot(options.screenshot);
377402
}
378403

379-
// C. Clean up null/undefined fields to save tokens (Your existing cleaner)
404+
// C. Clean up null/undefined fields to save tokens
380405
const cleanElement = (obj) => {
381406
if (Array.isArray(obj)) {
382407
return obj.map(cleanElement);
383-
} else if (obj !== null && typeof obj === 'object') {
408+
}
409+
if (obj !== null && typeof obj === 'object') {
384410
const cleaned = {};
385411
for (const [key, value] of Object.entries(obj)) {
386-
// Keep boolean false for critical flags if desired, or remove to match Rust defaults
412+
// Explicitly skip null AND undefined
387413
if (value !== null && value !== undefined) {
388-
cleaned[key] = cleanElement(value);
414+
// Recursively clean objects
415+
if (typeof value === 'object') {
416+
const deepClean = cleanElement(value);
417+
// Only keep object if it's not empty (optional optimization)
418+
if (Object.keys(deepClean).length > 0) {
419+
cleaned[key] = deepClean;
420+
}
421+
} else {
422+
cleaned[key] = value;
423+
}
389424
}
390425
}
391426
return cleaned;
@@ -395,11 +430,20 @@
395430

396431
const cleanedElements = cleanElement(result);
397432

433+
// DEBUG: Check rawData before pruning
434+
// console.log(`[DEBUG] rawData length BEFORE pruning: ${rawData.length}`);
435+
// Prune raw elements using WASM before sending to API
436+
// This prevents 413 errors on large sites (Amazon: 5000+ -> ~200-400)
437+
const prunedRawData = wasmModule.prune_for_api(rawData);
438+
439+
// Clean up null/undefined fields in raw_elements as well
440+
const cleanedRawElements = cleanElement(prunedRawData);
441+
398442
return {
399443
status: "success",
400444
url: window.location.href,
401445
elements: cleanedElements,
402-
raw_elements: rawData, // Include raw data for server-side processing (safe to expose - no proprietary value)
446+
raw_elements: cleanedRawElements, // Send cleaned pruned data to prevent 413 errors
403447
screenshot: screenshot
404448
};
405449
},

sentience/extension/pkg/sentience_core.d.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,22 @@ export function analyze_page_with_options(val: any, options: any): any;
77

88
export function decide_and_act(_raw_elements: any): void;
99

10+
/**
11+
* Prune raw elements before sending to API
12+
* This is a "dumb" filter that reduces payload size without leaking proprietary IP
13+
* Filters out: tiny elements, invisible elements, non-interactive wrapper divs
14+
* Amazon: 5000-6000 elements -> ~200-400 elements (~95% reduction)
15+
*/
16+
export function prune_for_api(val: any): any;
17+
1018
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
1119

1220
export interface InitOutput {
1321
readonly memory: WebAssembly.Memory;
1422
readonly analyze_page: (a: number) => number;
1523
readonly analyze_page_with_options: (a: number, b: number) => number;
1624
readonly decide_and_act: (a: number) => void;
25+
readonly prune_for_api: (a: number) => number;
1726
readonly __wbindgen_export: (a: number, b: number) => number;
1827
readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
1928
readonly __wbindgen_export3: (a: number) => void;

sentience/extension/pkg/sentience_core.js

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,19 @@ export function decide_and_act(_raw_elements) {
223223
wasm.decide_and_act(addHeapObject(_raw_elements));
224224
}
225225

226+
/**
227+
* Prune raw elements before sending to API
228+
* This is a "dumb" filter that reduces payload size without leaking proprietary IP
229+
* Filters out: tiny elements, invisible elements, non-interactive wrapper divs
230+
* Amazon: 5000-6000 elements -> ~200-400 elements (~95% reduction)
231+
* @param {any} val
232+
* @returns {any}
233+
*/
234+
export function prune_for_api(val) {
235+
const ret = wasm.prune_for_api(addHeapObject(val));
236+
return takeObject(ret);
237+
}
238+
226239
const EXPECTED_RESPONSE_TYPES = new Set(['basic', 'cors', 'default']);
227240

228241
async function __wbg_load(module, imports) {
@@ -338,6 +351,9 @@ function __wbg_get_imports() {
338351
const ret = getObject(arg0).done;
339352
return ret;
340353
};
354+
imports.wbg.__wbg_error_7bc7d576a6aaf855 = function(arg0) {
355+
console.error(getObject(arg0));
356+
};
341357
imports.wbg.__wbg_get_6b7bd52aca3f9671 = function(arg0, arg1) {
342358
const ret = getObject(arg0)[arg1 >>> 0];
343359
return addHeapObject(ret);
3.34 KB
Binary file not shown.

sentience/extension/pkg/sentience_core_bg.wasm.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ export const memory: WebAssembly.Memory;
44
export const analyze_page: (a: number) => number;
55
export const analyze_page_with_options: (a: number, b: number) => number;
66
export const decide_and_act: (a: number) => void;
7+
export const prune_for_api: (a: number) => number;
78
export const __wbindgen_export: (a: number, b: number) => number;
89
export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number;
910
export const __wbindgen_export3: (a: number) => void;

sentience/extension/release.json

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
{
2-
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272122068",
3-
"assets_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272122068/assets",
4-
"upload_url": "https://uploads.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272122068/assets{?name,label}",
5-
"html_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/tag/v0.10.3",
6-
"id": 272122068,
2+
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272300094",
3+
"assets_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272300094/assets",
4+
"upload_url": "https://uploads.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/272300094/assets{?name,label}",
5+
"html_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/tag/v0.10.4",
6+
"id": 272300094,
77
"author": {
88
"login": "github-actions[bot]",
99
"id": 41898282,
@@ -25,21 +25,21 @@
2525
"user_view_type": "public",
2626
"site_admin": false
2727
},
28-
"node_id": "RE_kwDOQshiJ84QOEDU",
29-
"tag_name": "v0.10.3",
28+
"node_id": "RE_kwDOQshiJ84QOvg-",
29+
"tag_name": "v0.10.4",
3030
"target_commitish": "main",
31-
"name": "Release v0.10.3",
31+
"name": "Release v0.10.4",
3232
"draft": false,
3333
"immutable": false,
3434
"prerelease": false,
35-
"created_at": "2025-12-22T07:06:41Z",
36-
"updated_at": "2025-12-22T07:07:53Z",
37-
"published_at": "2025-12-22T07:07:53Z",
35+
"created_at": "2025-12-22T19:42:33Z",
36+
"updated_at": "2025-12-22T19:43:39Z",
37+
"published_at": "2025-12-22T19:43:38Z",
3838
"assets": [
3939
{
40-
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/331630535",
41-
"id": 331630535,
42-
"node_id": "RA_kwDOQshiJ84TxEfH",
40+
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/331850955",
41+
"id": 331850955,
42+
"node_id": "RA_kwDOQshiJ84Tx6TL",
4343
"name": "extension-files.tar.gz",
4444
"label": "",
4545
"uploader": {
@@ -65,17 +65,17 @@
6565
},
6666
"content_type": "application/gzip",
6767
"state": "uploaded",
68-
"size": 61175,
69-
"digest": "sha256:21ef5d067ffba9a1dd93630c03b39efd57dabb3d1598543c7e3519ff9d3bf387",
68+
"size": 63026,
69+
"digest": "sha256:f1f888c2b98e15c4433cee3a45e0109bbcef0dec0b4eef9d889156c45382f1ca",
7070
"download_count": 0,
71-
"created_at": "2025-12-22T07:07:53Z",
72-
"updated_at": "2025-12-22T07:07:53Z",
73-
"browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v0.10.3/extension-files.tar.gz"
71+
"created_at": "2025-12-22T19:43:39Z",
72+
"updated_at": "2025-12-22T19:43:39Z",
73+
"browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v0.10.4/extension-files.tar.gz"
7474
},
7575
{
76-
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/331630534",
77-
"id": 331630534,
78-
"node_id": "RA_kwDOQshiJ84TxEfG",
76+
"url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/331850954",
77+
"id": 331850954,
78+
"node_id": "RA_kwDOQshiJ84Tx6TK",
7979
"name": "extension-package.zip",
8080
"label": "",
8181
"uploader": {
@@ -101,16 +101,16 @@
101101
},
102102
"content_type": "application/zip",
103103
"state": "uploaded",
104-
"size": 63347,
105-
"digest": "sha256:78ca5a87ce41e249a66de63b3fa34aa34268920abe590e412ba6036ec93ceb2f",
104+
"size": 65474,
105+
"digest": "sha256:798aa7b8a37ea110d25310ce62c4796245124a48202fa01ca7909d2fb13b07ee",
106106
"download_count": 0,
107-
"created_at": "2025-12-22T07:07:53Z",
108-
"updated_at": "2025-12-22T07:07:53Z",
109-
"browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v0.10.3/extension-package.zip"
107+
"created_at": "2025-12-22T19:43:39Z",
108+
"updated_at": "2025-12-22T19:43:39Z",
109+
"browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v0.10.4/extension-package.zip"
110110
}
111111
],
112-
"tarball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/tarball/v0.10.3",
113-
"zipball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/zipball/v0.10.3",
114-
"body": "## What's Changed\n* release fix by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/8\n* transferred by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/9\n* fix build and release by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/10\n* verify loop by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/11\n* more robust by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/12\n\n\n**Full Changelog**: https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/compare/vv0.9.0...v0.10.3",
112+
"tarball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/tarball/v0.10.4",
113+
"zipball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/zipball/v0.10.4",
114+
"body": "## What's Changed\n* prune raw element by @rcholic in https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/pull/13\n\n\n**Full Changelog**: https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/compare/v0.10.3...v0.10.4",
115115
"mentions_count": 1
116116
}

tests/test_generator.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import tempfile
77
import os
88
from sentience import SentienceBrowser, record
9-
from sentience.recorder import Trace
9+
from sentience.recorder import Trace, TraceStep
1010
from sentience.generator import ScriptGenerator, generate
1111

1212

@@ -111,15 +111,18 @@ def test_generator_without_selector():
111111
browser.page.goto("https://example.com")
112112
browser.page.wait_for_load_state("networkidle")
113113

114-
with record(browser) as rec:
115-
rec.record_click(1) # No selector
116-
117-
# Explicitly remove selector to test the no-selector case
118-
# (The recorder automatically infers selectors, so we need to clear it)
119-
if rec.trace.steps:
120-
rec.trace.steps[-1].selector = None
121-
122-
generator = ScriptGenerator(rec.trace)
114+
# Create a trace manually with a step that has no selector
115+
# (The recorder automatically infers selectors, so we create the step directly)
116+
trace = Trace("https://example.com")
117+
step = TraceStep(
118+
ts=0,
119+
type="click",
120+
element_id=1,
121+
selector=None # Explicitly no selector
122+
)
123+
trace.add_step(step)
124+
125+
generator = ScriptGenerator(trace)
123126
code = generator.generate_python()
124127

125128
# Should include TODO comment for missing selector

0 commit comments

Comments
 (0)