|
20 | 20 | window.sentience_registry = []; |
21 | 21 | let wasmModule = null; |
22 | 22 |
|
23 | | - // --- HELPER: Deep Walker --- |
| 23 | + // --- HELPER: Deep Walker with Native Filter --- |
24 | 24 | function getAllElements(root = document) { |
25 | 25 | const elements = []; |
26 | | - const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT); |
| 26 | + // FILTER: Skip Script, Style, Comments, Metadata tags during traversal |
| 27 | + // This prevents collecting them in the first place, saving memory and CPU |
| 28 | + const filter = { |
| 29 | + acceptNode: function(node) { |
| 30 | + // Skip metadata and script/style tags |
| 31 | + if (['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'LINK', 'HEAD'].includes(node.tagName)) { |
| 32 | + return NodeFilter.FILTER_REJECT; |
| 33 | + } |
| 34 | + // Skip deep SVG children (keep root <svg> only, unless you need path data) |
| 35 | + // This reduces noise from complex SVG graphics while preserving icon containers |
| 36 | + if (node.parentNode && node.parentNode.tagName === 'SVG' && node.tagName !== 'SVG') { |
| 37 | + return NodeFilter.FILTER_REJECT; |
| 38 | + } |
| 39 | + return NodeFilter.FILTER_ACCEPT; |
| 40 | + } |
| 41 | + }; |
| 42 | + |
| 43 | + const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT, filter); |
27 | 44 | while(walker.nextNode()) { |
28 | 45 | const node = walker.currentNode; |
29 | | - elements.push(node); |
30 | | - if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot)); |
| 46 | + // Pre-check: Don't even process empty/detached nodes |
| 47 | + if (node.isConnected) { |
| 48 | + elements.push(node); |
| 49 | + if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot)); |
| 50 | + } |
31 | 51 | } |
32 | 52 | return elements; |
33 | 53 | } |
|
40 | 60 | return (el.innerText || '').replace(/\s+/g, ' ').trim().substring(0, 100); |
41 | 61 | } |
42 | 62 |
|
| 63 | + // --- HELPER: Safe Class Name Extractor --- |
| 64 | + // Fixes the SVGAnimatedString error by ensuring we always get a primitive string |
| 65 | + function getClassName(el) { |
| 66 | + if (typeof el.className === 'string') return el.className; |
| 67 | + // Handle SVGAnimatedString (has baseVal and animVal) |
| 68 | + if (el.className && typeof el.className.baseVal === 'string') return el.className.baseVal; |
| 69 | + return ''; |
| 70 | + } |
| 71 | + |
43 | 72 | // --- HELPER: Viewport Check (NEW) --- |
44 | 73 | function isInViewport(rect) { |
45 | 74 | return ( |
|
287 | 316 |
|
288 | 317 | // Verify functions are available |
289 | 318 | if (!wasmModule.analyze_page) { |
290 | | - console.error('[SentienceAPI.com] WASM functions not available'); |
| 319 | + console.error('[SentienceAPI.com] available'); |
291 | 320 | } else { |
292 | | - console.log('[SentienceAPI.com] ✓ API Ready!'); |
| 321 | + console.log('[SentienceAPI.com] ✓ Ready!'); |
293 | 322 | console.log('[SentienceAPI.com] Available functions:', Object.keys(wasmModule).filter(k => k.startsWith('analyze'))); |
294 | 323 | } |
295 | 324 | } catch (e) { |
296 | | - console.error('[SentienceAPI.com] WASM Load Failed:', e); |
| 325 | + console.error('[SentienceAPI.com] Extension Load Failed:', e); |
297 | 326 | } |
298 | 327 |
|
299 | 328 | // REMOVED: Headless detection - no longer needed (license system removed) |
|
332 | 361 | display: style.display, |
333 | 362 | visibility: style.visibility, |
334 | 363 | opacity: style.opacity, |
335 | | - z_index: style.zIndex || "0", |
| 364 | + z_index: String(style.zIndex || "auto"), // Force string conversion |
336 | 365 | bg_color: style.backgroundColor, |
337 | 366 | color: style.color, |
338 | 367 | cursor: style.cursor, |
339 | | - font_weight: style.fontWeight, |
| 368 | + font_weight: String(style.fontWeight), // Force string conversion |
340 | 369 | font_size: style.fontSize |
341 | 370 | }, |
342 | 371 | attributes: { |
343 | 372 | role: el.getAttribute('role'), |
344 | 373 | type_: el.getAttribute('type'), |
345 | 374 | aria_label: el.getAttribute('aria-label'), |
346 | | - href: el.href, |
347 | | - class: el.className |
| 375 | + // Convert SVGAnimatedString to string for SVG elements |
| 376 | + href: el.href?.baseVal || el.href || null, |
| 377 | + class: getClassName(el) || null |
348 | 378 | }, |
349 | 379 | // Pass to WASM |
350 | 380 | text: textVal || null, |
|
353 | 383 | }); |
354 | 384 | }); |
355 | 385 |
|
356 | | - // FREE TIER: No license checks - extension provides basic geometry data |
357 | | - // Pro/Enterprise tiers will be handled server-side (future work) |
358 | | - |
359 | | - // 1. Get Geometry from WASM |
360 | 386 | let result; |
361 | 387 | try { |
362 | 388 | if (options.limit || options.filter) { |
|
368 | 394 | return { status: "error", error: e.message }; |
369 | 395 | } |
370 | 396 |
|
371 | | - // Hydration step removed as WASM now returns populated structs |
372 | | - |
| 397 | + // Hydration step removed |
373 | 398 | // Capture Screenshot |
374 | 399 | let screenshot = null; |
375 | 400 | if (options.screenshot) { |
376 | 401 | screenshot = await captureScreenshot(options.screenshot); |
377 | 402 | } |
378 | 403 |
|
379 | | - // C. Clean up null/undefined fields to save tokens (Your existing cleaner) |
| 404 | + // C. Clean up null/undefined fields to save tokens |
380 | 405 | const cleanElement = (obj) => { |
381 | 406 | if (Array.isArray(obj)) { |
382 | 407 | return obj.map(cleanElement); |
383 | | - } else if (obj !== null && typeof obj === 'object') { |
| 408 | + } |
| 409 | + if (obj !== null && typeof obj === 'object') { |
384 | 410 | const cleaned = {}; |
385 | 411 | for (const [key, value] of Object.entries(obj)) { |
386 | | - // Keep boolean false for critical flags if desired, or remove to match Rust defaults |
| 412 | + // Explicitly skip null AND undefined |
387 | 413 | if (value !== null && value !== undefined) { |
388 | | - cleaned[key] = cleanElement(value); |
| 414 | + // Recursively clean objects |
| 415 | + if (typeof value === 'object') { |
| 416 | + const deepClean = cleanElement(value); |
| 417 | + // Only keep object if it's not empty (optional optimization) |
| 418 | + if (Object.keys(deepClean).length > 0) { |
| 419 | + cleaned[key] = deepClean; |
| 420 | + } |
| 421 | + } else { |
| 422 | + cleaned[key] = value; |
| 423 | + } |
389 | 424 | } |
390 | 425 | } |
391 | 426 | return cleaned; |
|
395 | 430 |
|
396 | 431 | const cleanedElements = cleanElement(result); |
397 | 432 |
|
| 433 | + // DEBUG: Check rawData before pruning |
| 434 | + // console.log(`[DEBUG] rawData length BEFORE pruning: ${rawData.length}`); |
| 435 | + // Prune raw elements using WASM before sending to API |
| 436 | + // This prevents 413 errors on large sites (Amazon: 5000+ -> ~200-400) |
| 437 | + const prunedRawData = wasmModule.prune_for_api(rawData); |
| 438 | + |
| 439 | + // Clean up null/undefined fields in raw_elements as well |
| 440 | + const cleanedRawElements = cleanElement(prunedRawData); |
| 441 | + |
398 | 442 | return { |
399 | 443 | status: "success", |
400 | 444 | url: window.location.href, |
401 | 445 | elements: cleanedElements, |
402 | | - raw_elements: rawData, // Include raw data for server-side processing (safe to expose - no proprietary value) |
| 446 | + raw_elements: cleanedRawElements, // Send cleaned pruned data to prevent 413 errors |
403 | 447 | screenshot: screenshot |
404 | 448 | }; |
405 | 449 | }, |
|
0 commit comments