diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js index 0778094..9230b8e 100644 --- a/sentience/extension/injected_api.js +++ b/sentience/extension/injected_api.js @@ -10,6 +10,165 @@ } return elements; } + const CAPTCHA_TEXT_KEYWORDS = [ "verify you are human", "captcha", "human verification", "unusual traffic", "are you a robot", "security check", "prove you are human", "bot detection", "automated access" ], CAPTCHA_URL_HINTS = [ "captcha", "challenge", "verify" ], CAPTCHA_IFRAME_HINTS = { + recaptcha: [ "recaptcha", "google.com/recaptcha" ], + hcaptcha: [ "hcaptcha.com" ], + turnstile: [ "challenges.cloudflare.com", "turnstile" ], + arkose: [ "arkoselabs.com", "funcaptcha.com", "client-api.arkoselabs.com" ], + awswaf: [ "amazonaws.com/captcha", "awswaf.com" ] + }, CAPTCHA_SCRIPT_HINTS = { + recaptcha: [ "recaptcha" ], + hcaptcha: [ "hcaptcha" ], + turnstile: [ "turnstile", "challenges.cloudflare.com" ], + arkose: [ "arkoselabs", "funcaptcha" ], + awswaf: [ "captcha.awswaf", "awswaf-captcha" ] + }, CAPTCHA_CONTAINER_SELECTORS = [ { + selector: ".g-recaptcha", + provider: "recaptcha" + }, { + selector: "#g-recaptcha", + provider: "recaptcha" + }, { + selector: "[data-sitekey]", + provider: "unknown" + }, { + selector: 'iframe[title*="recaptcha" i]', + provider: "recaptcha" + }, { + selector: ".h-captcha", + provider: "hcaptcha" + }, { + selector: "#h-captcha", + provider: "hcaptcha" + }, { + selector: 'iframe[title*="hcaptcha" i]', + provider: "hcaptcha" + }, { + selector: ".cf-turnstile", + provider: "turnstile" + }, { + selector: "[data-cf-turnstile-sitekey]", + provider: "turnstile" + }, { + selector: 'iframe[src*="challenges.cloudflare.com"]', + provider: "turnstile" + }, { + selector: "#FunCaptcha", + provider: "arkose" + }, { + selector: ".funcaptcha", + provider: "arkose" + }, { + selector: "[data-arkose-public-key]", + provider: "arkose" + }, { + selector: 'iframe[src*="arkoselabs"]', + provider: "arkose" + }, { + selector: "#captcha-container", + provider: "awswaf" + }, { + selector: "[data-awswaf-captcha]", + provider: "awswaf" + }, { + selector: 'iframe[title*="captcha" i]', + provider: "unknown" + } ]; + function addEvidence(list, value) { + value && (list.length >= 5 || list.push(value)); + } + function truncateText(text, maxLen) { + return text ? text.length <= maxLen ? text : text.slice(0, maxLen) : ""; + } + function matchHints(value, hints) { + const lower = String(value || "").toLowerCase(); + return !!lower && hints.some(hint => lower.includes(hint)); + } + function detectCaptcha() { + const evidence = { + text_hits: [], + selector_hits: [], + iframe_src_hits: [], + url_hits: [] + }; + let hasIframeHit = !1, hasContainerHit = !1, hasScriptHit = !1, hasKeywordHit = !1, hasUrlHit = !1; + const providerSignals = { + recaptcha: 0, + hcaptcha: 0, + turnstile: 0, + arkose: 0, + awswaf: 0 + }; + try { + const iframes = document.querySelectorAll("iframe"); + for (const iframe of iframes) { + const src = iframe.getAttribute("src") || "", title = iframe.getAttribute("title") || ""; + if (src) for (const [provider, hints] of Object.entries(CAPTCHA_IFRAME_HINTS)) matchHints(src, hints) && (hasIframeHit = !0, + providerSignals[provider] += 1, addEvidence(evidence.iframe_src_hits, truncateText(src, 120))); + if (title && matchHints(title, [ "captcha", "recaptcha" ]) && (hasContainerHit = !0, + addEvidence(evidence.selector_hits, 'iframe[title*="captcha"]')), evidence.iframe_src_hits.length >= 5) break; + } + } catch (e) {} + try { + const scripts = document.querySelectorAll("script[src]"); + for (const script of scripts) { + const src = script.getAttribute("src") || ""; + if (src) { + for (const [provider, hints] of Object.entries(CAPTCHA_SCRIPT_HINTS)) matchHints(src, hints) && (hasScriptHit = !0, + providerSignals[provider] += 1, addEvidence(evidence.selector_hits, `script[src*="${hints[0]}"]`)); + if (evidence.selector_hits.length >= 5) break; + } + } + } catch (e) {} + for (const {selector: selector, provider: provider} of CAPTCHA_CONTAINER_SELECTORS) try { + document.querySelector(selector) && (hasContainerHit = !0, addEvidence(evidence.selector_hits, selector), + "unknown" !== provider && (providerSignals[provider] += 1)); + } catch (e) {} + const textSnippet = function() { + try { + const candidates = document.querySelectorAll("h1, h2, h3, h4, p, label, button, form, div, span"); + let combined = "", count = 0; + for (const node of candidates) { + if (count >= 30 || combined.length >= 2e3) break; + if (!node || "string" != typeof node.innerText) continue; + if (!node.offsetWidth && !node.offsetHeight && !node.getClientRects().length) continue; + const text = node.innerText.replace(/\s+/g, " ").trim(); + text && (combined += `${text} `, count += 1); + } + if (combined = combined.trim(), combined) return truncateText(combined, 2e3); + } catch (e) {} + try { + let bodyText = document.body?.innerText || ""; + return !bodyText && document.body?.textContent && (bodyText = document.body.textContent), + truncateText(bodyText.replace(/\s+/g, " ").trim(), 2e3); + } catch (e) { + return ""; + } + }(); + if (textSnippet) { + const lowerText = textSnippet.toLowerCase(); + for (const keyword of CAPTCHA_TEXT_KEYWORDS) lowerText.includes(keyword) && (hasKeywordHit = !0, + addEvidence(evidence.text_hits, keyword)); + } + try { + const lowerUrl = (window.location?.href || "").toLowerCase(); + for (const hint of CAPTCHA_URL_HINTS) lowerUrl.includes(hint) && (hasUrlHit = !0, + addEvidence(evidence.url_hits, hint)); + } catch (e) {} + let confidence = 0; + hasIframeHit && (confidence += .7), hasContainerHit && (confidence += .5), hasScriptHit && (confidence += .5), + hasKeywordHit && (confidence += .3), hasUrlHit && (confidence += .2), confidence = Math.min(1, confidence), + hasIframeHit && (confidence = Math.max(confidence, .8)), !hasKeywordHit || hasIframeHit || hasContainerHit || hasScriptHit || hasUrlHit || (confidence = Math.min(confidence, .4)); + const detected = confidence >= .7; + let providerHint = null; + return providerSignals.recaptcha > 0 ? providerHint = "recaptcha" : providerSignals.hcaptcha > 0 ? providerHint = "hcaptcha" : providerSignals.turnstile > 0 ? providerHint = "turnstile" : providerSignals.arkose > 0 ? providerHint = "arkose" : providerSignals.awswaf > 0 ? providerHint = "awswaf" : detected && (providerHint = "unknown"), + { + detected: detected, + provider_hint: providerHint, + confidence: confidence, + evidence: evidence + }; + } const DEFAULT_INFERENCE_CONFIG = { allowedTags: [ "label", "span", "div" ], allowedRoles: [], @@ -691,7 +850,8 @@ ready_state: document.readyState || null, quiet_ms: quietMs, node_count: nodeCount - } + }, + captcha: detectCaptcha() }; } catch (e) {} return { diff --git a/sentience/extension/manifest.json b/sentience/extension/manifest.json index 1be2d29..a2d123d 100644 --- a/sentience/extension/manifest.json +++ b/sentience/extension/manifest.json @@ -1,7 +1,7 @@ { "manifest_version": 3, "name": "Sentience Semantic Visual Grounding Extractor", - "version": "2.6.0", + "version": "2.7.0", "description": "Extract semantic visual grounding data from web pages", "permissions": ["activeTab", "scripting"], "host_permissions": [""], diff --git a/sentience/extension/release.json b/sentience/extension/release.json index a40fa61..b21a0a7 100644 --- a/sentience/extension/release.json +++ b/sentience/extension/release.json @@ -1,9 +1,9 @@ { - "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/277691523", - "assets_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/277691523/assets", - "upload_url": "https://uploads.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/277691523/assets{?name,label}", - "html_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/tag/v2.6.0", - "id": 277691523, + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/277802850", + "assets_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/277802850/assets", + "upload_url": "https://uploads.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/277802850/assets{?name,label}", + "html_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/tag/v2.7.0", + "id": 277802850, "author": { "login": "rcholic", "id": 135060, @@ -25,21 +25,21 @@ "user_view_type": "public", "site_admin": false }, - "node_id": "RE_kwDOQshiJ84QjTyD", - "tag_name": "v2.6.0", + "node_id": "RE_kwDOQshiJ84Qju9i", + "tag_name": "v2.7.0", "target_commitish": "main", - "name": "Release v2.6.0", + "name": "Release v2.7.0", "draft": false, "immutable": false, "prerelease": false, - "created_at": "2026-01-18T08:03:53Z", - "updated_at": "2026-01-18T08:05:04Z", - "published_at": "2026-01-18T08:04:24Z", + "created_at": "2026-01-19T05:08:35Z", + "updated_at": "2026-01-19T05:09:41Z", + "published_at": "2026-01-19T05:09:31Z", "assets": [ { - "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/342221644", - "id": 342221644, - "node_id": "RA_kwDOQshiJ84UZeNM", + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/342599449", + "id": 342599449, + "node_id": "RA_kwDOQshiJ84Ua6cZ", "name": "extension-files.tar.gz", "label": "", "uploader": { @@ -65,17 +65,17 @@ }, "content_type": "application/gzip", "state": "uploaded", - "size": 77212, - "digest": "sha256:82d79bfb011dbdf6ecd81b30689da13ee2045ac3df93534bee4b3c9ae3717670", - "download_count": 2, - "created_at": "2026-01-18T08:05:04Z", - "updated_at": "2026-01-18T08:05:04Z", - "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.6.0/extension-files.tar.gz" + "size": 79222, + "digest": "sha256:e0cad96e20e539d62a4777f1c5baedb1c8bd02cd26a8e38f7c1b9c804325f068", + "download_count": 0, + "created_at": "2026-01-19T05:09:41Z", + "updated_at": "2026-01-19T05:09:41Z", + "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.7.0/extension-files.tar.gz" }, { - "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/342221645", - "id": 342221645, - "node_id": "RA_kwDOQshiJ84UZeNN", + "url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/assets/342599448", + "id": 342599448, + "node_id": "RA_kwDOQshiJ84Ua6cY", "name": "extension-package.zip", "label": "", "uploader": { @@ -101,15 +101,15 @@ }, "content_type": "application/zip", "state": "uploaded", - "size": 79031, - "digest": "sha256:1f8fe71aeedf7143064afafadc19d83577932d125b66de3eb70e1f302173d172", + "size": 80692, + "digest": "sha256:83de4c4c54f401fc6404c27ef01a9205cf8e3566e81f62a7bc08f7103a2b0cc5", "download_count": 0, - "created_at": "2026-01-18T08:05:04Z", - "updated_at": "2026-01-18T08:05:04Z", - "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.6.0/extension-package.zip" + "created_at": "2026-01-19T05:09:41Z", + "updated_at": "2026-01-19T05:09:41Z", + "browser_download_url": "https://github.com/SentienceAPI/Sentience-Geometry-Chrome-Extension/releases/download/v2.7.0/extension-package.zip" } ], - "tarball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/tarball/v2.6.0", - "zipball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/zipball/v2.6.0", + "tarball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/tarball/v2.7.0", + "zipball_url": "https://api.github.com/repos/SentienceAPI/Sentience-Geometry-Chrome-Extension/zipball/v2.7.0", "body": "" }