tweak SDK methods from webbench

SentienceDEV · SentienceDEV · commit 0a8fa39d9a3c · 2026-01-24T20:47:17.000-08:00
diff --git a/examples/debug_read_async_empty_content.py b/examples/debug_read_async_empty_content.py
@@ -0,0 +1,52 @@
+"""
+Debug helper: investigate empty `read_async()` results.
+
+This script is meant to reproduce situations where the extension-backed `read_async()`
+returns `status="success"` but `content` is empty / near-empty (e.g. length=1).
+
+It prints the final `ReadResult` for:
+- a stable control page (example.com)
+- an AceHardware product details page (often triggers the empty-read symptom)
+
+Tip:
+  SENTIENCE_DEBUG_READ=1 python sdk-python/examples/debug_read_async_empty_content.py
+"""
+
+import asyncio
+import os
+
+from sentience.async_api import AsyncSentienceBrowser, read_async
+
+
+ACE_PDP_URL = "https://www.acehardware.com/departments/tools/power-tools/combo-power-tool-sets/2026525"
+
+
+async def dump_read(browser: AsyncSentienceBrowser, url: str) -> None:
+    print(f"\n=== URL: {url} ===")
+    await browser.goto(url, wait_until="domcontentloaded")
+
+    res_md = await read_async(browser, output_format="markdown", enhance_markdown=True)
+    print(
+        f"[markdown] status={res_md.status!r} length={res_md.length} url={res_md.url!r} error={res_md.error!r}"
+    )
+    print(res_md.content[:400].strip() or "<empty>")
+
+    res_raw = await read_async(browser, output_format="raw")
+    print(
+        f"[raw]     status={res_raw.status!r} length={res_raw.length} url={res_raw.url!r} error={res_raw.error!r}"
+    )
+    print(res_raw.content[:200].strip() or "<empty>")
+
+
+async def main() -> None:
+    api_key = os.environ.get("SENTIENCE_API_KEY")
+    headless = os.environ.get("HEADLESS", "1").strip() not in {"0", "false", "False"}
+
+    async with AsyncSentienceBrowser(api_key=api_key, headless=headless) as browser:
+        await dump_read(browser, "https://example.com")
+        await dump_read(browser, ACE_PDP_URL)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+
diff --git a/sentience/__init__.py b/sentience/__init__.py
@@ -107,7 +107,7 @@
 from .ordinal import OrdinalIntent, boost_ordinal_elements, detect_ordinal_intent, select_by_ordinal
 from .overlay import clear_overlay, show_overlay
 from .query import find, query
-from .read import extract, extract_async, read
+from .read import extract, extract_async, read, read_best_effort
 from .recorder import Recorder, Trace, TraceStep, record
 from .runtime_agent import RuntimeAgent, RuntimeStep, StepVerification
 from .screenshot import screenshot
@@ -220,6 +220,7 @@
     "ScriptGenerator",
     "generate",
     "read",
+    "read_best_effort",
     "screenshot",
     "show_overlay",
     "clear_overlay",
diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
@@ -445,6 +445,29 @@ def _is_captcha_detected(self, snapshot: Snapshot) -> bool:
         captcha = getattr(snapshot.diagnostics, "captcha", None) if snapshot.diagnostics else None
         if not captcha or not getattr(captcha, "detected", False):
             return False
+        # IMPORTANT: Many sites load CAPTCHA libraries proactively. We only want to
+        # block execution when there's evidence it's actually *present/active*.
+        # If we block on low-signal detections (e.g. just a recaptcha script tag),
+        # interactive runs will “do nothing” and time out.
+        evidence = getattr(captcha, "evidence", None)
+        if evidence is not None:
+            def _list(name: str) -> list[str]:
+                try:
+                    v = getattr(evidence, name, None)
+                except Exception:
+                    v = None
+                if v is None and isinstance(evidence, dict):
+                    v = evidence.get(name)
+                if not v:
+                    return []
+                return [str(x) for x in v if x is not None]
+
+            iframe_hits = _list("iframe_src_hits")
+            url_hits = _list("url_hits")
+            text_hits = _list("text_hits")
+            # If we only saw selector/script hints, treat as non-blocking.
+            if not iframe_hits and not url_hits and not text_hits:
+                return False
         confidence = getattr(captcha, "confidence", 0.0)
         return confidence >= self._captcha_options.min_confidence
 
diff --git a/sentience/async_api.py b/sentience/async_api.py
@@ -52,8 +52,8 @@
 from sentience.query import find, query
 
 # ========== Phase 2B: Supporting Utilities ==========
-# Re-export async read function from read.py
-from sentience.read import read_async
+# Re-export async read functions from read.py
+from sentience.read import read_async, read_best_effort_async
 
 # ========== Phase 2D: Developer Tools ==========
 # Re-export async recorder and inspector from their modules
@@ -90,6 +90,7 @@
     "find_text_rect_async",  # Re-exported from text_search.py
     # Phase 2B: Supporting Utilities
     "read_async",  # Re-exported from read.py
+    "read_best_effort_async",  # Re-exported from read.py
     "show_overlay_async",  # Re-exported from overlay.py
     "clear_overlay_async",  # Re-exported from overlay.py
     "expect_async",  # Re-exported from expect.py
diff --git a/sentience/backends/actions.py b/sentience/backends/actions.py
@@ -121,6 +121,7 @@ async def type_text(
     text: str,
     target: BBox | dict[str, float] | tuple[float, float] | None = None,
     clear_first: bool = False,
+    delay_ms: float | None = None,
 ) -> ActionResult:
     """
     Type text, optionally clicking a target first.
@@ -159,8 +160,8 @@ async def type_text(
             await backend.eval("document.execCommand('selectAll')")
             await asyncio.sleep(0.02)
 
-        # Type the text
-        await backend.type_text(text)
+        # Type the text (optional human-like delay)
+        await backend.type_text(text, delay_ms=delay_ms)
 
         duration_ms = int((time.time() - start_time) * 1000)
         return ActionResult(
diff --git a/sentience/backends/cdp_backend.py b/sentience/backends/cdp_backend.py
@@ -342,8 +342,10 @@ async def wheel(
             },
         )
 
-    async def type_text(self, text: str) -> None:
+    async def type_text(self, text: str, delay_ms: float | None = None) -> None:
         """Type text using keyboard input."""
+        # Preserve historical default (~10ms) unless caller overrides.
+        per_char_delay_s = 0.01 if delay_ms is None else max(0.0, float(delay_ms) / 1000.0)
         for char in text:
             # Key down
             await self._transport.send(
@@ -372,8 +374,9 @@ async def type_text(self, text: str) -> None:
                 },
             )
 
-            # Small delay between characters
-            await asyncio.sleep(0.01)
+            # Delay between characters (human-like typing when requested)
+            if per_char_delay_s:
+                await asyncio.sleep(per_char_delay_s)
 
     async def wait_ready_state(
         self,
diff --git a/sentience/backends/playwright_backend.py b/sentience/backends/playwright_backend.py
@@ -315,9 +315,10 @@ async def wheel(
 
         await self._page.mouse.wheel(0, delta_y)
 
-    async def type_text(self, text: str) -> None:
+    async def type_text(self, text: str, delay_ms: float | None = None) -> None:
         """Type text using keyboard input."""
-        await self._page.keyboard.type(text)
+        delay = 0 if delay_ms is None else max(0, float(delay_ms))
+        await self._page.keyboard.type(text, delay=delay)
 
     async def wait_ready_state(
         self,
diff --git a/sentience/backends/protocol.py b/sentience/backends/protocol.py
@@ -188,14 +188,16 @@ async def wheel(
         """
         ...
 
-    async def type_text(self, text: str) -> None:
+    async def type_text(self, text: str, delay_ms: float | None = None) -> None:
         """
         Type text using keyboard input.
 
         Uses CDP Input.dispatchKeyEvent for each character.
 
         Args:
             text: Text to type
+            delay_ms: Optional delay between keystrokes in milliseconds.
+                      If None, backend default behavior is used.
         """
         ...
 
diff --git a/sentience/llm_provider.py b/sentience/llm_provider.py
@@ -343,6 +343,14 @@ def __init__(
         base_url: str = "https://api.deepinfra.com/v1/openai",
     ):
         api_key = get_api_key_from_env(["DEEPINFRA_TOKEN", "DEEPINFRA_API_KEY"], api_key)
+        # IMPORTANT: If we pass api_key=None to the OpenAI SDK client, it may
+        # implicitly fall back to OPENAI_API_KEY from the environment.
+        # That leads to confusing 401s against DeepInfra with an OpenAI key.
+        if not api_key:
+            raise RuntimeError(
+                "DeepInfra API key is missing. Set DEEPINFRA_API_KEY (or DEEPINFRA_TOKEN), "
+                "or pass api_key=... to DeepInfraProvider."
+            )
         super().__init__(api_key=api_key, model=model, base_url=base_url)
 
 
diff --git a/sentience/read.py b/sentience/read.py
diff --git a/tests/test_read.py b/tests/test_read.py