Skip to content

Commit 0a8fa39

Browse files
author
SentienceDEV
committed
tweak SDK methods from webbench
1 parent 1c6d1db commit 0a8fa39

File tree

11 files changed

+419
-38
lines changed

11 files changed

+419
-38
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
"""
2+
Debug helper: investigate empty `read_async()` results.
3+
4+
This script is meant to reproduce situations where the extension-backed `read_async()`
5+
returns `status="success"` but `content` is empty / near-empty (e.g. length=1).
6+
7+
It prints the final `ReadResult` for:
8+
- a stable control page (example.com)
9+
- an AceHardware product details page (often triggers the empty-read symptom)
10+
11+
Tip:
12+
SENTIENCE_DEBUG_READ=1 python sdk-python/examples/debug_read_async_empty_content.py
13+
"""
14+
15+
import asyncio
16+
import os
17+
18+
from sentience.async_api import AsyncSentienceBrowser, read_async
19+
20+
21+
ACE_PDP_URL = "https://www.acehardware.com/departments/tools/power-tools/combo-power-tool-sets/2026525"
22+
23+
24+
async def dump_read(browser: AsyncSentienceBrowser, url: str) -> None:
25+
print(f"\n=== URL: {url} ===")
26+
await browser.goto(url, wait_until="domcontentloaded")
27+
28+
res_md = await read_async(browser, output_format="markdown", enhance_markdown=True)
29+
print(
30+
f"[markdown] status={res_md.status!r} length={res_md.length} url={res_md.url!r} error={res_md.error!r}"
31+
)
32+
print(res_md.content[:400].strip() or "<empty>")
33+
34+
res_raw = await read_async(browser, output_format="raw")
35+
print(
36+
f"[raw] status={res_raw.status!r} length={res_raw.length} url={res_raw.url!r} error={res_raw.error!r}"
37+
)
38+
print(res_raw.content[:200].strip() or "<empty>")
39+
40+
41+
async def main() -> None:
42+
api_key = os.environ.get("SENTIENCE_API_KEY")
43+
headless = os.environ.get("HEADLESS", "1").strip() not in {"0", "false", "False"}
44+
45+
async with AsyncSentienceBrowser(api_key=api_key, headless=headless) as browser:
46+
await dump_read(browser, "https://example.com")
47+
await dump_read(browser, ACE_PDP_URL)
48+
49+
50+
if __name__ == "__main__":
51+
asyncio.run(main())
52+

sentience/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@
107107
from .ordinal import OrdinalIntent, boost_ordinal_elements, detect_ordinal_intent, select_by_ordinal
108108
from .overlay import clear_overlay, show_overlay
109109
from .query import find, query
110-
from .read import extract, extract_async, read
110+
from .read import extract, extract_async, read, read_best_effort
111111
from .recorder import Recorder, Trace, TraceStep, record
112112
from .runtime_agent import RuntimeAgent, RuntimeStep, StepVerification
113113
from .screenshot import screenshot
@@ -220,6 +220,7 @@
220220
"ScriptGenerator",
221221
"generate",
222222
"read",
223+
"read_best_effort",
223224
"screenshot",
224225
"show_overlay",
225226
"clear_overlay",

sentience/agent_runtime.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,29 @@ def _is_captcha_detected(self, snapshot: Snapshot) -> bool:
445445
captcha = getattr(snapshot.diagnostics, "captcha", None) if snapshot.diagnostics else None
446446
if not captcha or not getattr(captcha, "detected", False):
447447
return False
448+
# IMPORTANT: Many sites load CAPTCHA libraries proactively. We only want to
449+
# block execution when there's evidence it's actually *present/active*.
450+
# If we block on low-signal detections (e.g. just a recaptcha script tag),
451+
# interactive runs will “do nothing” and time out.
452+
evidence = getattr(captcha, "evidence", None)
453+
if evidence is not None:
454+
def _list(name: str) -> list[str]:
455+
try:
456+
v = getattr(evidence, name, None)
457+
except Exception:
458+
v = None
459+
if v is None and isinstance(evidence, dict):
460+
v = evidence.get(name)
461+
if not v:
462+
return []
463+
return [str(x) for x in v if x is not None]
464+
465+
iframe_hits = _list("iframe_src_hits")
466+
url_hits = _list("url_hits")
467+
text_hits = _list("text_hits")
468+
# If we only saw selector/script hints, treat as non-blocking.
469+
if not iframe_hits and not url_hits and not text_hits:
470+
return False
448471
confidence = getattr(captcha, "confidence", 0.0)
449472
return confidence >= self._captcha_options.min_confidence
450473

sentience/async_api.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@
5252
from sentience.query import find, query
5353

5454
# ========== Phase 2B: Supporting Utilities ==========
55-
# Re-export async read function from read.py
56-
from sentience.read import read_async
55+
# Re-export async read functions from read.py
56+
from sentience.read import read_async, read_best_effort_async
5757

5858
# ========== Phase 2D: Developer Tools ==========
5959
# Re-export async recorder and inspector from their modules
@@ -90,6 +90,7 @@
9090
"find_text_rect_async", # Re-exported from text_search.py
9191
# Phase 2B: Supporting Utilities
9292
"read_async", # Re-exported from read.py
93+
"read_best_effort_async", # Re-exported from read.py
9394
"show_overlay_async", # Re-exported from overlay.py
9495
"clear_overlay_async", # Re-exported from overlay.py
9596
"expect_async", # Re-exported from expect.py

sentience/backends/actions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ async def type_text(
121121
text: str,
122122
target: BBox | dict[str, float] | tuple[float, float] | None = None,
123123
clear_first: bool = False,
124+
delay_ms: float | None = None,
124125
) -> ActionResult:
125126
"""
126127
Type text, optionally clicking a target first.
@@ -159,8 +160,8 @@ async def type_text(
159160
await backend.eval("document.execCommand('selectAll')")
160161
await asyncio.sleep(0.02)
161162

162-
# Type the text
163-
await backend.type_text(text)
163+
# Type the text (optional human-like delay)
164+
await backend.type_text(text, delay_ms=delay_ms)
164165

165166
duration_ms = int((time.time() - start_time) * 1000)
166167
return ActionResult(

sentience/backends/cdp_backend.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,10 @@ async def wheel(
342342
},
343343
)
344344

345-
async def type_text(self, text: str) -> None:
345+
async def type_text(self, text: str, delay_ms: float | None = None) -> None:
346346
"""Type text using keyboard input."""
347+
# Preserve historical default (~10ms) unless caller overrides.
348+
per_char_delay_s = 0.01 if delay_ms is None else max(0.0, float(delay_ms) / 1000.0)
347349
for char in text:
348350
# Key down
349351
await self._transport.send(
@@ -372,8 +374,9 @@ async def type_text(self, text: str) -> None:
372374
},
373375
)
374376

375-
# Small delay between characters
376-
await asyncio.sleep(0.01)
377+
# Delay between characters (human-like typing when requested)
378+
if per_char_delay_s:
379+
await asyncio.sleep(per_char_delay_s)
377380

378381
async def wait_ready_state(
379382
self,

sentience/backends/playwright_backend.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,9 +315,10 @@ async def wheel(
315315

316316
await self._page.mouse.wheel(0, delta_y)
317317

318-
async def type_text(self, text: str) -> None:
318+
async def type_text(self, text: str, delay_ms: float | None = None) -> None:
319319
"""Type text using keyboard input."""
320-
await self._page.keyboard.type(text)
320+
delay = 0 if delay_ms is None else max(0, float(delay_ms))
321+
await self._page.keyboard.type(text, delay=delay)
321322

322323
async def wait_ready_state(
323324
self,

sentience/backends/protocol.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,14 +188,16 @@ async def wheel(
188188
"""
189189
...
190190

191-
async def type_text(self, text: str) -> None:
191+
async def type_text(self, text: str, delay_ms: float | None = None) -> None:
192192
"""
193193
Type text using keyboard input.
194194
195195
Uses CDP Input.dispatchKeyEvent for each character.
196196
197197
Args:
198198
text: Text to type
199+
delay_ms: Optional delay between keystrokes in milliseconds.
200+
If None, backend default behavior is used.
199201
"""
200202
...
201203

sentience/llm_provider.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,14 @@ def __init__(
343343
base_url: str = "https://api.deepinfra.com/v1/openai",
344344
):
345345
api_key = get_api_key_from_env(["DEEPINFRA_TOKEN", "DEEPINFRA_API_KEY"], api_key)
346+
# IMPORTANT: If we pass api_key=None to the OpenAI SDK client, it may
347+
# implicitly fall back to OPENAI_API_KEY from the environment.
348+
# That leads to confusing 401s against DeepInfra with an OpenAI key.
349+
if not api_key:
350+
raise RuntimeError(
351+
"DeepInfra API key is missing. Set DEEPINFRA_API_KEY (or DEEPINFRA_TOKEN), "
352+
"or pass api_key=... to DeepInfraProvider."
353+
)
346354
super().__init__(api_key=api_key, model=model, base_url=base_url)
347355

348356

0 commit comments

Comments
 (0)