Skip to content

Commit 534f0f9

Browse files
authored
Merge pull request #193 from SentienceAPI/tweaking_fixes3
tweak SDK methods from webbench
2 parents 1c6d1db + 9b4b49b commit 534f0f9

File tree

11 files changed

+368
-39
lines changed

11 files changed

+368
-39
lines changed

sentience/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@
107107
from .ordinal import OrdinalIntent, boost_ordinal_elements, detect_ordinal_intent, select_by_ordinal
108108
from .overlay import clear_overlay, show_overlay
109109
from .query import find, query
110-
from .read import extract, extract_async, read
110+
from .read import extract, extract_async, read, read_best_effort
111111
from .recorder import Recorder, Trace, TraceStep, record
112112
from .runtime_agent import RuntimeAgent, RuntimeStep, StepVerification
113113
from .screenshot import screenshot
@@ -220,6 +220,7 @@
220220
"ScriptGenerator",
221221
"generate",
222222
"read",
223+
"read_best_effort",
223224
"screenshot",
224225
"show_overlay",
225226
"clear_overlay",

sentience/agent_runtime.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,29 @@ def _is_captcha_detected(self, snapshot: Snapshot) -> bool:
445445
captcha = getattr(snapshot.diagnostics, "captcha", None) if snapshot.diagnostics else None
446446
if not captcha or not getattr(captcha, "detected", False):
447447
return False
448+
# IMPORTANT: Many sites load CAPTCHA libraries proactively. We only want to
449+
# block execution when there's evidence it's actually *present/active*.
450+
# If we block on low-signal detections (e.g. just a recaptcha script tag),
451+
# interactive runs will “do nothing” and time out.
452+
evidence = getattr(captcha, "evidence", None)
453+
if evidence is not None:
454+
def _list(name: str) -> list[str]:
455+
try:
456+
v = getattr(evidence, name, None)
457+
except Exception:
458+
v = None
459+
if v is None and isinstance(evidence, dict):
460+
v = evidence.get(name)
461+
if not v:
462+
return []
463+
return [str(x) for x in v if x is not None]
464+
465+
iframe_hits = _list("iframe_src_hits")
466+
url_hits = _list("url_hits")
467+
text_hits = _list("text_hits")
468+
# If we only saw selector/script hints, treat as non-blocking.
469+
if not iframe_hits and not url_hits and not text_hits:
470+
return False
448471
confidence = getattr(captcha, "confidence", 0.0)
449472
return confidence >= self._captcha_options.min_confidence
450473

sentience/async_api.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@
5252
from sentience.query import find, query
5353

5454
# ========== Phase 2B: Supporting Utilities ==========
55-
# Re-export async read function from read.py
56-
from sentience.read import read_async
55+
# Re-export async read functions from read.py
56+
from sentience.read import read_async, read_best_effort_async
5757

5858
# ========== Phase 2D: Developer Tools ==========
5959
# Re-export async recorder and inspector from their modules
@@ -90,6 +90,7 @@
9090
"find_text_rect_async", # Re-exported from text_search.py
9191
# Phase 2B: Supporting Utilities
9292
"read_async", # Re-exported from read.py
93+
"read_best_effort_async", # Re-exported from read.py
9394
"show_overlay_async", # Re-exported from overlay.py
9495
"clear_overlay_async", # Re-exported from overlay.py
9596
"expect_async", # Re-exported from expect.py

sentience/backends/actions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ async def type_text(
121121
text: str,
122122
target: BBox | dict[str, float] | tuple[float, float] | None = None,
123123
clear_first: bool = False,
124+
delay_ms: float | None = None,
124125
) -> ActionResult:
125126
"""
126127
Type text, optionally clicking a target first.
@@ -159,8 +160,8 @@ async def type_text(
159160
await backend.eval("document.execCommand('selectAll')")
160161
await asyncio.sleep(0.02)
161162

162-
# Type the text
163-
await backend.type_text(text)
163+
# Type the text (optional human-like delay)
164+
await backend.type_text(text, delay_ms=delay_ms)
164165

165166
duration_ms = int((time.time() - start_time) * 1000)
166167
return ActionResult(

sentience/backends/cdp_backend.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,10 @@ async def wheel(
342342
},
343343
)
344344

345-
async def type_text(self, text: str) -> None:
345+
async def type_text(self, text: str, delay_ms: float | None = None) -> None:
346346
"""Type text using keyboard input."""
347+
# Preserve historical default (~10ms) unless caller overrides.
348+
per_char_delay_s = 0.01 if delay_ms is None else max(0.0, float(delay_ms) / 1000.0)
347349
for char in text:
348350
# Key down
349351
await self._transport.send(
@@ -372,8 +374,9 @@ async def type_text(self, text: str) -> None:
372374
},
373375
)
374376

375-
# Small delay between characters
376-
await asyncio.sleep(0.01)
377+
# Delay between characters (human-like typing when requested)
378+
if per_char_delay_s:
379+
await asyncio.sleep(per_char_delay_s)
377380

378381
async def wait_ready_state(
379382
self,

sentience/backends/playwright_backend.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,9 +315,10 @@ async def wheel(
315315

316316
await self._page.mouse.wheel(0, delta_y)
317317

318-
async def type_text(self, text: str) -> None:
318+
async def type_text(self, text: str, delay_ms: float | None = None) -> None:
319319
"""Type text using keyboard input."""
320-
await self._page.keyboard.type(text)
320+
delay = 0 if delay_ms is None else max(0, float(delay_ms))
321+
await self._page.keyboard.type(text, delay=delay)
321322

322323
async def wait_ready_state(
323324
self,

sentience/backends/protocol.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,14 +188,16 @@ async def wheel(
188188
"""
189189
...
190190

191-
async def type_text(self, text: str) -> None:
191+
async def type_text(self, text: str, delay_ms: float | None = None) -> None:
192192
"""
193193
Type text using keyboard input.
194194
195195
Uses CDP Input.dispatchKeyEvent for each character.
196196
197197
Args:
198198
text: Text to type
199+
delay_ms: Optional delay between keystrokes in milliseconds.
200+
If None, backend default behavior is used.
199201
"""
200202
...
201203

sentience/llm_provider.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,14 @@ def __init__(
343343
base_url: str = "https://api.deepinfra.com/v1/openai",
344344
):
345345
api_key = get_api_key_from_env(["DEEPINFRA_TOKEN", "DEEPINFRA_API_KEY"], api_key)
346+
# IMPORTANT: If we pass api_key=None to the OpenAI SDK client, it may
347+
# implicitly fall back to OPENAI_API_KEY from the environment.
348+
# That leads to confusing 401s against DeepInfra with an OpenAI key.
349+
if not api_key:
350+
raise RuntimeError(
351+
"DeepInfra API key is missing. Set DEEPINFRA_API_KEY (or DEEPINFRA_TOKEN), "
352+
"or pass api_key=... to DeepInfraProvider."
353+
)
346354
super().__init__(api_key=api_key, model=model, base_url=base_url)
347355

348356

0 commit comments

Comments
 (0)