Skip to content

Commit eadb2c6

Browse files
authored
Merge pull request #166 from SentienceAPI/p7_captcha
P7 captcha
2 parents de7d9bc + 9576f5e commit eadb2c6

File tree

8 files changed

+548
-2
lines changed

8 files changed

+548
-2
lines changed

README.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Use `AgentRuntime` to add Jest-style assertions to your agent loops. Verify brow
3030

3131
```python
3232
import asyncio
33-
from sentience import AsyncSentienceBrowser, AgentRuntime
33+
from sentience import AsyncSentienceBrowser, AgentRuntime, CaptchaOptions, HumanHandoffSolver
3434
from sentience.verification import (
3535
url_contains,
3636
exists,
@@ -80,6 +80,11 @@ async def main():
8080
).eventually(timeout_s=10.0, poll_s=0.25, min_confidence=0.7, max_snapshot_attempts=3)
8181
print("eventually() result:", ok)
8282

83+
# CAPTCHA handling (detection + handoff + verify)
84+
runtime.set_captcha_options(
85+
CaptchaOptions(policy="callback", handler=HumanHandoffSolver())
86+
)
87+
8388
# Check task completion
8489
if runtime.assert_done(exists("text~'Example'"), label="task_complete"):
8590
print("✅ Task completed!")
@@ -89,6 +94,26 @@ async def main():
8994
asyncio.run(main())
9095
```
9196

97+
#### CAPTCHA strategies (Batteries Included)
98+
99+
```python
100+
from sentience import CaptchaOptions, ExternalSolver, HumanHandoffSolver, VisionSolver
101+
102+
# Human-in-loop
103+
runtime.set_captcha_options(CaptchaOptions(policy="callback", handler=HumanHandoffSolver()))
104+
105+
# Vision verification only
106+
runtime.set_captcha_options(CaptchaOptions(policy="callback", handler=VisionSolver()))
107+
108+
# External system/webhook
109+
runtime.set_captcha_options(
110+
CaptchaOptions(
111+
policy="callback",
112+
handler=ExternalSolver(lambda ctx: notify_webhook(ctx)),
113+
)
114+
)
115+
```
116+
92117
### Failure Artifact Buffer (Phase 1)
93118

94119
Capture a short ring buffer of screenshots and persist them when a required assertion fails.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import asyncio
2+
import os
3+
4+
from sentience import (
5+
AgentRuntime,
6+
AsyncSentienceBrowser,
7+
CaptchaOptions,
8+
ExternalSolver,
9+
HumanHandoffSolver,
10+
VisionSolver,
11+
)
12+
from sentience.tracing import JsonlTraceSink, Tracer
13+
14+
15+
async def notify_webhook(ctx) -> None:
16+
# Example hook: send context to your system (no solver logic in Sentience).
17+
# Replace with your own client / queue / webhook call.
18+
print(f"[captcha] external resolver notified: url={ctx.url} run_id={ctx.run_id}")
19+
20+
21+
async def main() -> None:
22+
tracer = Tracer(run_id="captcha-demo", sink=JsonlTraceSink("trace.jsonl"))
23+
24+
async with AsyncSentienceBrowser() as browser:
25+
page = await browser.new_page()
26+
runtime = await AgentRuntime.from_sentience_browser(
27+
browser=browser,
28+
page=page,
29+
tracer=tracer,
30+
)
31+
32+
# Option 1: Human-in-loop
33+
runtime.set_captcha_options(
34+
CaptchaOptions(policy="callback", handler=HumanHandoffSolver())
35+
)
36+
37+
# Option 2: Vision-only verification (no actions)
38+
runtime.set_captcha_options(
39+
CaptchaOptions(policy="callback", handler=VisionSolver())
40+
)
41+
42+
# Option 3: External resolver orchestration
43+
runtime.set_captcha_options(
44+
CaptchaOptions(policy="callback", handler=ExternalSolver(lambda ctx: notify_webhook(ctx)))
45+
)
46+
47+
await page.goto(os.environ.get("CAPTCHA_TEST_URL", "https://example.com"))
48+
runtime.begin_step("Captcha-aware snapshot")
49+
await runtime.snapshot()
50+
51+
52+
if __name__ == "__main__":
53+
asyncio.run(main())

sentience/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
# Agent Layer (Phase 1 & 2)
4040
from .base_agent import BaseAgent
4141
from .browser import SentienceBrowser
42+
from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
43+
from .captcha_strategies import ExternalSolver, HumanHandoffSolver, VisionSolver
4244

4345
# Tracing (v0.12.0+)
4446
from .cloud_tracing import CloudTraceSink, SentienceLogger

sentience/agent_runtime.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
from dataclasses import dataclass
7171
from typing import TYPE_CHECKING, Any
7272

73+
from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
7374
from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions
7475
from .models import Snapshot, SnapshotOptions
7576
from .verification import AssertContext, AssertOutcome, Predicate
@@ -153,6 +154,10 @@ def __init__(
153154
self._task_done: bool = False
154155
self._task_done_label: str | None = None
155156

157+
# CAPTCHA handling (optional, disabled by default)
158+
self._captcha_options: CaptchaOptions | None = None
159+
self._captcha_retry_count: int = 0
160+
156161
@classmethod
157162
async def from_sentience_browser(
158163
cls,
@@ -248,13 +253,132 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
248253
from .backends.snapshot import snapshot as backend_snapshot
249254

250255
# Merge default options with call-specific kwargs
256+
skip_captcha_handling = bool(kwargs.pop("_skip_captcha_handling", False))
251257
options_dict = self._snapshot_options.model_dump(exclude_none=True)
252258
options_dict.update(kwargs)
253259
options = SnapshotOptions(**options_dict)
254260

255261
self.last_snapshot = await backend_snapshot(self.backend, options=options)
262+
if not skip_captcha_handling:
263+
await self._handle_captcha_if_needed(self.last_snapshot, source="gateway")
256264
return self.last_snapshot
257265

266+
def set_captcha_options(self, options: CaptchaOptions) -> None:
267+
"""
268+
Configure CAPTCHA handling (disabled by default unless set).
269+
"""
270+
self._captcha_options = options
271+
self._captcha_retry_count = 0
272+
273+
def _is_captcha_detected(self, snapshot: Snapshot) -> bool:
274+
if not self._captcha_options:
275+
return False
276+
captcha = getattr(snapshot.diagnostics, "captcha", None) if snapshot.diagnostics else None
277+
if not captcha or not getattr(captcha, "detected", False):
278+
return False
279+
confidence = getattr(captcha, "confidence", 0.0)
280+
return confidence >= self._captcha_options.min_confidence
281+
282+
def _build_captcha_context(self, snapshot: Snapshot, source: str) -> CaptchaContext:
283+
captcha = getattr(snapshot.diagnostics, "captcha", None)
284+
return CaptchaContext(
285+
run_id=self.tracer.run_id,
286+
step_index=self.step_index,
287+
url=snapshot.url,
288+
source=source, # type: ignore[arg-type]
289+
captcha=captcha,
290+
)
291+
292+
def _emit_captcha_event(self, reason_code: str, details: dict[str, Any] | None = None) -> None:
293+
payload = {
294+
"kind": "captcha",
295+
"passed": False,
296+
"label": reason_code,
297+
"details": {"reason_code": reason_code, **(details or {})},
298+
}
299+
self.tracer.emit("verification", data=payload, step_id=self.step_id)
300+
301+
async def _handle_captcha_if_needed(self, snapshot: Snapshot, source: str) -> None:
302+
if not self._captcha_options:
303+
return
304+
if not self._is_captcha_detected(snapshot):
305+
return
306+
307+
captcha = getattr(snapshot.diagnostics, "captcha", None)
308+
self._emit_captcha_event(
309+
"captcha_detected",
310+
{"captcha": getattr(captcha, "model_dump", lambda: captcha)()},
311+
)
312+
313+
resolution: CaptchaResolution
314+
if self._captcha_options.policy == "callback":
315+
if not self._captcha_options.handler:
316+
self._emit_captcha_event("captcha_handler_error")
317+
raise CaptchaHandlingError(
318+
"captcha_handler_error",
319+
'Captcha handler is required for policy="callback".',
320+
)
321+
try:
322+
resolution = await self._captcha_options.handler(
323+
self._build_captcha_context(snapshot, source)
324+
)
325+
except Exception as exc: # pragma: no cover - defensive
326+
self._emit_captcha_event("captcha_handler_error", {"error": str(exc)})
327+
raise CaptchaHandlingError(
328+
"captcha_handler_error", "Captcha handler failed."
329+
) from exc
330+
else:
331+
resolution = CaptchaResolution(action="abort")
332+
333+
await self._apply_captcha_resolution(resolution, snapshot, source)
334+
335+
async def _apply_captcha_resolution(
336+
self,
337+
resolution: CaptchaResolution,
338+
snapshot: Snapshot,
339+
source: str,
340+
) -> None:
341+
if resolution.action == "abort":
342+
self._emit_captcha_event("captcha_policy_abort", {"message": resolution.message})
343+
raise CaptchaHandlingError(
344+
"captcha_policy_abort",
345+
resolution.message or "Captcha detected. Aborting per policy.",
346+
)
347+
348+
if resolution.action == "retry_new_session":
349+
self._captcha_retry_count += 1
350+
self._emit_captcha_event("captcha_retry_new_session")
351+
if self._captcha_retry_count > self._captcha_options.max_retries_new_session:
352+
self._emit_captcha_event("captcha_retry_exhausted")
353+
raise CaptchaHandlingError(
354+
"captcha_retry_exhausted",
355+
"Captcha retry_new_session exhausted.",
356+
)
357+
if not self._captcha_options.reset_session:
358+
raise CaptchaHandlingError(
359+
"captcha_retry_new_session",
360+
"reset_session callback is required for retry_new_session.",
361+
)
362+
await self._captcha_options.reset_session()
363+
return
364+
365+
if resolution.action == "wait_until_cleared":
366+
timeout_ms = resolution.timeout_ms or self._captcha_options.timeout_ms
367+
poll_ms = resolution.poll_ms or self._captcha_options.poll_ms
368+
await self._wait_until_cleared(timeout_ms=timeout_ms, poll_ms=poll_ms, source=source)
369+
self._emit_captcha_event("captcha_resumed")
370+
371+
async def _wait_until_cleared(self, *, timeout_ms: int, poll_ms: int, source: str) -> None:
372+
deadline = time.time() + timeout_ms / 1000.0
373+
while time.time() <= deadline:
374+
await asyncio.sleep(poll_ms / 1000.0)
375+
snap = await self.snapshot(_skip_captcha_handling=True)
376+
if not self._is_captcha_detected(snap):
377+
self._emit_captcha_event("captcha_cleared", {"source": source})
378+
return
379+
self._emit_captcha_event("captcha_wait_timeout", {"timeout_ms": timeout_ms})
380+
raise CaptchaHandlingError("captcha_wait_timeout", "Captcha wait_until_cleared timed out.")
381+
258382
async def enable_failure_artifacts(
259383
self,
260384
options: FailureArtifactsOptions | None = None,

sentience/captcha.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from __future__ import annotations
2+
3+
from dataclasses import dataclass
4+
from typing import Awaitable, Callable, Literal, Optional
5+
6+
from .models import CaptchaDiagnostics
7+
8+
CaptchaPolicy = Literal["abort", "callback"]
9+
CaptchaAction = Literal["abort", "retry_new_session", "wait_until_cleared"]
10+
CaptchaSource = Literal["extension", "gateway", "runtime"]
11+
12+
13+
@dataclass
14+
class CaptchaContext:
15+
run_id: str
16+
step_index: int
17+
url: str
18+
source: CaptchaSource
19+
captcha: CaptchaDiagnostics
20+
screenshot_path: Optional[str] = None
21+
frames_dir: Optional[str] = None
22+
snapshot_path: Optional[str] = None
23+
live_session_url: Optional[str] = None
24+
meta: Optional[dict[str, str]] = None
25+
26+
27+
@dataclass
28+
class CaptchaResolution:
29+
action: CaptchaAction
30+
message: Optional[str] = None
31+
handled_by: Optional[Literal["human", "customer_system", "unknown"]] = None
32+
timeout_ms: Optional[int] = None
33+
poll_ms: Optional[int] = None
34+
35+
36+
CaptchaHandler = Callable[[CaptchaContext], CaptchaResolution | Awaitable[CaptchaResolution]]
37+
38+
39+
@dataclass
40+
class CaptchaOptions:
41+
policy: CaptchaPolicy = "abort"
42+
min_confidence: float = 0.7
43+
timeout_ms: int = 120_000
44+
poll_ms: int = 1_000
45+
max_retries_new_session: int = 1
46+
handler: Optional[CaptchaHandler] = None
47+
reset_session: Optional[Callable[[], Awaitable[None]]] = None
48+
49+
50+
class CaptchaHandlingError(RuntimeError):
51+
def __init__(self, reason_code: str, message: str) -> None:
52+
super().__init__(message)
53+
self.reason_code = reason_code

sentience/captcha_strategies.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from __future__ import annotations
2+
3+
import inspect
4+
from typing import Callable
5+
6+
from .captcha import CaptchaContext, CaptchaHandler, CaptchaResolution
7+
8+
9+
def HumanHandoffSolver(
10+
*,
11+
message: str | None = None,
12+
handled_by: str | None = "human",
13+
timeout_ms: int | None = None,
14+
poll_ms: int | None = None,
15+
) -> CaptchaHandler:
16+
async def _handler(_ctx: CaptchaContext) -> CaptchaResolution:
17+
return CaptchaResolution(
18+
action="wait_until_cleared",
19+
message=message or "Solve CAPTCHA in the live session, then resume.",
20+
handled_by=handled_by,
21+
timeout_ms=timeout_ms,
22+
poll_ms=poll_ms,
23+
)
24+
25+
return _handler
26+
27+
28+
def VisionSolver(
29+
*,
30+
message: str | None = None,
31+
handled_by: str | None = "customer_system",
32+
timeout_ms: int | None = None,
33+
poll_ms: int | None = None,
34+
) -> CaptchaHandler:
35+
async def _handler(_ctx: CaptchaContext) -> CaptchaResolution:
36+
return CaptchaResolution(
37+
action="wait_until_cleared",
38+
message=message or "Waiting for CAPTCHA to clear (vision verification).",
39+
handled_by=handled_by,
40+
timeout_ms=timeout_ms,
41+
poll_ms=poll_ms,
42+
)
43+
44+
return _handler
45+
46+
47+
def ExternalSolver(
48+
resolver: Callable[[CaptchaContext], None | bool | dict],
49+
*,
50+
message: str | None = None,
51+
handled_by: str | None = "customer_system",
52+
timeout_ms: int | None = None,
53+
poll_ms: int | None = None,
54+
) -> CaptchaHandler:
55+
async def _handler(ctx: CaptchaContext) -> CaptchaResolution:
56+
result = resolver(ctx)
57+
if inspect.isawaitable(result):
58+
await result
59+
return CaptchaResolution(
60+
action="wait_until_cleared",
61+
message=message or "External solver invoked; waiting for clearance.",
62+
handled_by=handled_by,
63+
timeout_ms=timeout_ms,
64+
poll_ms=poll_ms,
65+
)
66+
67+
return _handler

0 commit comments

Comments
 (0)