Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Use `AgentRuntime` to add Jest-style assertions to your agent loops. Verify brow

```python
import asyncio
from sentience import AsyncSentienceBrowser, AgentRuntime
from sentience import AsyncSentienceBrowser, AgentRuntime, CaptchaOptions, HumanHandoffSolver
from sentience.verification import (
url_contains,
exists,
Expand Down Expand Up @@ -80,6 +80,11 @@ async def main():
).eventually(timeout_s=10.0, poll_s=0.25, min_confidence=0.7, max_snapshot_attempts=3)
print("eventually() result:", ok)

# CAPTCHA handling (detection + handoff + verify)
runtime.set_captcha_options(
CaptchaOptions(policy="callback", handler=HumanHandoffSolver())
)

# Check task completion
if runtime.assert_done(exists("text~'Example'"), label="task_complete"):
print("✅ Task completed!")
Expand All @@ -89,6 +94,26 @@ async def main():
asyncio.run(main())
```

#### CAPTCHA strategies (Batteries Included)

```python
from sentience import CaptchaOptions, ExternalSolver, HumanHandoffSolver, VisionSolver

# Human-in-loop
runtime.set_captcha_options(CaptchaOptions(policy="callback", handler=HumanHandoffSolver()))

# Vision verification only
runtime.set_captcha_options(CaptchaOptions(policy="callback", handler=VisionSolver()))

# External system/webhook
runtime.set_captcha_options(
CaptchaOptions(
policy="callback",
handler=ExternalSolver(lambda ctx: notify_webhook(ctx)),
)
)
```

### Failure Artifact Buffer (Phase 1)

Capture a short ring buffer of screenshots and persist them when a required assertion fails.
Expand Down
53 changes: 53 additions & 0 deletions examples/agent_runtime_captcha_strategies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import asyncio
import os

from sentience import (
AgentRuntime,
AsyncSentienceBrowser,
CaptchaOptions,
ExternalSolver,
HumanHandoffSolver,
VisionSolver,
)
from sentience.tracing import JsonlTraceSink, Tracer


async def notify_webhook(ctx) -> None:
# Example hook: send context to your system (no solver logic in Sentience).
# Replace with your own client / queue / webhook call.
print(f"[captcha] external resolver notified: url={ctx.url} run_id={ctx.run_id}")


async def main() -> None:
tracer = Tracer(run_id="captcha-demo", sink=JsonlTraceSink("trace.jsonl"))

async with AsyncSentienceBrowser() as browser:
page = await browser.new_page()
runtime = await AgentRuntime.from_sentience_browser(
browser=browser,
page=page,
tracer=tracer,
)

# Option 1: Human-in-loop
runtime.set_captcha_options(
CaptchaOptions(policy="callback", handler=HumanHandoffSolver())
)

# Option 2: Vision-only verification (no actions)
runtime.set_captcha_options(
CaptchaOptions(policy="callback", handler=VisionSolver())
)

# Option 3: External resolver orchestration
runtime.set_captcha_options(
CaptchaOptions(policy="callback", handler=ExternalSolver(lambda ctx: notify_webhook(ctx)))
)

await page.goto(os.environ.get("CAPTCHA_TEST_URL", "https://example.com"))
runtime.begin_step("Captcha-aware snapshot")
await runtime.snapshot()


if __name__ == "__main__":
asyncio.run(main())
2 changes: 2 additions & 0 deletions sentience/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
# Agent Layer (Phase 1 & 2)
from .base_agent import BaseAgent
from .browser import SentienceBrowser
from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
from .captcha_strategies import ExternalSolver, HumanHandoffSolver, VisionSolver

# Tracing (v0.12.0+)
from .cloud_tracing import CloudTraceSink, SentienceLogger
Expand Down
126 changes: 125 additions & 1 deletion sentience/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions
from .models import Snapshot, SnapshotOptions
from .verification import AssertContext, AssertOutcome, Predicate
Expand Down Expand Up @@ -153,6 +154,10 @@ def __init__(
self._task_done: bool = False
self._task_done_label: str | None = None

# CAPTCHA handling (optional, disabled by default)
self._captcha_options: CaptchaOptions | None = None
self._captcha_retry_count: int = 0

@classmethod
async def from_sentience_browser(
cls,
Expand Down Expand Up @@ -248,13 +253,132 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
from .backends.snapshot import snapshot as backend_snapshot

# Merge default options with call-specific kwargs
skip_captcha_handling = bool(kwargs.pop("_skip_captcha_handling", False))
options_dict = self._snapshot_options.model_dump(exclude_none=True)
options_dict.update(kwargs)
options = SnapshotOptions(**options_dict)

self.last_snapshot = await backend_snapshot(self.backend, options=options)
if not skip_captcha_handling:
await self._handle_captcha_if_needed(self.last_snapshot, source="gateway")
return self.last_snapshot

def set_captcha_options(self, options: CaptchaOptions) -> None:
"""
Configure CAPTCHA handling (disabled by default unless set).
"""
self._captcha_options = options
self._captcha_retry_count = 0

def _is_captcha_detected(self, snapshot: Snapshot) -> bool:
if not self._captcha_options:
return False
captcha = getattr(snapshot.diagnostics, "captcha", None) if snapshot.diagnostics else None
if not captcha or not getattr(captcha, "detected", False):
return False
confidence = getattr(captcha, "confidence", 0.0)
return confidence >= self._captcha_options.min_confidence

def _build_captcha_context(self, snapshot: Snapshot, source: str) -> CaptchaContext:
captcha = getattr(snapshot.diagnostics, "captcha", None)
return CaptchaContext(
run_id=self.tracer.run_id,
step_index=self.step_index,
url=snapshot.url,
source=source, # type: ignore[arg-type]
captcha=captcha,
)

def _emit_captcha_event(self, reason_code: str, details: dict[str, Any] | None = None) -> None:
payload = {
"kind": "captcha",
"passed": False,
"label": reason_code,
"details": {"reason_code": reason_code, **(details or {})},
}
self.tracer.emit("verification", data=payload, step_id=self.step_id)

async def _handle_captcha_if_needed(self, snapshot: Snapshot, source: str) -> None:
if not self._captcha_options:
return
if not self._is_captcha_detected(snapshot):
return

captcha = getattr(snapshot.diagnostics, "captcha", None)
self._emit_captcha_event(
"captcha_detected",
{"captcha": getattr(captcha, "model_dump", lambda: captcha)()},
)

resolution: CaptchaResolution
if self._captcha_options.policy == "callback":
if not self._captcha_options.handler:
self._emit_captcha_event("captcha_handler_error")
raise CaptchaHandlingError(
"captcha_handler_error",
'Captcha handler is required for policy="callback".',
)
try:
resolution = await self._captcha_options.handler(
self._build_captcha_context(snapshot, source)
)
except Exception as exc: # pragma: no cover - defensive
self._emit_captcha_event("captcha_handler_error", {"error": str(exc)})
raise CaptchaHandlingError(
"captcha_handler_error", "Captcha handler failed."
) from exc
else:
resolution = CaptchaResolution(action="abort")

await self._apply_captcha_resolution(resolution, snapshot, source)

async def _apply_captcha_resolution(
self,
resolution: CaptchaResolution,
snapshot: Snapshot,
source: str,
) -> None:
if resolution.action == "abort":
self._emit_captcha_event("captcha_policy_abort", {"message": resolution.message})
raise CaptchaHandlingError(
"captcha_policy_abort",
resolution.message or "Captcha detected. Aborting per policy.",
)

if resolution.action == "retry_new_session":
self._captcha_retry_count += 1
self._emit_captcha_event("captcha_retry_new_session")
if self._captcha_retry_count > self._captcha_options.max_retries_new_session:
self._emit_captcha_event("captcha_retry_exhausted")
raise CaptchaHandlingError(
"captcha_retry_exhausted",
"Captcha retry_new_session exhausted.",
)
if not self._captcha_options.reset_session:
raise CaptchaHandlingError(
"captcha_retry_new_session",
"reset_session callback is required for retry_new_session.",
)
await self._captcha_options.reset_session()
return

if resolution.action == "wait_until_cleared":
timeout_ms = resolution.timeout_ms or self._captcha_options.timeout_ms
poll_ms = resolution.poll_ms or self._captcha_options.poll_ms
await self._wait_until_cleared(timeout_ms=timeout_ms, poll_ms=poll_ms, source=source)
self._emit_captcha_event("captcha_resumed")

async def _wait_until_cleared(self, *, timeout_ms: int, poll_ms: int, source: str) -> None:
deadline = time.time() + timeout_ms / 1000.0
while time.time() <= deadline:
await asyncio.sleep(poll_ms / 1000.0)
snap = await self.snapshot(_skip_captcha_handling=True)
if not self._is_captcha_detected(snap):
self._emit_captcha_event("captcha_cleared", {"source": source})
return
self._emit_captcha_event("captcha_wait_timeout", {"timeout_ms": timeout_ms})
raise CaptchaHandlingError("captcha_wait_timeout", "Captcha wait_until_cleared timed out.")

async def enable_failure_artifacts(
self,
options: FailureArtifactsOptions | None = None,
Expand Down Expand Up @@ -455,7 +579,7 @@ def assert_done(
True if task is complete (assertion passed), False otherwise
"""
# Convenience wrapper for assert_ with required=True
ok = self.assertTrue(predicate, label=label, required=True)
ok = self.assert_(predicate, label=label, required=True)
if ok:
self._task_done = True
self._task_done_label = label
Expand Down
53 changes: 53 additions & 0 deletions sentience/captcha.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Awaitable, Callable, Literal, Optional

from .models import CaptchaDiagnostics

CaptchaPolicy = Literal["abort", "callback"]
CaptchaAction = Literal["abort", "retry_new_session", "wait_until_cleared"]
CaptchaSource = Literal["extension", "gateway", "runtime"]


@dataclass
class CaptchaContext:
run_id: str
step_index: int
url: str
source: CaptchaSource
captcha: CaptchaDiagnostics
screenshot_path: Optional[str] = None
frames_dir: Optional[str] = None
snapshot_path: Optional[str] = None
live_session_url: Optional[str] = None
meta: Optional[dict[str, str]] = None


@dataclass
class CaptchaResolution:
action: CaptchaAction
message: Optional[str] = None
handled_by: Optional[Literal["human", "customer_system", "unknown"]] = None
timeout_ms: Optional[int] = None
poll_ms: Optional[int] = None


CaptchaHandler = Callable[[CaptchaContext], CaptchaResolution | Awaitable[CaptchaResolution]]


@dataclass
class CaptchaOptions:
policy: CaptchaPolicy = "abort"
min_confidence: float = 0.7
timeout_ms: int = 120_000
poll_ms: int = 1_000
max_retries_new_session: int = 1
handler: Optional[CaptchaHandler] = None
reset_session: Optional[Callable[[], Awaitable[None]]] = None


class CaptchaHandlingError(RuntimeError):
def __init__(self, reason_code: str, message: str) -> None:
super().__init__(message)
self.reason_code = reason_code
67 changes: 67 additions & 0 deletions sentience/captcha_strategies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from __future__ import annotations

import inspect
from typing import Callable

from .captcha import CaptchaContext, CaptchaHandler, CaptchaResolution


def HumanHandoffSolver(
*,
message: str | None = None,
handled_by: str | None = "human",
timeout_ms: int | None = None,
poll_ms: int | None = None,
) -> CaptchaHandler:
async def _handler(_ctx: CaptchaContext) -> CaptchaResolution:
return CaptchaResolution(
action="wait_until_cleared",
message=message or "Solve CAPTCHA in the live session, then resume.",
handled_by=handled_by,
timeout_ms=timeout_ms,
poll_ms=poll_ms,
)

return _handler


def VisionSolver(
*,
message: str | None = None,
handled_by: str | None = "customer_system",
timeout_ms: int | None = None,
poll_ms: int | None = None,
) -> CaptchaHandler:
async def _handler(_ctx: CaptchaContext) -> CaptchaResolution:
return CaptchaResolution(
action="wait_until_cleared",
message=message or "Waiting for CAPTCHA to clear (vision verification).",
handled_by=handled_by,
timeout_ms=timeout_ms,
poll_ms=poll_ms,
)

return _handler


def ExternalSolver(
resolver: Callable[[CaptchaContext], None | bool | dict],
*,
message: str | None = None,
handled_by: str | None = "customer_system",
timeout_ms: int | None = None,
poll_ms: int | None = None,
) -> CaptchaHandler:
async def _handler(ctx: CaptchaContext) -> CaptchaResolution:
result = resolver(ctx)
if inspect.isawaitable(result):
await result
return CaptchaResolution(
action="wait_until_cleared",
message=message or "External solver invoked; waiting for clearance.",
handled_by=handled_by,
timeout_ms=timeout_ms,
poll_ms=poll_ms,
)

return _handler
Loading
Loading