|
70 | 70 | from dataclasses import dataclass |
71 | 71 | from typing import TYPE_CHECKING, Any |
72 | 72 |
|
| 73 | +from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution |
73 | 74 | from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions |
74 | 75 | from .models import Snapshot, SnapshotOptions |
75 | 76 | from .verification import AssertContext, AssertOutcome, Predicate |
@@ -153,6 +154,10 @@ def __init__( |
153 | 154 | self._task_done: bool = False |
154 | 155 | self._task_done_label: str | None = None |
155 | 156 |
|
| 157 | + # CAPTCHA handling (optional, disabled by default) |
| 158 | + self._captcha_options: CaptchaOptions | None = None |
| 159 | + self._captcha_retry_count: int = 0 |
| 160 | + |
156 | 161 | @classmethod |
157 | 162 | async def from_sentience_browser( |
158 | 163 | cls, |
@@ -248,13 +253,132 @@ async def snapshot(self, **kwargs: Any) -> Snapshot: |
248 | 253 | from .backends.snapshot import snapshot as backend_snapshot |
249 | 254 |
|
250 | 255 | # Merge default options with call-specific kwargs |
| 256 | + skip_captcha_handling = bool(kwargs.pop("_skip_captcha_handling", False)) |
251 | 257 | options_dict = self._snapshot_options.model_dump(exclude_none=True) |
252 | 258 | options_dict.update(kwargs) |
253 | 259 | options = SnapshotOptions(**options_dict) |
254 | 260 |
|
255 | 261 | self.last_snapshot = await backend_snapshot(self.backend, options=options) |
| 262 | + if not skip_captcha_handling: |
| 263 | + await self._handle_captcha_if_needed(self.last_snapshot, source="gateway") |
256 | 264 | return self.last_snapshot |
257 | 265 |
|
| 266 | + def set_captcha_options(self, options: CaptchaOptions) -> None: |
| 267 | + """ |
| 268 | + Configure CAPTCHA handling (disabled by default unless set). |
| 269 | + """ |
| 270 | + self._captcha_options = options |
| 271 | + self._captcha_retry_count = 0 |
| 272 | + |
| 273 | + def _is_captcha_detected(self, snapshot: Snapshot) -> bool: |
| 274 | + if not self._captcha_options: |
| 275 | + return False |
| 276 | + captcha = getattr(snapshot.diagnostics, "captcha", None) if snapshot.diagnostics else None |
| 277 | + if not captcha or not getattr(captcha, "detected", False): |
| 278 | + return False |
| 279 | + confidence = getattr(captcha, "confidence", 0.0) |
| 280 | + return confidence >= self._captcha_options.min_confidence |
| 281 | + |
| 282 | + def _build_captcha_context(self, snapshot: Snapshot, source: str) -> CaptchaContext: |
| 283 | + captcha = getattr(snapshot.diagnostics, "captcha", None) |
| 284 | + return CaptchaContext( |
| 285 | + run_id=self.tracer.run_id, |
| 286 | + step_index=self.step_index, |
| 287 | + url=snapshot.url, |
| 288 | + source=source, # type: ignore[arg-type] |
| 289 | + captcha=captcha, |
| 290 | + ) |
| 291 | + |
| 292 | + def _emit_captcha_event(self, reason_code: str, details: dict[str, Any] | None = None) -> None: |
| 293 | + payload = { |
| 294 | + "kind": "captcha", |
| 295 | + "passed": False, |
| 296 | + "label": reason_code, |
| 297 | + "details": {"reason_code": reason_code, **(details or {})}, |
| 298 | + } |
| 299 | + self.tracer.emit("verification", data=payload, step_id=self.step_id) |
| 300 | + |
| 301 | + async def _handle_captcha_if_needed(self, snapshot: Snapshot, source: str) -> None: |
| 302 | + if not self._captcha_options: |
| 303 | + return |
| 304 | + if not self._is_captcha_detected(snapshot): |
| 305 | + return |
| 306 | + |
| 307 | + captcha = getattr(snapshot.diagnostics, "captcha", None) |
| 308 | + self._emit_captcha_event( |
| 309 | + "captcha_detected", |
| 310 | + {"captcha": getattr(captcha, "model_dump", lambda: captcha)()}, |
| 311 | + ) |
| 312 | + |
| 313 | + resolution: CaptchaResolution |
| 314 | + if self._captcha_options.policy == "callback": |
| 315 | + if not self._captcha_options.handler: |
| 316 | + self._emit_captcha_event("captcha_handler_error") |
| 317 | + raise CaptchaHandlingError( |
| 318 | + "captcha_handler_error", |
| 319 | + 'Captcha handler is required for policy="callback".', |
| 320 | + ) |
| 321 | + try: |
| 322 | + resolution = await self._captcha_options.handler( |
| 323 | + self._build_captcha_context(snapshot, source) |
| 324 | + ) |
| 325 | + except Exception as exc: # pragma: no cover - defensive |
| 326 | + self._emit_captcha_event("captcha_handler_error", {"error": str(exc)}) |
| 327 | + raise CaptchaHandlingError( |
| 328 | + "captcha_handler_error", "Captcha handler failed." |
| 329 | + ) from exc |
| 330 | + else: |
| 331 | + resolution = CaptchaResolution(action="abort") |
| 332 | + |
| 333 | + await self._apply_captcha_resolution(resolution, snapshot, source) |
| 334 | + |
| 335 | + async def _apply_captcha_resolution( |
| 336 | + self, |
| 337 | + resolution: CaptchaResolution, |
| 338 | + snapshot: Snapshot, |
| 339 | + source: str, |
| 340 | + ) -> None: |
| 341 | + if resolution.action == "abort": |
| 342 | + self._emit_captcha_event("captcha_policy_abort", {"message": resolution.message}) |
| 343 | + raise CaptchaHandlingError( |
| 344 | + "captcha_policy_abort", |
| 345 | + resolution.message or "Captcha detected. Aborting per policy.", |
| 346 | + ) |
| 347 | + |
| 348 | + if resolution.action == "retry_new_session": |
| 349 | + self._captcha_retry_count += 1 |
| 350 | + self._emit_captcha_event("captcha_retry_new_session") |
| 351 | + if self._captcha_retry_count > self._captcha_options.max_retries_new_session: |
| 352 | + self._emit_captcha_event("captcha_retry_exhausted") |
| 353 | + raise CaptchaHandlingError( |
| 354 | + "captcha_retry_exhausted", |
| 355 | + "Captcha retry_new_session exhausted.", |
| 356 | + ) |
| 357 | + if not self._captcha_options.reset_session: |
| 358 | + raise CaptchaHandlingError( |
| 359 | + "captcha_retry_new_session", |
| 360 | + "reset_session callback is required for retry_new_session.", |
| 361 | + ) |
| 362 | + await self._captcha_options.reset_session() |
| 363 | + return |
| 364 | + |
| 365 | + if resolution.action == "wait_until_cleared": |
| 366 | + timeout_ms = resolution.timeout_ms or self._captcha_options.timeout_ms |
| 367 | + poll_ms = resolution.poll_ms or self._captcha_options.poll_ms |
| 368 | + await self._wait_until_cleared(timeout_ms=timeout_ms, poll_ms=poll_ms, source=source) |
| 369 | + self._emit_captcha_event("captcha_resumed") |
| 370 | + |
| 371 | + async def _wait_until_cleared(self, *, timeout_ms: int, poll_ms: int, source: str) -> None: |
| 372 | + deadline = time.time() + timeout_ms / 1000.0 |
| 373 | + while time.time() <= deadline: |
| 374 | + await asyncio.sleep(poll_ms / 1000.0) |
| 375 | + snap = await self.snapshot(_skip_captcha_handling=True) |
| 376 | + if not self._is_captcha_detected(snap): |
| 377 | + self._emit_captcha_event("captcha_cleared", {"source": source}) |
| 378 | + return |
| 379 | + self._emit_captcha_event("captcha_wait_timeout", {"timeout_ms": timeout_ms}) |
| 380 | + raise CaptchaHandlingError("captcha_wait_timeout", "Captcha wait_until_cleared timed out.") |
| 381 | + |
258 | 382 | async def enable_failure_artifacts( |
259 | 383 | self, |
260 | 384 | options: FailureArtifactsOptions | None = None, |
|
0 commit comments