Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions sentience/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,25 @@ def _compute_hash(self, text: str) -> str:
"""Compute SHA256 hash of text."""
return hashlib.sha256(text.encode("utf-8")).hexdigest()

def _best_effort_post_snapshot_digest(self, goal: str) -> str | None:
"""
Best-effort post-action snapshot digest for tracing.
"""
try:
snap_opts = SnapshotOptions(
limit=min(10, self.default_snapshot_limit),
goal=f"{goal} (post)",
)
snap_opts.screenshot = False
snap_opts.show_overlay = self.config.show_overlay if self.config else None
post_snap = snapshot(self.browser, snap_opts)
if post_snap.status != "success":
return None
digest_input = f"{post_snap.url}{post_snap.timestamp}"
return f"sha256:{self._compute_hash(digest_input)}"
except Exception:
return None

def _get_element_bbox(self, element_id: int | None, snap: Snapshot) -> dict[str, float] | None:
"""Get bounding box for an element from snapshot."""
if element_id is None:
Expand Down Expand Up @@ -513,6 +532,10 @@ def act( # noqa: C901
snapshot_event_data = TraceEventBuilder.build_snapshot_event(snap_with_diff)
pre_elements = snapshot_event_data.get("elements", [])

post_snapshot_digest = (
self._best_effort_post_snapshot_digest(goal) if self.tracer else None
)

# Build complete step_end event
step_end_data = TraceEventBuilder.build_step_end_event(
step_id=step_id,
Expand All @@ -522,6 +545,7 @@ def act( # noqa: C901
pre_url=pre_url,
post_url=post_url,
snapshot_digest=snapshot_digest,
post_snapshot_digest=post_snapshot_digest,
llm_data=llm_data,
exec_data=exec_data,
verify_data=verify_data,
Expand Down Expand Up @@ -601,6 +625,7 @@ def act( # noqa: C901
pre_url=_step_pre_url,
post_url=post_url,
snapshot_digest=snapshot_digest,
post_snapshot_digest=None,
llm_data=llm_data,
exec_data=exec_data,
verify_data=None,
Expand Down Expand Up @@ -1155,6 +1180,10 @@ async def act( # noqa: C901
snapshot_event_data = TraceEventBuilder.build_snapshot_event(snap_with_diff)
pre_elements = snapshot_event_data.get("elements", [])

post_snapshot_digest = (
self._best_effort_post_snapshot_digest(goal) if self.tracer else None
)

# Build complete step_end event
step_end_data = TraceEventBuilder.build_step_end_event(
step_id=step_id,
Expand All @@ -1164,6 +1193,7 @@ async def act( # noqa: C901
pre_url=pre_url,
post_url=post_url,
snapshot_digest=snapshot_digest,
post_snapshot_digest=post_snapshot_digest,
llm_data=llm_data,
exec_data=exec_data,
verify_data=verify_data,
Expand Down Expand Up @@ -1243,6 +1273,7 @@ async def act( # noqa: C901
pre_url=_step_pre_url,
post_url=post_url,
snapshot_digest=snapshot_digest,
post_snapshot_digest=None,
llm_data=llm_data,
exec_data=exec_data,
verify_data=None,
Expand Down
130 changes: 130 additions & 0 deletions sentience/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,15 @@

import asyncio
import difflib
import hashlib
import time
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions
from .models import Snapshot, SnapshotOptions
from .trace_event_builder import TraceEventBuilder
from .verification import AssertContext, AssertOutcome, Predicate

if TYPE_CHECKING:
Expand Down Expand Up @@ -138,6 +140,8 @@ def __init__(

# Snapshot state
self.last_snapshot: Snapshot | None = None
self._step_pre_snapshot: Snapshot | None = None
self._step_pre_url: str | None = None

# Failure artifacts (Phase 1)
self._artifact_buffer: FailureArtifactBuffer | None = None
Expand All @@ -148,6 +152,12 @@ def __init__(

# Assertions accumulated during current step
self._assertions_this_step: list[dict[str, Any]] = []
self._step_goal: str | None = None
self._last_action: str | None = None
self._last_action_error: str | None = None
self._last_action_outcome: str | None = None
self._last_action_duration_ms: int | None = None
self._last_action_success: bool | None = None

# Task completion tracking
self._task_done: bool = False
Expand Down Expand Up @@ -250,6 +260,11 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
# Check if using legacy browser (backward compat)
if hasattr(self, "_legacy_browser") and hasattr(self, "_legacy_page"):
self.last_snapshot = await self._legacy_browser.snapshot(self._legacy_page, **kwargs)
if self.last_snapshot is not None:
self._cached_url = self.last_snapshot.url
if self._step_pre_snapshot is None:
self._step_pre_snapshot = self.last_snapshot
self._step_pre_url = self.last_snapshot.url
return self.last_snapshot

# Use backend-agnostic snapshot
Expand All @@ -262,6 +277,11 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
options = SnapshotOptions(**options_dict)

self.last_snapshot = await backend_snapshot(self.backend, options=options)
if self.last_snapshot is not None:
self._cached_url = self.last_snapshot.url
if self._step_pre_snapshot is None:
self._step_pre_snapshot = self.last_snapshot
self._step_pre_url = self.last_snapshot.url
if not skip_captcha_handling:
await self._handle_captcha_if_needed(self.last_snapshot, source="gateway")
return self.last_snapshot
Expand Down Expand Up @@ -414,6 +434,7 @@ async def record_action(
"""
Record an action in the artifact timeline and capture a frame if enabled.
"""
self._last_action = action
if not self._artifact_buffer:
return
self._artifact_buffer.record_step(
Expand All @@ -425,6 +446,107 @@ async def record_action(
if self._artifact_buffer.options.capture_on_action:
await self._capture_artifact_frame()

def _compute_snapshot_digest(self, snap: Snapshot | None) -> str | None:
if snap is None:
return None
try:
return (
"sha256:"
+ hashlib.sha256(f"{snap.url}{snap.timestamp}".encode("utf-8")).hexdigest()
)
except Exception:
return None

async def emit_step_end(
self,
*,
action: str | None = None,
success: bool | None = None,
error: str | None = None,
outcome: str | None = None,
duration_ms: int | None = None,
attempt: int = 0,
verify_passed: bool | None = None,
verify_signals: dict[str, Any] | None = None,
post_url: str | None = None,
post_snapshot_digest: str | None = None,
) -> dict[str, Any]:
"""
Emit a step_end event using TraceEventBuilder.
"""
goal = self._step_goal or ""
pre_snap = self._step_pre_snapshot or self.last_snapshot
pre_url = (
self._step_pre_url
or (pre_snap.url if pre_snap else None)
or self._cached_url
or ""
)

if post_url is None:
try:
post_url = await self.get_url()
except Exception:
post_url = (
(self.last_snapshot.url if self.last_snapshot else None) or self._cached_url
)
post_url = post_url or pre_url

pre_digest = self._compute_snapshot_digest(pre_snap)
post_digest = post_snapshot_digest or self._compute_snapshot_digest(self.last_snapshot)
url_changed = bool(pre_url and post_url and str(pre_url) != str(post_url))

assertions_data = self.get_assertions_for_step_end()
assertions = assertions_data.get("assertions") or []

signals = dict(verify_signals or {})
signals.setdefault("url_changed", url_changed)
if error and "error" not in signals:
signals["error"] = error

passed = (
bool(verify_passed)
if verify_passed is not None
else self.required_assertions_passed()
)

exec_success = bool(success) if success is not None else bool(
self._last_action_success if self._last_action_success is not None else passed
)

exec_data: dict[str, Any] = {
"success": exec_success,
"action": action or self._last_action or "unknown",
"outcome": outcome or self._last_action_outcome or "",
}
if duration_ms is not None:
exec_data["duration_ms"] = int(duration_ms)
if error:
exec_data["error"] = error

verify_data = {
"passed": bool(passed),
"signals": signals,
}

step_end_data = TraceEventBuilder.build_step_end_event(
step_id=self.step_id or "",
step_index=int(self.step_index),
goal=goal,
attempt=int(attempt),
pre_url=str(pre_url or ""),
post_url=str(post_url or ""),
snapshot_digest=pre_digest,
llm_data={},
exec_data=exec_data,
verify_data=verify_data,
pre_elements=None,
assertions=assertions,
post_snapshot_digest=post_digest,
)
self.tracer.emit("step_end", step_end_data, step_id=self.step_id)
return step_end_data

async def _capture_artifact_frame(self) -> None:
if not self._artifact_buffer:
return
Expand Down Expand Up @@ -511,6 +633,14 @@ def begin_step(self, goal: str, step_index: int | None = None) -> str:
"""
# Clear previous step state
self._assertions_this_step = []
self._step_pre_snapshot = None
self._step_pre_url = None
self._step_goal = goal
self._last_action = None
self._last_action_error = None
self._last_action_outcome = None
self._last_action_duration_ms = None
self._last_action_success = None

# Update step index
if step_index is not None:
Expand Down
1 change: 1 addition & 0 deletions sentience/integrations/langchain/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ async def _trace(self, tool_name: str, exec_coro, exec_meta: dict[str, Any]):
pre_url=pre_url or "",
post_url=post_url or "",
snapshot_digest=None,
post_snapshot_digest=None,
llm_data={},
exec_data=exec_data,
verify_data=verify_data,
Expand Down
1 change: 1 addition & 0 deletions sentience/integrations/pydanticai/toolset.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ async def _trace_tool_call(ctx: Any, tool_name: str, exec_coro, exec_meta: dict[
pre_url=pre_url or "",
post_url=post_url or "",
snapshot_digest=None,
post_snapshot_digest=None,
llm_data={},
exec_data=exec_data,
verify_data=verify_data,
Expand Down
9 changes: 9 additions & 0 deletions sentience/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,12 @@ class GridInfo(BaseModel):
)
is_dominant: bool = False # Whether this grid is the dominant group (main content area)

# Z-index and modal detection fields (from gateway/sentience-core)
z_index: int = 0 # Z-index of this grid (max among elements in this grid)
z_index_max: int = 0 # Global max z-index across ALL grids (for comparison)
blocks_interaction: bool = False # Whether this grid blocks interaction with content behind it
viewport_coverage: float = 0.0 # Ratio of grid area to viewport area (0.0-1.0)


class Snapshot(BaseModel):
"""Snapshot response from extension"""
Expand All @@ -161,6 +167,9 @@ class Snapshot(BaseModel):
dominant_group_key: str | None = None # The most common group_key (main content group)
# Phase 2: Runtime stability/debug info (confidence/reasons/metrics)
diagnostics: SnapshotDiagnostics | None = None
# Modal detection fields (from gateway)
modal_detected: bool | None = None # True if a modal/overlay grid was detected
modal_grids: list[GridInfo] | None = None # Array of GridInfo for detected modal grids

def save(self, filepath: str) -> None:
"""Save snapshot as JSON file"""
Expand Down
56 changes: 40 additions & 16 deletions sentience/runtime_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,26 +86,50 @@ async def run_step(
step: RuntimeStep,
) -> bool:
self.runtime.begin_step(step.goal)
emitted = False
ok = False
try:
snap = await self._snapshot_with_ramp(step=step)

snap = await self._snapshot_with_ramp(step=step)

if await self._should_short_circuit_to_vision(step=step, snap=snap):
ok = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
return ok
if await self._should_short_circuit_to_vision(step=step, snap=snap):
ok = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
return ok

# 1) Structured executor attempt.
action = self._propose_structured_action(task_goal=task_goal, step=step, snap=snap)
await self._execute_action(action=action, snap=snap)
ok = await self._apply_verifications(step=step)
if ok:
return True
# 1) Structured executor attempt.
action = self._propose_structured_action(task_goal=task_goal, step=step, snap=snap)
await self._execute_action(action=action, snap=snap)
ok = await self._apply_verifications(step=step)
if ok:
return True

# 2) Optional vision executor fallback (bounded).
if step.vision_executor_enabled and step.max_vision_executor_attempts > 0:
ok2 = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
return ok2
# 2) Optional vision executor fallback (bounded).
if step.vision_executor_enabled and step.max_vision_executor_attempts > 0:
ok = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
return ok

return False
return False
except Exception as exc:
try:
await self.runtime.emit_step_end(
success=False,
error=str(exc),
outcome="exception",
verify_passed=False,
)
emitted = True
except Exception:
pass
raise
finally:
if not emitted:
try:
await self.runtime.emit_step_end(
success=ok,
outcome=("ok" if ok else "verification_failed"),
verify_passed=ok,
)
except Exception:
pass

async def _snapshot_with_ramp(self, *, step: RuntimeStep) -> Snapshot:
limit = step.snapshot_limit_base
Expand Down
Loading
Loading