Skip to content

Commit 3ce46f7

Browse files
authored
Merge pull request #180 from SentienceAPI/verification_payload
add verification/assertion results to trace
2 parents 59fc5bd + 3c5c904 commit 3ce46f7

File tree

9 files changed

+316
-16
lines changed

9 files changed

+316
-16
lines changed

sentience/agent.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,25 @@ def _compute_hash(self, text: str) -> str:
143143
"""Compute SHA256 hash of text."""
144144
return hashlib.sha256(text.encode("utf-8")).hexdigest()
145145

146+
def _best_effort_post_snapshot_digest(self, goal: str) -> str | None:
147+
"""
148+
Best-effort post-action snapshot digest for tracing.
149+
"""
150+
try:
151+
snap_opts = SnapshotOptions(
152+
limit=min(10, self.default_snapshot_limit),
153+
goal=f"{goal} (post)",
154+
)
155+
snap_opts.screenshot = False
156+
snap_opts.show_overlay = self.config.show_overlay if self.config else None
157+
post_snap = snapshot(self.browser, snap_opts)
158+
if post_snap.status != "success":
159+
return None
160+
digest_input = f"{post_snap.url}{post_snap.timestamp}"
161+
return f"sha256:{self._compute_hash(digest_input)}"
162+
except Exception:
163+
return None
164+
146165
def _get_element_bbox(self, element_id: int | None, snap: Snapshot) -> dict[str, float] | None:
147166
"""Get bounding box for an element from snapshot."""
148167
if element_id is None:
@@ -513,6 +532,10 @@ def act( # noqa: C901
513532
snapshot_event_data = TraceEventBuilder.build_snapshot_event(snap_with_diff)
514533
pre_elements = snapshot_event_data.get("elements", [])
515534

535+
post_snapshot_digest = (
536+
self._best_effort_post_snapshot_digest(goal) if self.tracer else None
537+
)
538+
516539
# Build complete step_end event
517540
step_end_data = TraceEventBuilder.build_step_end_event(
518541
step_id=step_id,
@@ -522,6 +545,7 @@ def act( # noqa: C901
522545
pre_url=pre_url,
523546
post_url=post_url,
524547
snapshot_digest=snapshot_digest,
548+
post_snapshot_digest=post_snapshot_digest,
525549
llm_data=llm_data,
526550
exec_data=exec_data,
527551
verify_data=verify_data,
@@ -601,6 +625,7 @@ def act( # noqa: C901
601625
pre_url=_step_pre_url,
602626
post_url=post_url,
603627
snapshot_digest=snapshot_digest,
628+
post_snapshot_digest=None,
604629
llm_data=llm_data,
605630
exec_data=exec_data,
606631
verify_data=None,
@@ -1155,6 +1180,10 @@ async def act( # noqa: C901
11551180
snapshot_event_data = TraceEventBuilder.build_snapshot_event(snap_with_diff)
11561181
pre_elements = snapshot_event_data.get("elements", [])
11571182

1183+
post_snapshot_digest = (
1184+
self._best_effort_post_snapshot_digest(goal) if self.tracer else None
1185+
)
1186+
11581187
# Build complete step_end event
11591188
step_end_data = TraceEventBuilder.build_step_end_event(
11601189
step_id=step_id,
@@ -1164,6 +1193,7 @@ async def act( # noqa: C901
11641193
pre_url=pre_url,
11651194
post_url=post_url,
11661195
snapshot_digest=snapshot_digest,
1196+
post_snapshot_digest=post_snapshot_digest,
11671197
llm_data=llm_data,
11681198
exec_data=exec_data,
11691199
verify_data=verify_data,
@@ -1243,6 +1273,7 @@ async def act( # noqa: C901
12431273
pre_url=_step_pre_url,
12441274
post_url=post_url,
12451275
snapshot_digest=snapshot_digest,
1276+
post_snapshot_digest=None,
12461277
llm_data=llm_data,
12471278
exec_data=exec_data,
12481279
verify_data=None,

sentience/agent_runtime.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,15 @@
6565

6666
import asyncio
6767
import difflib
68+
import hashlib
6869
import time
6970
from dataclasses import dataclass
7071
from typing import TYPE_CHECKING, Any
7172

7273
from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
7374
from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions
7475
from .models import Snapshot, SnapshotOptions
76+
from .trace_event_builder import TraceEventBuilder
7577
from .verification import AssertContext, AssertOutcome, Predicate
7678

7779
if TYPE_CHECKING:
@@ -138,6 +140,8 @@ def __init__(
138140

139141
# Snapshot state
140142
self.last_snapshot: Snapshot | None = None
143+
self._step_pre_snapshot: Snapshot | None = None
144+
self._step_pre_url: str | None = None
141145

142146
# Failure artifacts (Phase 1)
143147
self._artifact_buffer: FailureArtifactBuffer | None = None
@@ -148,6 +152,12 @@ def __init__(
148152

149153
# Assertions accumulated during current step
150154
self._assertions_this_step: list[dict[str, Any]] = []
155+
self._step_goal: str | None = None
156+
self._last_action: str | None = None
157+
self._last_action_error: str | None = None
158+
self._last_action_outcome: str | None = None
159+
self._last_action_duration_ms: int | None = None
160+
self._last_action_success: bool | None = None
151161

152162
# Task completion tracking
153163
self._task_done: bool = False
@@ -250,6 +260,11 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
250260
# Check if using legacy browser (backward compat)
251261
if hasattr(self, "_legacy_browser") and hasattr(self, "_legacy_page"):
252262
self.last_snapshot = await self._legacy_browser.snapshot(self._legacy_page, **kwargs)
263+
if self.last_snapshot is not None:
264+
self._cached_url = self.last_snapshot.url
265+
if self._step_pre_snapshot is None:
266+
self._step_pre_snapshot = self.last_snapshot
267+
self._step_pre_url = self.last_snapshot.url
253268
return self.last_snapshot
254269

255270
# Use backend-agnostic snapshot
@@ -262,6 +277,11 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
262277
options = SnapshotOptions(**options_dict)
263278

264279
self.last_snapshot = await backend_snapshot(self.backend, options=options)
280+
if self.last_snapshot is not None:
281+
self._cached_url = self.last_snapshot.url
282+
if self._step_pre_snapshot is None:
283+
self._step_pre_snapshot = self.last_snapshot
284+
self._step_pre_url = self.last_snapshot.url
265285
if not skip_captcha_handling:
266286
await self._handle_captcha_if_needed(self.last_snapshot, source="gateway")
267287
return self.last_snapshot
@@ -414,6 +434,7 @@ async def record_action(
414434
"""
415435
Record an action in the artifact timeline and capture a frame if enabled.
416436
"""
437+
self._last_action = action
417438
if not self._artifact_buffer:
418439
return
419440
self._artifact_buffer.record_step(
@@ -425,6 +446,107 @@ async def record_action(
425446
if self._artifact_buffer.options.capture_on_action:
426447
await self._capture_artifact_frame()
427448

449+
def _compute_snapshot_digest(self, snap: Snapshot | None) -> str | None:
450+
if snap is None:
451+
return None
452+
try:
453+
return (
454+
"sha256:"
455+
+ hashlib.sha256(f"{snap.url}{snap.timestamp}".encode("utf-8")).hexdigest()
456+
)
457+
except Exception:
458+
return None
459+
460+
async def emit_step_end(
461+
self,
462+
*,
463+
action: str | None = None,
464+
success: bool | None = None,
465+
error: str | None = None,
466+
outcome: str | None = None,
467+
duration_ms: int | None = None,
468+
attempt: int = 0,
469+
verify_passed: bool | None = None,
470+
verify_signals: dict[str, Any] | None = None,
471+
post_url: str | None = None,
472+
post_snapshot_digest: str | None = None,
473+
) -> dict[str, Any]:
474+
"""
475+
Emit a step_end event using TraceEventBuilder.
476+
"""
477+
goal = self._step_goal or ""
478+
pre_snap = self._step_pre_snapshot or self.last_snapshot
479+
pre_url = (
480+
self._step_pre_url
481+
or (pre_snap.url if pre_snap else None)
482+
or self._cached_url
483+
or ""
484+
)
485+
486+
if post_url is None:
487+
try:
488+
post_url = await self.get_url()
489+
except Exception:
490+
post_url = (
491+
(self.last_snapshot.url if self.last_snapshot else None) or self._cached_url
492+
)
493+
post_url = post_url or pre_url
494+
495+
pre_digest = self._compute_snapshot_digest(pre_snap)
496+
post_digest = post_snapshot_digest or self._compute_snapshot_digest(self.last_snapshot)
497+
url_changed = bool(pre_url and post_url and str(pre_url) != str(post_url))
498+
499+
assertions_data = self.get_assertions_for_step_end()
500+
assertions = assertions_data.get("assertions") or []
501+
502+
signals = dict(verify_signals or {})
503+
signals.setdefault("url_changed", url_changed)
504+
if error and "error" not in signals:
505+
signals["error"] = error
506+
507+
passed = (
508+
bool(verify_passed)
509+
if verify_passed is not None
510+
else self.required_assertions_passed()
511+
)
512+
513+
exec_success = bool(success) if success is not None else bool(
514+
self._last_action_success if self._last_action_success is not None else passed
515+
)
516+
517+
exec_data: dict[str, Any] = {
518+
"success": exec_success,
519+
"action": action or self._last_action or "unknown",
520+
"outcome": outcome or self._last_action_outcome or "",
521+
}
522+
if duration_ms is not None:
523+
exec_data["duration_ms"] = int(duration_ms)
524+
if error:
525+
exec_data["error"] = error
526+
527+
verify_data = {
528+
"passed": bool(passed),
529+
"signals": signals,
530+
}
531+
532+
step_end_data = TraceEventBuilder.build_step_end_event(
533+
step_id=self.step_id or "",
534+
step_index=int(self.step_index),
535+
goal=goal,
536+
attempt=int(attempt),
537+
pre_url=str(pre_url or ""),
538+
post_url=str(post_url or ""),
539+
snapshot_digest=pre_digest,
540+
llm_data={},
541+
exec_data=exec_data,
542+
verify_data=verify_data,
543+
pre_elements=None,
544+
assertions=assertions,
545+
post_snapshot_digest=post_digest,
546+
)
547+
self.tracer.emit("step_end", step_end_data, step_id=self.step_id)
548+
return step_end_data
549+
428550
async def _capture_artifact_frame(self) -> None:
429551
if not self._artifact_buffer:
430552
return
@@ -511,6 +633,14 @@ def begin_step(self, goal: str, step_index: int | None = None) -> str:
511633
"""
512634
# Clear previous step state
513635
self._assertions_this_step = []
636+
self._step_pre_snapshot = None
637+
self._step_pre_url = None
638+
self._step_goal = goal
639+
self._last_action = None
640+
self._last_action_error = None
641+
self._last_action_outcome = None
642+
self._last_action_duration_ms = None
643+
self._last_action_success = None
514644

515645
# Update step index
516646
if step_index is not None:

sentience/integrations/langchain/core.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ async def _trace(self, tool_name: str, exec_coro, exec_meta: dict[str, Any]):
113113
pre_url=pre_url or "",
114114
post_url=post_url or "",
115115
snapshot_digest=None,
116+
post_snapshot_digest=None,
116117
llm_data={},
117118
exec_data=exec_data,
118119
verify_data=verify_data,

sentience/integrations/pydanticai/toolset.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ async def _trace_tool_call(ctx: Any, tool_name: str, exec_coro, exec_meta: dict[
126126
pre_url=pre_url or "",
127127
post_url=post_url or "",
128128
snapshot_digest=None,
129+
post_snapshot_digest=None,
129130
llm_data={},
130131
exec_data=exec_data,
131132
verify_data=verify_data,

sentience/models.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,12 @@ class GridInfo(BaseModel):
144144
)
145145
is_dominant: bool = False # Whether this grid is the dominant group (main content area)
146146

147+
# Z-index and modal detection fields (from gateway/sentience-core)
148+
z_index: int = 0 # Z-index of this grid (max among elements in this grid)
149+
z_index_max: int = 0 # Global max z-index across ALL grids (for comparison)
150+
blocks_interaction: bool = False # Whether this grid blocks interaction with content behind it
151+
viewport_coverage: float = 0.0 # Ratio of grid area to viewport area (0.0-1.0)
152+
147153

148154
class Snapshot(BaseModel):
149155
"""Snapshot response from extension"""
@@ -161,6 +167,9 @@ class Snapshot(BaseModel):
161167
dominant_group_key: str | None = None # The most common group_key (main content group)
162168
# Phase 2: Runtime stability/debug info (confidence/reasons/metrics)
163169
diagnostics: SnapshotDiagnostics | None = None
170+
# Modal detection fields (from gateway)
171+
modal_detected: bool | None = None # True if a modal/overlay grid was detected
172+
modal_grids: list[GridInfo] | None = None # Array of GridInfo for detected modal grids
164173

165174
def save(self, filepath: str) -> None:
166175
"""Save snapshot as JSON file"""

sentience/runtime_agent.py

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -86,26 +86,50 @@ async def run_step(
8686
step: RuntimeStep,
8787
) -> bool:
8888
self.runtime.begin_step(step.goal)
89+
emitted = False
90+
ok = False
91+
try:
92+
snap = await self._snapshot_with_ramp(step=step)
8993

90-
snap = await self._snapshot_with_ramp(step=step)
91-
92-
if await self._should_short_circuit_to_vision(step=step, snap=snap):
93-
ok = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
94-
return ok
94+
if await self._should_short_circuit_to_vision(step=step, snap=snap):
95+
ok = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
96+
return ok
9597

96-
# 1) Structured executor attempt.
97-
action = self._propose_structured_action(task_goal=task_goal, step=step, snap=snap)
98-
await self._execute_action(action=action, snap=snap)
99-
ok = await self._apply_verifications(step=step)
100-
if ok:
101-
return True
98+
# 1) Structured executor attempt.
99+
action = self._propose_structured_action(task_goal=task_goal, step=step, snap=snap)
100+
await self._execute_action(action=action, snap=snap)
101+
ok = await self._apply_verifications(step=step)
102+
if ok:
103+
return True
102104

103-
# 2) Optional vision executor fallback (bounded).
104-
if step.vision_executor_enabled and step.max_vision_executor_attempts > 0:
105-
ok2 = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
106-
return ok2
105+
# 2) Optional vision executor fallback (bounded).
106+
if step.vision_executor_enabled and step.max_vision_executor_attempts > 0:
107+
ok = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
108+
return ok
107109

108-
return False
110+
return False
111+
except Exception as exc:
112+
try:
113+
await self.runtime.emit_step_end(
114+
success=False,
115+
error=str(exc),
116+
outcome="exception",
117+
verify_passed=False,
118+
)
119+
emitted = True
120+
except Exception:
121+
pass
122+
raise
123+
finally:
124+
if not emitted:
125+
try:
126+
await self.runtime.emit_step_end(
127+
success=ok,
128+
outcome=("ok" if ok else "verification_failed"),
129+
verify_passed=ok,
130+
)
131+
except Exception:
132+
pass
109133

110134
async def _snapshot_with_ramp(self, *, step: RuntimeStep) -> Snapshot:
111135
limit = step.snapshot_limit_base

0 commit comments

Comments
 (0)