Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ await runtime.enable_failure_artifacts(
await runtime.record_action("CLICK")
```

**Video clip generation (optional):** To generate MP4 video clips from captured frames, install [ffmpeg](https://ffmpeg.org/) (version 4.0 or later; version 5.1+ recommended for best compatibility). If ffmpeg is not installed, frames are still captured but no video clip is generated.

### Redaction callback (Phase 3)

Provide a user-defined callback to redact snapshots and decide whether to persist frames. The SDK does not implement image/video redaction.
Expand Down
154 changes: 154 additions & 0 deletions sentience/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,13 @@ def act( # noqa: C901
pre_url=pre_url,
)

# Track data collected during step execution for step_end emission on failure
_step_snap_with_diff: Snapshot | None = None
_step_pre_url: str | None = None
_step_llm_response: LLMResponse | None = None
_step_result: AgentActionResult | None = None
_step_duration_ms: int = 0

for attempt in range(max_retries + 1):
try:
# 1. OBSERVE: Get refined semantic snapshot
Expand Down Expand Up @@ -254,6 +261,10 @@ def act( # noqa: C901
error=snap.error,
)

# Track for step_end emission on failure
_step_snap_with_diff = snap_with_diff
_step_pre_url = snap.url

# Update previous snapshot for next comparison
self._previous_snapshot = snap

Expand Down Expand Up @@ -311,6 +322,9 @@ def act( # noqa: C901
# 3. THINK: Query LLM for next action
llm_response = self.llm_handler.query_llm(context, goal)

# Track for step_end emission on failure
_step_llm_response = llm_response

# Emit LLM query trace event if tracer is enabled
if self.tracer:
_safe_tracer_call(
Expand Down Expand Up @@ -358,6 +372,10 @@ def act( # noqa: C901
cursor=result_dict.get("cursor"),
)

# Track for step_end emission on failure
_step_result = result
_step_duration_ms = duration_ms

# Emit action execution trace event if tracer is enabled
if self.tracer:
post_url = self.browser.page.url if self.browser.page else None
Expand Down Expand Up @@ -539,6 +557,65 @@ def act( # noqa: C901
time.sleep(1.0) # Brief delay before retry
continue
else:
# Emit step_end with whatever data we collected before failure
# This ensures diff_status and other fields are preserved in traces
if self.tracer and _step_snap_with_diff is not None:
post_url = self.browser.page.url if self.browser.page else None
snapshot_digest = f"sha256:{self._compute_hash(f'{_step_pre_url}{_step_snap_with_diff.timestamp}')}"

# Build pre_elements from snap_with_diff (includes diff_status)
snapshot_event_data = TraceEventBuilder.build_snapshot_event(
_step_snap_with_diff
)
pre_elements = snapshot_event_data.get("elements", [])

# Build LLM data if available
llm_data = None
if _step_llm_response:
llm_response_text = _step_llm_response.content
llm_response_hash = f"sha256:{self._compute_hash(llm_response_text)}"
llm_data = {
"response_text": llm_response_text,
"response_hash": llm_response_hash,
"usage": {
"prompt_tokens": _step_llm_response.prompt_tokens or 0,
"completion_tokens": _step_llm_response.completion_tokens or 0,
"total_tokens": _step_llm_response.total_tokens or 0,
},
}

# Build exec data (failure state)
exec_data = {
"success": False,
"action": _step_result.action if _step_result else "error",
"outcome": str(e),
"duration_ms": _step_duration_ms,
}

# Build step_end event for failed step
step_end_data = TraceEventBuilder.build_step_end_event(
step_id=step_id,
step_index=self._step_count,
goal=goal,
attempt=attempt,
pre_url=_step_pre_url,
post_url=post_url,
snapshot_digest=snapshot_digest,
llm_data=llm_data,
exec_data=exec_data,
verify_data=None,
pre_elements=pre_elements,
)

_safe_tracer_call(
self.tracer,
"emit",
self.verbose,
"step_end",
step_end_data,
step_id=step_id,
)

# Create error result
error_result = AgentActionResult(
success=False,
Expand Down Expand Up @@ -771,6 +848,13 @@ async def act( # noqa: C901
pre_url=pre_url,
)

# Track data collected during step execution for step_end emission on failure
_step_snap_with_diff: Snapshot | None = None
_step_pre_url: str | None = None
_step_llm_response: LLMResponse | None = None
_step_result: AgentActionResult | None = None
_step_duration_ms: int = 0

for attempt in range(max_retries + 1):
try:
# 1. OBSERVE: Get refined semantic snapshot
Expand Down Expand Up @@ -823,6 +907,10 @@ async def act( # noqa: C901
error=snap.error,
)

# Track for step_end emission on failure
_step_snap_with_diff = snap_with_diff
_step_pre_url = snap.url

# Update previous snapshot for next comparison
self._previous_snapshot = snap

Expand Down Expand Up @@ -880,6 +968,9 @@ async def act( # noqa: C901
# 3. THINK: Query LLM for next action
llm_response = self.llm_handler.query_llm(context, goal)

# Track for step_end emission on failure
_step_llm_response = llm_response

# Emit LLM query trace event if tracer is enabled
if self.tracer:
_safe_tracer_call(
Expand Down Expand Up @@ -926,6 +1017,10 @@ async def act( # noqa: C901
message=result_dict.get("message"),
)

# Track for step_end emission on failure
_step_result = result
_step_duration_ms = duration_ms

# Emit action execution trace event if tracer is enabled
if self.tracer:
post_url = self.browser.page.url if self.browser.page else None
Expand Down Expand Up @@ -1104,6 +1199,65 @@ async def act( # noqa: C901
await asyncio.sleep(1.0) # Brief delay before retry
continue
else:
# Emit step_end with whatever data we collected before failure
# This ensures diff_status and other fields are preserved in traces
if self.tracer and _step_snap_with_diff is not None:
post_url = self.browser.page.url if self.browser.page else None
snapshot_digest = f"sha256:{self._compute_hash(f'{_step_pre_url}{_step_snap_with_diff.timestamp}')}"

# Build pre_elements from snap_with_diff (includes diff_status)
snapshot_event_data = TraceEventBuilder.build_snapshot_event(
_step_snap_with_diff
)
pre_elements = snapshot_event_data.get("elements", [])

# Build LLM data if available
llm_data = None
if _step_llm_response:
llm_response_text = _step_llm_response.content
llm_response_hash = f"sha256:{self._compute_hash(llm_response_text)}"
llm_data = {
"response_text": llm_response_text,
"response_hash": llm_response_hash,
"usage": {
"prompt_tokens": _step_llm_response.prompt_tokens or 0,
"completion_tokens": _step_llm_response.completion_tokens or 0,
"total_tokens": _step_llm_response.total_tokens or 0,
},
}

# Build exec data (failure state)
exec_data = {
"success": False,
"action": _step_result.action if _step_result else "error",
"outcome": str(e),
"duration_ms": _step_duration_ms,
}

# Build step_end event for failed step
step_end_data = TraceEventBuilder.build_step_end_event(
step_id=step_id,
step_index=self._step_count,
goal=goal,
attempt=attempt,
pre_url=_step_pre_url,
post_url=post_url,
snapshot_digest=snapshot_digest,
llm_data=llm_data,
exec_data=exec_data,
verify_data=None,
pre_elements=pre_elements,
)

_safe_tracer_call(
self.tracer,
"emit",
self.verbose,
"step_end",
step_end_data,
step_id=step_id,
)

# Create error result
error_result = AgentActionResult(
success=False,
Expand Down
2 changes: 1 addition & 1 deletion sentience/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,7 @@ def assert_done(
True if task is complete (assertion passed), False otherwise
"""
# Convenience wrapper for assert_ with required=True
ok = self.assert_(predicate, label=label, required=True)
ok = self.assertTrue(predicate, label=label, required=True)
if ok:
self._task_done = True
self._task_done_label = label
Expand Down
5 changes: 4 additions & 1 deletion sentience/backends/snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,10 @@ async def _snapshot_via_api(
# Step 1: Get raw data from local extension (always happens locally)
raw_options: dict[str, Any] = {}
if options.screenshot is not False:
raw_options["screenshot"] = options.screenshot
if hasattr(options.screenshot, "model_dump"):
raw_options["screenshot"] = options.screenshot.model_dump()
else:
raw_options["screenshot"] = options.screenshot

# Call extension to get raw elements
raw_result = await _eval_with_navigation_retry(
Expand Down
Loading
Loading