Skip to content

Commit eb95eaf

Browse files
author
SentienceDEV
committed
fix trace upload issue in exception; ffmpeg fix
1 parent 4377276 commit eb95eaf

File tree

3 files changed

+217
-11
lines changed

3 files changed

+217
-11
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ await runtime.enable_failure_artifacts(
129129
await runtime.record_action("CLICK")
130130
```
131131

132+
**Video clip generation (optional):** To generate MP4 video clips from captured frames, install [ffmpeg](https://ffmpeg.org/) (version 4.0 or later; version 5.1+ recommended for best compatibility). If ffmpeg is not installed, frames are still captured but no video clip is generated.
133+
132134
### Redaction callback (Phase 3)
133135

134136
Provide a user-defined callback to redact snapshots and decide whether to persist frames. The SDK does not implement image/video redaction.

sentience/agent.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,13 @@ def act( # noqa: C901
205205
pre_url=pre_url,
206206
)
207207

208+
# Track data collected during step execution for step_end emission on failure
209+
_step_snap_with_diff: Optional[Snapshot] = None
210+
_step_pre_url: Optional[str] = None
211+
_step_llm_response: Optional[LLMResponse] = None
212+
_step_result: Optional[AgentActionResult] = None
213+
_step_duration_ms: int = 0
214+
208215
for attempt in range(max_retries + 1):
209216
try:
210217
# 1. OBSERVE: Get refined semantic snapshot
@@ -254,6 +261,10 @@ def act( # noqa: C901
254261
error=snap.error,
255262
)
256263

264+
# Track for step_end emission on failure
265+
_step_snap_with_diff = snap_with_diff
266+
_step_pre_url = snap.url
267+
257268
# Update previous snapshot for next comparison
258269
self._previous_snapshot = snap
259270

@@ -311,6 +322,9 @@ def act( # noqa: C901
311322
# 3. THINK: Query LLM for next action
312323
llm_response = self.llm_handler.query_llm(context, goal)
313324

325+
# Track for step_end emission on failure
326+
_step_llm_response = llm_response
327+
314328
# Emit LLM query trace event if tracer is enabled
315329
if self.tracer:
316330
_safe_tracer_call(
@@ -358,6 +372,10 @@ def act( # noqa: C901
358372
cursor=result_dict.get("cursor"),
359373
)
360374

375+
# Track for step_end emission on failure
376+
_step_result = result
377+
_step_duration_ms = duration_ms
378+
361379
# Emit action execution trace event if tracer is enabled
362380
if self.tracer:
363381
post_url = self.browser.page.url if self.browser.page else None
@@ -539,6 +557,63 @@ def act( # noqa: C901
539557
time.sleep(1.0) # Brief delay before retry
540558
continue
541559
else:
560+
# Emit step_end with whatever data we collected before failure
561+
# This ensures diff_status and other fields are preserved in traces
562+
if self.tracer and _step_snap_with_diff is not None:
563+
post_url = self.browser.page.url if self.browser.page else None
564+
snapshot_digest = f"sha256:{self._compute_hash(f'{_step_pre_url}{_step_snap_with_diff.timestamp}')}"
565+
566+
# Build pre_elements from snap_with_diff (includes diff_status)
567+
snapshot_event_data = TraceEventBuilder.build_snapshot_event(_step_snap_with_diff)
568+
pre_elements = snapshot_event_data.get("elements", [])
569+
570+
# Build LLM data if available
571+
llm_data = None
572+
if _step_llm_response:
573+
llm_response_text = _step_llm_response.content
574+
llm_response_hash = f"sha256:{self._compute_hash(llm_response_text)}"
575+
llm_data = {
576+
"response_text": llm_response_text,
577+
"response_hash": llm_response_hash,
578+
"usage": {
579+
"prompt_tokens": _step_llm_response.prompt_tokens or 0,
580+
"completion_tokens": _step_llm_response.completion_tokens or 0,
581+
"total_tokens": _step_llm_response.total_tokens or 0,
582+
},
583+
}
584+
585+
# Build exec data (failure state)
586+
exec_data = {
587+
"success": False,
588+
"action": _step_result.action if _step_result else "error",
589+
"outcome": str(e),
590+
"duration_ms": _step_duration_ms,
591+
}
592+
593+
# Build step_end event for failed step
594+
step_end_data = TraceEventBuilder.build_step_end_event(
595+
step_id=step_id,
596+
step_index=self._step_count,
597+
goal=goal,
598+
attempt=attempt,
599+
pre_url=_step_pre_url,
600+
post_url=post_url,
601+
snapshot_digest=snapshot_digest,
602+
llm_data=llm_data,
603+
exec_data=exec_data,
604+
verify_data=None,
605+
pre_elements=pre_elements,
606+
)
607+
608+
_safe_tracer_call(
609+
self.tracer,
610+
"emit",
611+
self.verbose,
612+
"step_end",
613+
step_end_data,
614+
step_id=step_id,
615+
)
616+
542617
# Create error result
543618
error_result = AgentActionResult(
544619
success=False,
@@ -771,6 +846,13 @@ async def act( # noqa: C901
771846
pre_url=pre_url,
772847
)
773848

849+
# Track data collected during step execution for step_end emission on failure
850+
_step_snap_with_diff: Optional[Snapshot] = None
851+
_step_pre_url: Optional[str] = None
852+
_step_llm_response: Optional[LLMResponse] = None
853+
_step_result: Optional[AgentActionResult] = None
854+
_step_duration_ms: int = 0
855+
774856
for attempt in range(max_retries + 1):
775857
try:
776858
# 1. OBSERVE: Get refined semantic snapshot
@@ -823,6 +905,10 @@ async def act( # noqa: C901
823905
error=snap.error,
824906
)
825907

908+
# Track for step_end emission on failure
909+
_step_snap_with_diff = snap_with_diff
910+
_step_pre_url = snap.url
911+
826912
# Update previous snapshot for next comparison
827913
self._previous_snapshot = snap
828914

@@ -880,6 +966,9 @@ async def act( # noqa: C901
880966
# 3. THINK: Query LLM for next action
881967
llm_response = self.llm_handler.query_llm(context, goal)
882968

969+
# Track for step_end emission on failure
970+
_step_llm_response = llm_response
971+
883972
# Emit LLM query trace event if tracer is enabled
884973
if self.tracer:
885974
_safe_tracer_call(
@@ -926,6 +1015,10 @@ async def act( # noqa: C901
9261015
message=result_dict.get("message"),
9271016
)
9281017

1018+
# Track for step_end emission on failure
1019+
_step_result = result
1020+
_step_duration_ms = duration_ms
1021+
9291022
# Emit action execution trace event if tracer is enabled
9301023
if self.tracer:
9311024
post_url = self.browser.page.url if self.browser.page else None
@@ -1104,6 +1197,63 @@ async def act( # noqa: C901
11041197
await asyncio.sleep(1.0) # Brief delay before retry
11051198
continue
11061199
else:
1200+
# Emit step_end with whatever data we collected before failure
1201+
# This ensures diff_status and other fields are preserved in traces
1202+
if self.tracer and _step_snap_with_diff is not None:
1203+
post_url = self.browser.page.url if self.browser.page else None
1204+
snapshot_digest = f"sha256:{self._compute_hash(f'{_step_pre_url}{_step_snap_with_diff.timestamp}')}"
1205+
1206+
# Build pre_elements from snap_with_diff (includes diff_status)
1207+
snapshot_event_data = TraceEventBuilder.build_snapshot_event(_step_snap_with_diff)
1208+
pre_elements = snapshot_event_data.get("elements", [])
1209+
1210+
# Build LLM data if available
1211+
llm_data = None
1212+
if _step_llm_response:
1213+
llm_response_text = _step_llm_response.content
1214+
llm_response_hash = f"sha256:{self._compute_hash(llm_response_text)}"
1215+
llm_data = {
1216+
"response_text": llm_response_text,
1217+
"response_hash": llm_response_hash,
1218+
"usage": {
1219+
"prompt_tokens": _step_llm_response.prompt_tokens or 0,
1220+
"completion_tokens": _step_llm_response.completion_tokens or 0,
1221+
"total_tokens": _step_llm_response.total_tokens or 0,
1222+
},
1223+
}
1224+
1225+
# Build exec data (failure state)
1226+
exec_data = {
1227+
"success": False,
1228+
"action": _step_result.action if _step_result else "error",
1229+
"outcome": str(e),
1230+
"duration_ms": _step_duration_ms,
1231+
}
1232+
1233+
# Build step_end event for failed step
1234+
step_end_data = TraceEventBuilder.build_step_end_event(
1235+
step_id=step_id,
1236+
step_index=self._step_count,
1237+
goal=goal,
1238+
attempt=attempt,
1239+
pre_url=_step_pre_url,
1240+
post_url=post_url,
1241+
snapshot_digest=snapshot_digest,
1242+
llm_data=llm_data,
1243+
exec_data=exec_data,
1244+
verify_data=None,
1245+
pre_elements=pre_elements,
1246+
)
1247+
1248+
_safe_tracer_call(
1249+
self.tracer,
1250+
"emit",
1251+
self.verbose,
1252+
"step_end",
1253+
step_end_data,
1254+
step_id=step_id,
1255+
)
1256+
11071257
# Create error result
11081258
error_result = AgentActionResult(
11091259
success=False,

sentience/failure_artifacts.py

Lines changed: 65 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import gzip
44
import json
55
import logging
6+
import re
67
import shutil
78
import subprocess
89
import tempfile
@@ -104,6 +105,26 @@ def _is_ffmpeg_available() -> bool:
104105
return False
105106

106107

108+
def _get_ffmpeg_version() -> tuple[int, int] | None:
109+
"""Get ffmpeg major and minor version. Returns (major, minor) or None if unavailable."""
110+
try:
111+
result = subprocess.run(
112+
["ffmpeg", "-version"],
113+
capture_output=True,
114+
timeout=5,
115+
)
116+
if result.returncode != 0:
117+
return None
118+
output = result.stdout.decode("utf-8", errors="replace")
119+
# Parse version from "ffmpeg version X.Y.Z ..."
120+
match = re.search(r"ffmpeg version (\d+)\.(\d+)", output)
121+
if match:
122+
return (int(match.group(1)), int(match.group(2)))
123+
return None
124+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
125+
return None
126+
127+
107128
def _generate_clip_from_frames(
108129
frames_dir: Path,
109130
output_path: Path,
@@ -154,10 +175,17 @@ def _generate_clip_from_frames(
154175
# -f concat: use concat demuxer
155176
# -safe 0: allow unsafe file paths
156177
# -i: input file list
157-
# -vsync vfr: variable frame rate
178+
# -fps_mode vfr or -vsync vfr: variable frame rate
179+
# (-fps_mode replaces deprecated -vsync in ffmpeg 5.1+)
158180
# -pix_fmt yuv420p: compatibility with most players
159181
# -c:v libx264: H.264 codec
160182
# -crf 23: quality (lower = better, 23 is default)
183+
184+
# Detect ffmpeg version to use correct vsync option
185+
# -fps_mode was introduced in ffmpeg 5.1, -vsync deprecated in 7.0
186+
ffmpeg_version = _get_ffmpeg_version()
187+
use_fps_mode = ffmpeg_version is not None and ffmpeg_version >= (5, 1)
188+
161189
cmd = [
162190
"ffmpeg",
163191
"-y",
@@ -166,17 +194,40 @@ def _generate_clip_from_frames(
166194
"-safe",
167195
"0",
168196
"-i",
169-
str(list_file),
170-
"-vsync",
171-
"vfr",
172-
"-pix_fmt",
173-
"yuv420p",
174-
"-c:v",
175-
"libx264",
176-
"-crf",
177-
"23",
178-
str(output_path),
197+
"frames_list.txt", # Use relative path since cwd=frames_dir
179198
]
199+
# Add vsync option based on ffmpeg version
200+
if use_fps_mode:
201+
cmd.extend(["-fps_mode", "vfr"])
202+
else:
203+
cmd.extend(["-vsync", "vfr"])
204+
cmd.extend(
205+
[
206+
"-pix_fmt",
207+
"yuv420p",
208+
"-c:v",
209+
"libx264",
210+
"-crf",
211+
"23",
212+
str(output_path),
213+
]
214+
)
215+
216+
# Log the command for debugging
217+
logger.debug(f"Running ffmpeg command: {' '.join(cmd)}")
218+
logger.debug(f"Working directory: {frames_dir}")
219+
logger.debug(f"Frame files found: {len(frame_files)}")
220+
221+
# Verify files exist before running ffmpeg
222+
if not list_file.exists():
223+
logger.warning(f"frames_list.txt does not exist: {list_file}")
224+
return False
225+
226+
# Verify all frame files referenced in the list exist
227+
for frame_file in frame_files:
228+
if not frame_file.exists():
229+
logger.warning(f"Frame file does not exist: {frame_file}")
230+
return False
180231

181232
result = subprocess.run(
182233
cmd,
@@ -187,9 +238,12 @@ def _generate_clip_from_frames(
187238

188239
if result.returncode != 0:
189240
stderr = result.stderr.decode("utf-8", errors="replace")[:500]
241+
stdout = result.stdout.decode("utf-8", errors="replace")[:200]
190242
logger.warning(
191243
f"ffmpeg failed with return code {result.returncode}: {stderr}"
192244
)
245+
if stdout:
246+
logger.debug(f"ffmpeg stdout: {stdout}")
193247
# Fallback: use glob input (handles non-uniform filenames)
194248
fallback_cmd = [
195249
"ffmpeg",

0 commit comments

Comments
 (0)