Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions sentience/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
import asyncio
import difflib
import time
import uuid
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

Expand Down Expand Up @@ -504,20 +503,20 @@ def begin_step(self, goal: str, step_index: int | None = None) -> str:
step_index: Optional explicit step index (otherwise auto-increments)

Returns:
Generated step_id
Generated step_id in format 'step-N' where N is the step index
"""
# Clear previous step state
self._assertions_this_step = []

# Generate new step_id
self.step_id = str(uuid.uuid4())

# Update step index
if step_index is not None:
self.step_index = step_index
else:
self.step_index += 1

# Generate step_id in 'step-N' format for Studio compatibility
self.step_id = f"step-{self.step_index}"

return self.step_id

def assert_(
Expand Down Expand Up @@ -583,7 +582,7 @@ def assert_done(
True if task is complete (assertion passed), False otherwise
"""
# Convenience wrapper for assert_ with required=True
ok = self.assertTrue(predicate, label=label, required=True)
ok = self.assert_(predicate, label=label, required=True)
if ok:
self._task_done = True
self._task_done_label = label
Expand Down
9 changes: 7 additions & 2 deletions sentience/schemas/trace_v1.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,12 @@
},
"step_id": {
"type": ["string", "null"],
"pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
"description": "UUID for the step (present for step-scoped events)"
"description": "Step identifier in 'step-N' format where N is the step index (present for step-scoped events)"
},
"step_index": {
"type": ["integer", "null"],
"minimum": 0,
"description": "Step index (0-based), present for step-scoped events"
},
"data": {
"type": "object",
Expand Down Expand Up @@ -67,6 +71,7 @@
"description": "snapshot or snapshot_taken data",
"properties": {
"step_id": {"type": ["string", "null"]},
"step_index": {"type": ["integer", "null"], "minimum": 0, "description": "Step index for Studio compatibility"},
"snapshot_id": {"type": ["string", "null"]},
"snapshot_digest": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"},
"snapshot_digest_loose": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"},
Expand Down
11 changes: 10 additions & 1 deletion sentience/trace_event_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class TraceEventBuilder:
def build_snapshot_event(
snapshot: Snapshot,
include_all_elements: bool = True,
step_index: int | None = None,
) -> dict[str, Any]:
"""
Build snapshot_taken trace event data.
Expand All @@ -31,6 +32,8 @@ def build_snapshot_event(
snapshot: Snapshot to build event from
include_all_elements: If True, include all elements (for DOM tree display).
If False, use filtered elements only.
step_index: Optional step index (0-based) for Studio compatibility.
Required when step_id is not in 'step-N' format (e.g., UUIDs).

Returns:
Dictionary with snapshot event data
Expand Down Expand Up @@ -64,13 +67,19 @@ def build_snapshot_event(
el_dict["importance_score"] = importance_score
elements_data.append(el_dict)

return {
result = {
"url": snapshot.url,
"element_count": len(snapshot.elements),
"timestamp": snapshot.timestamp,
"elements": elements_data, # Full element data for DOM tree
}

# Include step_index if provided (required for UUID step_ids)
if step_index is not None:
result["step_index"] = step_index

return result

@staticmethod
def build_step_end_event(
step_id: str,
Expand Down
23 changes: 21 additions & 2 deletions tests/test_agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,15 +152,34 @@ class TestAgentRuntimeBeginStep:
"""Tests for begin_step method."""

def test_begin_step_generates_step_id(self) -> None:
"""Test begin_step generates a UUID step_id."""
"""Test begin_step generates a step_id in 'step-N' format."""
backend = MockBackend()
tracer = MockTracer()
runtime = AgentRuntime(backend=backend, tracer=tracer)

step_id = runtime.begin_step(goal="Test step")

assert step_id is not None
assert len(step_id) == 36 # UUID length with dashes
assert step_id == "step-1" # First step should be step-1

def test_begin_step_id_matches_index(self) -> None:
"""Test step_id format matches step_index for Studio compatibility."""
backend = MockBackend()
tracer = MockTracer()
runtime = AgentRuntime(backend=backend, tracer=tracer)

step_id_1 = runtime.begin_step(goal="Step 1")
assert step_id_1 == "step-1"
assert runtime.step_index == 1

step_id_2 = runtime.begin_step(goal="Step 2")
assert step_id_2 == "step-2"
assert runtime.step_index == 2

# With explicit index
step_id_10 = runtime.begin_step(goal="Step 10", step_index=10)
assert step_id_10 == "step-10"
assert runtime.step_index == 10

def test_begin_step_increments_index(self) -> None:
"""Test begin_step auto-increments step_index."""
Expand Down
51 changes: 21 additions & 30 deletions tests/test_screenshot_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,10 @@ def test_extract_screenshots_from_trace(self):
}
)

# Close to write file
sink.close(blocking=False)

# Wait a bit for file to be written
import time

time.sleep(0.1)
# Finalize trace file synchronously (closes file handle properly)
# Using _finalize_trace_file_for_upload instead of close(blocking=False)
# to avoid Windows file locking issues in tests
sink._finalize_trace_file_for_upload()

# Extract screenshots
screenshots = sink._extract_screenshots_from_trace()
Expand All @@ -59,7 +56,7 @@ def test_extract_screenshots_from_trace(self):
assert screenshots[1]["format"] == "png"
assert screenshots[1]["step_id"] == "step-1"

# Cleanup
# Cleanup - file handle is already closed
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
trace_path = cache_dir / f"{run_id}.jsonl"
if trace_path.exists():
Expand Down Expand Up @@ -93,15 +90,15 @@ def test_extract_screenshots_handles_multiple(self):
}
)

sink.close(blocking=False)
import time

time.sleep(0.1)
# Finalize trace file synchronously (closes file handle properly)
# Using _finalize_trace_file_for_upload instead of close(blocking=False)
# to avoid Windows file locking issues in tests
sink._finalize_trace_file_for_upload()

screenshots = sink._extract_screenshots_from_trace()
assert len(screenshots) == 3

# Cleanup
# Cleanup - file handle is already closed
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
trace_path = cache_dir / f"{run_id}.jsonl"
if trace_path.exists():
Expand Down Expand Up @@ -130,15 +127,15 @@ def test_extract_screenshots_skips_events_without_screenshots(self):
}
)

sink.close(blocking=False)
import time

time.sleep(0.1)
# Finalize trace file synchronously (closes file handle properly)
# Using _finalize_trace_file_for_upload instead of close(blocking=False)
# to avoid Windows file locking issues in tests
sink._finalize_trace_file_for_upload()

screenshots = sink._extract_screenshots_from_trace()
assert len(screenshots) == 0

# Cleanup
# Cleanup - file handle is already closed
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
trace_path = cache_dir / f"{run_id}.jsonl"
if trace_path.exists():
Expand Down Expand Up @@ -174,10 +171,8 @@ def test_create_cleaned_trace_removes_screenshot_fields(self):
}
)

sink.close(blocking=False)
import time

time.sleep(0.1)
# Finalize trace file synchronously to avoid Windows file locking issues
sink._finalize_trace_file_for_upload()

# Create cleaned trace
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
Expand Down Expand Up @@ -223,10 +218,8 @@ def test_create_cleaned_trace_preserves_other_events(self):
}
)

sink.close(blocking=False)
import time

time.sleep(0.1)
# Finalize trace file synchronously to avoid Windows file locking issues
sink._finalize_trace_file_for_upload()

# Create cleaned trace
cache_dir = Path.home() / ".sentience" / "traces" / "pending"
Expand Down Expand Up @@ -436,10 +429,8 @@ def test_upload_removes_screenshot_base64_from_trace(self):
}
)

sink.close(blocking=False)
import time

time.sleep(0.1)
# Finalize trace file synchronously to avoid Windows file locking issues
sink._finalize_trace_file_for_upload()

# Mock gateway and upload responses
mock_upload_urls = {
Expand Down
22 changes: 22 additions & 0 deletions tests/test_trace_event_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,3 +320,25 @@ def test_build_step_end_event_with_none_verify_data():

# Verify should be empty dict when verify_data is None
assert result["verify"] == {}


def test_build_snapshot_event_with_step_index():
"""Test that build_snapshot_event includes step_index when provided.

This is required for AgentRuntime which uses UUID step_ids that can't be
parsed by Studio's trace-parser to extract step_index.
"""
elements = [create_element(1, text="Test element")]
snapshot = create_snapshot(elements)

# Without step_index
result_without = TraceEventBuilder.build_snapshot_event(snapshot)
assert "step_index" not in result_without

# With step_index=0
result_with_zero = TraceEventBuilder.build_snapshot_event(snapshot, step_index=0)
assert result_with_zero["step_index"] == 0

# With step_index=5
result_with_five = TraceEventBuilder.build_snapshot_event(snapshot, step_index=5)
assert result_with_five["step_index"] == 5
Loading