From 69ad24a0a6842207c7284612d4ed4d0cf658894f Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Mon, 19 Jan 2026 19:29:48 -0800 Subject: [PATCH 1/5] fix AgentRuntime step_id from UUID to step-N --- sentience/agent_runtime.py | 11 +++++------ sentience/trace_event_builder.py | 11 ++++++++++- tests/test_agent_runtime.py | 23 +++++++++++++++++++++-- tests/test_trace_event_builder.py | 22 ++++++++++++++++++++++ 4 files changed, 58 insertions(+), 9 deletions(-) diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index 6dad018..01ba2ff 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -66,7 +66,6 @@ import asyncio import difflib import time -import uuid from dataclasses import dataclass from typing import TYPE_CHECKING, Any @@ -504,20 +503,20 @@ def begin_step(self, goal: str, step_index: int | None = None) -> str: step_index: Optional explicit step index (otherwise auto-increments) Returns: - Generated step_id + Generated step_id in format 'step-N' where N is the step index """ # Clear previous step state self._assertions_this_step = [] - # Generate new step_id - self.step_id = str(uuid.uuid4()) - # Update step index if step_index is not None: self.step_index = step_index else: self.step_index += 1 + # Generate step_id in 'step-N' format for Studio compatibility + self.step_id = f"step-{self.step_index}" + return self.step_id def assert_( @@ -583,7 +582,7 @@ def assert_done( True if task is complete (assertion passed), False otherwise """ # Convenience wrapper for assert_ with required=True - ok = self.assertTrue(predicate, label=label, required=True) + ok = self.assert_(predicate, label=label, required=True) if ok: self._task_done = True self._task_done_label = label diff --git a/sentience/trace_event_builder.py b/sentience/trace_event_builder.py index 44efa57..8b5b911 100644 --- a/sentience/trace_event_builder.py +++ b/sentience/trace_event_builder.py @@ -23,6 +23,7 @@ class TraceEventBuilder: def build_snapshot_event( snapshot: Snapshot, include_all_elements: bool = True, + step_index: int | None = None, ) -> dict[str, Any]: """ Build snapshot_taken trace event data. @@ -31,6 +32,8 @@ def build_snapshot_event( snapshot: Snapshot to build event from include_all_elements: If True, include all elements (for DOM tree display). If False, use filtered elements only. + step_index: Optional step index (0-based) for Studio compatibility. + Required when step_id is not in 'step-N' format (e.g., UUIDs). Returns: Dictionary with snapshot event data @@ -64,13 +67,19 @@ def build_snapshot_event( el_dict["importance_score"] = importance_score elements_data.append(el_dict) - return { + result = { "url": snapshot.url, "element_count": len(snapshot.elements), "timestamp": snapshot.timestamp, "elements": elements_data, # Full element data for DOM tree } + # Include step_index if provided (required for UUID step_ids) + if step_index is not None: + result["step_index"] = step_index + + return result + @staticmethod def build_step_end_event( step_id: str, diff --git a/tests/test_agent_runtime.py b/tests/test_agent_runtime.py index 4cfc2f4..c7f1b06 100644 --- a/tests/test_agent_runtime.py +++ b/tests/test_agent_runtime.py @@ -152,7 +152,7 @@ class TestAgentRuntimeBeginStep: """Tests for begin_step method.""" def test_begin_step_generates_step_id(self) -> None: - """Test begin_step generates a UUID step_id.""" + """Test begin_step generates a step_id in 'step-N' format.""" backend = MockBackend() tracer = MockTracer() runtime = AgentRuntime(backend=backend, tracer=tracer) @@ -160,7 +160,26 @@ def test_begin_step_generates_step_id(self) -> None: step_id = runtime.begin_step(goal="Test step") assert step_id is not None - assert len(step_id) == 36 # UUID length with dashes + assert step_id == "step-1" # First step should be step-1 + + def test_begin_step_id_matches_index(self) -> None: + """Test step_id format matches step_index for Studio compatibility.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + + step_id_1 = runtime.begin_step(goal="Step 1") + assert step_id_1 == "step-1" + assert runtime.step_index == 1 + + step_id_2 = runtime.begin_step(goal="Step 2") + assert step_id_2 == "step-2" + assert runtime.step_index == 2 + + # With explicit index + step_id_10 = runtime.begin_step(goal="Step 10", step_index=10) + assert step_id_10 == "step-10" + assert runtime.step_index == 10 def test_begin_step_increments_index(self) -> None: """Test begin_step auto-increments step_index.""" diff --git a/tests/test_trace_event_builder.py b/tests/test_trace_event_builder.py index b9b637c..e8923b8 100644 --- a/tests/test_trace_event_builder.py +++ b/tests/test_trace_event_builder.py @@ -320,3 +320,25 @@ def test_build_step_end_event_with_none_verify_data(): # Verify should be empty dict when verify_data is None assert result["verify"] == {} + + +def test_build_snapshot_event_with_step_index(): + """Test that build_snapshot_event includes step_index when provided. + + This is required for AgentRuntime which uses UUID step_ids that can't be + parsed by Studio's trace-parser to extract step_index. + """ + elements = [create_element(1, text="Test element")] + snapshot = create_snapshot(elements) + + # Without step_index + result_without = TraceEventBuilder.build_snapshot_event(snapshot) + assert "step_index" not in result_without + + # With step_index=0 + result_with_zero = TraceEventBuilder.build_snapshot_event(snapshot, step_index=0) + assert result_with_zero["step_index"] == 0 + + # With step_index=5 + result_with_five = TraceEventBuilder.build_snapshot_event(snapshot, step_index=5) + assert result_with_five["step_index"] == 5 From bb43056b8e30b7fc4c10cc5d169e0eb04cef6b52 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Mon, 19 Jan 2026 19:31:11 -0800 Subject: [PATCH 2/5] updated trace schema --- sentience/schemas/trace_v1.json | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sentience/schemas/trace_v1.json b/sentience/schemas/trace_v1.json index 392dda1..5c5aab4 100644 --- a/sentience/schemas/trace_v1.json +++ b/sentience/schemas/trace_v1.json @@ -37,8 +37,12 @@ }, "step_id": { "type": ["string", "null"], - "pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", - "description": "UUID for the step (present for step-scoped events)" + "description": "Step identifier in 'step-N' format where N is the step index (present for step-scoped events)" + }, + "step_index": { + "type": ["integer", "null"], + "minimum": 0, + "description": "Step index (0-based), present for step-scoped events" }, "data": { "type": "object", @@ -67,6 +71,7 @@ "description": "snapshot or snapshot_taken data", "properties": { "step_id": {"type": ["string", "null"]}, + "step_index": {"type": ["integer", "null"], "minimum": 0, "description": "Step index for Studio compatibility"}, "snapshot_id": {"type": ["string", "null"]}, "snapshot_digest": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"}, "snapshot_digest_loose": {"type": "string", "pattern": "^sha256:[0-9a-f]{64}$"}, From a3c31713fdee2fe3b1df8ca16e1b6e3d65d84ebf Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Mon, 19 Jan 2026 19:48:23 -0800 Subject: [PATCH 3/5] fix tests --- tests/test_screenshot_storage.py | 51 +++++++++++++------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/tests/test_screenshot_storage.py b/tests/test_screenshot_storage.py index a95a63d..a5fa129 100644 --- a/tests/test_screenshot_storage.py +++ b/tests/test_screenshot_storage.py @@ -42,13 +42,10 @@ def test_extract_screenshots_from_trace(self): } ) - # Close to write file - sink.close(blocking=False) - - # Wait a bit for file to be written - import time - - time.sleep(0.1) + # Finalize trace file synchronously (closes file handle properly) + # Using _finalize_trace_file_for_upload instead of close(blocking=False) + # to avoid Windows file locking issues in tests + sink._finalize_trace_file_for_upload() # Extract screenshots screenshots = sink._extract_screenshots_from_trace() @@ -59,7 +56,7 @@ def test_extract_screenshots_from_trace(self): assert screenshots[1]["format"] == "png" assert screenshots[1]["step_id"] == "step-1" - # Cleanup + # Cleanup - file handle is already closed cache_dir = Path.home() / ".sentience" / "traces" / "pending" trace_path = cache_dir / f"{run_id}.jsonl" if trace_path.exists(): @@ -93,15 +90,15 @@ def test_extract_screenshots_handles_multiple(self): } ) - sink.close(blocking=False) - import time - - time.sleep(0.1) + # Finalize trace file synchronously (closes file handle properly) + # Using _finalize_trace_file_for_upload instead of close(blocking=False) + # to avoid Windows file locking issues in tests + sink._finalize_trace_file_for_upload() screenshots = sink._extract_screenshots_from_trace() assert len(screenshots) == 3 - # Cleanup + # Cleanup - file handle is already closed cache_dir = Path.home() / ".sentience" / "traces" / "pending" trace_path = cache_dir / f"{run_id}.jsonl" if trace_path.exists(): @@ -130,15 +127,15 @@ def test_extract_screenshots_skips_events_without_screenshots(self): } ) - sink.close(blocking=False) - import time - - time.sleep(0.1) + # Finalize trace file synchronously (closes file handle properly) + # Using _finalize_trace_file_for_upload instead of close(blocking=False) + # to avoid Windows file locking issues in tests + sink._finalize_trace_file_for_upload() screenshots = sink._extract_screenshots_from_trace() assert len(screenshots) == 0 - # Cleanup + # Cleanup - file handle is already closed cache_dir = Path.home() / ".sentience" / "traces" / "pending" trace_path = cache_dir / f"{run_id}.jsonl" if trace_path.exists(): @@ -174,10 +171,8 @@ def test_create_cleaned_trace_removes_screenshot_fields(self): } ) - sink.close(blocking=False) - import time - - time.sleep(0.1) + # Finalize trace file synchronously to avoid Windows file locking issues + sink._finalize_trace_file_for_upload() # Create cleaned trace cache_dir = Path.home() / ".sentience" / "traces" / "pending" @@ -223,10 +218,8 @@ def test_create_cleaned_trace_preserves_other_events(self): } ) - sink.close(blocking=False) - import time - - time.sleep(0.1) + # Finalize trace file synchronously to avoid Windows file locking issues + sink._finalize_trace_file_for_upload() # Create cleaned trace cache_dir = Path.home() / ".sentience" / "traces" / "pending" @@ -436,10 +429,8 @@ def test_upload_removes_screenshot_base64_from_trace(self): } ) - sink.close(blocking=False) - import time - - time.sleep(0.1) + # Finalize trace file synchronously to avoid Windows file locking issues + sink._finalize_trace_file_for_upload() # Mock gateway and upload responses mock_upload_urls = { From a1dcd422ce83eef2b79700d7e9c890ce3c47ad6a Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Mon, 19 Jan 2026 19:48:37 -0800 Subject: [PATCH 4/5] fix tests --- sentience/agent_runtime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index 01ba2ff..bac9837 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -582,7 +582,7 @@ def assert_done( True if task is complete (assertion passed), False otherwise """ # Convenience wrapper for assert_ with required=True - ok = self.assert_(predicate, label=label, required=True) + ok = self.assertTrue(predicate, label=label, required=True) if ok: self._task_done = True self._task_done_label = label From 5101e39427474f5992eaea606afc0837ffaacd45 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Mon, 19 Jan 2026 19:55:59 -0800 Subject: [PATCH 5/5] use self.assert_ --- sentience/agent_runtime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index bac9837..01ba2ff 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -582,7 +582,7 @@ def assert_done( True if task is complete (assertion passed), False otherwise """ # Convenience wrapper for assert_ with required=True - ok = self.assertTrue(predicate, label=label, required=True) + ok = self.assert_(predicate, label=label, required=True) if ok: self._task_done = True self._task_done_label = label