diff --git a/examples/agent_runtime_verification.py b/examples/agent_runtime_verification.py index dc93d26..537cd12 100644 --- a/examples/agent_runtime_verification.py +++ b/examples/agent_runtime_verification.py @@ -5,65 +5,63 @@ The AgentRuntime provides assertion predicates to verify browser state during execution. Key features: +- BrowserBackendV0 protocol: Framework-agnostic browser integration - Predicate helpers: url_matches, url_contains, exists, not_exists, element_count - Combinators: all_of, any_of for complex conditions - Task completion: assert_done() for goal verification - Trace integration: Assertions emitted to trace for Studio timeline Requirements: -- SENTIENCE_API_KEY (Pro or Enterprise tier) +- SENTIENCE_API_KEY (Pro or Enterprise tier) - optional, enables Gateway refinement Usage: python examples/agent_runtime_verification.py """ +import asyncio import os -from sentience import ( - AgentRuntime, - SentienceBrowser, - all_of, - exists, - not_exists, - url_contains, - url_matches, -) -from sentience.tracer_factory import create_tracer +from sentience import AsyncSentienceBrowser +from sentience.agent_runtime import AgentRuntime +from sentience.tracing import JsonlTraceSink, Tracer +from sentience.verification import all_of, exists, not_exists, url_contains, url_matches -def main(): - # Get API key from environment +async def main(): + # Get API key from environment (optional - enables Pro tier features) sentience_key = os.environ.get("SENTIENCE_API_KEY") - if not sentience_key: - print("Error: SENTIENCE_API_KEY not set") - return - print("Starting Agent Runtime Verification Demo\n") # 1. Create tracer for verification event emission run_id = "verification-demo" - tracer = create_tracer(api_key=sentience_key, run_id=run_id, upload_trace=False) + sink = JsonlTraceSink(f"traces/{run_id}.jsonl") + tracer = Tracer(run_id=run_id, sink=sink) print(f"Run ID: {run_id}\n") - # 2. Create browser - browser = SentienceBrowser(api_key=sentience_key, headless=False) - browser.start() - - try: - # 3. Create AgentRuntime with browser, page, and tracer - runtime = AgentRuntime(browser, browser.page, tracer) + # 2. Create browser using AsyncSentienceBrowser + async with AsyncSentienceBrowser(headless=False) as browser: + page = await browser.new_page() + + # 3. Create AgentRuntime using from_sentience_browser factory + # This wraps the browser/page into the new BrowserBackendV0 architecture + runtime = await AgentRuntime.from_sentience_browser( + browser=browser, + page=page, + tracer=tracer, + sentience_api_key=sentience_key, # Optional: enables Pro tier Gateway refinement + ) # 4. Navigate to a page print("Navigating to example.com...\n") - browser.page.goto("https://example.com") - browser.page.wait_for_load_state("networkidle") + await page.goto("https://example.com") + await page.wait_for_load_state("networkidle") # 5. Begin a verification step runtime.begin_step("Verify page loaded correctly") # 6. Take a snapshot (required for element assertions) - snapshot = runtime.snapshot() + snapshot = await runtime.snapshot() print(f"Snapshot taken: {len(snapshot.elements)} elements found\n") # 7. Run assertions against current state @@ -108,19 +106,12 @@ def main(): print(f" Required passed: {runtime.required_assertions_passed()}") print(f" Task complete: {runtime.is_task_done}") - except Exception as e: - print(f"\nError during execution: {e}") - raise - - finally: - # Close tracer and browser - print("\nClosing tracer...") - tracer.close(blocking=True) - print(f"Trace saved to: ~/.sentience/traces/{run_id}.jsonl") - - browser.close() - print("Done!") + # Close tracer after browser context exits + print("\nClosing tracer...") + tracer.close() + print(f"Trace saved to: traces/{run_id}.jsonl") + print("Done!") if __name__ == "__main__": - main() + asyncio.run(main()) diff --git a/examples/browser-use/agent_runtime_browser_use.py b/examples/browser-use/agent_runtime_browser_use.py new file mode 100644 index 0000000..fe5ddaa --- /dev/null +++ b/examples/browser-use/agent_runtime_browser_use.py @@ -0,0 +1,142 @@ +""" +Example: Agent Runtime with browser-use Integration + +Demonstrates how to use AgentRuntime with browser-use library via BrowserBackendV0 protocol. +This pattern enables framework-agnostic browser integration for agent verification loops. + +Key features: +- BrowserUseAdapter: Wraps browser-use BrowserSession into CDPBackendV0 +- BrowserBackendV0 protocol: Minimal interface for browser operations +- Direct AgentRuntime construction: No need for from_sentience_browser factory + +Requirements: +- browser-use library: pip install browser-use +- SENTIENCE_API_KEY (optional) - enables Pro tier Gateway refinement + +Usage: + python examples/agent_runtime_browser_use.py +""" + +import asyncio +import os + +from sentience import get_extension_dir +from sentience.agent_runtime import AgentRuntime +from sentience.backends import BrowserUseAdapter +from sentience.tracing import JsonlTraceSink, Tracer +from sentience.verification import all_of, exists, not_exists, url_contains, url_matches + +# browser-use imports (requires: pip install browser-use) +try: + from browser_use import BrowserProfile, BrowserSession +except ImportError: + print("Error: browser-use library not installed.") + print("Install with: pip install browser-use") + exit(1) + + +async def main(): + # Get API key from environment (optional - enables Pro tier features) + sentience_key = os.environ.get("SENTIENCE_API_KEY") + + print("Starting Agent Runtime with browser-use Integration Demo\n") + + # 1. Create tracer for verification event emission + run_id = "browser-use-demo" + sink = JsonlTraceSink(f"traces/{run_id}.jsonl") + tracer = Tracer(run_id=run_id, sink=sink) + print(f"Run ID: {run_id}\n") + + # 2. Create browser-use session with Sentience extension loaded + # The extension is required for snapshot() to work + extension_dir = get_extension_dir() + profile = BrowserProfile( + args=[f"--load-extension={extension_dir}"], + headless=False, + ) + session = BrowserSession(browser_profile=profile) + await session.start() + + try: + # 3. Create BrowserBackendV0 using BrowserUseAdapter + # This wraps the browser-use session into the standard backend protocol + adapter = BrowserUseAdapter(session) + backend = await adapter.create_backend() + print("Created CDPBackendV0 from browser-use session\n") + + # 4. Create AgentRuntime directly with backend + # For Pro tier, pass sentience_api_key for Gateway element refinement + runtime = AgentRuntime( + backend=backend, + tracer=tracer, + sentience_api_key=sentience_key, # Optional: enables Pro tier + ) + + # 5. Navigate using browser-use + page = await session.get_current_page() + print("Navigating to example.com...\n") + await page.goto("https://example.com") + await page.wait_for_load_state("networkidle") + + # 6. Begin a verification step + runtime.begin_step("Verify page loaded correctly") + + # 7. Take a snapshot (uses Sentience extension via backend.eval()) + snapshot = await runtime.snapshot() + print(f"Snapshot taken: {len(snapshot.elements)} elements found\n") + + # 8. Run assertions against current state + print("Running assertions:\n") + + # URL assertions + url_ok = runtime.assert_(url_contains("example.com"), "on_example_domain") + print(f" [{'PASS' if url_ok else 'FAIL'}] on_example_domain") + + url_match = runtime.assert_(url_matches(r"https://.*example\.com"), "url_is_https") + print(f" [{'PASS' if url_match else 'FAIL'}] url_is_https") + + # Element assertions + has_heading = runtime.assert_(exists("role=heading"), "has_heading") + print(f" [{'PASS' if has_heading else 'FAIL'}] has_heading") + + no_error = runtime.assert_(not_exists("text~'Error'"), "no_error_message") + print(f" [{'PASS' if no_error else 'FAIL'}] no_error_message") + + # Combined assertion with all_of + page_ready = runtime.assert_( + all_of(url_contains("example"), exists("role=link")), + "page_fully_ready", + ) + print(f" [{'PASS' if page_ready else 'FAIL'}] page_fully_ready") + + # 9. Check if task is done (required assertion) + task_complete = runtime.assert_done( + exists("text~'Example Domain'"), + "reached_example_page", + ) + print(f"\n [{'DONE' if task_complete else 'NOT DONE'}] reached_example_page") + + # 10. Get accumulated assertions for step_end event + assertions_data = runtime.get_assertions_for_step_end() + print(f"\nTotal assertions: {len(assertions_data['assertions'])}") + print(f"Task done: {assertions_data.get('task_done', False)}") + + # 11. Check overall status + print("\nVerification Summary:") + print(f" All passed: {runtime.all_assertions_passed()}") + print(f" Required passed: {runtime.required_assertions_passed()}") + print(f" Task complete: {runtime.is_task_done}") + + finally: + # Close browser-use session + await session.close() + + # Close tracer + print("\nClosing tracer...") + tracer.close() + print(f"Trace saved to: traces/{run_id}.jsonl") + print("Done!") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/screenshot.png b/screenshot.png index 847eb73..4f8a30a 100644 Binary files a/screenshot.png and b/screenshot.png differ diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index 168659e..3197e24 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -2,7 +2,7 @@ Agent runtime for verification loop support. This module provides a thin runtime wrapper that combines: -1. Browser session management +1. Browser session management (via BrowserBackendV0 protocol) 2. Snapshot/query helpers 3. Tracer for event emission 4. Assertion/verification methods @@ -10,29 +10,55 @@ The AgentRuntime is designed to be used in agent verification loops where you need to repeatedly take snapshots, execute actions, and verify results. -Example usage: - from sentience import AsyncSentienceBrowser +Example usage with browser-use: + from browser_use import BrowserSession, BrowserProfile + from sentience import get_extension_dir + from sentience.backends import BrowserUseAdapter from sentience.agent_runtime import AgentRuntime from sentience.verification import url_matches, exists from sentience.tracing import Tracer, JsonlTraceSink + # Setup browser-use with Sentience extension + profile = BrowserProfile(args=[f"--load-extension={get_extension_dir()}"]) + session = BrowserSession(browser_profile=profile) + await session.start() + + # Create adapter and backend + adapter = BrowserUseAdapter(session) + backend = await adapter.create_backend() + + # Navigate using browser-use + page = await session.get_current_page() + await page.goto("https://example.com") + + # Create runtime with backend + sink = JsonlTraceSink("trace.jsonl") + tracer = Tracer(run_id="test-run", sink=sink) + runtime = AgentRuntime(backend=backend, tracer=tracer) + + # Take snapshot and run assertions + await runtime.snapshot() + runtime.assert_(url_matches(r"example\\.com"), label="on_homepage") + runtime.assert_(exists("role=button"), label="has_buttons") + + # Check if task is done + if runtime.assert_done(exists("text~'Success'"), label="task_complete"): + print("Task completed!") + +Example usage with AsyncSentienceBrowser (backward compatible): + from sentience import AsyncSentienceBrowser + from sentience.agent_runtime import AgentRuntime + async with AsyncSentienceBrowser() as browser: page = await browser.new_page() await page.goto("https://example.com") - sink = JsonlTraceSink("trace.jsonl") - tracer = Tracer(run_id="test-run", sink=sink) - - runtime = AgentRuntime(browser=browser, page=page, tracer=tracer) - - # Take snapshot and run assertions + runtime = await AgentRuntime.from_sentience_browser( + browser=browser, + page=page, + tracer=tracer, + ) await runtime.snapshot() - runtime.assert_(url_matches(r"example\\.com"), label="on_homepage") - runtime.assert_(exists("role=button"), label="has_buttons") - - # Check if task is done - if runtime.assert_done(exists("text~'Success'"), label="task_complete"): - print("Task completed!") """ from __future__ import annotations @@ -40,13 +66,14 @@ import uuid from typing import TYPE_CHECKING, Any -from .verification import AssertContext, AssertOutcome, Predicate +from .models import Snapshot, SnapshotOptions +from .verification import AssertContext, Predicate if TYPE_CHECKING: from playwright.async_api import Page + from .backends.protocol_v0 import BrowserBackendV0 from .browser import AsyncSentienceBrowser - from .models import Snapshot from .tracing import Tracer @@ -63,8 +90,7 @@ class AgentRuntime: to the tracer for Studio timeline display. Attributes: - browser: AsyncSentienceBrowser instance - page: Playwright Page instance + backend: BrowserBackendV0 instance for browser operations tracer: Tracer for event emission step_id: Current step identifier step_index: Current step index (0-based) @@ -73,22 +99,34 @@ class AgentRuntime: def __init__( self, - browser: AsyncSentienceBrowser, - page: Page, + backend: BrowserBackendV0, tracer: Tracer, + snapshot_options: SnapshotOptions | None = None, + sentience_api_key: str | None = None, ): """ - Initialize agent runtime. + Initialize agent runtime with any BrowserBackendV0-compatible browser. Args: - browser: AsyncSentienceBrowser instance for taking snapshots - page: Playwright Page for browser interaction + backend: Any browser implementing BrowserBackendV0 protocol. + Examples: + - CDPBackendV0 (for browser-use via BrowserUseAdapter) + - PlaywrightBackend (future, for direct Playwright) tracer: Tracer for emitting verification events + snapshot_options: Default options for snapshots + sentience_api_key: API key for Pro/Enterprise tier (enables Gateway refinement) """ - self.browser = browser - self.page = page + self.backend = backend self.tracer = tracer + # Build default snapshot options with API key if provided + default_opts = snapshot_options or SnapshotOptions() + if sentience_api_key: + default_opts.sentience_api_key = sentience_api_key + if default_opts.use_api is None: + default_opts.use_api = True + self._snapshot_options = default_opts + # Step tracking self.step_id: str | None = None self.step_index: int = 0 @@ -96,6 +134,9 @@ def __init__( # Snapshot state self.last_snapshot: Snapshot | None = None + # Cached URL (updated on snapshot or explicit get_url call) + self._cached_url: str | None = None + # Assertions accumulated during current step self._assertions_this_step: list[dict[str, Any]] = [] @@ -103,6 +144,45 @@ def __init__( self._task_done: bool = False self._task_done_label: str | None = None + @classmethod + async def from_sentience_browser( + cls, + browser: AsyncSentienceBrowser, + page: Page, + tracer: Tracer, + snapshot_options: SnapshotOptions | None = None, + sentience_api_key: str | None = None, + ) -> AgentRuntime: + """ + Create AgentRuntime from AsyncSentienceBrowser (backward compatibility). + + This factory method wraps an AsyncSentienceBrowser + Page combination + into the new BrowserBackendV0-based AgentRuntime. + + Args: + browser: AsyncSentienceBrowser instance + page: Playwright Page for browser interaction + tracer: Tracer for emitting verification events + snapshot_options: Default options for snapshots + sentience_api_key: API key for Pro/Enterprise tier + + Returns: + AgentRuntime instance + """ + from .backends.playwright_backend import PlaywrightBackend + + backend = PlaywrightBackend(page) + runtime = cls( + backend=backend, + tracer=tracer, + snapshot_options=snapshot_options, + sentience_api_key=sentience_api_key, + ) + # Store browser reference for snapshot() to use + runtime._legacy_browser = browser + runtime._legacy_page = page + return runtime + def _ctx(self) -> AssertContext: """ Build assertion context from current state. @@ -113,8 +193,8 @@ def _ctx(self) -> AssertContext: url = None if self.last_snapshot is not None: url = self.last_snapshot.url - elif self.page: - url = self.page.url + elif self._cached_url: + url = self._cached_url return AssertContext( snapshot=self.last_snapshot, @@ -122,19 +202,48 @@ def _ctx(self) -> AssertContext: step_id=self.step_id, ) - async def snapshot(self, **kwargs) -> Snapshot: + async def get_url(self) -> str: + """ + Get current page URL. + + Returns: + Current page URL + """ + url = await self.backend.get_url() + self._cached_url = url + return url + + async def snapshot(self, **kwargs: Any) -> Snapshot: """ Take a snapshot of the current page state. This updates last_snapshot which is used as context for assertions. Args: - **kwargs: Passed through to browser.snapshot() + **kwargs: Override default snapshot options for this call. + Common options: + - limit: Maximum elements to return + - goal: Task goal for ordinal support + - screenshot: Include screenshot + - show_overlay: Show visual overlay Returns: Snapshot of current page state """ - self.last_snapshot = await self.browser.snapshot(self.page, **kwargs) + # Check if using legacy browser (backward compat) + if hasattr(self, "_legacy_browser") and hasattr(self, "_legacy_page"): + self.last_snapshot = await self._legacy_browser.snapshot(self._legacy_page, **kwargs) + return self.last_snapshot + + # Use backend-agnostic snapshot + from .backends.snapshot import snapshot as backend_snapshot + + # Merge default options with call-specific kwargs + options_dict = self._snapshot_options.model_dump(exclude_none=True) + options_dict.update(kwargs) + options = SnapshotOptions(**options_dict) + + self.last_snapshot = await backend_snapshot(self.backend, options=options) return self.last_snapshot def begin_step(self, goal: str, step_index: int | None = None) -> str: @@ -233,7 +342,7 @@ def assert_done( Returns: True if task is complete (assertion passed), False otherwise """ - ok = self.assertTrue(predicate, label=label, required=True) + ok = self.assert_(predicate, label=label, required=True) if ok: self._task_done = True diff --git a/sentience/backends/cdp_backend.py b/sentience/backends/cdp_backend.py index 1061e1a..4d9d7ba 100644 --- a/sentience/backends/cdp_backend.py +++ b/sentience/backends/cdp_backend.py @@ -386,3 +386,8 @@ async def wait_ready_state( # Poll every 100ms await asyncio.sleep(0.1) + + async def get_url(self) -> str: + """Get current page URL.""" + result = await self.eval("window.location.href") + return result if result else "" diff --git a/sentience/backends/playwright_backend.py b/sentience/backends/playwright_backend.py index 719561a..e589c93 100644 --- a/sentience/backends/playwright_backend.py +++ b/sentience/backends/playwright_backend.py @@ -185,6 +185,10 @@ async def wait_ready_state( await asyncio.sleep(0.1) + async def get_url(self) -> str: + """Get current page URL.""" + return self._page.url + # Verify protocol compliance at import time assert isinstance(PlaywrightBackend.__new__(PlaywrightBackend), BrowserBackendV0) diff --git a/sentience/backends/protocol_v0.py b/sentience/backends/protocol_v0.py index 2ac86cc..763647d 100644 --- a/sentience/backends/protocol_v0.py +++ b/sentience/backends/protocol_v0.py @@ -205,3 +205,12 @@ async def wait_ready_state( TimeoutError: If state not reached within timeout """ ... + + async def get_url(self) -> str: + """ + Get current page URL. + + Returns: + Current page URL (window.location.href) + """ + ... diff --git a/sentience/extension/background.js b/sentience/extension/background.js index 2923f55..aff49b0 100644 --- a/sentience/extension/background.js +++ b/sentience/extension/background.js @@ -28,14 +28,14 @@ async function handleSnapshotProcessing(rawData, options = {}) { const startTime = performance.now(); try { if (!Array.isArray(rawData)) throw new Error("rawData must be an array"); - if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), + if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), !wasmReady) throw new Error("WASM module not initialized"); let analyzedElements, prunedRawData; try { const wasmPromise = new Promise((resolve, reject) => { try { let result; - result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), + result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), resolve(result); } catch (e) { reject(e); @@ -101,4 +101,4 @@ initWASM().catch(err => {}), chrome.runtime.onMessage.addListener((request, send event.preventDefault(); }), self.addEventListener("unhandledrejection", event => { event.preventDefault(); -}); \ No newline at end of file +}); diff --git a/sentience/extension/content.js b/sentience/extension/content.js index e94cde1..9d5b3bf 100644 --- a/sentience/extension/content.js +++ b/sentience/extension/content.js @@ -82,7 +82,7 @@ if (!elements || !Array.isArray(elements)) return; removeOverlay(); const host = document.createElement("div"); - host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", + host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", document.body.appendChild(host); const shadow = host.attachShadow({ mode: "closed" @@ -94,15 +94,15 @@ let color; color = isTarget ? "#FF0000" : isPrimary ? "#0066FF" : "#00FF00"; const importanceRatio = maxImportance > 0 ? importance / maxImportance : .5, borderOpacity = isTarget ? 1 : isPrimary ? .9 : Math.max(.4, .5 + .5 * importanceRatio), fillOpacity = .2 * borderOpacity, borderWidth = isTarget ? 2 : isPrimary ? 1.5 : Math.max(.5, Math.round(2 * importanceRatio)), hexOpacity = Math.round(255 * fillOpacity).toString(16).padStart(2, "0"), box = document.createElement("div"); - if (box.style.cssText = `\n position: absolute;\n left: ${bbox.x}px;\n top: ${bbox.y}px;\n width: ${bbox.width}px;\n height: ${bbox.height}px;\n border: ${borderWidth}px solid ${color};\n background-color: ${color}${hexOpacity};\n box-sizing: border-box;\n opacity: ${borderOpacity};\n pointer-events: none;\n `, + if (box.style.cssText = `\n position: absolute;\n left: ${bbox.x}px;\n top: ${bbox.y}px;\n width: ${bbox.width}px;\n height: ${bbox.height}px;\n border: ${borderWidth}px solid ${color};\n background-color: ${color}${hexOpacity};\n box-sizing: border-box;\n opacity: ${borderOpacity};\n pointer-events: none;\n `, importance > 0 || isPrimary) { const badge = document.createElement("span"); - badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, + badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, box.appendChild(badge); } if (isTarget) { const targetIndicator = document.createElement("span"); - targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", + targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", box.appendChild(targetIndicator); } shadow.appendChild(box); @@ -120,7 +120,7 @@ let overlayTimeout = null; function removeOverlay() { const existing = document.getElementById(OVERLAY_HOST_ID); - existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), + existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), overlayTimeout = null); } -}(); \ No newline at end of file +}(); diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js index f8c1ec1..983b4da 100644 --- a/sentience/extension/injected_api.js +++ b/sentience/extension/injected_api.js @@ -112,7 +112,7 @@ if (labelEl) { let text = ""; try { - if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), + if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), !text && labelEl.getAttribute) { const ariaLabel = labelEl.getAttribute("aria-label"); ariaLabel && (text = ariaLabel.trim()); @@ -281,7 +281,7 @@ }); const checkStable = () => { const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime; - timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), + timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), resolve()) : setTimeout(checkStable, 50); }; checkStable(); @@ -301,7 +301,7 @@ }); const checkQuiet = () => { const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime; - timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), + timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), resolve()) : setTimeout(checkQuiet, 50); }; checkQuiet(); @@ -468,8 +468,8 @@ const requestId = `iframe-${idx}-${Date.now()}`, timeout = setTimeout(() => { resolve(null); }, 5e3), listener = event => { - "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), - window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, + "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), + window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, resolve({ iframe: iframe, data: event.data.snapshot, @@ -485,7 +485,7 @@ ...options, collectIframes: !0 } - }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), + }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null)); } catch (error) { clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null); @@ -535,7 +535,7 @@ }, 25e3), listener = e => { if ("SENTIENCE_SNAPSHOT_RESULT" === e.data.type && e.data.requestId === requestId) { if (resolved) return; - resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), + resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), e.data.error ? reject(new Error(e.data.error)) : resolve({ elements: e.data.elements, raw_elements: e.data.raw_elements, @@ -552,7 +552,7 @@ options: options }, "*"); } catch (error) { - resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), + resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), reject(new Error(`Failed to send snapshot request: ${error.message}`))); } }); @@ -562,7 +562,7 @@ options.screenshot && (screenshot = await function(options) { return new Promise(resolve => { const requestId = Math.random().toString(36).substring(7), listener = e => { - "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), + "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), resolve(e.data.screenshot)); }; window.addEventListener("message", listener), window.postMessage({ @@ -609,15 +609,15 @@ } if (node.nodeType !== Node.ELEMENT_NODE) return; const tag = node.tagName.toLowerCase(); - if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), - "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), - "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), - "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), + if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), + "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), + "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), + "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), "a" === tag) { const href = node.getAttribute("href"); markdown += href ? `](${href})` : "]", insideLink = !1; } - "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), + "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), insideLink || "h1" !== tag && "h2" !== tag && "h3" !== tag && "p" !== tag && "div" !== tag || (markdown += "\n"); }(tempDiv), markdown.replace(/\n{3,}/g, "\n\n").trim(); }(document.body) : function(root) { @@ -630,7 +630,7 @@ const style = window.getComputedStyle(node); if ("none" === style.display || "hidden" === style.visibility) return; const isBlock = "block" === style.display || "flex" === style.display || "P" === node.tagName || "DIV" === node.tagName; - isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), + isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), isBlock && (text += "\n"); } } else text += node.textContent; @@ -729,25 +729,25 @@ } function startRecording(options = {}) { const {highlightColor: highlightColor = "#ff0000", successColor: successColor = "#00ff00", autoDisableTimeout: autoDisableTimeout = 18e5, keyboardShortcut: keyboardShortcut = "Ctrl+Shift+I"} = options; - if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), + if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), () => {}; window.sentience_registry_map = new Map, window.sentience_registry.forEach((el, idx) => { el && window.sentience_registry_map.set(el, idx); }); let highlightBox = document.getElementById("sentience-highlight-box"); - highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", - highlightBox.style.cssText = `\n position: fixed;\n pointer-events: none;\n z-index: 2147483647;\n border: 2px solid ${highlightColor};\n background: rgba(255, 0, 0, 0.1);\n display: none;\n transition: all 0.1s ease;\n box-sizing: border-box;\n `, + highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", + highlightBox.style.cssText = `\n position: fixed;\n pointer-events: none;\n z-index: 2147483647;\n border: 2px solid ${highlightColor};\n background: rgba(255, 0, 0, 0.1);\n display: none;\n transition: all 0.1s ease;\n box-sizing: border-box;\n `, document.body.appendChild(highlightBox)); let recordingIndicator = document.getElementById("sentience-recording-indicator"); - recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", - recordingIndicator.style.cssText = `\n position: fixed;\n top: 0;\n left: 0;\n right: 0;\n height: 3px;\n background: ${highlightColor};\n z-index: 2147483646;\n pointer-events: none;\n `, + recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", + recordingIndicator.style.cssText = `\n position: fixed;\n top: 0;\n left: 0;\n right: 0;\n height: 3px;\n background: ${highlightColor};\n z-index: 2147483646;\n pointer-events: none;\n `, document.body.appendChild(recordingIndicator)), recordingIndicator.style.display = "block"; const mouseOverHandler = e => { const el = e.target; if (!el || el === highlightBox || el === recordingIndicator) return; const rect = el.getBoundingClientRect(); - highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", - highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", + highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", + highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", highlightBox.style.height = rect.height + "px"; }, clickHandler = e => { e.preventDefault(), e.stopPropagation(); @@ -824,7 +824,7 @@ debug_snapshot: rawData }, jsonString = JSON.stringify(snippet, null, 2); navigator.clipboard.writeText(jsonString).then(() => { - highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", + highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", setTimeout(() => { highlightBox.style.border = `2px solid ${highlightColor}`, highlightBox.style.background = "rgba(255, 0, 0, 0.1)"; }, 500); @@ -834,15 +834,15 @@ }; let timeoutId = null; const stopRecording = () => { - document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), - document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), - timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), + document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), + document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), + timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), window.sentience_registry_map && window.sentience_registry_map.clear(), window.sentience_stopRecording === stopRecording && delete window.sentience_stopRecording; }, keyboardHandler = e => { - (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), + (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), stopRecording()); }; - return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), + return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), document.addEventListener("keydown", keyboardHandler, !0), autoDisableTimeout > 0 && (timeoutId = setTimeout(() => { stopRecording(); }, autoDisableTimeout)), window.sentience_stopRecording = stopRecording, stopRecording; @@ -902,4 +902,4 @@ } }), window.sentience_iframe_handler_setup = !0)); })(); -}(); \ No newline at end of file +}(); diff --git a/sentience/extension/pkg/sentience_core.js b/sentience/extension/pkg/sentience_core.js index ecba479..2696a64 100644 --- a/sentience/extension/pkg/sentience_core.js +++ b/sentience/extension/pkg/sentience_core.js @@ -47,7 +47,7 @@ function getArrayU8FromWasm0(ptr, len) { let cachedDataViewMemory0 = null; function getDataViewMemory0() { - return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), + return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), cachedDataViewMemory0; } @@ -58,7 +58,7 @@ function getStringFromWasm0(ptr, len) { let cachedUint8ArrayMemory0 = null; function getUint8ArrayMemory0() { - return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), + return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), cachedUint8ArrayMemory0; } @@ -87,7 +87,7 @@ function isLikeNone(x) { function passStringToWasm0(arg, malloc, realloc) { if (void 0 === realloc) { const buf = cachedTextEncoder.encode(arg), ptr = malloc(buf.length, 1) >>> 0; - return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, + return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, ptr; } let len = arg.length, ptr = malloc(len, 1) >>> 0; @@ -188,7 +188,7 @@ function __wbg_get_imports() { return Number(getObject(arg0)); }, imports.wbg.__wbg___wbindgen_bigint_get_as_i64_6e32f5e6aff02e1d = function(arg0, arg1) { const v = getObject(arg1), ret = "bigint" == typeof v ? v : void 0; - getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), + getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), getDataViewMemory0().setInt32(arg0 + 0, !isLikeNone(ret), !0); }, imports.wbg.__wbg___wbindgen_boolean_get_dea25b33882b895b = function(arg0) { const v = getObject(arg0), ret = "boolean" == typeof v ? v : void 0; @@ -296,7 +296,7 @@ function __wbg_get_imports() { } function __wbg_finalize_init(instance, module) { - return wasm = instance.exports, __wbg_init.__wbindgen_wasm_module = module, cachedDataViewMemory0 = null, + return wasm = instance.exports, __wbg_init.__wbindgen_wasm_module = module, cachedDataViewMemory0 = null, cachedUint8ArrayMemory0 = null, wasm; } @@ -310,7 +310,7 @@ function initSync(module) { async function __wbg_init(module_or_path) { if (void 0 !== wasm) return wasm; - void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), + void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), void 0 === module_or_path && (module_or_path = new URL("sentience_core_bg.wasm", import.meta.url)); const imports = __wbg_get_imports(); ("string" == typeof module_or_path || "function" == typeof Request && module_or_path instanceof Request || "function" == typeof URL && module_or_path instanceof URL) && (module_or_path = fetch(module_or_path)); @@ -320,4 +320,4 @@ async function __wbg_init(module_or_path) { export { initSync }; -export default __wbg_init; \ No newline at end of file +export default __wbg_init; diff --git a/tests/test_agent_runtime.py b/tests/test_agent_runtime.py new file mode 100644 index 0000000..d708c84 --- /dev/null +++ b/tests/test_agent_runtime.py @@ -0,0 +1,561 @@ +""" +Tests for AgentRuntime. + +These tests verify the AgentRuntime works correctly with the new +BrowserBackendV0-based architecture. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from sentience.agent_runtime import AgentRuntime +from sentience.models import SnapshotOptions +from sentience.verification import AssertContext, AssertOutcome + + +class MockBackend: + """Mock BrowserBackendV0 implementation for testing.""" + + def __init__(self) -> None: + self._url = "https://example.com" + self.eval_results: dict[str, any] = {} + + async def get_url(self) -> str: + return self._url + + async def eval(self, expression: str) -> any: + return self.eval_results.get(expression) + + async def refresh_page_info(self): + pass + + async def call(self, function_declaration: str, args=None): + pass + + async def get_layout_metrics(self): + pass + + async def screenshot_png(self) -> bytes: + return b"" + + async def mouse_move(self, x: float, y: float) -> None: + pass + + async def mouse_click(self, x: float, y: float, button="left", click_count=1) -> None: + pass + + async def wheel(self, delta_y: float, x=None, y=None) -> None: + pass + + async def type_text(self, text: str) -> None: + pass + + async def wait_ready_state(self, state="interactive", timeout_ms=15000) -> None: + pass + + +class MockTracer: + """Mock Tracer for testing.""" + + def __init__(self) -> None: + self.events: list[dict] = [] + + def emit(self, event_type: str, data: dict, step_id: str | None = None) -> None: + self.events.append( + { + "type": event_type, + "data": data, + "step_id": step_id, + } + ) + + +class TestAgentRuntimeInit: + """Tests for AgentRuntime initialization.""" + + def test_init_with_backend(self) -> None: + """Test basic initialization with backend.""" + backend = MockBackend() + tracer = MockTracer() + + runtime = AgentRuntime(backend=backend, tracer=tracer) + + assert runtime.backend is backend + assert runtime.tracer is tracer + assert runtime.step_id is None + assert runtime.step_index == 0 + assert runtime.last_snapshot is None + assert runtime.is_task_done is False + + def test_init_with_snapshot_options(self) -> None: + """Test initialization with custom snapshot options.""" + backend = MockBackend() + tracer = MockTracer() + options = SnapshotOptions(limit=100, goal="test goal") + + runtime = AgentRuntime(backend=backend, tracer=tracer, snapshot_options=options) + + assert runtime._snapshot_options.limit == 100 + assert runtime._snapshot_options.goal == "test goal" + + def test_init_with_api_key(self) -> None: + """Test initialization with API key enables use_api.""" + backend = MockBackend() + tracer = MockTracer() + + runtime = AgentRuntime( + backend=backend, + tracer=tracer, + sentience_api_key="sk_test_key", + ) + + assert runtime._snapshot_options.sentience_api_key == "sk_test_key" + assert runtime._snapshot_options.use_api is True + + def test_init_with_api_key_and_options(self) -> None: + """Test API key merges with provided options.""" + backend = MockBackend() + tracer = MockTracer() + options = SnapshotOptions(limit=50) + + runtime = AgentRuntime( + backend=backend, + tracer=tracer, + snapshot_options=options, + sentience_api_key="sk_pro_key", + ) + + assert runtime._snapshot_options.limit == 50 + assert runtime._snapshot_options.sentience_api_key == "sk_pro_key" + assert runtime._snapshot_options.use_api is True + + +class TestAgentRuntimeGetUrl: + """Tests for get_url method.""" + + @pytest.mark.asyncio + async def test_get_url(self) -> None: + """Test get_url returns URL from backend.""" + backend = MockBackend() + backend._url = "https://test.example.com/page" + tracer = MockTracer() + + runtime = AgentRuntime(backend=backend, tracer=tracer) + url = await runtime.get_url() + + assert url == "https://test.example.com/page" + assert runtime._cached_url == "https://test.example.com/page" + + +class TestAgentRuntimeBeginStep: + """Tests for begin_step method.""" + + def test_begin_step_generates_step_id(self) -> None: + """Test begin_step generates a UUID step_id.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + + step_id = runtime.begin_step(goal="Test step") + + assert step_id is not None + assert len(step_id) == 36 # UUID length with dashes + + def test_begin_step_increments_index(self) -> None: + """Test begin_step auto-increments step_index.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + + runtime.begin_step(goal="Step 1") + assert runtime.step_index == 1 + + runtime.begin_step(goal="Step 2") + assert runtime.step_index == 2 + + def test_begin_step_explicit_index(self) -> None: + """Test begin_step with explicit step_index.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + + runtime.begin_step(goal="Custom step", step_index=10) + assert runtime.step_index == 10 + + def test_begin_step_clears_assertions(self) -> None: + """Test begin_step clears previous assertions.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + + # Add some assertions + runtime._assertions_this_step = [{"label": "old", "passed": True}] + + runtime.begin_step(goal="New step") + + assert runtime._assertions_this_step == [] + + +class TestAgentRuntimeAssertions: + """Tests for assertion methods.""" + + def test_assert_passing(self) -> None: + """Test assert_ with passing predicate.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime.begin_step(goal="Test") + + # Create a passing predicate + def passing_predicate(ctx: AssertContext) -> AssertOutcome: + return AssertOutcome(passed=True, reason="Matched", details={}) + + result = runtime.assert_(passing_predicate, label="test_label") + + assert result is True + assert len(runtime._assertions_this_step) == 1 + assert runtime._assertions_this_step[0]["label"] == "test_label" + assert runtime._assertions_this_step[0]["passed"] is True + + def test_assert_failing(self) -> None: + """Test assert_ with failing predicate.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime.begin_step(goal="Test") + + def failing_predicate(ctx: AssertContext) -> AssertOutcome: + return AssertOutcome(passed=False, reason="Not matched", details={}) + + result = runtime.assert_(failing_predicate, label="fail_label") + + assert result is False + assert runtime._assertions_this_step[0]["passed"] is False + + def test_assert_emits_event(self) -> None: + """Test assert_ emits verification event.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime.begin_step(goal="Test") + + def predicate(ctx: AssertContext) -> AssertOutcome: + return AssertOutcome(passed=True, reason="OK", details={"key": "value"}) + + runtime.assert_(predicate, label="test_emit") + + assert len(tracer.events) == 1 + event = tracer.events[0] + assert event["type"] == "verification" + assert event["data"]["kind"] == "assert" + assert event["data"]["passed"] is True + assert event["data"]["label"] == "test_emit" + + def test_assert_done_marks_task_complete(self) -> None: + """Test assert_done marks task as done on success.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime.begin_step(goal="Test") + + def passing_predicate(ctx: AssertContext) -> AssertOutcome: + return AssertOutcome(passed=True, reason="Done", details={}) + + result = runtime.assert_done(passing_predicate, label="task_complete") + + assert result is True + assert runtime.is_task_done is True + assert runtime._task_done_label == "task_complete" + + def test_assert_done_does_not_mark_on_failure(self) -> None: + """Test assert_done doesn't mark task done on failure.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime.begin_step(goal="Test") + + def failing_predicate(ctx: AssertContext) -> AssertOutcome: + return AssertOutcome(passed=False, reason="Not done", details={}) + + result = runtime.assert_done(failing_predicate, label="task_incomplete") + + assert result is False + assert runtime.is_task_done is False + + +class TestAgentRuntimeAssertionHelpers: + """Tests for assertion helper methods.""" + + def test_all_assertions_passed_empty(self) -> None: + """Test all_assertions_passed with no assertions.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + + assert runtime.all_assertions_passed() is True + + def test_all_assertions_passed_true(self) -> None: + """Test all_assertions_passed when all pass.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime._assertions_this_step = [ + {"passed": True}, + {"passed": True}, + ] + + assert runtime.all_assertions_passed() is True + + def test_all_assertions_passed_false(self) -> None: + """Test all_assertions_passed when one fails.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime._assertions_this_step = [ + {"passed": True}, + {"passed": False}, + ] + + assert runtime.all_assertions_passed() is False + + def test_required_assertions_passed(self) -> None: + """Test required_assertions_passed ignores optional failures.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime._assertions_this_step = [ + {"passed": True, "required": True}, + {"passed": False, "required": False}, # Optional failure + ] + + assert runtime.required_assertions_passed() is True + + def test_required_assertions_failed(self) -> None: + """Test required_assertions_passed fails on required failure.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime._assertions_this_step = [ + {"passed": True, "required": True}, + {"passed": False, "required": True}, # Required failure + ] + + assert runtime.required_assertions_passed() is False + + +class TestAgentRuntimeFlushAssertions: + """Tests for flush_assertions method.""" + + def test_flush_assertions(self) -> None: + """Test flush_assertions returns and clears assertions.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime._assertions_this_step = [ + {"label": "a", "passed": True}, + {"label": "b", "passed": False}, + ] + + assertions = runtime.flush_assertions() + + assert len(assertions) == 2 + assert assertions[0]["label"] == "a" + assert runtime._assertions_this_step == [] + + +class TestAgentRuntimeGetAssertionsForStepEnd: + """Tests for get_assertions_for_step_end method.""" + + def test_get_assertions_basic(self) -> None: + """Test get_assertions_for_step_end returns assertions.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime._assertions_this_step = [{"label": "test", "passed": True}] + + result = runtime.get_assertions_for_step_end() + + assert "assertions" in result + assert len(result["assertions"]) == 1 + assert "task_done" not in result + + def test_get_assertions_with_task_done(self) -> None: + """Test get_assertions_for_step_end includes task_done.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime._task_done = True + runtime._task_done_label = "completed" + + result = runtime.get_assertions_for_step_end() + + assert result["task_done"] is True + assert result["task_done_label"] == "completed" + + +class TestAgentRuntimeResetTaskDone: + """Tests for reset_task_done method.""" + + def test_reset_task_done(self) -> None: + """Test reset_task_done clears task state.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime._task_done = True + runtime._task_done_label = "was_done" + + runtime.reset_task_done() + + assert runtime.is_task_done is False + assert runtime._task_done_label is None + + +class TestAgentRuntimeContext: + """Tests for _ctx method.""" + + def test_ctx_with_snapshot(self) -> None: + """Test _ctx uses snapshot URL.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime.begin_step(goal="Test") + + # Mock snapshot with URL + mock_snapshot = MagicMock() + mock_snapshot.url = "https://snapshot-url.com" + runtime.last_snapshot = mock_snapshot + + ctx = runtime._ctx() + + assert ctx.url == "https://snapshot-url.com" + assert ctx.snapshot is mock_snapshot + assert ctx.step_id == runtime.step_id + + def test_ctx_fallback_to_cached_url(self) -> None: + """Test _ctx falls back to cached URL.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + runtime._cached_url = "https://cached-url.com" + runtime.begin_step(goal="Test") + + ctx = runtime._ctx() + + assert ctx.url == "https://cached-url.com" + assert ctx.snapshot is None + + +class TestAgentRuntimeFromSentienceBrowser: + """Tests for from_sentience_browser factory method.""" + + @pytest.mark.asyncio + async def test_from_sentience_browser_creates_runtime(self) -> None: + """Test from_sentience_browser creates runtime with legacy support.""" + mock_browser = MagicMock() + mock_page = MagicMock() + mock_page.url = "https://example.com" + tracer = MockTracer() + + with patch("sentience.backends.playwright_backend.PlaywrightBackend") as MockPWBackend: + mock_backend_instance = MagicMock() + MockPWBackend.return_value = mock_backend_instance + + runtime = await AgentRuntime.from_sentience_browser( + browser=mock_browser, + page=mock_page, + tracer=tracer, + ) + + assert runtime.backend is mock_backend_instance + assert runtime._legacy_browser is mock_browser + assert runtime._legacy_page is mock_page + MockPWBackend.assert_called_once_with(mock_page) + + @pytest.mark.asyncio + async def test_from_sentience_browser_with_api_key(self) -> None: + """Test from_sentience_browser passes API key.""" + mock_browser = MagicMock() + mock_page = MagicMock() + tracer = MockTracer() + + with patch("sentience.backends.playwright_backend.PlaywrightBackend"): + runtime = await AgentRuntime.from_sentience_browser( + browser=mock_browser, + page=mock_page, + tracer=tracer, + sentience_api_key="sk_test", + ) + + assert runtime._snapshot_options.sentience_api_key == "sk_test" + assert runtime._snapshot_options.use_api is True + + +class TestAgentRuntimeSnapshot: + """Tests for snapshot method.""" + + @pytest.mark.asyncio + async def test_snapshot_with_legacy_browser(self) -> None: + """Test snapshot uses legacy browser when available.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + + # Set up legacy browser + mock_browser = MagicMock() + mock_page = MagicMock() + mock_snapshot = MagicMock() + mock_browser.snapshot = AsyncMock(return_value=mock_snapshot) + + runtime._legacy_browser = mock_browser + runtime._legacy_page = mock_page + + result = await runtime.snapshot(limit=30) + + mock_browser.snapshot.assert_called_once_with(mock_page, limit=30) + assert result is mock_snapshot + assert runtime.last_snapshot is mock_snapshot + + @pytest.mark.asyncio + async def test_snapshot_with_backend(self) -> None: + """Test snapshot uses backend-agnostic snapshot.""" + backend = MockBackend() + tracer = MockTracer() + runtime = AgentRuntime(backend=backend, tracer=tracer) + + mock_snapshot = MagicMock() + + with patch("sentience.backends.snapshot.snapshot", new_callable=AsyncMock) as mock_snap_fn: + mock_snap_fn.return_value = mock_snapshot + + result = await runtime.snapshot(goal="test goal") + + mock_snap_fn.assert_called_once() + call_args = mock_snap_fn.call_args + assert call_args[0][0] is backend + assert call_args[1]["options"].goal == "test goal" + assert result is mock_snapshot + assert runtime.last_snapshot is mock_snapshot + + @pytest.mark.asyncio + async def test_snapshot_merges_options(self) -> None: + """Test snapshot merges default and call-specific options.""" + backend = MockBackend() + tracer = MockTracer() + default_options = SnapshotOptions(limit=100, screenshot=True) + runtime = AgentRuntime( + backend=backend, + tracer=tracer, + snapshot_options=default_options, + ) + + with patch("sentience.backends.snapshot.snapshot", new_callable=AsyncMock) as mock_snap_fn: + mock_snap_fn.return_value = MagicMock() + + await runtime.snapshot(goal="override goal") + + call_args = mock_snap_fn.call_args + options = call_args[1]["options"] + assert options.limit == 100 # From default + assert options.screenshot is True # From default + assert options.goal == "override goal" # From call diff --git a/tests/test_backends.py b/tests/test_backends.py index 00e4325..ef725e3 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -341,6 +341,30 @@ async def test_wait_ready_state_timeout( with pytest.raises(TimeoutError, match="Timed out"): await backend.wait_ready_state(state="complete", timeout_ms=200) + @pytest.mark.asyncio + async def test_get_url(self, backend: CDPBackendV0, transport: MockCDPTransport) -> None: + """Test get_url returns current page URL.""" + transport.set_response( + "Runtime.evaluate", + {"result": {"type": "string", "value": "https://example.com/page"}}, + ) + + url = await backend.get_url() + + assert url == "https://example.com/page" + + @pytest.mark.asyncio + async def test_get_url_empty(self, backend: CDPBackendV0, transport: MockCDPTransport) -> None: + """Test get_url returns empty string when URL is None.""" + transport.set_response( + "Runtime.evaluate", + {"result": {"type": "undefined"}}, + ) + + url = await backend.get_url() + + assert url == "" + class TestCDPBackendProtocol: """Test that CDPBackendV0 implements BrowserBackendV0 protocol.""" @@ -736,6 +760,17 @@ async def test_screenshot_png(self) -> None: assert result.startswith(b"\x89PNG") mock_page.screenshot.assert_called_once_with(type="png") + @pytest.mark.asyncio + async def test_get_url(self) -> None: + """Test get_url returns page.url.""" + mock_page = MagicMock() + mock_page.url = "https://example.com/test" + + backend = PlaywrightBackend(mock_page) + url = await backend.get_url() + + assert url == "https://example.com/test" + class TestCachedSnapshot: """Tests for CachedSnapshot caching behavior."""