From e454ad95cd8c792f3cb7d3a7a51d3b57bf3b4f6a Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 11 Jan 2026 20:53:49 -0800 Subject: [PATCH] Add machine-verifiable assertions to SentienceAgent examples --- browser_use/integrations/sentience/agent.py | 291 +++++++++++++++--- .../integrations/sentience_agent_example.py | 98 +++++- .../integrations/sentience_agent_local_llm.py | 63 +++- 3 files changed, 396 insertions(+), 56 deletions(-) diff --git a/browser_use/integrations/sentience/agent.py b/browser_use/integrations/sentience/agent.py index 5857c8cb00..aad6685d01 100644 --- a/browser_use/integrations/sentience/agent.py +++ b/browser_use/integrations/sentience/agent.py @@ -3,14 +3,20 @@ This agent uses Sentience SDK snapshots as the primary, compact prompt format, with automatic fallback to vision mode when snapshots fail. + +Features: +- Sentience snapshot as compact prompt (~3K tokens vs ~40K for vision) +- Vision fallback when snapshot fails +- Native AgentRuntime integration for verification assertions +- Machine-verifiable task completion via assert_done() """ from __future__ import annotations import asyncio import logging -from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Literal +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any, Callable, Literal from pydantic import BaseModel, Field @@ -22,6 +28,9 @@ if TYPE_CHECKING: from browser_use.browser.session import BrowserSession from browser_use.tools.service import Tools + from sentience.agent_runtime import AgentRuntime + from sentience.tracing import Tracer + from sentience.verification import Predicate logger = logging.getLogger(__name__) @@ -66,6 +75,49 @@ class VisionFallbackConfig: """Whether to include screenshots in vision fallback.""" +@dataclass +class VerificationConfig: + """Configuration for Sentience SDK verification (AgentRuntime integration). + + This enables machine-verifiable assertions during agent execution, + providing observability into agent behavior and task completion status. + """ + + enabled: bool = False + """Whether to enable verification via AgentRuntime.""" + + step_assertions: list[dict[str, Any]] = field(default_factory=list) + """Per-step assertions to run after each action. + + Each assertion dict should have: + - predicate: A Predicate callable (e.g., url_contains("example.com")) + - label: String label for the assertion + - required: Optional bool, if True failing this assertion marks step as failed + + Example: + step_assertions=[ + {"predicate": url_contains("news.ycombinator.com"), "label": "on_hackernews", "required": True}, + {"predicate": exists("role=link[text*='Show HN']"), "label": "show_hn_visible"}, + ] + """ + + done_assertion: Any | None = None + """Predicate for machine-verifiable task completion. + + When set, this predicate is evaluated after each step. If it returns True, + the task is considered complete (independent of LLM's done action). + + Example: + done_assertion=all_of( + url_contains("news.ycombinator.com/show"), + exists("role=link[text*='Show HN']"), + ) + """ + + trace_dir: str = "traces" + """Directory for trace output files.""" + + class SentienceAgentSettings(BaseModel): """Settings for SentienceAgent.""" @@ -88,14 +140,20 @@ class SentienceAgentSettings(BaseModel): description="Configuration for vision fallback behavior" ) + # Verification configuration (AgentRuntime integration) + verification: VerificationConfig = Field( + default_factory=VerificationConfig, + description="Configuration for Sentience SDK verification assertions" + ) + class SentienceAgent: """ Custom agent with full control over prompt construction. Features: - - Primary: Sentience snapshot as compact prompt (~3K tokens) - - Fallback: Vision mode when snapshot fails (~40K tokens) + - Primary: Sentience snapshot as compact prompt (~1.3K tokens) + - Fallback: Vision mode when snapshot fails (~4K tokens) - Token usage tracking via browser-use utilities - Clear isolation from built-in vision model """ @@ -108,13 +166,7 @@ def __init__( tools: Tools | None = None, *, # Sentience configuration - sentience_api_key: str | None = None, - sentience_use_api: bool | None = None, - sentience_max_elements: int = 40, - sentience_show_overlay: bool = False, - sentience_wait_for_extension_ms: int = 5000, - sentience_retries: int = 2, - sentience_retry_delay_s: float = 1.0, + sentience_config: SentienceAgentConfig, # Vision fallback configuration vision_fallback_enabled: bool = True, vision_detail_level: Literal['auto', 'low', 'high'] = 'auto', @@ -127,6 +179,12 @@ def __init__( max_failures: int = 3, llm_timeout: int = 60, step_timeout: int = 120, + # Verification configuration (Sentience SDK AgentRuntime) + enable_verification: bool = False, + step_assertions: list[dict[str, Any]] | None = None, + done_assertion: Any | None = None, + tracer: Any | None = None, + trace_dir: str = "traces", **kwargs, ): """ @@ -137,13 +195,7 @@ def __init__( llm: Language model to use (primary model for Sentience snapshots) browser_session: Browser session instance tools: Tools registry (optional) - sentience_api_key: Sentience API key for gateway mode - sentience_use_api: Force API vs extension mode - sentience_max_elements: Maximum elements in snapshot - sentience_show_overlay: Show visual overlay - sentience_wait_for_extension_ms: Wait time for extension - sentience_retries: Number of snapshot retries - sentience_retry_delay_s: Delay between retries + sentience_config: SentienceAgentConfig object with all Sentience configuration vision_fallback_enabled: Enable vision fallback vision_detail_level: Vision detail level vision_include_screenshots: Include screenshots in fallback @@ -154,6 +206,11 @@ def __init__( max_failures: Maximum failures llm_timeout: LLM timeout step_timeout: Step timeout + enable_verification: Enable Sentience SDK verification via AgentRuntime + step_assertions: Per-step assertions (list of dicts with predicate, label, required) + done_assertion: Predicate for machine-verifiable task completion + tracer: Optional Tracer instance (auto-created if None and verification enabled) + trace_dir: Directory for trace output files """ self.task = task self.llm = llm @@ -173,20 +230,17 @@ def __init__( self.file_system = FileSystem(base_dir=tempfile.mkdtemp(prefix="sentience_agent_")) # Build settings - sentience_config = SentienceAgentConfig( - sentience_api_key=sentience_api_key, - sentience_use_api=sentience_use_api, - sentience_max_elements=sentience_max_elements, - sentience_show_overlay=sentience_show_overlay, - sentience_wait_for_extension_ms=sentience_wait_for_extension_ms, - sentience_retries=sentience_retries, - sentience_retry_delay_s=sentience_retry_delay_s, - ) vision_fallback = VisionFallbackConfig( enabled=vision_fallback_enabled, detail_level=vision_detail_level, include_screenshots=vision_include_screenshots, ) + verification_config = VerificationConfig( + enabled=enable_verification, + step_assertions=step_assertions or [], + done_assertion=done_assertion, + trace_dir=trace_dir, + ) self.settings = SentienceAgentSettings( task=task, max_steps=max_steps, @@ -196,11 +250,17 @@ def __init__( step_timeout=step_timeout, sentience_config=sentience_config, vision_fallback=vision_fallback, + verification=verification_config, ) # Initialize SentienceContext (lazy import to avoid hard dependency) self._sentience_context: Any | None = None + # Initialize AgentRuntime for verification (if enabled) + self._runtime: Any | None = None + self._tracer: Any | None = tracer + self._verification_initialized = False + # Initialize token cost service self.token_cost_service = TokenCost(include_cost=calculate_cost) self.token_cost_service.register_llm(llm) @@ -223,10 +283,62 @@ def __init__( logger.info( f"Initialized SentienceAgent: task='{task}', " - f"sentience_max_elements={sentience_max_elements}, " + f"sentience_max_elements={sentience_config.sentience_max_elements}, " f"vision_fallback={'enabled' if vision_fallback_enabled else 'disabled'}" ) + @property + def runtime(self) -> Any | None: + """Access the AgentRuntime instance (if verification is enabled).""" + return self._runtime + + async def _initialize_verification(self) -> None: + """Initialize AgentRuntime for verification assertions. + + Creates a BrowserBackend from the browser_session and sets up + the AgentRuntime with tracer for verification events. + """ + if self._verification_initialized or not self.settings.verification.enabled: + return + + try: + from sentience.agent_runtime import AgentRuntime + from sentience.backends import BrowserUseAdapter + from sentience.tracing import JsonlTraceSink, Tracer + + # Create backend from browser_session + adapter = BrowserUseAdapter(self.browser_session) + backend = await adapter.create_backend() + + # Create tracer if not provided + if self._tracer is None: + import os + import time + os.makedirs(self.settings.verification.trace_dir, exist_ok=True) + run_id = f"sentience-agent-{int(time.time())}" + sink = JsonlTraceSink( + f"{self.settings.verification.trace_dir}/{run_id}.jsonl" + ) + self._tracer = Tracer(run_id=run_id, sink=sink) + logger.info(f"šŸ“ Verification trace: {self.settings.verification.trace_dir}/{run_id}.jsonl") + + # Create AgentRuntime + self._runtime = AgentRuntime( + backend=backend, + tracer=self._tracer, + sentience_api_key=self.settings.sentience_config.sentience_api_key, + ) + + self._verification_initialized = True + logger.info("āœ… Verification enabled via Sentience AgentRuntime") + + except ImportError as e: + logger.warning( + f"āš ļø Verification requested but Sentience SDK not fully installed: {e}. " + "Install with: pip install sentienceapi" + ) + self.settings.verification.enabled = False + def _get_sentience_context(self) -> Any: """Get or create SentienceContext instance.""" if self._sentience_context is None: @@ -809,6 +921,59 @@ def traverse_node(node: SimplifiedNode) -> None: traverse_node(dom_state._root) return stats + async def _run_verification_assertions( + self, + step: int, + sentience_state: Any | None, + model_output: Any | None, + ) -> tuple[bool, bool]: + """Run verification assertions for the current step. + + Args: + step: Current step number + sentience_state: Current Sentience snapshot state (if available) + model_output: Model output with next_goal (for step labeling) + + Returns: + Tuple of (all_step_assertions_passed, task_done_by_assertion) + """ + if not self._runtime or not self.settings.verification.enabled: + return True, False + + # Begin step in AgentRuntime + goal = "" + if model_output and hasattr(model_output, "next_goal"): + goal = model_output.next_goal or "" + self._runtime.begin_step(goal=f"Step {step + 1}: {goal[:50]}") + + # Inject current snapshot into runtime (avoid double-snapshot) + if sentience_state and hasattr(sentience_state, "snapshot"): + self._runtime.last_snapshot = sentience_state.snapshot + self._runtime._cached_url = sentience_state.snapshot.url if hasattr(sentience_state.snapshot, "url") else None + + # Run step assertions + all_passed = True + for assertion_config in self.settings.verification.step_assertions: + predicate = assertion_config.get("predicate") + label = assertion_config.get("label", "unnamed") + required = assertion_config.get("required", False) + + if predicate: + passed = self._runtime.assert_(predicate, label=label, required=required) + if required and not passed: + all_passed = False + logger.info(f" šŸ” Assertion '{label}': {'āœ… PASS' if passed else 'āŒ FAIL'}") + + # Check done assertion + task_done = False + done_assertion = self.settings.verification.done_assertion + if done_assertion: + task_done = self._runtime.assert_done(done_assertion, label="task_complete") + if task_done: + logger.info(" šŸŽÆ Task verified complete by assertion!") + + return all_passed, task_done + async def run(self) -> Any: """ Run the agent loop with full action execution and history tracking. @@ -823,6 +988,10 @@ async def run(self) -> Any: # Initialize browser session if needed (start() is idempotent) await self.browser_session.start() + # Initialize verification if enabled + if self.settings.verification.enabled: + await self._initialize_verification() + # Get AgentOutput type from tools registry # Create action model from registered actions action_model = self.tools.registry.create_action_model() @@ -833,6 +1002,7 @@ async def run(self) -> Any: # Track execution history execution_history: list[dict[str, Any]] = [] sentience_used_in_any_step = False # Track if Sentience was used in ANY step + verification_task_done = False # Track if task completed by assertion # Main agent loop for step in range(self.settings.max_steps): @@ -1016,6 +1186,13 @@ async def run(self) -> Any: if model_output.action: action_results = await self._execute_actions(model_output.action) + # Run verification assertions (if enabled) + assertions_passed, verification_task_done = await self._run_verification_assertions( + step=step, + sentience_state=self._current_sentience_state, + model_output=model_output, + ) + # Update history with model output and action results self.message_manager.update_history( model_output=model_output, @@ -1027,20 +1204,25 @@ async def run(self) -> Any: if sentience_used: sentience_used_in_any_step = True - # Track in execution history - execution_history.append( - { - "step": step + 1, - "model_output": model_output, - "action_results": action_results, - "sentience_used": sentience_used, - } - ) - - # Check if done + # Track in execution history (include verification results) + step_entry = { + "step": step + 1, + "model_output": model_output, + "action_results": action_results, + "sentience_used": sentience_used, + } + if self.settings.verification.enabled: + step_entry["assertions_passed"] = assertions_passed + step_entry["verification_task_done"] = verification_task_done + execution_history.append(step_entry) + + # Check if done (by LLM action OR by verification assertion) is_done = any(result.is_done for result in action_results if result.is_done) if is_done: - logger.info("āœ… Task completed") + logger.info("āœ… Task completed (LLM done action)") + break + if verification_task_done: + logger.info("āœ… Task completed (verified by assertion)") break # Check for errors @@ -1086,8 +1268,29 @@ async def run(self) -> Any: steps_using_sentience = sum(1 for entry in execution_history if entry.get("sentience_used", False)) total_steps = len(execution_history) + # Build verification summary (if enabled) + verification_summary = None + if self.settings.verification.enabled and self._runtime: + verification_summary = { + "enabled": True, + "all_assertions_passed": self._runtime.all_assertions_passed(), + "required_assertions_passed": self._runtime.required_assertions_passed(), + "task_verified_complete": self._runtime.is_task_done, + "assertions": self._runtime.get_assertions_for_step_end().get("assertions", []), + } + logger.info( + f"šŸ“Š Verification Summary: " + f"all_passed={verification_summary['all_assertions_passed']}, " + f"task_done={verification_summary['task_verified_complete']}" + ) + + # Close tracer if we created it + if self._tracer and hasattr(self._tracer, "close"): + self._tracer.close() + logger.info(f"šŸ“ Trace saved to: {self.settings.verification.trace_dir}/") + # Return execution summary (will return AgentHistoryList in future phases) - return { + result = { "steps": self._current_step + 1, "sentience_used": sentience_used_in_any_step, "sentience_usage_stats": { @@ -1099,6 +1302,12 @@ async def run(self) -> Any: "execution_history": execution_history, } + # Add verification results if enabled + if verification_summary: + result["verification"] = verification_summary + + return result + async def _execute_actions(self, actions: list[Any]) -> list[Any]: """ Execute a list of actions. diff --git a/examples/integrations/sentience_agent_example.py b/examples/integrations/sentience_agent_example.py index fc7553d41f..f5f741bbf6 100644 --- a/examples/integrations/sentience_agent_example.py +++ b/examples/integrations/sentience_agent_example.py @@ -1,10 +1,18 @@ """ -Example usage of SentienceAgent. +Example usage of SentienceAgent with Verification. This example demonstrates how to use SentienceAgent with: - Sentience snapshot as primary prompt (compact, token-efficient) - Vision fallback when snapshot fails - Token usage tracking +- **NEW: Machine-verifiable assertions via Sentience SDK AgentRuntime** +- **NEW: Declarative task completion verification** + +The verification feature showcases the full power of the Sentience SDK: +- Per-step assertions (url_contains, exists, not_exists, etc.) +- Predicate combinators (all_of, any_of) +- Machine-verifiable task completion (assert_done) +- Trace output for observability (Studio timeline) """ import asyncio @@ -15,9 +23,20 @@ from dotenv import load_dotenv from browser_use import BrowserProfile, BrowserSession, ChatBrowserUse -from browser_use.integrations.sentience import SentienceAgent +from browser_use.integrations.sentience import SentienceAgent, SentienceAgentConfig from sentience import get_extension_dir +# Import Sentience SDK verification helpers +from sentience.verification import ( + url_contains, + exists, + not_exists, + all_of, + any_of, +) +# Import the assertion DSL for expressive queries +from sentience.asserts import E, expect, in_dominant_list + # Note: This example requires: # 1. Sentience SDK installed: pip install sentienceapi # 2. Sentience extension loaded in browser @@ -130,18 +149,56 @@ async def main(): 3. Call the done action with the element ID and title in this format: "Top post: element ID [index], title: [title]" """ - log(f"\nšŸš€ Starting SentienceAgent: {task}\n") + log(f"\nšŸš€ Starting SentienceAgent with Verification: {task}\n") + + # Define verification assertions + # These will be checked after each step to provide machine-verifiable observability + step_assertions = [ + # Verify we're on Hacker News + { + "predicate": url_contains("news.ycombinator.com"), + "label": "on_hackernews", + "required": True, # Required: agent fails if this doesn't pass + }, + # Verify Show HN posts are visible + { + "predicate": exists("role=link text~'Show HN'"), + "label": "show_hn_posts_visible", + }, + # Verify no error messages + { + "predicate": not_exists("text~'Error'"), + "label": "no_error_message", + }, + ] + + # Define task completion assertion + # This is machine-verifiable: if this passes, the task is done! + done_assertion = all_of( + url_contains("news.ycombinator.com/show"), + exists("role=link text~'Show HN'"), + ) + + log("šŸ“‹ Verification assertions configured:") + log(" - on_hackernews (required): URL contains 'news.ycombinator.com'") + log(" - show_hn_posts_visible: Show HN links are visible") + log(" - no_error_message: No error text on page") + log(" - done_assertion: URL is /show AND Show HN links visible\n") + + # Create Sentience configuration + sentience_config = SentienceAgentConfig( + sentience_api_key=os.getenv("SENTIENCE_API_KEY"), + sentience_use_api=True, # Use gateway/API mode + sentience_max_elements=40, + sentience_show_overlay=True, + ) agent = SentienceAgent( task=task, llm=llm, browser_session=browser_session, tools=None, # Will use default tools - # Sentience configuration - sentience_api_key=os.getenv("SENTIENCE_API_KEY"), - sentience_use_api=True, # Use gateway/API mode - sentience_max_elements=40, - sentience_show_overlay=True, + sentience_config=sentience_config, # Vision fallback configuration vision_fallback_enabled=True, vision_detail_level="auto", @@ -151,6 +208,11 @@ async def main(): # Agent settings max_steps=10, # Limit steps for example max_failures=3, + # ✨ NEW: Verification configuration (Sentience SDK AgentRuntime) + enable_verification=True, + step_assertions=step_assertions, + done_assertion=done_assertion, + trace_dir="traces", # Trace output for Studio timeline ) # Run agent @@ -174,6 +236,26 @@ async def main(): else: log(f" Sentience used: {result.get('sentience_used', 'unknown')}") + # ✨ NEW: Show verification results + verification = result.get("verification") + if verification: + log(f"\nšŸ” Verification Summary:") + log(f" All assertions passed: {verification.get('all_assertions_passed', 'N/A')}") + log(f" Required assertions passed: {verification.get('required_assertions_passed', 'N/A')}") + log(f" Task verified complete: {verification.get('task_verified_complete', False)}") + + # Show individual assertions + assertions = verification.get("assertions", []) + if assertions: + log(f"\n Assertion Details ({len(assertions)} total):") + for assertion in assertions: + status = "āœ…" if assertion.get("passed") else "āŒ" + label = assertion.get("label", "unnamed") + required = " (required)" if assertion.get("required") else "" + log(f" {status} {label}{required}") + else: + log(f"\nšŸ” Verification: disabled") + except ImportError as e: log(f"āŒ Import error: {e}") log("Make sure Sentience SDK is installed: pip install sentienceapi") diff --git a/examples/integrations/sentience_agent_local_llm.py b/examples/integrations/sentience_agent_local_llm.py index 9ee12e5a51..5ecf20ffff 100644 --- a/examples/integrations/sentience_agent_local_llm.py +++ b/examples/integrations/sentience_agent_local_llm.py @@ -4,6 +4,8 @@ This example demonstrates how to use SentienceAgent with: - Primary: Local LLM (Qwen 2.5 3B) for Sentience snapshots (fast, free) - Fallback: Cloud vision model (GPT-4o) for vision mode when Sentience fails +- **NEW: Machine-verifiable assertions via Sentience SDK AgentRuntime** +- **NEW: Declarative task completion verification** Requirements: 1. Install transformers: pip install transformers torch accelerate @@ -26,11 +28,19 @@ from dotenv import load_dotenv from browser_use import BrowserProfile, BrowserSession -from browser_use.integrations.sentience import SentienceAgent +from browser_use.integrations.sentience import SentienceAgent, SentienceAgentConfig from browser_use.llm import ChatHuggingFace, ChatOpenAI from browser_use.llm.messages import SystemMessage, UserMessage from sentience import get_extension_dir +# Import Sentience SDK verification helpers +from sentience.verification import ( + url_contains, + exists, + not_exists, + all_of, +) + load_dotenv() # Enable debug logging to see detailed Sentience extension errors @@ -220,7 +230,36 @@ async def main(): 3. Call the done action with the element ID and title in this format: "Top post: element ID [index], title: [title]" """ - log(f"\nšŸš€ Starting SentienceAgent: {task}\n") + log(f"\nšŸš€ Starting SentienceAgent with Verification: {task}\n") + + # Define verification assertions for local LLM + step_assertions = [ + { + "predicate": url_contains("news.ycombinator.com"), + "label": "on_hackernews", + "required": True, + }, + { + "predicate": exists("role=link text~'Show HN'"), + "label": "show_hn_posts_visible", + }, + ] + + # Task completion assertion + done_assertion = all_of( + url_contains("news.ycombinator.com/show"), + exists("role=link text~'Show HN'"), + ) + + log("šŸ“‹ Verification enabled (assertions will be checked each step)") + + # Create Sentience configuration + sentience_config = SentienceAgentConfig( + sentience_api_key=os.getenv("SENTIENCE_API_KEY"), + sentience_use_api=True, # Use gateway/API mode + sentience_max_elements=40, + sentience_show_overlay=True, + ) agent = SentienceAgent( task=task, @@ -228,11 +267,7 @@ async def main(): vision_llm=vision_llm, # Fallback LLM: GPT-4o for vision mode browser_session=browser_session, tools=None, # Will use default tools - # Sentience configuration - sentience_api_key=os.getenv("SENTIENCE_API_KEY"), - sentience_use_api=True, # Use gateway/API mode - sentience_max_elements=40, - sentience_show_overlay=True, + sentience_config=sentience_config, # Vision fallback configuration vision_fallback_enabled=True, vision_detail_level="auto", @@ -246,6 +281,11 @@ async def main(): max_history_items=5, # Keep minimal history for small models llm_timeout=300, # Increased timeout for local LLMs (5 minutes) step_timeout=360, # Increased step timeout (6 minutes) + # ✨ Verification configuration (Sentience SDK AgentRuntime) + enable_verification=True, + step_assertions=step_assertions, + done_assertion=done_assertion, + trace_dir="traces", ) # Run agent @@ -269,6 +309,15 @@ async def main(): else: log(f" Sentience used: {result.get('sentience_used', 'unknown')}") + # ✨ Show verification results + verification = result.get("verification") + if verification: + log(f"\nšŸ” Verification Summary:") + log(f" All assertions passed: {verification.get('all_assertions_passed', 'N/A')}") + log(f" Task verified complete: {verification.get('task_verified_complete', False)}") + else: + log(f"\nšŸ” Verification: disabled") + except ImportError as e: log(f"āŒ Import error: {e}") log("\nPlease install required packages:")