diff --git a/examples/lang-chain/README.md b/examples/lang-chain/README.md new file mode 100644 index 0000000..2dedf51 --- /dev/null +++ b/examples/lang-chain/README.md @@ -0,0 +1,13 @@ +### LangChain / LangGraph examples (Python) + +These examples show how to use Sentience as a **tool layer** inside LangChain and LangGraph. + +Install: + +```bash +pip install sentienceapi[langchain] +``` + +Examples: +- `langchain_tools_demo.py`: build a Sentience tool pack for LangChain +- `langgraph_self_correcting_graph.py`: observe → act → verify → branch (retry) template diff --git a/examples/lang-chain/langchain_tools_demo.py b/examples/lang-chain/langchain_tools_demo.py new file mode 100644 index 0000000..232f569 --- /dev/null +++ b/examples/lang-chain/langchain_tools_demo.py @@ -0,0 +1,41 @@ +""" +Example: Build Sentience LangChain tools (async-only). + +Install: + pip install sentienceapi[langchain] + +Run: + python examples/lang-chain/langchain_tools_demo.py + +Notes: +- This example focuses on creating the tools. Hook them into your agent of choice. +""" + +from __future__ import annotations + +import asyncio + +from sentience import AsyncSentienceBrowser +from sentience.integrations.langchain import ( + SentienceLangChainContext, + build_sentience_langchain_tools, +) + + +async def main() -> None: + browser = AsyncSentienceBrowser(headless=False) + await browser.start() + await browser.goto("https://example.com") + + ctx = SentienceLangChainContext(browser=browser) + tools = build_sentience_langchain_tools(ctx) + + print("Registered tools:") + for t in tools: + print(f"- {t.name}") + + await browser.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/lang-chain/langgraph_self_correcting_graph.py b/examples/lang-chain/langgraph_self_correcting_graph.py new file mode 100644 index 0000000..4daa905 --- /dev/null +++ b/examples/lang-chain/langgraph_self_correcting_graph.py @@ -0,0 +1,80 @@ +""" +LangGraph reference example: Sentience observe → act → verify → branch (self-correcting). + +Install: + pip install sentienceapi[langchain] + +Run: + python examples/lang-chain/langgraph_self_correcting_graph.py +""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass + +from sentience import AsyncSentienceBrowser +from sentience.integrations.langchain import SentienceLangChainContext, SentienceLangChainCore + + +@dataclass +class State: + url: str | None = None + last_action: str | None = None + attempts: int = 0 + done: bool = False + + +async def main() -> None: + from langgraph.graph import END, StateGraph + + browser = AsyncSentienceBrowser(headless=False) + await browser.start() + + core = SentienceLangChainCore(SentienceLangChainContext(browser=browser)) + + async def observe(state: State) -> State: + s = await core.snapshot_state() + state.url = s.url + return state + + async def act(state: State) -> State: + # Replace with an LLM decision node. For demo we just navigate once. + if state.attempts == 0: + await core.navigate("https://example.com") + state.last_action = "navigate" + else: + state.last_action = "noop" + state.attempts += 1 + return state + + async def verify(state: State) -> State: + out = await core.verify_url_matches(r"example\.com") + state.done = bool(out.passed) + return state + + def branch(state: State) -> str: + if state.done: + return "done" + if state.attempts >= 3: + return "done" + return "retry" + + g = StateGraph(State) + g.add_node("observe", observe) + g.add_node("act", act) + g.add_node("verify", verify) + g.set_entry_point("observe") + g.add_edge("observe", "act") + g.add_edge("act", "verify") + g.add_conditional_edges("verify", branch, {"retry": "observe", "done": END}) + app = g.compile() + + final = await app.ainvoke(State()) + print(final) + + await browser.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/langgraph/sentience_self_correcting_graph.py b/examples/langgraph/sentience_self_correcting_graph.py new file mode 100644 index 0000000..cb38e79 --- /dev/null +++ b/examples/langgraph/sentience_self_correcting_graph.py @@ -0,0 +1,88 @@ +""" +LangGraph reference example: Sentience observe → act → verify → branch (self-correcting). + +Install: + pip install sentienceapi[langchain] + +Run: + python examples/langgraph/sentience_self_correcting_graph.py + +Notes: +- This is a template demonstrating control flow; you can replace the "decide" node + with an LLM step (LangChain) that chooses actions based on snapshot_state/read_page. +""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from typing import Optional + +from sentience import AsyncSentienceBrowser +from sentience.integrations.langchain import SentienceLangChainContext, SentienceLangChainCore + + +@dataclass +class State: + url: str | None = None + last_action: str | None = None + attempts: int = 0 + done: bool = False + + +async def main() -> None: + # Lazy import so the file can exist without langgraph installed + from langgraph.graph import END, StateGraph + + browser = AsyncSentienceBrowser(headless=False) + await browser.start() + + core = SentienceLangChainCore(SentienceLangChainContext(browser=browser)) + + async def observe(state: State) -> State: + s = await core.snapshot_state() + state.url = s.url + return state + + async def act(state: State) -> State: + # Replace this with an LLM-driven decision. For demo purposes, we just navigate once. + if state.attempts == 0: + await core.navigate("https://example.com") + state.last_action = "navigate" + else: + state.last_action = "noop" + state.attempts += 1 + return state + + async def verify(state: State) -> State: + # Guard condition: URL should contain example.com + out = await core.verify_url_matches(r"example\.com") + state.done = bool(out.passed) + return state + + def should_continue(state: State) -> str: + # Self-correcting loop: retry observe→act→verify up to 3 attempts + if state.done: + return "done" + if state.attempts >= 3: + return "done" + return "retry" + + g = StateGraph(State) + g.add_node("observe", observe) + g.add_node("act", act) + g.add_node("verify", verify) + g.set_entry_point("observe") + g.add_edge("observe", "act") + g.add_edge("act", "verify") + g.add_conditional_edges("verify", should_continue, {"retry": "observe", "done": END}) + app = g.compile() + + final = await app.ainvoke(State()) + print(final) + + await browser.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/sentience/cloud_tracing.py b/sentience/cloud_tracing.py index 366279d..8f1e9bc 100644 --- a/sentience/cloud_tracing.py +++ b/sentience/cloud_tracing.py @@ -148,40 +148,80 @@ def close( self._closed = True - # Flush and sync file to disk before closing to ensure all data is written - # This is critical on CI systems where file system operations may be slower - self._trace_file.flush() + if not blocking: + # Fire-and-forget background finalize+upload. + # + # IMPORTANT: for truly non-blocking close, we avoid synchronous work here + # (flush/fsync/index generation). That work happens in the background thread. + thread = threading.Thread( + target=self._close_and_upload_background, + args=(on_progress,), + daemon=True, + ) + thread.start() + return # Return immediately + + # Blocking mode: finalize trace file and upload now. + if not self._finalize_trace_file_for_upload(): + return + self._do_upload(on_progress) + + def _finalize_trace_file_for_upload(self) -> bool: + """ + Finalize the local trace file so it is ready for upload. + + Returns: + True if there is data to upload, False if the trace is empty/missing. + """ + # Flush and sync file to disk before closing to ensure all data is written. + # This can be slow on CI file systems; in non-blocking close we do this in background. + try: + self._trace_file.flush() + except Exception: + pass try: - # Force OS to write buffered data to disk os.fsync(self._trace_file.fileno()) except (OSError, AttributeError): - # Some file handles don't support fsync (e.g., StringIO in tests) - # This is fine - flush() is usually sufficient + # Some file handles don't support fsync; flush is usually sufficient. + pass + try: + self._trace_file.close() + except Exception: pass - self._trace_file.close() # Ensure file exists and has content before proceeding - if not self._path.exists() or self._path.stat().st_size == 0: - # No events were emitted, nothing to upload - if self.logger: - self.logger.warning("No trace events to upload (file is empty or missing)") - return + try: + if not self._path.exists() or self._path.stat().st_size == 0: + if self.logger: + self.logger.warning("No trace events to upload (file is empty or missing)") + return False + except Exception: + # If we can't stat, don't attempt upload + return False # Generate index after closing file self._generate_index() + return True - if not blocking: - # Fire-and-forget background upload - thread = threading.Thread( - target=self._do_upload, - args=(on_progress,), - daemon=True, - ) - thread.start() - return # Return immediately + def _close_and_upload_background( + self, on_progress: Callable[[int, int], None] | None = None + ) -> None: + """ + Background worker for non-blocking close. - # Blocking mode - self._do_upload(on_progress) + Performs file finalization + index generation + upload. + """ + try: + if not self._finalize_trace_file_for_upload(): + return + self._do_upload(on_progress) + except Exception as e: + # Non-fatal: preserve trace locally + self._upload_successful = False + print(f"❌ [Sentience] Error uploading trace (background): {e}") + print(f" Local trace preserved at: {self._path}") + if self.logger: + self.logger.error(f"Error uploading trace (background): {e}") def _do_upload(self, on_progress: Callable[[int, int], None] | None = None) -> None: """ diff --git a/sentience/integrations/langchain/__init__.py b/sentience/integrations/langchain/__init__.py new file mode 100644 index 0000000..3441406 --- /dev/null +++ b/sentience/integrations/langchain/__init__.py @@ -0,0 +1,12 @@ +""" +LangChain / LangGraph integration helpers (optional). + +This package is designed so the base SDK can be imported without LangChain installed. +All LangChain imports are done lazily inside tool-builder functions. +""" + +from .context import SentienceLangChainContext +from .core import SentienceLangChainCore +from .tools import build_sentience_langchain_tools + +__all__ = ["SentienceLangChainContext", "SentienceLangChainCore", "build_sentience_langchain_tools"] diff --git a/sentience/integrations/langchain/context.py b/sentience/integrations/langchain/context.py new file mode 100644 index 0000000..bc26c05 --- /dev/null +++ b/sentience/integrations/langchain/context.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from sentience.browser import AsyncSentienceBrowser +from sentience.tracing import Tracer + + +@dataclass +class SentienceLangChainContext: + """ + Context for LangChain/LangGraph integrations. + + We keep this small and explicit; it mirrors the PydanticAI deps object. + """ + + browser: AsyncSentienceBrowser + tracer: Tracer | None = None diff --git a/sentience/integrations/langchain/core.py b/sentience/integrations/langchain/core.py new file mode 100644 index 0000000..ea24073 --- /dev/null +++ b/sentience/integrations/langchain/core.py @@ -0,0 +1,326 @@ +from __future__ import annotations + +import asyncio +import re +import time +from typing import Any, Literal + +from sentience.actions import ( + click_async, + click_rect_async, + press_async, + scroll_to_async, + type_text_async, +) +from sentience.integrations.models import AssertionResult, BrowserState, ElementSummary +from sentience.models import ReadResult, SnapshotOptions, TextRectSearchResult +from sentience.read import read_async +from sentience.snapshot import snapshot_async +from sentience.text_search import find_text_rect_async +from sentience.trace_event_builder import TraceEventBuilder + +from .context import SentienceLangChainContext + + +class SentienceLangChainCore: + """ + Framework-agnostic (LangChain-friendly) async wrappers around Sentience SDK. + + - No LangChain imports + - Optional Sentience tracing (local/cloud) if ctx.tracer is provided + """ + + def __init__(self, ctx: SentienceLangChainContext): + self.ctx = ctx + self._step_counter = 0 + + def _safe_tracer_call(self, method_name: str, *args, **kwargs) -> None: + tracer = self.ctx.tracer + if not tracer: + return + try: + getattr(tracer, method_name)(*args, **kwargs) + except Exception: + # Tracing must be non-fatal + pass + + async def _trace(self, tool_name: str, exec_coro, exec_meta: dict[str, Any]): + tracer = self.ctx.tracer + browser = self.ctx.browser + + pre_url = getattr(getattr(browser, "page", None), "url", None) + + # Emit run_start once (best-effort) + if tracer and getattr(tracer, "started_at", None) is None: + self._safe_tracer_call( + "emit_run_start", + agent="LangChain+SentienceTools", + llm_model=None, + config={"integration": "langchain"}, + ) + + step_id = None + step_index = None + start = time.time() + if tracer: + self._step_counter += 1 + step_index = self._step_counter + step_id = f"tool-{step_index}:{tool_name}" + self._safe_tracer_call( + "emit_step_start", + step_id=step_id, + step_index=step_index, + goal=f"tool:{tool_name}", + attempt=0, + pre_url=pre_url, + ) + + try: + result = await exec_coro() + + if tracer and step_id and step_index: + post_url = getattr(getattr(browser, "page", None), "url", pre_url) + duration_ms = int((time.time() - start) * 1000) + + success: bool | None = None + if hasattr(result, "success"): + success = bool(getattr(result, "success")) + elif hasattr(result, "status"): + success = getattr(result, "status") == "success" + elif isinstance(result, dict): + if "success" in result: + try: + success = bool(result.get("success")) + except Exception: + success = None + elif "status" in result: + success = result.get("status") == "success" + + exec_data = {"tool": tool_name, "duration_ms": duration_ms, **exec_meta} + if success is not None: + exec_data["success"] = success + + verify_data = { + "passed": bool(success) if success is not None else True, + "signals": {}, + } + + step_end_data = TraceEventBuilder.build_step_end_event( + step_id=step_id, + step_index=step_index, + goal=f"tool:{tool_name}", + attempt=0, + pre_url=pre_url or "", + post_url=post_url or "", + snapshot_digest=None, + llm_data={}, + exec_data=exec_data, + verify_data=verify_data, + ) + self._safe_tracer_call("emit", "step_end", step_end_data, step_id=step_id) + + return result + except Exception as e: + if tracer and step_id: + self._safe_tracer_call("emit_error", step_id=step_id, error=str(e), attempt=0) + raise + + # ===== Observe ===== + async def snapshot_state( + self, limit: int = 50, include_screenshot: bool = False + ) -> BrowserState: + async def _run(): + opts = SnapshotOptions(limit=limit, screenshot=include_screenshot) + snap = await snapshot_async(self.ctx.browser, opts) + if getattr(snap, "status", "success") != "success": + raise RuntimeError(getattr(snap, "error", None) or "snapshot failed") + elements = [ + ElementSummary( + id=e.id, + role=e.role, + text=e.text, + importance=e.importance, + bbox=e.bbox, + ) + for e in snap.elements + ] + return BrowserState(url=snap.url, elements=elements) + + return await self._trace( + "snapshot_state", + _run, + {"limit": limit, "include_screenshot": include_screenshot}, + ) + + async def read_page( + self, + format: Literal["raw", "text", "markdown"] = "text", + enhance_markdown: bool = True, + ) -> ReadResult: + async def _run(): + return await read_async( + self.ctx.browser, output_format=format, enhance_markdown=enhance_markdown + ) + + return await self._trace( + "read_page", + _run, + {"format": format, "enhance_markdown": enhance_markdown}, + ) + + # ===== Act ===== + async def click(self, element_id: int): + return await self._trace( + "click", + lambda: click_async(self.ctx.browser, element_id), + {"element_id": element_id}, + ) + + async def type_text(self, element_id: int, text: str): + # avoid tracing text (PII) + return await self._trace( + "type_text", + lambda: type_text_async(self.ctx.browser, element_id, text), + {"element_id": element_id}, + ) + + async def press_key(self, key: str): + return await self._trace( + "press_key", lambda: press_async(self.ctx.browser, key), {"key": key} + ) + + async def scroll_to( + self, + element_id: int, + behavior: Literal["smooth", "instant", "auto"] = "smooth", + block: Literal["start", "center", "end", "nearest"] = "center", + ): + return await self._trace( + "scroll_to", + lambda: scroll_to_async(self.ctx.browser, element_id, behavior=behavior, block=block), + {"element_id": element_id, "behavior": behavior, "block": block}, + ) + + async def navigate(self, url: str) -> dict[str, Any]: + async def _run(): + await self.ctx.browser.goto(url) + post_url = getattr(getattr(self.ctx.browser, "page", None), "url", None) + return {"success": True, "url": post_url or url} + + return await self._trace("navigate", _run, {"url": url}) + + async def click_rect( + self, + *, + x: float, + y: float, + width: float, + height: float, + button: Literal["left", "right", "middle"] = "left", + click_count: int = 1, + ): + async def _run(): + return await click_rect_async( + self.ctx.browser, + {"x": x, "y": y, "w": width, "h": height}, + button=button, + click_count=click_count, + ) + + return await self._trace( + "click_rect", + _run, + { + "x": x, + "y": y, + "width": width, + "height": height, + "button": button, + "click_count": click_count, + }, + ) + + async def find_text_rect( + self, + text: str, + case_sensitive: bool = False, + whole_word: bool = False, + max_results: int = 10, + ) -> TextRectSearchResult: + async def _run(): + return await find_text_rect_async( + self.ctx.browser, + text, + case_sensitive=case_sensitive, + whole_word=whole_word, + max_results=max_results, + ) + + return await self._trace( + "find_text_rect", + _run, + { + "query": text, + "case_sensitive": case_sensitive, + "whole_word": whole_word, + "max_results": max_results, + }, + ) + + # ===== Verify / guard ===== + async def verify_url_matches(self, pattern: str, flags: int = 0) -> AssertionResult: + async def _run(): + page = getattr(self.ctx.browser, "page", None) + if not page: + return AssertionResult(passed=False, reason="Browser not started (page is None)") + url = page.url + ok = re.search(pattern, url, flags) is not None + return AssertionResult( + passed=ok, + reason="" if ok else f"URL did not match pattern. url={url!r} pattern={pattern!r}", + details={"url": url, "pattern": pattern}, + ) + + return await self._trace("verify_url_matches", _run, {"pattern": pattern}) + + async def verify_text_present( + self, + text: str, + *, + format: Literal["text", "markdown", "raw"] = "text", + case_sensitive: bool = False, + ) -> AssertionResult: + async def _run(): + result = await read_async(self.ctx.browser, output_format=format, enhance_markdown=True) + if result.status != "success": + return AssertionResult( + passed=False, reason=f"read failed: {result.error}", details={} + ) + + haystack = result.content if case_sensitive else result.content.lower() + needle = text if case_sensitive else text.lower() + ok = needle in haystack + return AssertionResult( + passed=ok, + reason="" if ok else f"Text not present: {text!r}", + details={"format": format, "query": text, "length": result.length}, + ) + + return await self._trace("verify_text_present", _run, {"query": text, "format": format}) + + async def assert_eventually_url_matches( + self, + pattern: str, + *, + timeout_s: float = 10.0, + poll_s: float = 0.25, + flags: int = 0, + ) -> AssertionResult: + deadline = time.monotonic() + timeout_s + last: AssertionResult | None = None + while time.monotonic() <= deadline: + last = await self.verify_url_matches(pattern, flags) + if last.passed: + return last + await asyncio.sleep(poll_s) + return last or AssertionResult(passed=False, reason="No attempts executed", details={}) diff --git a/sentience/integrations/langchain/tools.py b/sentience/integrations/langchain/tools.py new file mode 100644 index 0000000..57db09f --- /dev/null +++ b/sentience/integrations/langchain/tools.py @@ -0,0 +1,180 @@ +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import BaseModel, Field + +from .context import SentienceLangChainContext +from .core import SentienceLangChainCore + + +def build_sentience_langchain_tools(ctx: SentienceLangChainContext) -> list[Any]: + """ + Build LangChain tools backed by Sentience. + + LangChain is an optional dependency; imports are done lazily here so that + `import sentience` works without LangChain installed. + """ + + try: + from langchain_core.tools import StructuredTool + except Exception: # pragma: no cover + from langchain.tools import StructuredTool # type: ignore + + core = SentienceLangChainCore(ctx) + + # ---- Schemas ---- + class SnapshotStateArgs(BaseModel): + limit: int = Field(50, ge=1, le=500, description="Max elements to return (default 50)") + include_screenshot: bool = Field( + False, description="Include screenshot in snapshot (default false)" + ) + + class ReadPageArgs(BaseModel): + format: Literal["raw", "text", "markdown"] = Field("text", description="Output format") + enhance_markdown: bool = Field( + True, description="Enhance markdown conversion (default true)" + ) + + class ClickArgs(BaseModel): + element_id: int = Field(..., description="Sentience element id from snapshot_state()") + + class TypeTextArgs(BaseModel): + element_id: int = Field(..., description="Sentience element id from snapshot_state()") + text: str = Field(..., description="Text to type") + + class PressKeyArgs(BaseModel): + key: str = Field(..., description="Key to press (e.g., Enter, Escape, Tab)") + + class ScrollToArgs(BaseModel): + element_id: int = Field(..., description="Sentience element id from snapshot_state()") + behavior: Literal["smooth", "instant", "auto"] = Field( + "smooth", description="Scroll behavior" + ) + block: Literal["start", "center", "end", "nearest"] = Field( + "center", description="Vertical alignment" + ) + + class NavigateArgs(BaseModel): + url: str = Field(..., description="URL to navigate to") + + class ClickRectArgs(BaseModel): + x: float = Field(..., description="Rect x (px)") + y: float = Field(..., description="Rect y (px)") + width: float = Field(..., description="Rect width (px)") + height: float = Field(..., description="Rect height (px)") + button: Literal["left", "right", "middle"] = Field("left", description="Mouse button") + click_count: int = Field(1, ge=1, le=3, description="Click count") + + class FindTextRectArgs(BaseModel): + text: str = Field(..., description="Text to search for") + case_sensitive: bool = Field(False, description="Case sensitive search") + whole_word: bool = Field(False, description="Whole-word match only") + max_results: int = Field(10, ge=1, le=100, description="Max matches (capped at 100)") + + class VerifyUrlMatchesArgs(BaseModel): + pattern: str = Field(..., description="Regex pattern to match against current URL") + + class VerifyTextPresentArgs(BaseModel): + text: str = Field(..., description="Text to check for in read_page output") + format: Literal["text", "markdown", "raw"] = Field("text", description="Read format") + case_sensitive: bool = Field(False, description="Case sensitive check") + + class AssertEventuallyUrlMatchesArgs(BaseModel): + pattern: str = Field(..., description="Regex pattern to match against current URL") + timeout_s: float = Field(10.0, ge=0.1, description="Timeout seconds") + poll_s: float = Field(0.25, ge=0.05, description="Polling interval seconds") + + # ---- Sync wrappers (explicitly unsupported) ---- + def _sync_unsupported(*args, **kwargs): + raise RuntimeError( + "Sentience LangChain tools are async-only. Use an async LangChain agent/runner." + ) + + # ---- Tools ---- + return [ + StructuredTool( + name="sentience_snapshot_state", + description="Observe: take a bounded Sentience snapshot and return a typed BrowserState (url + elements).", + args_schema=SnapshotStateArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.snapshot_state(**kw), + ), + StructuredTool( + name="sentience_read_page", + description="Observe: read page content as text/markdown/raw HTML.", + args_schema=ReadPageArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.read_page(**kw), + ), + StructuredTool( + name="sentience_click", + description="Act: click an element by element_id from snapshot_state.", + args_schema=ClickArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.click(**kw), + ), + StructuredTool( + name="sentience_type_text", + description="Act: type text into an element by element_id from snapshot_state.", + args_schema=TypeTextArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.type_text(**kw), + ), + StructuredTool( + name="sentience_press_key", + description="Act: press a keyboard key (Enter/Escape/Tab/etc.).", + args_schema=PressKeyArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.press_key(**kw), + ), + StructuredTool( + name="sentience_scroll_to", + description="Act: scroll an element into view by element_id from snapshot_state.", + args_schema=ScrollToArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.scroll_to(**kw), + ), + StructuredTool( + name="sentience_navigate", + description="Act: navigate to a URL using the underlying Playwright page.goto.", + args_schema=NavigateArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.navigate(**kw), + ), + StructuredTool( + name="sentience_click_rect", + description="Act: click a rectangle by pixel coordinates (useful with find_text_rect).", + args_schema=ClickRectArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.click_rect(**kw), + ), + StructuredTool( + name="sentience_find_text_rect", + description="Locate: find text occurrences on the page and return pixel coordinates.", + args_schema=FindTextRectArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.find_text_rect(**kw), + ), + StructuredTool( + name="sentience_verify_url_matches", + description="Verify: check current URL matches a regex pattern (post-action guard).", + args_schema=VerifyUrlMatchesArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.verify_url_matches(**kw), + ), + StructuredTool( + name="sentience_verify_text_present", + description="Verify: check that a text substring is present in read_page output.", + args_schema=VerifyTextPresentArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.verify_text_present(**kw), + ), + StructuredTool( + name="sentience_assert_eventually_url_matches", + description="Verify: retry URL regex match until timeout (use for delayed navigation/redirects).", + args_schema=AssertEventuallyUrlMatchesArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.assert_eventually_url_matches(**kw), + ), + ] diff --git a/sentience/integrations/pydanticai/toolset.py b/sentience/integrations/pydanticai/toolset.py index e592dd3..033f606 100644 --- a/sentience/integrations/pydanticai/toolset.py +++ b/sentience/integrations/pydanticai/toolset.py @@ -3,7 +3,9 @@ import asyncio import re import time -from typing import Any, Literal +from typing import Annotated, Any, Literal + +from pydantic import Field from sentience.actions import ( click_async, @@ -140,7 +142,7 @@ async def _trace_tool_call(ctx: Any, tool_name: str, exec_coro, exec_meta: dict[ @agent.tool async def snapshot_state( ctx: Any, - limit: int = 50, + limit: Annotated[int, Field(ge=1, le=500)] = 50, include_screenshot: bool = False, ) -> BrowserState: """ @@ -200,7 +202,7 @@ async def _run(): @agent.tool async def click( ctx: Any, - element_id: int, + element_id: Annotated[int, Field(ge=0)], ): """ Click an element by Sentience element id (from snapshot). @@ -215,8 +217,9 @@ async def _run(): @agent.tool async def type_text( ctx: Any, - element_id: int, + element_id: Annotated[int, Field(ge=0)], text: str, + delay_ms: Annotated[float, Field(ge=0, le=250)] = 0, ): """ Type text into an element by Sentience element id (from snapshot). @@ -224,10 +227,15 @@ async def type_text( async def _run(): deps: SentiencePydanticDeps = ctx.deps - return await type_text_async(deps.browser, element_id, text) + return await type_text_async(deps.browser, element_id, text, delay_ms=delay_ms) # NOTE: we intentionally don't trace full `text` to avoid accidental PII leakage - return await _trace_tool_call(ctx, "type_text", _run, {"element_id": element_id}) + return await _trace_tool_call( + ctx, + "type_text", + _run, + {"element_id": element_id, "delay_ms": delay_ms}, + ) @agent.tool async def press_key( @@ -247,7 +255,7 @@ async def _run(): @agent.tool async def scroll_to( ctx: Any, - element_id: int, + element_id: Annotated[int, Field(ge=0)], behavior: Literal["smooth", "instant", "auto"] = "smooth", block: Literal["start", "center", "end", "nearest"] = "center", ): @@ -269,7 +277,7 @@ async def _run(): @agent.tool async def navigate( ctx: Any, - url: str, + url: Annotated[str, Field(min_length=1)], ) -> dict[str, Any]: """ Navigate to a URL using Playwright page.goto via AsyncSentienceBrowser. @@ -289,12 +297,12 @@ async def _run(): async def click_rect( ctx: Any, *, - x: float, - y: float, - width: float, - height: float, + x: Annotated[float, Field()], + y: Annotated[float, Field()], + width: Annotated[float, Field(gt=0)], + height: Annotated[float, Field(gt=0)], button: Literal["left", "right", "middle"] = "left", - click_count: int = 1, + click_count: Annotated[int, Field(ge=1, le=3)] = 1, ): """ Click by pixel coordinates (rectangle), useful with `find_text_rect`. @@ -326,10 +334,10 @@ async def _run(): @agent.tool async def find_text_rect( ctx: Any, - text: str, + text: Annotated[str, Field(min_length=1)], case_sensitive: bool = False, whole_word: bool = False, - max_results: int = 10, + max_results: Annotated[int, Field(ge=1, le=100)] = 10, ) -> TextRectSearchResult: """ Find text occurrences and return pixel coordinates. @@ -360,7 +368,7 @@ async def _run(): @agent.tool async def verify_url_matches( ctx: Any, - pattern: str, + pattern: Annotated[str, Field(min_length=1)], flags: int = 0, ) -> AssertionResult: """ @@ -390,7 +398,7 @@ async def _run(): @agent.tool async def verify_text_present( ctx: Any, - text: str, + text: Annotated[str, Field(min_length=1)], *, format: Literal["text", "markdown", "raw"] = "text", case_sensitive: bool = False, @@ -426,10 +434,10 @@ async def _run(): @agent.tool async def assert_eventually_url_matches( ctx: Any, - pattern: str, + pattern: Annotated[str, Field(min_length=1)], *, - timeout_s: float = 10.0, - poll_s: float = 0.25, + timeout_s: Annotated[float, Field(gt=0)] = 10.0, + poll_s: Annotated[float, Field(gt=0)] = 0.25, flags: int = 0, ) -> AssertionResult: """ diff --git a/sentience/models.py b/sentience/models.py index 9b483fa..74560ea 100644 --- a/sentience/models.py +++ b/sentience/models.py @@ -31,6 +31,7 @@ class VisualCues(BaseModel): is_primary: bool background_color_name: str | None = None + fallback_background_color_name: str | None = None is_clickable: bool @@ -66,6 +67,9 @@ class Element(BaseModel): # Hyperlink URL (for link elements) href: str | None = None + # Nearby static text (best-effort, usually only for top-ranked elements) + nearby_text: str | None = None + # ===== v1 state-aware assertion fields (optional) ===== # Best-effort accessible name/label for controls (distinct from visible text) name: str | None = None diff --git a/tests/unit/test_langchain_integration_core.py b/tests/unit/test_langchain_integration_core.py new file mode 100644 index 0000000..55d64b0 --- /dev/null +++ b/tests/unit/test_langchain_integration_core.py @@ -0,0 +1,99 @@ +import pytest + +from sentience.integrations.langchain.context import SentienceLangChainContext +from sentience.integrations.langchain.core import SentienceLangChainCore +from sentience.models import BBox, Element, Snapshot + + +class _FakeAsyncPage: + url = "https://example.com/" + + +class _FakeAsyncBrowser: + def __init__(self): + self.page = _FakeAsyncPage() + self.api_key = None + self.api_url = None + + async def goto(self, url: str) -> None: + self.page.url = url + + +class _FakeTracer: + def __init__(self): + self.started_at = None + self.calls = [] + + def emit_run_start(self, agent, llm_model=None, config=None): + self.started_at = object() + self.calls.append(("run_start", {"agent": agent, "config": config})) + + def emit_step_start(self, **kwargs): + self.calls.append(("step_start", kwargs)) + + def emit(self, event_type, data, step_id=None): + self.calls.append((event_type, {"step_id": step_id, "data": data})) + + def emit_error(self, **kwargs): + self.calls.append(("error", kwargs)) + + +@pytest.mark.asyncio +async def test_core_verify_url_matches_and_tracing(): + tracer = _FakeTracer() + ctx = SentienceLangChainContext(browser=_FakeAsyncBrowser(), tracer=tracer) # type: ignore[arg-type] + core = SentienceLangChainCore(ctx) + + ok = await core.verify_url_matches(r"example\.com") + assert ok.passed is True + + types = [c[0] for c in tracer.calls] + assert "run_start" in types + assert "step_start" in types + assert "step_end" in types + + +@pytest.mark.asyncio +async def test_core_navigate_updates_url(): + ctx = SentienceLangChainContext(browser=_FakeAsyncBrowser()) # type: ignore[arg-type] + core = SentienceLangChainCore(ctx) + + out = await core.navigate("https://example.com/next") + assert out["success"] is True + assert ctx.browser.page.url == "https://example.com/next" + + +@pytest.mark.asyncio +async def test_core_snapshot_state_summarizes(monkeypatch): + async def _fake_snapshot_async(browser, options): + assert options.limit == 10 + return Snapshot( + status="success", + url="https://example.com/", + elements=[ + Element( + id=1, + role="button", + text="Sign in", + importance=10, + bbox=BBox(x=1, y=2, width=3, height=4), + visual_cues={ + "is_primary": False, + "is_clickable": True, + "background_color_name": None, + }, + ) + ], + ) + + monkeypatch.setattr( + "sentience.integrations.langchain.core.snapshot_async", _fake_snapshot_async + ) + + ctx = SentienceLangChainContext(browser=_FakeAsyncBrowser()) # type: ignore[arg-type] + core = SentienceLangChainCore(ctx) + + state = await core.snapshot_state(limit=10, include_screenshot=False) + assert state.url == "https://example.com/" + assert len(state.elements) == 1 + assert state.elements[0].id == 1 diff --git a/tests/unit/test_pydanticai_toolset.py b/tests/unit/test_pydanticai_toolset.py index c6e6e78..cf6d20e 100644 --- a/tests/unit/test_pydanticai_toolset.py +++ b/tests/unit/test_pydanticai_toolset.py @@ -172,6 +172,32 @@ async def test_navigate_sets_url_and_returns_success(): assert browser.page.url == "https://example.com/next" +@pytest.mark.asyncio +async def test_type_text_passes_delay_ms(monkeypatch): + agent = _FakeAgent() + tools = register_sentience_tools(agent) + + called = {} + + async def _fake_type_text_async(browser, element_id, text, take_snapshot=False, delay_ms=0): + called["element_id"] = element_id + called["delay_ms"] = delay_ms + return {"success": True} + + monkeypatch.setattr( + "sentience.integrations.pydanticai.toolset.type_text_async", + _fake_type_text_async, + ) + + deps = SentiencePydanticDeps(browser=_FakeAsyncBrowser()) # type: ignore[arg-type] + ctx = _Ctx(deps) + + out = await tools["type_text"](ctx, element_id=1, text="hello", delay_ms=10) + assert out["success"] is True + assert called["element_id"] == 1 + assert called["delay_ms"] == 10 + + @pytest.mark.asyncio async def test_click_rect_is_registered(monkeypatch): agent = _FakeAgent() @@ -199,3 +225,27 @@ async def _fake_click_rect_async(browser, rect, button="left", click_count=1, ** assert called["rect"] == {"x": 10, "y": 20, "w": 30, "h": 40} assert called["button"] == "left" assert called["click_count"] == 2 + + +@pytest.mark.asyncio +async def test_tracing_emits_error_on_exception(monkeypatch): + agent = _FakeAgent() + tools = register_sentience_tools(agent) + + tracer = _FakeTracer() + deps = SentiencePydanticDeps(browser=_FakeAsyncBrowser(), tracer=tracer) # type: ignore[arg-type] + ctx = _Ctx(deps) + + async def _boom(): + raise RuntimeError("boom") + + monkeypatch.setattr( + "sentience.integrations.pydanticai.toolset.read_async", + lambda *args, **kwargs: _boom(), + ) + + with pytest.raises(RuntimeError): + await tools["read_page"](ctx, format="text") + + types = [c[0] for c in tracer.calls] + assert "error" in types