diff --git a/examples/lang-chain/README.md b/examples/lang-chain/README.md new file mode 100644 index 0000000..2dedf51 --- /dev/null +++ b/examples/lang-chain/README.md @@ -0,0 +1,13 @@ +### LangChain / LangGraph examples (Python) + +These examples show how to use Sentience as a **tool layer** inside LangChain and LangGraph. + +Install: + +```bash +pip install sentienceapi[langchain] +``` + +Examples: +- `langchain_tools_demo.py`: build a Sentience tool pack for LangChain +- `langgraph_self_correcting_graph.py`: observe → act → verify → branch (retry) template diff --git a/examples/lang-chain/langchain_tools_demo.py b/examples/lang-chain/langchain_tools_demo.py new file mode 100644 index 0000000..232f569 --- /dev/null +++ b/examples/lang-chain/langchain_tools_demo.py @@ -0,0 +1,41 @@ +""" +Example: Build Sentience LangChain tools (async-only). + +Install: + pip install sentienceapi[langchain] + +Run: + python examples/lang-chain/langchain_tools_demo.py + +Notes: +- This example focuses on creating the tools. Hook them into your agent of choice. +""" + +from __future__ import annotations + +import asyncio + +from sentience import AsyncSentienceBrowser +from sentience.integrations.langchain import ( + SentienceLangChainContext, + build_sentience_langchain_tools, +) + + +async def main() -> None: + browser = AsyncSentienceBrowser(headless=False) + await browser.start() + await browser.goto("https://example.com") + + ctx = SentienceLangChainContext(browser=browser) + tools = build_sentience_langchain_tools(ctx) + + print("Registered tools:") + for t in tools: + print(f"- {t.name}") + + await browser.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/lang-chain/langgraph_self_correcting_graph.py b/examples/lang-chain/langgraph_self_correcting_graph.py new file mode 100644 index 0000000..4daa905 --- /dev/null +++ b/examples/lang-chain/langgraph_self_correcting_graph.py @@ -0,0 +1,80 @@ +""" +LangGraph reference example: Sentience observe → act → verify → branch (self-correcting). + +Install: + pip install sentienceapi[langchain] + +Run: + python examples/lang-chain/langgraph_self_correcting_graph.py +""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass + +from sentience import AsyncSentienceBrowser +from sentience.integrations.langchain import SentienceLangChainContext, SentienceLangChainCore + + +@dataclass +class State: + url: str | None = None + last_action: str | None = None + attempts: int = 0 + done: bool = False + + +async def main() -> None: + from langgraph.graph import END, StateGraph + + browser = AsyncSentienceBrowser(headless=False) + await browser.start() + + core = SentienceLangChainCore(SentienceLangChainContext(browser=browser)) + + async def observe(state: State) -> State: + s = await core.snapshot_state() + state.url = s.url + return state + + async def act(state: State) -> State: + # Replace with an LLM decision node. For demo we just navigate once. + if state.attempts == 0: + await core.navigate("https://example.com") + state.last_action = "navigate" + else: + state.last_action = "noop" + state.attempts += 1 + return state + + async def verify(state: State) -> State: + out = await core.verify_url_matches(r"example\.com") + state.done = bool(out.passed) + return state + + def branch(state: State) -> str: + if state.done: + return "done" + if state.attempts >= 3: + return "done" + return "retry" + + g = StateGraph(State) + g.add_node("observe", observe) + g.add_node("act", act) + g.add_node("verify", verify) + g.set_entry_point("observe") + g.add_edge("observe", "act") + g.add_edge("act", "verify") + g.add_conditional_edges("verify", branch, {"retry": "observe", "done": END}) + app = g.compile() + + final = await app.ainvoke(State()) + print(final) + + await browser.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/langgraph/sentience_self_correcting_graph.py b/examples/langgraph/sentience_self_correcting_graph.py new file mode 100644 index 0000000..cb38e79 --- /dev/null +++ b/examples/langgraph/sentience_self_correcting_graph.py @@ -0,0 +1,88 @@ +""" +LangGraph reference example: Sentience observe → act → verify → branch (self-correcting). + +Install: + pip install sentienceapi[langchain] + +Run: + python examples/langgraph/sentience_self_correcting_graph.py + +Notes: +- This is a template demonstrating control flow; you can replace the "decide" node + with an LLM step (LangChain) that chooses actions based on snapshot_state/read_page. +""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from typing import Optional + +from sentience import AsyncSentienceBrowser +from sentience.integrations.langchain import SentienceLangChainContext, SentienceLangChainCore + + +@dataclass +class State: + url: str | None = None + last_action: str | None = None + attempts: int = 0 + done: bool = False + + +async def main() -> None: + # Lazy import so the file can exist without langgraph installed + from langgraph.graph import END, StateGraph + + browser = AsyncSentienceBrowser(headless=False) + await browser.start() + + core = SentienceLangChainCore(SentienceLangChainContext(browser=browser)) + + async def observe(state: State) -> State: + s = await core.snapshot_state() + state.url = s.url + return state + + async def act(state: State) -> State: + # Replace this with an LLM-driven decision. For demo purposes, we just navigate once. + if state.attempts == 0: + await core.navigate("https://example.com") + state.last_action = "navigate" + else: + state.last_action = "noop" + state.attempts += 1 + return state + + async def verify(state: State) -> State: + # Guard condition: URL should contain example.com + out = await core.verify_url_matches(r"example\.com") + state.done = bool(out.passed) + return state + + def should_continue(state: State) -> str: + # Self-correcting loop: retry observe→act→verify up to 3 attempts + if state.done: + return "done" + if state.attempts >= 3: + return "done" + return "retry" + + g = StateGraph(State) + g.add_node("observe", observe) + g.add_node("act", act) + g.add_node("verify", verify) + g.set_entry_point("observe") + g.add_edge("observe", "act") + g.add_edge("act", "verify") + g.add_conditional_edges("verify", should_continue, {"retry": "observe", "done": END}) + app = g.compile() + + final = await app.ainvoke(State()) + print(final) + + await browser.close() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/pydantic_ai/pydantic_ai_self_correcting_click.py b/examples/pydantic_ai/pydantic_ai_self_correcting_click.py new file mode 100644 index 0000000..7b1a5fc --- /dev/null +++ b/examples/pydantic_ai/pydantic_ai_self_correcting_click.py @@ -0,0 +1,43 @@ +""" +Example: PydanticAI + Sentience self-correcting action loop using URL guards. + +Run: + pip install sentienceapi[pydanticai] + python examples/pydantic_ai/pydantic_ai_self_correcting_click.py +""" + +from __future__ import annotations + +from sentience import AsyncSentienceBrowser +from sentience.integrations.pydanticai import SentiencePydanticDeps, register_sentience_tools + + +async def main() -> None: + from pydantic_ai import Agent + + browser = AsyncSentienceBrowser(headless=False) + await browser.start() + await browser.page.goto("https://example.com") # replace with a real target + + agent = Agent( + "openai:gpt-5", + deps_type=SentiencePydanticDeps, + output_type=str, + instructions=( + "Navigate on the site and click the appropriate link/button. " + "After clicking, use assert_eventually_url_matches to confirm the URL changed as expected." + ), + ) + register_sentience_tools(agent) + + deps = SentiencePydanticDeps(browser=browser) + result = await agent.run("Click something that navigates, then confirm URL changed.", deps=deps) + print(result.output) + + await browser.close() + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) diff --git a/examples/pydantic_ai/pydantic_ai_typed_extraction.py b/examples/pydantic_ai/pydantic_ai_typed_extraction.py new file mode 100644 index 0000000..43c8398 --- /dev/null +++ b/examples/pydantic_ai/pydantic_ai_typed_extraction.py @@ -0,0 +1,47 @@ +""" +Example: PydanticAI + Sentience typed extraction (Phase 1 integration). + +Run: + pip install sentienceapi[pydanticai] + python examples/pydantic_ai/pydantic_ai_typed_extraction.py +""" + +from __future__ import annotations + +from pydantic import BaseModel, Field + +from sentience import AsyncSentienceBrowser +from sentience.integrations.pydanticai import SentiencePydanticDeps, register_sentience_tools + + +class ProductInfo(BaseModel): + title: str = Field(..., description="Product title") + price: str = Field(..., description="Displayed price string") + + +async def main() -> None: + from pydantic_ai import Agent + + browser = AsyncSentienceBrowser(headless=False) + await browser.start() + await browser.page.goto("https://example.com") # replace with a real target + + agent = Agent( + "openai:gpt-5", + deps_type=SentiencePydanticDeps, + output_type=ProductInfo, + instructions="Extract the product title and price from the page.", + ) + register_sentience_tools(agent) + + deps = SentiencePydanticDeps(browser=browser) + result = await agent.run("Extract title and price.", deps=deps) + print(result.output) + + await browser.close() + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) diff --git a/pyproject.toml b/pyproject.toml index 83f2b49..b7c8cb6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,15 @@ sentience = "sentience.cli:main" browser-use = [ "browser-use>=0.1.40", ] +pydanticai = [ + # PydanticAI framework (PyPI: pydantic-ai, import: pydantic_ai) + "pydantic-ai", +] +langchain = [ + # LangChain + LangGraph (kept optional to avoid forcing heavyweight deps on core SDK users) + "langchain", + "langgraph", +] vision-local = [ "pillow>=10.0.0", "torch>=2.2.0", diff --git a/sentience/cloud_tracing.py b/sentience/cloud_tracing.py index 366279d..8f1e9bc 100644 --- a/sentience/cloud_tracing.py +++ b/sentience/cloud_tracing.py @@ -148,40 +148,80 @@ def close( self._closed = True - # Flush and sync file to disk before closing to ensure all data is written - # This is critical on CI systems where file system operations may be slower - self._trace_file.flush() + if not blocking: + # Fire-and-forget background finalize+upload. + # + # IMPORTANT: for truly non-blocking close, we avoid synchronous work here + # (flush/fsync/index generation). That work happens in the background thread. + thread = threading.Thread( + target=self._close_and_upload_background, + args=(on_progress,), + daemon=True, + ) + thread.start() + return # Return immediately + + # Blocking mode: finalize trace file and upload now. + if not self._finalize_trace_file_for_upload(): + return + self._do_upload(on_progress) + + def _finalize_trace_file_for_upload(self) -> bool: + """ + Finalize the local trace file so it is ready for upload. + + Returns: + True if there is data to upload, False if the trace is empty/missing. + """ + # Flush and sync file to disk before closing to ensure all data is written. + # This can be slow on CI file systems; in non-blocking close we do this in background. + try: + self._trace_file.flush() + except Exception: + pass try: - # Force OS to write buffered data to disk os.fsync(self._trace_file.fileno()) except (OSError, AttributeError): - # Some file handles don't support fsync (e.g., StringIO in tests) - # This is fine - flush() is usually sufficient + # Some file handles don't support fsync; flush is usually sufficient. + pass + try: + self._trace_file.close() + except Exception: pass - self._trace_file.close() # Ensure file exists and has content before proceeding - if not self._path.exists() or self._path.stat().st_size == 0: - # No events were emitted, nothing to upload - if self.logger: - self.logger.warning("No trace events to upload (file is empty or missing)") - return + try: + if not self._path.exists() or self._path.stat().st_size == 0: + if self.logger: + self.logger.warning("No trace events to upload (file is empty or missing)") + return False + except Exception: + # If we can't stat, don't attempt upload + return False # Generate index after closing file self._generate_index() + return True - if not blocking: - # Fire-and-forget background upload - thread = threading.Thread( - target=self._do_upload, - args=(on_progress,), - daemon=True, - ) - thread.start() - return # Return immediately + def _close_and_upload_background( + self, on_progress: Callable[[int, int], None] | None = None + ) -> None: + """ + Background worker for non-blocking close. - # Blocking mode - self._do_upload(on_progress) + Performs file finalization + index generation + upload. + """ + try: + if not self._finalize_trace_file_for_upload(): + return + self._do_upload(on_progress) + except Exception as e: + # Non-fatal: preserve trace locally + self._upload_successful = False + print(f"❌ [Sentience] Error uploading trace (background): {e}") + print(f" Local trace preserved at: {self._path}") + if self.logger: + self.logger.error(f"Error uploading trace (background): {e}") def _do_upload(self, on_progress: Callable[[int, int], None] | None = None) -> None: """ diff --git a/sentience/extension/background.js b/sentience/extension/background.js index 2923f55..aff49b0 100644 --- a/sentience/extension/background.js +++ b/sentience/extension/background.js @@ -28,14 +28,14 @@ async function handleSnapshotProcessing(rawData, options = {}) { const startTime = performance.now(); try { if (!Array.isArray(rawData)) throw new Error("rawData must be an array"); - if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), + if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), !wasmReady) throw new Error("WASM module not initialized"); let analyzedElements, prunedRawData; try { const wasmPromise = new Promise((resolve, reject) => { try { let result; - result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), + result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), resolve(result); } catch (e) { reject(e); @@ -101,4 +101,4 @@ initWASM().catch(err => {}), chrome.runtime.onMessage.addListener((request, send event.preventDefault(); }), self.addEventListener("unhandledrejection", event => { event.preventDefault(); -}); \ No newline at end of file +}); diff --git a/sentience/extension/content.js b/sentience/extension/content.js index b65cfb5..97923a2 100644 --- a/sentience/extension/content.js +++ b/sentience/extension/content.js @@ -82,7 +82,7 @@ if (!elements || !Array.isArray(elements)) return; removeOverlay(); const host = document.createElement("div"); - host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", + host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", document.body.appendChild(host); const shadow = host.attachShadow({ mode: "closed" @@ -94,15 +94,15 @@ let color; color = isTarget ? "#FF0000" : isPrimary ? "#0066FF" : "#00FF00"; const importanceRatio = maxImportance > 0 ? importance / maxImportance : .5, borderOpacity = isTarget ? 1 : isPrimary ? .9 : Math.max(.4, .5 + .5 * importanceRatio), fillOpacity = .2 * borderOpacity, borderWidth = isTarget ? 2 : isPrimary ? 1.5 : Math.max(.5, Math.round(2 * importanceRatio)), hexOpacity = Math.round(255 * fillOpacity).toString(16).padStart(2, "0"), box = document.createElement("div"); - if (box.style.cssText = `\n position: absolute;\n left: ${bbox.x}px;\n top: ${bbox.y}px;\n width: ${bbox.width}px;\n height: ${bbox.height}px;\n border: ${borderWidth}px solid ${color};\n background-color: ${color}${hexOpacity};\n box-sizing: border-box;\n opacity: ${borderOpacity};\n pointer-events: none;\n `, + if (box.style.cssText = `\n position: absolute;\n left: ${bbox.x}px;\n top: ${bbox.y}px;\n width: ${bbox.width}px;\n height: ${bbox.height}px;\n border: ${borderWidth}px solid ${color};\n background-color: ${color}${hexOpacity};\n box-sizing: border-box;\n opacity: ${borderOpacity};\n pointer-events: none;\n `, importance > 0 || isPrimary) { const badge = document.createElement("span"); - badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, + badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, box.appendChild(badge); } if (isTarget) { const targetIndicator = document.createElement("span"); - targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", + targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", box.appendChild(targetIndicator); } shadow.appendChild(box); @@ -122,7 +122,7 @@ if (!grids || !Array.isArray(grids)) return; removeOverlay(); const host = document.createElement("div"); - host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", + host.id = OVERLAY_HOST_ID, host.style.cssText = "\n position: fixed !important;\n top: 0 !important;\n left: 0 !important;\n width: 100vw !important;\n height: 100vh !important;\n pointer-events: none !important;\n z-index: 2147483647 !important;\n margin: 0 !important;\n padding: 0 !important;\n ", document.body.appendChild(host); const shadow = host.attachShadow({ mode: "closed" @@ -138,10 +138,10 @@ let labelText = grid.label ? `Grid ${grid.grid_id}: ${grid.label}` : `Grid ${grid.grid_id}`; grid.is_dominant && (labelText = `⭐ ${labelText} (dominant)`); const badge = document.createElement("span"); - if (badge.textContent = labelText, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, + if (badge.textContent = labelText, badge.style.cssText = `\n position: absolute;\n top: -18px;\n left: 0;\n background: ${color};\n color: white;\n font-size: 11px;\n font-weight: bold;\n padding: 2px 6px;\n font-family: Arial, sans-serif;\n border-radius: 3px;\n opacity: 0.95;\n white-space: nowrap;\n pointer-events: none;\n `, box.appendChild(badge), isTarget) { const targetIndicator = document.createElement("span"); - targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", + targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n position: absolute;\n top: -18px;\n right: 0;\n font-size: 16px;\n pointer-events: none;\n ", box.appendChild(targetIndicator); } shadow.appendChild(box); @@ -155,7 +155,7 @@ let overlayTimeout = null; function removeOverlay() { const existing = document.getElementById(OVERLAY_HOST_ID); - existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), + existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), overlayTimeout = null); } -}(); \ No newline at end of file +}(); diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js index abaee4b..daca8c4 100644 --- a/sentience/extension/injected_api.js +++ b/sentience/extension/injected_api.js @@ -112,7 +112,7 @@ if (labelEl) { let text = ""; try { - if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), + if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), !text && labelEl.getAttribute) { const ariaLabel = labelEl.getAttribute("aria-label"); ariaLabel && (text = ariaLabel.trim()); @@ -292,7 +292,7 @@ }); const checkStable = () => { const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime; - timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), + timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), resolve()) : setTimeout(checkStable, 50); }; checkStable(); @@ -318,7 +318,7 @@ }); const checkQuiet = () => { const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime; - timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), + timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), resolve()) : setTimeout(checkQuiet, 50); }; checkQuiet(); @@ -437,7 +437,7 @@ }(el); let safeValue = null, valueRedacted = null; try { - if (void 0 !== el.value || el.getAttribute && null !== el.getAttribute("value")) if (isPasswordInput) safeValue = null, + if (void 0 !== el.value || el.getAttribute && null !== el.getAttribute("value")) if (isPasswordInput) safeValue = null, valueRedacted = "true"; else { const rawValue = void 0 !== el.value ? String(el.value) : String(el.getAttribute("value")); safeValue = rawValue.length > 200 ? rawValue.substring(0, 200) : rawValue, valueRedacted = "false"; @@ -537,8 +537,8 @@ const requestId = `iframe-${idx}-${Date.now()}`, timeout = setTimeout(() => { resolve(null); }, 5e3), listener = event => { - "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), - window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, + "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), + window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, resolve({ iframe: iframe, data: event.data.snapshot, @@ -554,7 +554,7 @@ ...options, collectIframes: !0 } - }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), + }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null)); } catch (error) { clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null); @@ -604,7 +604,7 @@ }, 25e3), listener = e => { if ("SENTIENCE_SNAPSHOT_RESULT" === e.data.type && e.data.requestId === requestId) { if (resolved) return; - resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), + resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), e.data.error ? reject(new Error(e.data.error)) : resolve({ elements: e.data.elements, raw_elements: e.data.raw_elements, @@ -621,7 +621,7 @@ options: options }, "*"); } catch (error) { - resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), + resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), reject(new Error(`Failed to send snapshot request: ${error.message}`))); } }); @@ -631,7 +631,7 @@ options.screenshot && (screenshot = await function(options) { return new Promise(resolve => { const requestId = Math.random().toString(36).substring(7), listener = e => { - "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), + "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), resolve(e.data.screenshot)); }; window.addEventListener("message", listener), window.postMessage({ @@ -690,15 +690,15 @@ } if (node.nodeType !== Node.ELEMENT_NODE) return; const tag = node.tagName.toLowerCase(); - if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), - "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), - "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), - "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), + if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), + "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), + "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), + "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), "a" === tag) { const href = node.getAttribute("href"); markdown += href ? `](${href})` : "]", insideLink = !1; } - "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), + "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), insideLink || "h1" !== tag && "h2" !== tag && "h3" !== tag && "p" !== tag && "div" !== tag || (markdown += "\n"); }(tempDiv), markdown.replace(/\n{3,}/g, "\n\n").trim(); }(document.body) : function(root) { @@ -711,7 +711,7 @@ const style = window.getComputedStyle(node); if ("none" === style.display || "hidden" === style.visibility) return; const isBlock = "block" === style.display || "flex" === style.display || "P" === node.tagName || "DIV" === node.tagName; - isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), + isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), isBlock && (text += "\n"); } } else text += node.textContent; @@ -810,25 +810,25 @@ } function startRecording(options = {}) { const {highlightColor: highlightColor = "#ff0000", successColor: successColor = "#00ff00", autoDisableTimeout: autoDisableTimeout = 18e5, keyboardShortcut: keyboardShortcut = "Ctrl+Shift+I"} = options; - if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), + if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), () => {}; window.sentience_registry_map = new Map, window.sentience_registry.forEach((el, idx) => { el && window.sentience_registry_map.set(el, idx); }); let highlightBox = document.getElementById("sentience-highlight-box"); - highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", - highlightBox.style.cssText = `\n position: fixed;\n pointer-events: none;\n z-index: 2147483647;\n border: 2px solid ${highlightColor};\n background: rgba(255, 0, 0, 0.1);\n display: none;\n transition: all 0.1s ease;\n box-sizing: border-box;\n `, + highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", + highlightBox.style.cssText = `\n position: fixed;\n pointer-events: none;\n z-index: 2147483647;\n border: 2px solid ${highlightColor};\n background: rgba(255, 0, 0, 0.1);\n display: none;\n transition: all 0.1s ease;\n box-sizing: border-box;\n `, document.body.appendChild(highlightBox)); let recordingIndicator = document.getElementById("sentience-recording-indicator"); - recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", - recordingIndicator.style.cssText = `\n position: fixed;\n top: 0;\n left: 0;\n right: 0;\n height: 3px;\n background: ${highlightColor};\n z-index: 2147483646;\n pointer-events: none;\n `, + recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", + recordingIndicator.style.cssText = `\n position: fixed;\n top: 0;\n left: 0;\n right: 0;\n height: 3px;\n background: ${highlightColor};\n z-index: 2147483646;\n pointer-events: none;\n `, document.body.appendChild(recordingIndicator)), recordingIndicator.style.display = "block"; const mouseOverHandler = e => { const el = e.target; if (!el || el === highlightBox || el === recordingIndicator) return; const rect = el.getBoundingClientRect(); - highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", - highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", + highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", + highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", highlightBox.style.height = rect.height + "px"; }, clickHandler = e => { e.preventDefault(), e.stopPropagation(); @@ -905,7 +905,7 @@ debug_snapshot: rawData }, jsonString = JSON.stringify(snippet, null, 2); navigator.clipboard.writeText(jsonString).then(() => { - highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", + highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", setTimeout(() => { highlightBox.style.border = `2px solid ${highlightColor}`, highlightBox.style.background = "rgba(255, 0, 0, 0.1)"; }, 500); @@ -915,15 +915,15 @@ }; let timeoutId = null; const stopRecording = () => { - document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), - document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), - timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), + document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), + document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), + timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), window.sentience_registry_map && window.sentience_registry_map.clear(), window.sentience_stopRecording === stopRecording && delete window.sentience_stopRecording; }, keyboardHandler = e => { - (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), + (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), stopRecording()); }; - return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), + return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), document.addEventListener("keydown", keyboardHandler, !0), autoDisableTimeout > 0 && (timeoutId = setTimeout(() => { stopRecording(); }, autoDisableTimeout)), window.sentience_stopRecording = stopRecording, stopRecording; @@ -992,4 +992,4 @@ } }), window.sentience_iframe_handler_setup = !0)); })(); -}(); \ No newline at end of file +}(); diff --git a/sentience/extension/pkg/sentience_core.js b/sentience/extension/pkg/sentience_core.js index bb9cae0..c50ad61 100644 --- a/sentience/extension/pkg/sentience_core.js +++ b/sentience/extension/pkg/sentience_core.js @@ -25,7 +25,7 @@ function __wbg_get_imports() { }, __wbg___wbindgen_bigint_get_as_i64_8fcf4ce7f1ca72a2: function(arg0, arg1) { const v = getObject(arg1), ret = "bigint" == typeof v ? v : void 0; - getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), + getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), getDataViewMemory0().setInt32(arg0 + 0, !isLikeNone(ret), !0); }, __wbg___wbindgen_boolean_get_bbbb1c18aa2f5e25: function(arg0) { @@ -224,7 +224,7 @@ function getArrayU8FromWasm0(ptr, len) { let cachedDataViewMemory0 = null; function getDataViewMemory0() { - return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), + return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), cachedDataViewMemory0; } @@ -235,7 +235,7 @@ function getStringFromWasm0(ptr, len) { let cachedUint8ArrayMemory0 = null; function getUint8ArrayMemory0() { - return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), + return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), cachedUint8ArrayMemory0; } @@ -264,7 +264,7 @@ function isLikeNone(x) { function passStringToWasm0(arg, malloc, realloc) { if (void 0 === realloc) { const buf = cachedTextEncoder.encode(arg), ptr = malloc(buf.length, 1) >>> 0; - return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, + return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, ptr; } let len = arg.length, ptr = malloc(len, 1) >>> 0; @@ -319,7 +319,7 @@ const cachedTextEncoder = new TextEncoder; let wasmModule, wasm, WASM_VECTOR_LEN = 0; function __wbg_finalize_init(instance, module) { - return wasm = instance.exports, wasmModule = module, cachedDataViewMemory0 = null, + return wasm = instance.exports, wasmModule = module, cachedDataViewMemory0 = null, cachedUint8ArrayMemory0 = null, wasm; } @@ -360,7 +360,7 @@ function initSync(module) { async function __wbg_init(module_or_path) { if (void 0 !== wasm) return wasm; - void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), + void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), void 0 === module_or_path && (module_or_path = new URL("sentience_core_bg.wasm", import.meta.url)); const imports = __wbg_get_imports(); ("string" == typeof module_or_path || "function" == typeof Request && module_or_path instanceof Request || "function" == typeof URL && module_or_path instanceof URL) && (module_or_path = fetch(module_or_path)); @@ -368,4 +368,4 @@ async function __wbg_init(module_or_path) { return __wbg_finalize_init(instance, module); } -export { initSync, __wbg_init as default }; \ No newline at end of file +export { initSync, __wbg_init as default }; diff --git a/sentience/integrations/__init__.py b/sentience/integrations/__init__.py new file mode 100644 index 0000000..760bf4b --- /dev/null +++ b/sentience/integrations/__init__.py @@ -0,0 +1,6 @@ +""" +Integrations package (internal). + +This package is intended for framework integrations (e.g., PydanticAI, LangChain/LangGraph). +Public APIs should be introduced deliberately once the integration surface is stable. +""" diff --git a/sentience/integrations/langchain/__init__.py b/sentience/integrations/langchain/__init__.py new file mode 100644 index 0000000..3441406 --- /dev/null +++ b/sentience/integrations/langchain/__init__.py @@ -0,0 +1,12 @@ +""" +LangChain / LangGraph integration helpers (optional). + +This package is designed so the base SDK can be imported without LangChain installed. +All LangChain imports are done lazily inside tool-builder functions. +""" + +from .context import SentienceLangChainContext +from .core import SentienceLangChainCore +from .tools import build_sentience_langchain_tools + +__all__ = ["SentienceLangChainContext", "SentienceLangChainCore", "build_sentience_langchain_tools"] diff --git a/sentience/integrations/langchain/context.py b/sentience/integrations/langchain/context.py new file mode 100644 index 0000000..bc26c05 --- /dev/null +++ b/sentience/integrations/langchain/context.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from sentience.browser import AsyncSentienceBrowser +from sentience.tracing import Tracer + + +@dataclass +class SentienceLangChainContext: + """ + Context for LangChain/LangGraph integrations. + + We keep this small and explicit; it mirrors the PydanticAI deps object. + """ + + browser: AsyncSentienceBrowser + tracer: Tracer | None = None diff --git a/sentience/integrations/langchain/core.py b/sentience/integrations/langchain/core.py new file mode 100644 index 0000000..ea24073 --- /dev/null +++ b/sentience/integrations/langchain/core.py @@ -0,0 +1,326 @@ +from __future__ import annotations + +import asyncio +import re +import time +from typing import Any, Literal + +from sentience.actions import ( + click_async, + click_rect_async, + press_async, + scroll_to_async, + type_text_async, +) +from sentience.integrations.models import AssertionResult, BrowserState, ElementSummary +from sentience.models import ReadResult, SnapshotOptions, TextRectSearchResult +from sentience.read import read_async +from sentience.snapshot import snapshot_async +from sentience.text_search import find_text_rect_async +from sentience.trace_event_builder import TraceEventBuilder + +from .context import SentienceLangChainContext + + +class SentienceLangChainCore: + """ + Framework-agnostic (LangChain-friendly) async wrappers around Sentience SDK. + + - No LangChain imports + - Optional Sentience tracing (local/cloud) if ctx.tracer is provided + """ + + def __init__(self, ctx: SentienceLangChainContext): + self.ctx = ctx + self._step_counter = 0 + + def _safe_tracer_call(self, method_name: str, *args, **kwargs) -> None: + tracer = self.ctx.tracer + if not tracer: + return + try: + getattr(tracer, method_name)(*args, **kwargs) + except Exception: + # Tracing must be non-fatal + pass + + async def _trace(self, tool_name: str, exec_coro, exec_meta: dict[str, Any]): + tracer = self.ctx.tracer + browser = self.ctx.browser + + pre_url = getattr(getattr(browser, "page", None), "url", None) + + # Emit run_start once (best-effort) + if tracer and getattr(tracer, "started_at", None) is None: + self._safe_tracer_call( + "emit_run_start", + agent="LangChain+SentienceTools", + llm_model=None, + config={"integration": "langchain"}, + ) + + step_id = None + step_index = None + start = time.time() + if tracer: + self._step_counter += 1 + step_index = self._step_counter + step_id = f"tool-{step_index}:{tool_name}" + self._safe_tracer_call( + "emit_step_start", + step_id=step_id, + step_index=step_index, + goal=f"tool:{tool_name}", + attempt=0, + pre_url=pre_url, + ) + + try: + result = await exec_coro() + + if tracer and step_id and step_index: + post_url = getattr(getattr(browser, "page", None), "url", pre_url) + duration_ms = int((time.time() - start) * 1000) + + success: bool | None = None + if hasattr(result, "success"): + success = bool(getattr(result, "success")) + elif hasattr(result, "status"): + success = getattr(result, "status") == "success" + elif isinstance(result, dict): + if "success" in result: + try: + success = bool(result.get("success")) + except Exception: + success = None + elif "status" in result: + success = result.get("status") == "success" + + exec_data = {"tool": tool_name, "duration_ms": duration_ms, **exec_meta} + if success is not None: + exec_data["success"] = success + + verify_data = { + "passed": bool(success) if success is not None else True, + "signals": {}, + } + + step_end_data = TraceEventBuilder.build_step_end_event( + step_id=step_id, + step_index=step_index, + goal=f"tool:{tool_name}", + attempt=0, + pre_url=pre_url or "", + post_url=post_url or "", + snapshot_digest=None, + llm_data={}, + exec_data=exec_data, + verify_data=verify_data, + ) + self._safe_tracer_call("emit", "step_end", step_end_data, step_id=step_id) + + return result + except Exception as e: + if tracer and step_id: + self._safe_tracer_call("emit_error", step_id=step_id, error=str(e), attempt=0) + raise + + # ===== Observe ===== + async def snapshot_state( + self, limit: int = 50, include_screenshot: bool = False + ) -> BrowserState: + async def _run(): + opts = SnapshotOptions(limit=limit, screenshot=include_screenshot) + snap = await snapshot_async(self.ctx.browser, opts) + if getattr(snap, "status", "success") != "success": + raise RuntimeError(getattr(snap, "error", None) or "snapshot failed") + elements = [ + ElementSummary( + id=e.id, + role=e.role, + text=e.text, + importance=e.importance, + bbox=e.bbox, + ) + for e in snap.elements + ] + return BrowserState(url=snap.url, elements=elements) + + return await self._trace( + "snapshot_state", + _run, + {"limit": limit, "include_screenshot": include_screenshot}, + ) + + async def read_page( + self, + format: Literal["raw", "text", "markdown"] = "text", + enhance_markdown: bool = True, + ) -> ReadResult: + async def _run(): + return await read_async( + self.ctx.browser, output_format=format, enhance_markdown=enhance_markdown + ) + + return await self._trace( + "read_page", + _run, + {"format": format, "enhance_markdown": enhance_markdown}, + ) + + # ===== Act ===== + async def click(self, element_id: int): + return await self._trace( + "click", + lambda: click_async(self.ctx.browser, element_id), + {"element_id": element_id}, + ) + + async def type_text(self, element_id: int, text: str): + # avoid tracing text (PII) + return await self._trace( + "type_text", + lambda: type_text_async(self.ctx.browser, element_id, text), + {"element_id": element_id}, + ) + + async def press_key(self, key: str): + return await self._trace( + "press_key", lambda: press_async(self.ctx.browser, key), {"key": key} + ) + + async def scroll_to( + self, + element_id: int, + behavior: Literal["smooth", "instant", "auto"] = "smooth", + block: Literal["start", "center", "end", "nearest"] = "center", + ): + return await self._trace( + "scroll_to", + lambda: scroll_to_async(self.ctx.browser, element_id, behavior=behavior, block=block), + {"element_id": element_id, "behavior": behavior, "block": block}, + ) + + async def navigate(self, url: str) -> dict[str, Any]: + async def _run(): + await self.ctx.browser.goto(url) + post_url = getattr(getattr(self.ctx.browser, "page", None), "url", None) + return {"success": True, "url": post_url or url} + + return await self._trace("navigate", _run, {"url": url}) + + async def click_rect( + self, + *, + x: float, + y: float, + width: float, + height: float, + button: Literal["left", "right", "middle"] = "left", + click_count: int = 1, + ): + async def _run(): + return await click_rect_async( + self.ctx.browser, + {"x": x, "y": y, "w": width, "h": height}, + button=button, + click_count=click_count, + ) + + return await self._trace( + "click_rect", + _run, + { + "x": x, + "y": y, + "width": width, + "height": height, + "button": button, + "click_count": click_count, + }, + ) + + async def find_text_rect( + self, + text: str, + case_sensitive: bool = False, + whole_word: bool = False, + max_results: int = 10, + ) -> TextRectSearchResult: + async def _run(): + return await find_text_rect_async( + self.ctx.browser, + text, + case_sensitive=case_sensitive, + whole_word=whole_word, + max_results=max_results, + ) + + return await self._trace( + "find_text_rect", + _run, + { + "query": text, + "case_sensitive": case_sensitive, + "whole_word": whole_word, + "max_results": max_results, + }, + ) + + # ===== Verify / guard ===== + async def verify_url_matches(self, pattern: str, flags: int = 0) -> AssertionResult: + async def _run(): + page = getattr(self.ctx.browser, "page", None) + if not page: + return AssertionResult(passed=False, reason="Browser not started (page is None)") + url = page.url + ok = re.search(pattern, url, flags) is not None + return AssertionResult( + passed=ok, + reason="" if ok else f"URL did not match pattern. url={url!r} pattern={pattern!r}", + details={"url": url, "pattern": pattern}, + ) + + return await self._trace("verify_url_matches", _run, {"pattern": pattern}) + + async def verify_text_present( + self, + text: str, + *, + format: Literal["text", "markdown", "raw"] = "text", + case_sensitive: bool = False, + ) -> AssertionResult: + async def _run(): + result = await read_async(self.ctx.browser, output_format=format, enhance_markdown=True) + if result.status != "success": + return AssertionResult( + passed=False, reason=f"read failed: {result.error}", details={} + ) + + haystack = result.content if case_sensitive else result.content.lower() + needle = text if case_sensitive else text.lower() + ok = needle in haystack + return AssertionResult( + passed=ok, + reason="" if ok else f"Text not present: {text!r}", + details={"format": format, "query": text, "length": result.length}, + ) + + return await self._trace("verify_text_present", _run, {"query": text, "format": format}) + + async def assert_eventually_url_matches( + self, + pattern: str, + *, + timeout_s: float = 10.0, + poll_s: float = 0.25, + flags: int = 0, + ) -> AssertionResult: + deadline = time.monotonic() + timeout_s + last: AssertionResult | None = None + while time.monotonic() <= deadline: + last = await self.verify_url_matches(pattern, flags) + if last.passed: + return last + await asyncio.sleep(poll_s) + return last or AssertionResult(passed=False, reason="No attempts executed", details={}) diff --git a/sentience/integrations/langchain/tools.py b/sentience/integrations/langchain/tools.py new file mode 100644 index 0000000..57db09f --- /dev/null +++ b/sentience/integrations/langchain/tools.py @@ -0,0 +1,180 @@ +from __future__ import annotations + +from typing import Any, Literal + +from pydantic import BaseModel, Field + +from .context import SentienceLangChainContext +from .core import SentienceLangChainCore + + +def build_sentience_langchain_tools(ctx: SentienceLangChainContext) -> list[Any]: + """ + Build LangChain tools backed by Sentience. + + LangChain is an optional dependency; imports are done lazily here so that + `import sentience` works without LangChain installed. + """ + + try: + from langchain_core.tools import StructuredTool + except Exception: # pragma: no cover + from langchain.tools import StructuredTool # type: ignore + + core = SentienceLangChainCore(ctx) + + # ---- Schemas ---- + class SnapshotStateArgs(BaseModel): + limit: int = Field(50, ge=1, le=500, description="Max elements to return (default 50)") + include_screenshot: bool = Field( + False, description="Include screenshot in snapshot (default false)" + ) + + class ReadPageArgs(BaseModel): + format: Literal["raw", "text", "markdown"] = Field("text", description="Output format") + enhance_markdown: bool = Field( + True, description="Enhance markdown conversion (default true)" + ) + + class ClickArgs(BaseModel): + element_id: int = Field(..., description="Sentience element id from snapshot_state()") + + class TypeTextArgs(BaseModel): + element_id: int = Field(..., description="Sentience element id from snapshot_state()") + text: str = Field(..., description="Text to type") + + class PressKeyArgs(BaseModel): + key: str = Field(..., description="Key to press (e.g., Enter, Escape, Tab)") + + class ScrollToArgs(BaseModel): + element_id: int = Field(..., description="Sentience element id from snapshot_state()") + behavior: Literal["smooth", "instant", "auto"] = Field( + "smooth", description="Scroll behavior" + ) + block: Literal["start", "center", "end", "nearest"] = Field( + "center", description="Vertical alignment" + ) + + class NavigateArgs(BaseModel): + url: str = Field(..., description="URL to navigate to") + + class ClickRectArgs(BaseModel): + x: float = Field(..., description="Rect x (px)") + y: float = Field(..., description="Rect y (px)") + width: float = Field(..., description="Rect width (px)") + height: float = Field(..., description="Rect height (px)") + button: Literal["left", "right", "middle"] = Field("left", description="Mouse button") + click_count: int = Field(1, ge=1, le=3, description="Click count") + + class FindTextRectArgs(BaseModel): + text: str = Field(..., description="Text to search for") + case_sensitive: bool = Field(False, description="Case sensitive search") + whole_word: bool = Field(False, description="Whole-word match only") + max_results: int = Field(10, ge=1, le=100, description="Max matches (capped at 100)") + + class VerifyUrlMatchesArgs(BaseModel): + pattern: str = Field(..., description="Regex pattern to match against current URL") + + class VerifyTextPresentArgs(BaseModel): + text: str = Field(..., description="Text to check for in read_page output") + format: Literal["text", "markdown", "raw"] = Field("text", description="Read format") + case_sensitive: bool = Field(False, description="Case sensitive check") + + class AssertEventuallyUrlMatchesArgs(BaseModel): + pattern: str = Field(..., description="Regex pattern to match against current URL") + timeout_s: float = Field(10.0, ge=0.1, description="Timeout seconds") + poll_s: float = Field(0.25, ge=0.05, description="Polling interval seconds") + + # ---- Sync wrappers (explicitly unsupported) ---- + def _sync_unsupported(*args, **kwargs): + raise RuntimeError( + "Sentience LangChain tools are async-only. Use an async LangChain agent/runner." + ) + + # ---- Tools ---- + return [ + StructuredTool( + name="sentience_snapshot_state", + description="Observe: take a bounded Sentience snapshot and return a typed BrowserState (url + elements).", + args_schema=SnapshotStateArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.snapshot_state(**kw), + ), + StructuredTool( + name="sentience_read_page", + description="Observe: read page content as text/markdown/raw HTML.", + args_schema=ReadPageArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.read_page(**kw), + ), + StructuredTool( + name="sentience_click", + description="Act: click an element by element_id from snapshot_state.", + args_schema=ClickArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.click(**kw), + ), + StructuredTool( + name="sentience_type_text", + description="Act: type text into an element by element_id from snapshot_state.", + args_schema=TypeTextArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.type_text(**kw), + ), + StructuredTool( + name="sentience_press_key", + description="Act: press a keyboard key (Enter/Escape/Tab/etc.).", + args_schema=PressKeyArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.press_key(**kw), + ), + StructuredTool( + name="sentience_scroll_to", + description="Act: scroll an element into view by element_id from snapshot_state.", + args_schema=ScrollToArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.scroll_to(**kw), + ), + StructuredTool( + name="sentience_navigate", + description="Act: navigate to a URL using the underlying Playwright page.goto.", + args_schema=NavigateArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.navigate(**kw), + ), + StructuredTool( + name="sentience_click_rect", + description="Act: click a rectangle by pixel coordinates (useful with find_text_rect).", + args_schema=ClickRectArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.click_rect(**kw), + ), + StructuredTool( + name="sentience_find_text_rect", + description="Locate: find text occurrences on the page and return pixel coordinates.", + args_schema=FindTextRectArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.find_text_rect(**kw), + ), + StructuredTool( + name="sentience_verify_url_matches", + description="Verify: check current URL matches a regex pattern (post-action guard).", + args_schema=VerifyUrlMatchesArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.verify_url_matches(**kw), + ), + StructuredTool( + name="sentience_verify_text_present", + description="Verify: check that a text substring is present in read_page output.", + args_schema=VerifyTextPresentArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.verify_text_present(**kw), + ), + StructuredTool( + name="sentience_assert_eventually_url_matches", + description="Verify: retry URL regex match until timeout (use for delayed navigation/redirects).", + args_schema=AssertEventuallyUrlMatchesArgs, + func=_sync_unsupported, + coroutine=lambda **kw: core.assert_eventually_url_matches(**kw), + ), + ] diff --git a/sentience/integrations/models.py b/sentience/integrations/models.py new file mode 100644 index 0000000..180c1a0 --- /dev/null +++ b/sentience/integrations/models.py @@ -0,0 +1,46 @@ +""" +Shared typed models for integrations (internal). + +These are intentionally small, framework-friendly return types for tool wrappers. +They wrap/derive from existing Sentience SDK types while keeping payloads bounded +and predictable for LLM tool calls. +""" + +from __future__ import annotations + +from typing import Any + +from pydantic import BaseModel + +from sentience.models import BBox + + +class ElementSummary(BaseModel): + """A small, stable subset of `sentience.models.Element` suitable for tool returns.""" + + id: int + role: str + text: str | None = None + importance: int | None = None + bbox: BBox | None = None + + +class BrowserState(BaseModel): + """ + Minimal browser state for integrations. + + Notes: + - Keep this payload bounded: prefer `snapshot(limit=50)` and summarize elements. + - Integrations can extend this in their own packages without changing core SDK. + """ + + url: str + elements: list[ElementSummary] + + +class AssertionResult(BaseModel): + """Framework-friendly assertion/guard result.""" + + passed: bool + reason: str = "" + details: dict[str, Any] = {} diff --git a/sentience/integrations/pydanticai/__init__.py b/sentience/integrations/pydanticai/__init__.py new file mode 100644 index 0000000..b714042 --- /dev/null +++ b/sentience/integrations/pydanticai/__init__.py @@ -0,0 +1,15 @@ +""" +PydanticAI integration helpers (optional). + +This module does NOT import `pydantic_ai` at import time so the base SDK can be +installed without the optional dependency. Users should install: + + pip install sentienceapi[pydanticai] + +and then use `register_sentience_tools(...)` with a PydanticAI `Agent`. +""" + +from .deps import SentiencePydanticDeps +from .toolset import register_sentience_tools + +__all__ = ["SentiencePydanticDeps", "register_sentience_tools"] diff --git a/sentience/integrations/pydanticai/deps.py b/sentience/integrations/pydanticai/deps.py new file mode 100644 index 0000000..f667489 --- /dev/null +++ b/sentience/integrations/pydanticai/deps.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from sentience.browser import AsyncSentienceBrowser +from sentience.tracing import Tracer + + +@dataclass +class SentiencePydanticDeps: + """ + Dependencies passed into PydanticAI tools via ctx.deps. + + At minimum we carry the live `AsyncSentienceBrowser`. + """ + + browser: AsyncSentienceBrowser + runtime: Any | None = None + tracer: Tracer | None = None diff --git a/sentience/integrations/pydanticai/toolset.py b/sentience/integrations/pydanticai/toolset.py new file mode 100644 index 0000000..033f606 --- /dev/null +++ b/sentience/integrations/pydanticai/toolset.py @@ -0,0 +1,468 @@ +from __future__ import annotations + +import asyncio +import re +import time +from typing import Annotated, Any, Literal + +from pydantic import Field + +from sentience.actions import ( + click_async, + click_rect_async, + press_async, + scroll_to_async, + type_text_async, +) +from sentience.integrations.models import AssertionResult, BrowserState, ElementSummary +from sentience.models import ReadResult, SnapshotOptions, TextRectSearchResult +from sentience.read import read_async +from sentience.snapshot import snapshot_async +from sentience.text_search import find_text_rect_async +from sentience.trace_event_builder import TraceEventBuilder + +from .deps import SentiencePydanticDeps + + +def register_sentience_tools(agent: Any) -> dict[str, Any]: + """ + Register Sentience tools on a PydanticAI agent. + + This function is intentionally lightweight and avoids importing `pydantic_ai` + at module import time. It expects `agent` to provide a `.tool` decorator + compatible with PydanticAI's `Agent.tool`. + + Returns: + Mapping of tool name -> underlying coroutine function (useful for tests). + """ + + # Per-agent counter for tool call steps (for tracing) + step_counter = {"n": 0} + + def _safe_tracer_call(tracer: Any, method_name: str, *args, **kwargs) -> None: + try: + getattr(tracer, method_name)(*args, **kwargs) + except Exception: + # Tracing must be non-fatal for tool execution + pass + + async def _trace_tool_call(ctx: Any, tool_name: str, exec_coro, exec_meta: dict[str, Any]): + """ + Wrap a tool execution with Sentience tracing if a tracer is present in deps. + """ + deps: SentiencePydanticDeps = ctx.deps + tracer = deps.tracer + + pre_url = None + if getattr(deps.browser, "page", None) is not None: + pre_url = getattr(deps.browser.page, "url", None) + + # Initialize run_start once (best-effort) + if tracer and getattr(tracer, "started_at", None) is None: + _safe_tracer_call( + tracer, + "emit_run_start", + agent="PydanticAI+SentienceToolset", + llm_model=None, + config={"integration": "pydanticai"}, + ) + + step_id = None + step_index = None + start = time.time() + if tracer: + step_counter["n"] += 1 + step_index = step_counter["n"] + step_id = f"tool-{step_index}:{tool_name}" + _safe_tracer_call( + tracer, + "emit_step_start", + step_id=step_id, + step_index=step_index, + goal=f"tool:{tool_name}", + attempt=0, + pre_url=pre_url, + ) + + try: + result = await exec_coro() + + if tracer and step_id and step_index: + post_url = pre_url + if getattr(deps.browser, "page", None) is not None: + post_url = getattr(deps.browser.page, "url", pre_url) + + duration_ms = int((time.time() - start) * 1000) + + # Best-effort success inference + success: bool | None = None + if hasattr(result, "success"): + success = bool(getattr(result, "success")) + elif hasattr(result, "status"): + success = getattr(result, "status") == "success" + elif isinstance(result, dict): + if "success" in result: + try: + success = bool(result.get("success")) + except Exception: + success = None + elif "status" in result: + success = result.get("status") == "success" + + exec_data = {"tool": tool_name, "duration_ms": duration_ms, **exec_meta} + if success is not None: + exec_data["success"] = success + + verify_data = { + "passed": bool(success) if success is not None else True, + "signals": {}, + } + + step_end_data = TraceEventBuilder.build_step_end_event( + step_id=step_id, + step_index=step_index, + goal=f"tool:{tool_name}", + attempt=0, + pre_url=pre_url or "", + post_url=post_url or "", + snapshot_digest=None, + llm_data={}, + exec_data=exec_data, + verify_data=verify_data, + ) + _safe_tracer_call(tracer, "emit", "step_end", step_end_data, step_id=step_id) + + return result + + except Exception as e: + if tracer and step_id: + _safe_tracer_call(tracer, "emit_error", step_id=step_id, error=str(e), attempt=0) + raise + + @agent.tool + async def snapshot_state( + ctx: Any, + limit: Annotated[int, Field(ge=1, le=500)] = 50, + include_screenshot: bool = False, + ) -> BrowserState: + """ + Take a bounded snapshot of the current page and return a small typed summary. + """ + + async def _run(): + deps: SentiencePydanticDeps = ctx.deps + opts = SnapshotOptions(limit=limit, screenshot=include_screenshot) + snap = await snapshot_async(deps.browser, opts) + if getattr(snap, "status", "success") != "success": + raise RuntimeError(getattr(snap, "error", None) or "snapshot failed") + elements = [ + ElementSummary( + id=e.id, + role=e.role, + text=e.text, + importance=e.importance, + bbox=e.bbox, + ) + for e in snap.elements + ] + return BrowserState(url=snap.url, elements=elements) + + return await _trace_tool_call( + ctx, + "snapshot_state", + _run, + {"limit": limit, "include_screenshot": include_screenshot}, + ) + + @agent.tool + async def read_page( + ctx: Any, + format: Literal["raw", "text", "markdown"] = "text", + enhance_markdown: bool = True, + ) -> ReadResult: + """ + Read page content as raw HTML, text, or markdown. + """ + + async def _run(): + deps: SentiencePydanticDeps = ctx.deps + return await read_async( + deps.browser, + output_format=format, + enhance_markdown=enhance_markdown, + ) + + return await _trace_tool_call( + ctx, + "read_page", + _run, + {"format": format, "enhance_markdown": enhance_markdown}, + ) + + @agent.tool + async def click( + ctx: Any, + element_id: Annotated[int, Field(ge=0)], + ): + """ + Click an element by Sentience element id (from snapshot). + """ + + async def _run(): + deps: SentiencePydanticDeps = ctx.deps + return await click_async(deps.browser, element_id) + + return await _trace_tool_call(ctx, "click", _run, {"element_id": element_id}) + + @agent.tool + async def type_text( + ctx: Any, + element_id: Annotated[int, Field(ge=0)], + text: str, + delay_ms: Annotated[float, Field(ge=0, le=250)] = 0, + ): + """ + Type text into an element by Sentience element id (from snapshot). + """ + + async def _run(): + deps: SentiencePydanticDeps = ctx.deps + return await type_text_async(deps.browser, element_id, text, delay_ms=delay_ms) + + # NOTE: we intentionally don't trace full `text` to avoid accidental PII leakage + return await _trace_tool_call( + ctx, + "type_text", + _run, + {"element_id": element_id, "delay_ms": delay_ms}, + ) + + @agent.tool + async def press_key( + ctx: Any, + key: str, + ): + """ + Press a keyboard key (Enter, Escape, Tab, etc.). + """ + + async def _run(): + deps: SentiencePydanticDeps = ctx.deps + return await press_async(deps.browser, key) + + return await _trace_tool_call(ctx, "press_key", _run, {"key": key}) + + @agent.tool + async def scroll_to( + ctx: Any, + element_id: Annotated[int, Field(ge=0)], + behavior: Literal["smooth", "instant", "auto"] = "smooth", + block: Literal["start", "center", "end", "nearest"] = "center", + ): + """ + Scroll an element into view by Sentience element id (from snapshot). + """ + + async def _run(): + deps: SentiencePydanticDeps = ctx.deps + return await scroll_to_async(deps.browser, element_id, behavior=behavior, block=block) + + return await _trace_tool_call( + ctx, + "scroll_to", + _run, + {"element_id": element_id, "behavior": behavior, "block": block}, + ) + + @agent.tool + async def navigate( + ctx: Any, + url: Annotated[str, Field(min_length=1)], + ) -> dict[str, Any]: + """ + Navigate to a URL using Playwright page.goto via AsyncSentienceBrowser. + """ + + async def _run(): + deps: SentiencePydanticDeps = ctx.deps + await deps.browser.goto(url) + post_url = None + if getattr(deps.browser, "page", None) is not None: + post_url = getattr(deps.browser.page, "url", None) + return {"success": True, "url": post_url or url} + + return await _trace_tool_call(ctx, "navigate", _run, {"url": url}) + + @agent.tool + async def click_rect( + ctx: Any, + *, + x: Annotated[float, Field()], + y: Annotated[float, Field()], + width: Annotated[float, Field(gt=0)], + height: Annotated[float, Field(gt=0)], + button: Literal["left", "right", "middle"] = "left", + click_count: Annotated[int, Field(ge=1, le=3)] = 1, + ): + """ + Click by pixel coordinates (rectangle), useful with `find_text_rect`. + """ + + async def _run(): + deps: SentiencePydanticDeps = ctx.deps + return await click_rect_async( + deps.browser, + {"x": x, "y": y, "w": width, "h": height}, + button=button, + click_count=click_count, + ) + + return await _trace_tool_call( + ctx, + "click_rect", + _run, + { + "x": x, + "y": y, + "width": width, + "height": height, + "button": button, + "click_count": click_count, + }, + ) + + @agent.tool + async def find_text_rect( + ctx: Any, + text: Annotated[str, Field(min_length=1)], + case_sensitive: bool = False, + whole_word: bool = False, + max_results: Annotated[int, Field(ge=1, le=100)] = 10, + ) -> TextRectSearchResult: + """ + Find text occurrences and return pixel coordinates. + """ + + async def _run(): + deps: SentiencePydanticDeps = ctx.deps + return await find_text_rect_async( + deps.browser, + text, + case_sensitive=case_sensitive, + whole_word=whole_word, + max_results=max_results, + ) + + return await _trace_tool_call( + ctx, + "find_text_rect", + _run, + { + "query": text, + "case_sensitive": case_sensitive, + "whole_word": whole_word, + "max_results": max_results, + }, + ) + + @agent.tool + async def verify_url_matches( + ctx: Any, + pattern: Annotated[str, Field(min_length=1)], + flags: int = 0, + ) -> AssertionResult: + """ + Verify the current page URL matches a regex pattern. + """ + + async def _run(): + deps: SentiencePydanticDeps = ctx.deps + if not deps.browser.page: + return AssertionResult(passed=False, reason="Browser not started (page is None)") + + url = deps.browser.page.url + ok = re.search(pattern, url, flags) is not None + return AssertionResult( + passed=ok, + reason="" if ok else f"URL did not match pattern. url={url!r} pattern={pattern!r}", + details={"url": url, "pattern": pattern}, + ) + + return await _trace_tool_call( + ctx, + "verify_url_matches", + _run, + {"pattern": pattern}, + ) + + @agent.tool + async def verify_text_present( + ctx: Any, + text: Annotated[str, Field(min_length=1)], + *, + format: Literal["text", "markdown", "raw"] = "text", + case_sensitive: bool = False, + ) -> AssertionResult: + """ + Verify a text substring is present in `read_page()` output. + """ + + async def _run(): + deps: SentiencePydanticDeps = ctx.deps + result = await read_async(deps.browser, output_format=format, enhance_markdown=True) + if result.status != "success": + return AssertionResult( + passed=False, reason=f"read failed: {result.error}", details={} + ) + + haystack = result.content if case_sensitive else result.content.lower() + needle = text if case_sensitive else text.lower() + ok = needle in haystack + return AssertionResult( + passed=ok, + reason="" if ok else f"Text not present: {text!r}", + details={"format": format, "query": text, "length": result.length}, + ) + + return await _trace_tool_call( + ctx, + "verify_text_present", + _run, + {"query": text, "format": format}, + ) + + @agent.tool + async def assert_eventually_url_matches( + ctx: Any, + pattern: Annotated[str, Field(min_length=1)], + *, + timeout_s: Annotated[float, Field(gt=0)] = 10.0, + poll_s: Annotated[float, Field(gt=0)] = 0.25, + flags: int = 0, + ) -> AssertionResult: + """ + Retry until the page URL matches `pattern` or timeout is reached. + """ + deadline = time.monotonic() + timeout_s + last = None + while time.monotonic() <= deadline: + last = await verify_url_matches(ctx, pattern, flags) + if last.passed: + return last + await asyncio.sleep(poll_s) + return last or AssertionResult(passed=False, reason="No attempts executed", details={}) + + return { + "snapshot_state": snapshot_state, + "read_page": read_page, + "click": click, + "type_text": type_text, + "press_key": press_key, + "scroll_to": scroll_to, + "navigate": navigate, + "click_rect": click_rect, + "find_text_rect": find_text_rect, + "verify_url_matches": verify_url_matches, + "verify_text_present": verify_text_present, + "assert_eventually_url_matches": assert_eventually_url_matches, + } diff --git a/sentience/models.py b/sentience/models.py index 9b483fa..74560ea 100644 --- a/sentience/models.py +++ b/sentience/models.py @@ -31,6 +31,7 @@ class VisualCues(BaseModel): is_primary: bool background_color_name: str | None = None + fallback_background_color_name: str | None = None is_clickable: bool @@ -66,6 +67,9 @@ class Element(BaseModel): # Hyperlink URL (for link elements) href: str | None = None + # Nearby static text (best-effort, usually only for top-ranked elements) + nearby_text: str | None = None + # ===== v1 state-aware assertion fields (optional) ===== # Best-effort accessible name/label for controls (distinct from visible text) name: str | None = None diff --git a/sentience/text_search.py b/sentience/text_search.py index d0a5b3b..a9a67cd 100644 --- a/sentience/text_search.py +++ b/sentience/text_search.py @@ -5,6 +5,7 @@ from .browser import AsyncSentienceBrowser, SentienceBrowser from .browser_evaluator import BrowserEvaluator from .models import TextRectSearchResult +from .sentience_methods import SentienceMethod def find_text_rect( diff --git a/sentience/utils.py b/sentience/utils.py deleted file mode 100644 index 86014b6..0000000 --- a/sentience/utils.py +++ /dev/null @@ -1,296 +0,0 @@ -""" -Digest utilities for snapshot canonicalization and hashing. - -Provides functions to compute stable digests of snapshots for determinism diff. -Two digest strategies: -- strict: includes structure + normalized text -- loose: structure only (no text) - detects layout changes vs content changes -""" - -import hashlib -import json -import re -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Optional - -from playwright.sync_api import BrowserContext - - -@dataclass -class BBox: - """Bounding box with normalized coordinates.""" - - x: int - y: int - width: int - height: int - - @classmethod - def from_dict(cls, bbox_dict: dict[str, Any]) -> "BBox": - """Create BBox from dictionary.""" - return cls( - x=int(bbox_dict.get("x", 0)), - y=int(bbox_dict.get("y", 0)), - width=int(bbox_dict.get("width", 0)), - height=int(bbox_dict.get("height", 0)), - ) - - def to_normalized(self, bucket_size: int = 2) -> list[int]: - """ - Normalize bbox to fixed-size buckets to ignore minor jitter. - - Args: - bucket_size: Pixel bucket size (default 2px) - - Returns: - List of [x, y, width, height] rounded to buckets - """ - return [ - round(self.x / bucket_size) * bucket_size, - round(self.y / bucket_size) * bucket_size, - round(self.width / bucket_size) * bucket_size, - round(self.height / bucket_size) * bucket_size, - ] - - -@dataclass -class ElementFingerprint: - """Normalized element data for digest computation.""" - - id: int - role: str - bbox: list[int] # Normalized - clickable: int # 0 or 1 - primary: int # 0 or 1 - text: str = "" # Empty for loose digest - - def to_dict(self) -> dict[str, Any]: - """Convert to dictionary for JSON serialization.""" - data = { - "id": self.id, - "role": self.role, - "bbox": self.bbox, - "clickable": self.clickable, - "primary": self.primary, - } - if self.text: # Only include text if non-empty - data["text"] = self.text - return data - - -def normalize_text_strict(text: str | None, max_length: int = 80) -> str: - """ - Normalize text for strict digest (structure + content). - - Rules: - - Lowercase - - Trim and collapse whitespace - - Cap length at max_length - - Replace digit runs with '#' - - Normalize currency: $79.99 -> $# - - Normalize time patterns: 12:34 -> #:# - - Args: - text: Input text - max_length: Maximum text length (default 80) - - Returns: - Normalized text string - """ - if not text: - return "" - - # Lowercase and trim - text = text.strip().lower() - - # Collapse whitespace - text = " ".join(text.split()) - - # Cap length - text = text[:max_length] - - # Replace digit runs with # - text = re.sub(r"\d+", "#", text) - - # Normalize currency - text = re.sub(r"\$\s*#", "$#", text) - - # Normalize time patterns (HH:MM or similar) - text = re.sub(r"#:#", "#:#", text) - - # Normalize date patterns (YYYY-MM-DD or similar) - text = re.sub(r"#-#-#", "#-#-#", text) - - return text - - -def normalize_bbox(bbox: dict[str, Any] | BBox, bucket_size: int = 2) -> list[int]: - """ - Round bbox to fixed-size buckets to ignore jitter. - - Args: - bbox: BBox object or dict with x, y, width, height - bucket_size: Pixel bucket size (default 2px) - - Returns: - List of [x, y, width, height] rounded to buckets - """ - if isinstance(bbox, BBox): - return bbox.to_normalized(bucket_size) - - bbox_obj = BBox.from_dict(bbox) - return bbox_obj.to_normalized(bucket_size) - - -def extract_element_fingerprint( - element: dict[str, Any], - include_text: bool = True, -) -> ElementFingerprint: - """ - Extract normalized fingerprint from element dict. - - Args: - element: Element dict from snapshot - include_text: Whether to include normalized text (False for loose digest) - - Returns: - ElementFingerprint with normalized data - """ - # Extract basic fields - element_id = element.get("id", 0) - role = element.get("role", "unknown") - - # Extract and normalize bbox - bbox_data = element.get("bbox", {}) - bbox_normalized = normalize_bbox(bbox_data) - - # Extract visual cues - visual_cues = element.get("visual_cues", {}) - clickable = 1 if visual_cues.get("is_clickable", False) else 0 - primary = 1 if visual_cues.get("is_primary", False) else 0 - - # Extract and normalize text (if requested) - text = "" - if include_text: - raw_text = element.get("text", "") - text = normalize_text_strict(raw_text) - - return ElementFingerprint( - id=element_id, - role=role, - bbox=bbox_normalized, - clickable=clickable, - primary=primary, - text=text, - ) - - -def canonical_snapshot_strict(elements: list[dict[str, Any]]) -> str: - """ - Create strict snapshot digest (structure + normalized text). - - Args: - elements: List of element dicts from snapshot - - Returns: - Canonical JSON string for hashing - """ - fingerprints = [] - - for element in sorted(elements, key=lambda e: e.get("id", 0)): - fingerprint = extract_element_fingerprint(element, include_text=True) - fingerprints.append(fingerprint.to_dict()) - - return json.dumps(fingerprints, sort_keys=True, ensure_ascii=False) - - -def canonical_snapshot_loose(elements: list[dict[str, Any]]) -> str: - """ - Create loose snapshot digest (structure only, no text). - - This is more resistant to content churn (prices, ads, timestamps). - Use for detecting structural changes vs content changes. - - Args: - elements: List of element dicts from snapshot - - Returns: - Canonical JSON string for hashing - """ - fingerprints = [] - - for element in sorted(elements, key=lambda e: e.get("id", 0)): - fingerprint = extract_element_fingerprint(element, include_text=False) - fingerprints.append(fingerprint.to_dict()) - - return json.dumps(fingerprints, sort_keys=True, ensure_ascii=False) - - -def sha256_digest(canonical_str: str) -> str: - """ - Compute SHA256 hash with 'sha256:' prefix. - - Args: - canonical_str: Canonical string to hash - - Returns: - Hash string with format: "sha256:" - """ - hash_obj = hashlib.sha256(canonical_str.encode("utf-8")) - return f"sha256:{hash_obj.hexdigest()}" - - -def compute_snapshot_digests(elements: list[dict[str, Any]]) -> dict[str, str]: - """ - Compute both strict and loose digests for a snapshot. - - Args: - elements: List of element dicts from snapshot - - Returns: - Dict with 'strict' and 'loose' digest strings - """ - canonical_strict = canonical_snapshot_strict(elements) - canonical_loose = canonical_snapshot_loose(elements) - - return { - "strict": sha256_digest(canonical_strict), - "loose": sha256_digest(canonical_loose), - } - - -def save_storage_state(context: BrowserContext, file_path: str | Path) -> None: - """ - Save current browser storage state (cookies + localStorage) to a file. - - This is useful for capturing a logged-in session to reuse later. - - Args: - context: Playwright BrowserContext - file_path: Path to save the storage state JSON file - - Example: - ```python - from sentience import SentienceBrowser, save_storage_state - - browser = SentienceBrowser() - browser.start() - - # User logs in manually or via agent - browser.goto("https://example.com") - # ... login happens ... - - # Save session for later - save_storage_state(browser.context, "auth.json") - ``` - - Raises: - IOError: If file cannot be written - """ - storage_state = context.storage_state() - file_path_obj = Path(file_path) - file_path_obj.parent.mkdir(parents=True, exist_ok=True) - with open(file_path_obj, "w") as f: - json.dump(storage_state, f, indent=2) - print(f"✅ [Sentience] Saved storage state to {file_path_obj}") diff --git a/tests/conftest.py b/tests/conftest.py index fd79df5..b0c3a7f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,8 @@ Pytest configuration and fixtures for Sentience SDK tests """ +from __future__ import annotations + import os from pathlib import Path @@ -50,6 +52,72 @@ def extension_available(): return False +def _ensure_playwright_stubs() -> None: + """ + Provide minimal `playwright.*` stubs so the SDK can be imported in environments + where Playwright isn't installed (e.g., constrained CI/sandbox). + + This is only intended to support pure unit/contract tests that don't actually + launch browsers. + """ + + import sys + import types + + def ensure_module(name: str) -> types.ModuleType: + if name in sys.modules: + return sys.modules[name] + mod = types.ModuleType(name) + sys.modules[name] = mod + return mod + + playwright_mod = ensure_module("playwright") + async_api_mod = ensure_module("playwright.async_api") + sync_api_mod = ensure_module("playwright.sync_api") + impl_mod = ensure_module("playwright._impl") + impl_errors_mod = ensure_module("playwright._impl._errors") + + class _Dummy: + pass + + async_api_mod.BrowserContext = _Dummy + async_api_mod.Page = _Dummy + async_api_mod.Playwright = _Dummy + + async def _async_playwright(): + raise RuntimeError("Playwright is not available in this environment.") + + async_api_mod.async_playwright = _async_playwright + + sync_api_mod.BrowserContext = _Dummy + sync_api_mod.Page = _Dummy + sync_api_mod.Playwright = _Dummy + + def _sync_playwright(): + raise RuntimeError("Playwright is not available in this environment.") + + sync_api_mod.sync_playwright = _sync_playwright + + playwright_mod.async_api = async_api_mod + playwright_mod.sync_api = sync_api_mod + + # Some unit tests import internal Playwright exceptions directly + class TimeoutError(Exception): + pass + + impl_errors_mod.TimeoutError = TimeoutError + impl_mod._errors = impl_errors_mod + + +try: + import playwright # noqa: F401 + + PLAYWRIGHT_AVAILABLE = True +except Exception: + PLAYWRIGHT_AVAILABLE = False + _ensure_playwright_stubs() + + @pytest.fixture(autouse=True) def skip_if_no_extension(request, extension_available): """Automatically skip tests that require extension if it's not available""" @@ -63,3 +131,24 @@ def skip_if_no_extension(request, extension_available): pytest.skip("Extension not available in CI environment") else: pytest.skip("Extension not found. Build it first: cd ../sentience-chrome && ./build.sh") + + +@pytest.fixture(autouse=True) +def skip_non_unit_if_no_playwright(request): + """ + If Playwright isn't installed, skip non-unit tests. + + Rationale: many tests (and the SDK import surface) depend on Playwright; without it, + importing those tests will fail during collection. Unit tests can still run using + lightweight stubs. + """ + + if PLAYWRIGHT_AVAILABLE: + return + + # Allow unit tests to run (tests/unit/**) + fspath = str(getattr(request.node, "fspath", "")) + if "/tests/unit/" in fspath.replace("\\", "/"): + return + + pytest.skip("Playwright not installed; skipping non-unit tests.") diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 0000000..dc65871 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,67 @@ +""" +Unit-test-only stubs. + +The core Sentience SDK imports Playwright at module import time (`sentience.browser`), +but many unit tests don't actually need a real browser. In CI and developer envs, +Playwright is usually installed; however in constrained environments it may not be. + +This conftest provides minimal `playwright.*` stubs so we can import the SDK and run +pure unit/contract tests without requiring Playwright. + +IMPORTANT: +- These stubs are only active during pytest runs (via conftest import order). +- Integration/E2E tests that need real Playwright should install Playwright and will + typically run in separate environments. +""" + +from __future__ import annotations + +import sys +import types + + +def _ensure_module(name: str) -> types.ModuleType: + if name in sys.modules: + return sys.modules[name] + mod = types.ModuleType(name) + sys.modules[name] = mod + return mod + + +# Create top-level playwright module and submodules +playwright_mod = _ensure_module("playwright") +async_api_mod = _ensure_module("playwright.async_api") +sync_api_mod = _ensure_module("playwright.sync_api") + + +class _Dummy: + """Placeholder type used for Playwright classes in unit tests.""" + + +# Minimal symbols imported by `sentience.browser` +async_api_mod.BrowserContext = _Dummy +async_api_mod.Page = _Dummy +async_api_mod.Playwright = _Dummy + + +async def _async_playwright(): + raise RuntimeError("Playwright is not available in this unit-test environment.") + + +async_api_mod.async_playwright = _async_playwright + +sync_api_mod.BrowserContext = _Dummy +sync_api_mod.Page = _Dummy +sync_api_mod.Playwright = _Dummy + + +def _sync_playwright(): + raise RuntimeError("Playwright is not available in this unit-test environment.") + + +sync_api_mod.sync_playwright = _sync_playwright + + +# Expose submodules on the top-level module for completeness +playwright_mod.async_api = async_api_mod +playwright_mod.sync_api = sync_api_mod diff --git a/tests/unit/test_integration_phase0_contracts.py b/tests/unit/test_integration_phase0_contracts.py new file mode 100644 index 0000000..cd201a2 --- /dev/null +++ b/tests/unit/test_integration_phase0_contracts.py @@ -0,0 +1,121 @@ +import pytest + +from sentience.models import SnapshotOptions +from sentience.read import read +from sentience.snapshot import snapshot +from sentience.text_search import find_text_rect + + +class _FakePage: + def __init__(self): + self.evaluate_calls: list[tuple[str, object | None]] = [] + self.url = "https://example.com/" + + def evaluate(self, expression: str, arg=None): + self.evaluate_calls.append((expression, arg)) + + # Snapshot path: return a minimal successful snapshot payload + if "window.sentience.snapshot" in expression: + return { + "status": "success", + "url": self.url, + "elements": [], + "raw_elements": [], + } + + # Read path: return a minimal successful read payload + if "window.sentience.read" in expression: + fmt = (arg or {}).get("format", "raw") + return { + "status": "success", + "url": self.url, + "format": fmt, + "content": "" if fmt == "raw" else "content", + "length": 7, + } + + # findTextRect availability check + if "typeof window.sentience.findTextRect" in expression: + return False + + raise AssertionError(f"Unexpected page.evaluate call: {expression!r}") + + +class _FakeBrowser: + def __init__(self, page: _FakePage): + self.page = page + self.api_key = None + self.api_url = None + + +def test_snapshot_default_limit_not_sent_to_extension(monkeypatch): + """ + Contract: SnapshotOptions.limit defaults to 50 and the SDK avoids sending + 'limit' to the extension unless it differs from default. + """ + # Avoid any real extension waiting logic + from sentience.browser_evaluator import BrowserEvaluator + + monkeypatch.setattr(BrowserEvaluator, "wait_for_extension", lambda *args, **kwargs: None) + + page = _FakePage() + browser = _FakeBrowser(page) + + snap = snapshot(browser) # type: ignore[arg-type] + assert snap.url == "https://example.com/" + + # Find the snapshot evaluate call and assert the options payload + snap_calls = [(expr, arg) for (expr, arg) in page.evaluate_calls if "snapshot(" in expr] + assert len(snap_calls) == 1 + _, options = snap_calls[0] + assert isinstance(options, dict) + assert "limit" not in options # default should not be sent + + +def test_snapshot_non_default_limit_is_sent_to_extension(monkeypatch): + from sentience.browser_evaluator import BrowserEvaluator + + monkeypatch.setattr(BrowserEvaluator, "wait_for_extension", lambda *args, **kwargs: None) + + page = _FakePage() + browser = _FakeBrowser(page) + + snapshot(browser, SnapshotOptions(limit=10)) # type: ignore[arg-type] + + snap_calls = [(expr, arg) for (expr, arg) in page.evaluate_calls if "snapshot(" in expr] + assert len(snap_calls) == 1 + _, options = snap_calls[0] + assert options["limit"] == 10 + + +def test_read_passes_requested_format(): + page = _FakePage() + browser = _FakeBrowser(page) + + result = read(browser, output_format="text", enhance_markdown=False) # type: ignore[arg-type] + assert result.format == "text" + + read_calls = [ + (expr, arg) for (expr, arg) in page.evaluate_calls if "window.sentience.read" in expr + ] + assert len(read_calls) == 1 + _, options = read_calls[0] + assert options == {"format": "text"} + + +def test_find_text_rect_unavailable_raises(monkeypatch): + """ + Contract: if the extension doesn't expose findTextRect, the SDK surfaces a clear error. + """ + # Avoid any real extension waiting logic (and avoid sync/async page detection details) + from sentience.browser_evaluator import BrowserEvaluator + + monkeypatch.setattr(BrowserEvaluator, "wait_for_extension", lambda *args, **kwargs: None) + + page = _FakePage() + browser = _FakeBrowser(page) + + with pytest.raises(RuntimeError) as e: + find_text_rect(browser, "Sign In") # type: ignore[arg-type] + + assert "window.sentience.findTextRect is not available" in str(e.value) diff --git a/tests/unit/test_langchain_integration_core.py b/tests/unit/test_langchain_integration_core.py new file mode 100644 index 0000000..55d64b0 --- /dev/null +++ b/tests/unit/test_langchain_integration_core.py @@ -0,0 +1,99 @@ +import pytest + +from sentience.integrations.langchain.context import SentienceLangChainContext +from sentience.integrations.langchain.core import SentienceLangChainCore +from sentience.models import BBox, Element, Snapshot + + +class _FakeAsyncPage: + url = "https://example.com/" + + +class _FakeAsyncBrowser: + def __init__(self): + self.page = _FakeAsyncPage() + self.api_key = None + self.api_url = None + + async def goto(self, url: str) -> None: + self.page.url = url + + +class _FakeTracer: + def __init__(self): + self.started_at = None + self.calls = [] + + def emit_run_start(self, agent, llm_model=None, config=None): + self.started_at = object() + self.calls.append(("run_start", {"agent": agent, "config": config})) + + def emit_step_start(self, **kwargs): + self.calls.append(("step_start", kwargs)) + + def emit(self, event_type, data, step_id=None): + self.calls.append((event_type, {"step_id": step_id, "data": data})) + + def emit_error(self, **kwargs): + self.calls.append(("error", kwargs)) + + +@pytest.mark.asyncio +async def test_core_verify_url_matches_and_tracing(): + tracer = _FakeTracer() + ctx = SentienceLangChainContext(browser=_FakeAsyncBrowser(), tracer=tracer) # type: ignore[arg-type] + core = SentienceLangChainCore(ctx) + + ok = await core.verify_url_matches(r"example\.com") + assert ok.passed is True + + types = [c[0] for c in tracer.calls] + assert "run_start" in types + assert "step_start" in types + assert "step_end" in types + + +@pytest.mark.asyncio +async def test_core_navigate_updates_url(): + ctx = SentienceLangChainContext(browser=_FakeAsyncBrowser()) # type: ignore[arg-type] + core = SentienceLangChainCore(ctx) + + out = await core.navigate("https://example.com/next") + assert out["success"] is True + assert ctx.browser.page.url == "https://example.com/next" + + +@pytest.mark.asyncio +async def test_core_snapshot_state_summarizes(monkeypatch): + async def _fake_snapshot_async(browser, options): + assert options.limit == 10 + return Snapshot( + status="success", + url="https://example.com/", + elements=[ + Element( + id=1, + role="button", + text="Sign in", + importance=10, + bbox=BBox(x=1, y=2, width=3, height=4), + visual_cues={ + "is_primary": False, + "is_clickable": True, + "background_color_name": None, + }, + ) + ], + ) + + monkeypatch.setattr( + "sentience.integrations.langchain.core.snapshot_async", _fake_snapshot_async + ) + + ctx = SentienceLangChainContext(browser=_FakeAsyncBrowser()) # type: ignore[arg-type] + core = SentienceLangChainCore(ctx) + + state = await core.snapshot_state(limit=10, include_screenshot=False) + assert state.url == "https://example.com/" + assert len(state.elements) == 1 + assert state.elements[0].id == 1 diff --git a/tests/unit/test_pydanticai_toolset.py b/tests/unit/test_pydanticai_toolset.py new file mode 100644 index 0000000..cf6d20e --- /dev/null +++ b/tests/unit/test_pydanticai_toolset.py @@ -0,0 +1,251 @@ +import types + +import pytest + +from sentience.integrations.pydanticai.deps import SentiencePydanticDeps +from sentience.integrations.pydanticai.toolset import register_sentience_tools +from sentience.models import BBox, Element, Snapshot + + +class _FakeAgent: + def __init__(self): + self._tools = {} + + def tool(self, fn): + # PydanticAI's decorator registers the function for tool calling. + # For unit tests we just store it by name and return it unchanged. + self._tools[fn.__name__] = fn + return fn + + +class _FakeAsyncPage: + url = "https://example.com/" + + +class _FakeAsyncBrowser: + def __init__(self): + self.page = _FakeAsyncPage() + self.api_key = None + self.api_url = None + + async def goto(self, url: str) -> None: + self.page.url = url + + +class _Ctx: + def __init__(self, deps): + self.deps = deps + + +class _FakeTracer: + def __init__(self): + self.started_at = None + self.calls = [] + + def emit_run_start(self, agent, llm_model=None, config=None): + # mimic Tracer behavior: set started_at so we don't re-emit + self.started_at = object() + self.calls.append(("run_start", {"agent": agent, "llm_model": llm_model, "config": config})) + + def emit_step_start(self, **kwargs): + self.calls.append(("step_start", kwargs)) + + def emit(self, event_type, data, step_id=None): + self.calls.append((event_type, {"data": data, "step_id": step_id})) + + def emit_error(self, **kwargs): + self.calls.append(("error", kwargs)) + + +@pytest.mark.asyncio +async def test_register_sentience_tools_registers_expected_names(): + agent = _FakeAgent() + tools = register_sentience_tools(agent) + + expected = { + "snapshot_state", + "read_page", + "click", + "click_rect", + "type_text", + "press_key", + "scroll_to", + "navigate", + "find_text_rect", + "verify_url_matches", + "verify_text_present", + "assert_eventually_url_matches", + } + assert set(tools.keys()) == expected + assert set(agent._tools.keys()) == expected + + +@pytest.mark.asyncio +async def test_snapshot_state_passes_limit_and_summarizes(monkeypatch): + agent = _FakeAgent() + tools = register_sentience_tools(agent) + + captured = {} + + async def _fake_snapshot_async(browser, options): + captured["limit"] = options.limit + captured["screenshot"] = options.screenshot + return Snapshot( + status="success", + url="https://example.com/", + elements=[ + Element( + id=1, + role="button", + text="Sign in", + importance=10, + bbox=BBox(x=1, y=2, width=3, height=4), + visual_cues={ + "is_primary": False, + "is_clickable": True, + "background_color_name": None, + }, + ) + ], + ) + + monkeypatch.setattr( + "sentience.integrations.pydanticai.toolset.snapshot_async", _fake_snapshot_async + ) + + deps = SentiencePydanticDeps(browser=_FakeAsyncBrowser()) # type: ignore[arg-type] + ctx = _Ctx(deps) + + result = await tools["snapshot_state"](ctx, limit=10, include_screenshot=False) + assert captured["limit"] == 10 + assert captured["screenshot"] is False + assert result.url == "https://example.com/" + assert len(result.elements) == 1 + assert result.elements[0].id == 1 + assert result.elements[0].role == "button" + + +@pytest.mark.asyncio +async def test_verify_url_matches_uses_page_url(): + agent = _FakeAgent() + tools = register_sentience_tools(agent) + + deps = SentiencePydanticDeps(browser=_FakeAsyncBrowser()) # type: ignore[arg-type] + ctx = _Ctx(deps) + + ok = await tools["verify_url_matches"](ctx, r"example\.com") + bad = await tools["verify_url_matches"](ctx, r"not-real") + + assert ok.passed is True + assert bad.passed is False + + +@pytest.mark.asyncio +async def test_tracing_emits_step_events_for_tool_calls(): + agent = _FakeAgent() + tools = register_sentience_tools(agent) + + tracer = _FakeTracer() + deps = SentiencePydanticDeps(browser=_FakeAsyncBrowser(), tracer=tracer) # type: ignore[arg-type] + ctx = _Ctx(deps) + + _ = await tools["verify_url_matches"](ctx, r"example\.com") + + # We should emit run_start once, step_start once, step_end once + types = [c[0] for c in tracer.calls] + assert "run_start" in types + assert "step_start" in types + assert "step_end" in types + + +@pytest.mark.asyncio +async def test_navigate_sets_url_and_returns_success(): + agent = _FakeAgent() + tools = register_sentience_tools(agent) + + browser = _FakeAsyncBrowser() + deps = SentiencePydanticDeps(browser=browser) # type: ignore[arg-type] + ctx = _Ctx(deps) + + out = await tools["navigate"](ctx, "https://example.com/next") + assert out["success"] is True + assert browser.page.url == "https://example.com/next" + + +@pytest.mark.asyncio +async def test_type_text_passes_delay_ms(monkeypatch): + agent = _FakeAgent() + tools = register_sentience_tools(agent) + + called = {} + + async def _fake_type_text_async(browser, element_id, text, take_snapshot=False, delay_ms=0): + called["element_id"] = element_id + called["delay_ms"] = delay_ms + return {"success": True} + + monkeypatch.setattr( + "sentience.integrations.pydanticai.toolset.type_text_async", + _fake_type_text_async, + ) + + deps = SentiencePydanticDeps(browser=_FakeAsyncBrowser()) # type: ignore[arg-type] + ctx = _Ctx(deps) + + out = await tools["type_text"](ctx, element_id=1, text="hello", delay_ms=10) + assert out["success"] is True + assert called["element_id"] == 1 + assert called["delay_ms"] == 10 + + +@pytest.mark.asyncio +async def test_click_rect_is_registered(monkeypatch): + agent = _FakeAgent() + tools = register_sentience_tools(agent) + + called = {} + + async def _fake_click_rect_async(browser, rect, button="left", click_count=1, **kwargs): + called["rect"] = rect + called["button"] = button + called["click_count"] = click_count + return {"success": True} + + monkeypatch.setattr( + "sentience.integrations.pydanticai.toolset.click_rect_async", _fake_click_rect_async + ) + + deps = SentiencePydanticDeps(browser=_FakeAsyncBrowser()) # type: ignore[arg-type] + ctx = _Ctx(deps) + + out = await tools["click_rect"]( + ctx, x=10, y=20, width=30, height=40, button="left", click_count=2 + ) + assert out["success"] is True + assert called["rect"] == {"x": 10, "y": 20, "w": 30, "h": 40} + assert called["button"] == "left" + assert called["click_count"] == 2 + + +@pytest.mark.asyncio +async def test_tracing_emits_error_on_exception(monkeypatch): + agent = _FakeAgent() + tools = register_sentience_tools(agent) + + tracer = _FakeTracer() + deps = SentiencePydanticDeps(browser=_FakeAsyncBrowser(), tracer=tracer) # type: ignore[arg-type] + ctx = _Ctx(deps) + + async def _boom(): + raise RuntimeError("boom") + + monkeypatch.setattr( + "sentience.integrations.pydanticai.toolset.read_async", + lambda *args, **kwargs: _boom(), + ) + + with pytest.raises(RuntimeError): + await tools["read_page"](ctx, format="text") + + types = [c[0] for c in tracer.calls] + assert "error" in types