From 01fc4d9c058adae443bd5ae71d9261565cc4557d Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Wed, 21 Jan 2026 18:07:24 -0800
Subject: [PATCH 1/2] Agent with integrated Runtime for verification gate

---
 examples/runtime_agent_minimal.py         |  89 +++++
 sentience/__init__.py                     |   4 +
 sentience/agent_runtime.py                |   2 +-
 sentience/extension/background.js         |   6 +-
 sentience/extension/content.js            |  18 +-
 sentience/extension/injected_api.js       |  80 ++--
 sentience/extension/pkg/sentience_core.js |  14 +-
 sentience/runtime_agent.py                | 423 ++++++++++++++++++++++
 tests/unit/test_runtime_agent.py          | 338 +++++++++++++++++
 9 files changed, 914 insertions(+), 60 deletions(-)
 create mode 100644 examples/runtime_agent_minimal.py
 create mode 100644 sentience/runtime_agent.py
 create mode 100644 tests/unit/test_runtime_agent.py

diff --git a/examples/runtime_agent_minimal.py b/examples/runtime_agent_minimal.py
new file mode 100644
index 0000000..8595354
--- /dev/null
+++ b/examples/runtime_agent_minimal.py
@@ -0,0 +1,89 @@
+"""
+Example: RuntimeAgent (AgentRuntime-backed) minimal demo.
+
+This demonstrates the verification-first loop:
+snapshot -> propose action (structured executor) -> execute -> verify (AgentRuntime predicates)
+
+Usage:
+  python examples/runtime_agent_minimal.py
+"""
+
+import asyncio
+
+from sentience import AsyncSentienceBrowser
+from sentience.agent_runtime import AgentRuntime
+from sentience.llm_provider import LLMProvider, LLMResponse
+from sentience.runtime_agent import RuntimeAgent, RuntimeStep, StepVerification
+from sentience.tracing import JsonlTraceSink, Tracer
+from sentience.verification import AssertContext, AssertOutcome, exists, url_contains
+
+
+class FixedActionProvider(LLMProvider):
+    """A tiny in-process provider for examples/tests."""
+
+    def __init__(self, action: str):
+        super().__init__(model="fixed-action")
+        self._action = action
+
+    def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
+        _ = system_prompt, user_prompt, kwargs
+        return LLMResponse(content=self._action, model_name=self.model_name)
+
+    def supports_json_mode(self) -> bool:
+        return False
+
+    @property
+    def model_name(self) -> str:
+        return "fixed-action"
+
+
+async def main() -> None:
+    # Local trace (viewable in Studio if uploaded later).
+    run_id = "runtime-agent-minimal"
+    tracer = Tracer(run_id=run_id, sink=JsonlTraceSink(f"traces/{run_id}.jsonl"))
+
+    async with AsyncSentienceBrowser(headless=False) as browser:
+        page = await browser.new_page()
+        await page.goto("https://example.com")
+        await page.wait_for_load_state("networkidle")
+
+        runtime = await AgentRuntime.from_sentience_browser(browser=browser, page=page, tracer=tracer)
+
+        # Structured executor (for demo, we just return FINISH()).
+        executor = FixedActionProvider("FINISH()")
+
+        agent = RuntimeAgent(
+            runtime=runtime,
+            executor=executor,
+            # vision_executor=... (optional)
+            # vision_verifier=... (optional, for AgentRuntime assertion vision fallback)
+        )
+
+        # One step: no action needed; we just verify structure + URL.
+        def has_example_heading(ctx: AssertContext) -> AssertOutcome:
+            # Demonstrates custom predicates (you can also use exists/url_contains helpers).
+            snap = ctx.snapshot
+            ok = bool(snap and any((el.role == "heading" and (el.text or "").startswith("Example")) for el in snap.elements))
+            return AssertOutcome(passed=ok, reason="" if ok else "missing heading", details={})
+
+        step = RuntimeStep(
+            goal="Confirm Example Domain page is loaded",
+            verifications=[
+                StepVerification(predicate=url_contains("example.com"), label="url_contains_example", required=True),
+                StepVerification(predicate=exists("role=heading"), label="has_heading", required=True),
+                StepVerification(predicate=has_example_heading, label="heading_text_matches", required=False),
+            ],
+            max_snapshot_attempts=2,
+            snapshot_limit_base=60,
+        )
+
+        ok = await agent.run_step(task_goal="Open example.com and verify", step=step)
+        print(f"step ok: {ok}")
+
+    tracer.close()
+    print(f"trace written to traces/{run_id}.jsonl")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+
diff --git a/sentience/__init__.py b/sentience/__init__.py
index 5da3711..b80d313 100644
--- a/sentience/__init__.py
+++ b/sentience/__init__.py
@@ -89,6 +89,7 @@
 from .query import find, query
 from .read import read
 from .recorder import Recorder, Trace, TraceStep, record
+from .runtime_agent import RuntimeAgent, RuntimeStep, StepVerification
 from .screenshot import screenshot
 from .sentience_methods import AgentAction, SentienceMethod
 from .snapshot import snapshot
@@ -210,6 +211,9 @@
     "MLXVLMProvider",
     "SentienceAgent",
     "SentienceAgentAsync",
+    "RuntimeAgent",
+    "RuntimeStep",
+    "StepVerification",
     "SentienceVisualAgent",
     "SentienceVisualAgentAsync",
     "ConversationalAgent",
diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
index 01ba2ff..bac9837 100644
--- a/sentience/agent_runtime.py
+++ b/sentience/agent_runtime.py
@@ -582,7 +582,7 @@ def assert_done(
             True if task is complete (assertion passed), False otherwise
         """
         # Convenience wrapper for assert_ with required=True
-        ok = self.assert_(predicate, label=label, required=True)
+        ok = self.assertTrue(predicate, label=label, required=True)
         if ok:
             self._task_done = True
             self._task_done_label = label
diff --git a/sentience/extension/background.js b/sentience/extension/background.js
index 2923f55..aff49b0 100644
--- a/sentience/extension/background.js
+++ b/sentience/extension/background.js
@@ -28,14 +28,14 @@ async function handleSnapshotProcessing(rawData, options = {}) {
     const startTime = performance.now();
     try {
         if (!Array.isArray(rawData)) throw new Error("rawData must be an array");
-        if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(), 
+        if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(),
         !wasmReady) throw new Error("WASM module not initialized");
         let analyzedElements, prunedRawData;
         try {
             const wasmPromise = new Promise((resolve, reject) => {
                 try {
                     let result;
-                    result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData), 
+                    result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData),
                     resolve(result);
                 } catch (e) {
                     reject(e);
@@ -101,4 +101,4 @@ initWASM().catch(err => {}), chrome.runtime.onMessage.addListener((request, send
     event.preventDefault();
 }), self.addEventListener("unhandledrejection", event => {
     event.preventDefault();
-});
\ No newline at end of file
+});
diff --git a/sentience/extension/content.js b/sentience/extension/content.js
index b65cfb5..97923a2 100644
--- a/sentience/extension/content.js
+++ b/sentience/extension/content.js
@@ -82,7 +82,7 @@
                 if (!elements || !Array.isArray(elements)) return;
                 removeOverlay();
                 const host = document.createElement("div");
-                host.id = OVERLAY_HOST_ID, host.style.cssText = "\n        position: fixed !important;\n        top: 0 !important;\n        left: 0 !important;\n        width: 100vw !important;\n        height: 100vh !important;\n        pointer-events: none !important;\n        z-index: 2147483647 !important;\n        margin: 0 !important;\n        padding: 0 !important;\n    ", 
+                host.id = OVERLAY_HOST_ID, host.style.cssText = "\n        position: fixed !important;\n        top: 0 !important;\n        left: 0 !important;\n        width: 100vw !important;\n        height: 100vh !important;\n        pointer-events: none !important;\n        z-index: 2147483647 !important;\n        margin: 0 !important;\n        padding: 0 !important;\n    ",
                 document.body.appendChild(host);
                 const shadow = host.attachShadow({
                     mode: "closed"
@@ -94,15 +94,15 @@
                     let color;
                     color = isTarget ? "#FF0000" : isPrimary ? "#0066FF" : "#00FF00";
                     const importanceRatio = maxImportance > 0 ? importance / maxImportance : .5, borderOpacity = isTarget ? 1 : isPrimary ? .9 : Math.max(.4, .5 + .5 * importanceRatio), fillOpacity = .2 * borderOpacity, borderWidth = isTarget ? 2 : isPrimary ? 1.5 : Math.max(.5, Math.round(2 * importanceRatio)), hexOpacity = Math.round(255 * fillOpacity).toString(16).padStart(2, "0"), box = document.createElement("div");
-                    if (box.style.cssText = `\n            position: absolute;\n            left: ${bbox.x}px;\n            top: ${bbox.y}px;\n            width: ${bbox.width}px;\n            height: ${bbox.height}px;\n            border: ${borderWidth}px solid ${color};\n            background-color: ${color}${hexOpacity};\n            box-sizing: border-box;\n            opacity: ${borderOpacity};\n            pointer-events: none;\n        `, 
+                    if (box.style.cssText = `\n            position: absolute;\n            left: ${bbox.x}px;\n            top: ${bbox.y}px;\n            width: ${bbox.width}px;\n            height: ${bbox.height}px;\n            border: ${borderWidth}px solid ${color};\n            background-color: ${color}${hexOpacity};\n            box-sizing: border-box;\n            opacity: ${borderOpacity};\n            pointer-events: none;\n        `,
                     importance > 0 || isPrimary) {
                         const badge = document.createElement("span");
-                        badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n                position: absolute;\n                top: -18px;\n                left: 0;\n                background: ${color};\n                color: white;\n                font-size: 11px;\n                font-weight: bold;\n                padding: 2px 6px;\n                font-family: Arial, sans-serif;\n                border-radius: 3px;\n                opacity: 0.95;\n                white-space: nowrap;\n                pointer-events: none;\n            `, 
+                        badge.textContent = isPrimary ? `⭐${importance}` : `${importance}`, badge.style.cssText = `\n                position: absolute;\n                top: -18px;\n                left: 0;\n                background: ${color};\n                color: white;\n                font-size: 11px;\n                font-weight: bold;\n                padding: 2px 6px;\n                font-family: Arial, sans-serif;\n                border-radius: 3px;\n                opacity: 0.95;\n                white-space: nowrap;\n                pointer-events: none;\n            `,
                         box.appendChild(badge);
                     }
                     if (isTarget) {
                         const targetIndicator = document.createElement("span");
-                        targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n                position: absolute;\n                top: -18px;\n                right: 0;\n                font-size: 16px;\n                pointer-events: none;\n            ", 
+                        targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n                position: absolute;\n                top: -18px;\n                right: 0;\n                font-size: 16px;\n                pointer-events: none;\n            ",
                         box.appendChild(targetIndicator);
                     }
                     shadow.appendChild(box);
@@ -122,7 +122,7 @@
                 if (!grids || !Array.isArray(grids)) return;
                 removeOverlay();
                 const host = document.createElement("div");
-                host.id = OVERLAY_HOST_ID, host.style.cssText = "\n        position: fixed !important;\n        top: 0 !important;\n        left: 0 !important;\n        width: 100vw !important;\n        height: 100vh !important;\n        pointer-events: none !important;\n        z-index: 2147483647 !important;\n        margin: 0 !important;\n        padding: 0 !important;\n    ", 
+                host.id = OVERLAY_HOST_ID, host.style.cssText = "\n        position: fixed !important;\n        top: 0 !important;\n        left: 0 !important;\n        width: 100vw !important;\n        height: 100vh !important;\n        pointer-events: none !important;\n        z-index: 2147483647 !important;\n        margin: 0 !important;\n        padding: 0 !important;\n    ",
                 document.body.appendChild(host);
                 const shadow = host.attachShadow({
                     mode: "closed"
@@ -138,10 +138,10 @@
                     let labelText = grid.label ? `Grid ${grid.grid_id}: ${grid.label}` : `Grid ${grid.grid_id}`;
                     grid.is_dominant && (labelText = `⭐ ${labelText} (dominant)`);
                     const badge = document.createElement("span");
-                    if (badge.textContent = labelText, badge.style.cssText = `\n                position: absolute;\n                top: -18px;\n                left: 0;\n                background: ${color};\n                color: white;\n                font-size: 11px;\n                font-weight: bold;\n                padding: 2px 6px;\n                font-family: Arial, sans-serif;\n                border-radius: 3px;\n                opacity: 0.95;\n                white-space: nowrap;\n                pointer-events: none;\n            `, 
+                    if (badge.textContent = labelText, badge.style.cssText = `\n                position: absolute;\n                top: -18px;\n                left: 0;\n                background: ${color};\n                color: white;\n                font-size: 11px;\n                font-weight: bold;\n                padding: 2px 6px;\n                font-family: Arial, sans-serif;\n                border-radius: 3px;\n                opacity: 0.95;\n                white-space: nowrap;\n                pointer-events: none;\n            `,
                     box.appendChild(badge), isTarget) {
                         const targetIndicator = document.createElement("span");
-                        targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n                position: absolute;\n                top: -18px;\n                right: 0;\n                font-size: 16px;\n                pointer-events: none;\n            ", 
+                        targetIndicator.textContent = "🎯", targetIndicator.style.cssText = "\n                position: absolute;\n                top: -18px;\n                right: 0;\n                font-size: 16px;\n                pointer-events: none;\n            ",
                         box.appendChild(targetIndicator);
                     }
                     shadow.appendChild(box);
@@ -155,7 +155,7 @@
     let overlayTimeout = null;
     function removeOverlay() {
         const existing = document.getElementById(OVERLAY_HOST_ID);
-        existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout), 
+        existing && existing.remove(), overlayTimeout && (clearTimeout(overlayTimeout),
         overlayTimeout = null);
     }
-}();
\ No newline at end of file
+}();
diff --git a/sentience/extension/injected_api.js b/sentience/extension/injected_api.js
index 9230b8e..d8c46a4 100644
--- a/sentience/extension/injected_api.js
+++ b/sentience/extension/injected_api.js
@@ -103,9 +103,9 @@
             const iframes = document.querySelectorAll("iframe");
             for (const iframe of iframes) {
                 const src = iframe.getAttribute("src") || "", title = iframe.getAttribute("title") || "";
-                if (src) for (const [provider, hints] of Object.entries(CAPTCHA_IFRAME_HINTS)) matchHints(src, hints) && (hasIframeHit = !0, 
+                if (src) for (const [provider, hints] of Object.entries(CAPTCHA_IFRAME_HINTS)) matchHints(src, hints) && (hasIframeHit = !0,
                 providerSignals[provider] += 1, addEvidence(evidence.iframe_src_hits, truncateText(src, 120)));
-                if (title && matchHints(title, [ "captcha", "recaptcha" ]) && (hasContainerHit = !0, 
+                if (title && matchHints(title, [ "captcha", "recaptcha" ]) && (hasContainerHit = !0,
                 addEvidence(evidence.selector_hits, 'iframe[title*="captcha"]')), evidence.iframe_src_hits.length >= 5) break;
             }
         } catch (e) {}
@@ -114,14 +114,14 @@
             for (const script of scripts) {
                 const src = script.getAttribute("src") || "";
                 if (src) {
-                    for (const [provider, hints] of Object.entries(CAPTCHA_SCRIPT_HINTS)) matchHints(src, hints) && (hasScriptHit = !0, 
+                    for (const [provider, hints] of Object.entries(CAPTCHA_SCRIPT_HINTS)) matchHints(src, hints) && (hasScriptHit = !0,
                     providerSignals[provider] += 1, addEvidence(evidence.selector_hits, `script[src*="${hints[0]}"]`));
                     if (evidence.selector_hits.length >= 5) break;
                 }
             }
         } catch (e) {}
         for (const {selector: selector, provider: provider} of CAPTCHA_CONTAINER_SELECTORS) try {
-            document.querySelector(selector) && (hasContainerHit = !0, addEvidence(evidence.selector_hits, selector), 
+            document.querySelector(selector) && (hasContainerHit = !0, addEvidence(evidence.selector_hits, selector),
             "unknown" !== provider && (providerSignals[provider] += 1));
         } catch (e) {}
         const textSnippet = function() {
@@ -139,7 +139,7 @@
             } catch (e) {}
             try {
                 let bodyText = document.body?.innerText || "";
-                return !bodyText && document.body?.textContent && (bodyText = document.body.textContent), 
+                return !bodyText && document.body?.textContent && (bodyText = document.body.textContent),
                 truncateText(bodyText.replace(/\s+/g, " ").trim(), 2e3);
             } catch (e) {
                 return "";
@@ -147,21 +147,21 @@
         }();
         if (textSnippet) {
             const lowerText = textSnippet.toLowerCase();
-            for (const keyword of CAPTCHA_TEXT_KEYWORDS) lowerText.includes(keyword) && (hasKeywordHit = !0, 
+            for (const keyword of CAPTCHA_TEXT_KEYWORDS) lowerText.includes(keyword) && (hasKeywordHit = !0,
             addEvidence(evidence.text_hits, keyword));
         }
         try {
             const lowerUrl = (window.location?.href || "").toLowerCase();
-            for (const hint of CAPTCHA_URL_HINTS) lowerUrl.includes(hint) && (hasUrlHit = !0, 
+            for (const hint of CAPTCHA_URL_HINTS) lowerUrl.includes(hint) && (hasUrlHit = !0,
             addEvidence(evidence.url_hits, hint));
         } catch (e) {}
         let confidence = 0;
-        hasIframeHit && (confidence += .7), hasContainerHit && (confidence += .5), hasScriptHit && (confidence += .5), 
-        hasKeywordHit && (confidence += .3), hasUrlHit && (confidence += .2), confidence = Math.min(1, confidence), 
+        hasIframeHit && (confidence += .7), hasContainerHit && (confidence += .5), hasScriptHit && (confidence += .5),
+        hasKeywordHit && (confidence += .3), hasUrlHit && (confidence += .2), confidence = Math.min(1, confidence),
         hasIframeHit && (confidence = Math.max(confidence, .8)), !hasKeywordHit || hasIframeHit || hasContainerHit || hasScriptHit || hasUrlHit || (confidence = Math.min(confidence, .4));
         const detected = confidence >= .7;
         let providerHint = null;
-        return providerSignals.recaptcha > 0 ? providerHint = "recaptcha" : providerSignals.hcaptcha > 0 ? providerHint = "hcaptcha" : providerSignals.turnstile > 0 ? providerHint = "turnstile" : providerSignals.arkose > 0 ? providerHint = "arkose" : providerSignals.awswaf > 0 ? providerHint = "awswaf" : detected && (providerHint = "unknown"), 
+        return providerSignals.recaptcha > 0 ? providerHint = "recaptcha" : providerSignals.hcaptcha > 0 ? providerHint = "hcaptcha" : providerSignals.turnstile > 0 ? providerHint = "turnstile" : providerSignals.arkose > 0 ? providerHint = "arkose" : providerSignals.awswaf > 0 ? providerHint = "awswaf" : detected && (providerHint = "unknown"),
         {
             detected: detected,
             provider_hint: providerHint,
@@ -271,7 +271,7 @@
                     if (labelEl) {
                         let text = "";
                         try {
-                            if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()), 
+                            if (text = (labelEl.innerText || "").trim(), !text && labelEl.textContent && (text = labelEl.textContent.trim()),
                             !text && labelEl.getAttribute) {
                                 const ariaLabel = labelEl.getAttribute("aria-label");
                                 ariaLabel && (text = ariaLabel.trim());
@@ -466,7 +466,7 @@
                         });
                         const checkStable = () => {
                             const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime;
-                            timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(), 
+                            timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (observer.disconnect(),
                             resolve()) : setTimeout(checkStable, 50);
                         };
                         checkStable();
@@ -492,7 +492,7 @@
                                 });
                                 const checkQuiet = () => {
                                     const timeSinceLastChange = Date.now() - lastChange, totalWait = Date.now() - startTime;
-                                    timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(), 
+                                    timeSinceLastChange >= quietPeriod || totalWait >= maxWait ? (quietObserver.disconnect(),
                                     resolve()) : setTimeout(checkQuiet, 50);
                                 };
                                 checkQuiet();
@@ -607,7 +607,7 @@
                 }(el);
                 let safeValue = null, valueRedacted = null;
                 try {
-                    if (void 0 !== el.value || el.getAttribute && null !== el.getAttribute("value")) if (isPasswordInput) safeValue = null, 
+                    if (void 0 !== el.value || el.getAttribute && null !== el.getAttribute("value")) if (isPasswordInput) safeValue = null,
                     valueRedacted = "true"; else {
                         const rawValue = void 0 !== el.value ? String(el.value) : String(el.getAttribute("value"));
                         safeValue = rawValue.length > 200 ? rawValue.substring(0, 200) : rawValue, valueRedacted = "false";
@@ -734,8 +734,8 @@
                             const requestId = `iframe-${idx}-${Date.now()}`, timeout = setTimeout(() => {
                                 resolve(null);
                             }, 5e3), listener = event => {
-                                "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout), 
-                                window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot, 
+                                "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data, "SENTIENCE_IFRAME_SNAPSHOT_RESPONSE" === event.data?.type && event.data?.requestId === requestId && (clearTimeout(timeout),
+                                window.removeEventListener("message", listener), event.data.error ? resolve(null) : (event.data.snapshot,
                                 resolve({
                                     iframe: iframe,
                                     data: event.data.snapshot,
@@ -751,7 +751,7 @@
                                         ...options,
                                         collectIframes: !0
                                     }
-                                }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener), 
+                                }, "*") : (clearTimeout(timeout), window.removeEventListener("message", listener),
                                 resolve(null));
                             } catch (error) {
                                 clearTimeout(timeout), window.removeEventListener("message", listener), resolve(null);
@@ -801,7 +801,7 @@
                     }, 25e3), listener = e => {
                         if ("SENTIENCE_SNAPSHOT_RESULT" === e.data.type && e.data.requestId === requestId) {
                             if (resolved) return;
-                            resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), 
+                            resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener),
                             e.data.error ? reject(new Error(e.data.error)) : resolve({
                                 elements: e.data.elements,
                                 raw_elements: e.data.raw_elements,
@@ -818,7 +818,7 @@
                             options: options
                         }, "*");
                     } catch (error) {
-                        resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener), 
+                        resolved || (resolved = !0, clearTimeout(timeout), window.removeEventListener("message", listener),
                         reject(new Error(`Failed to send snapshot request: ${error.message}`)));
                     }
                 });
@@ -828,7 +828,7 @@
             options.screenshot && (screenshot = await function(options) {
                 return new Promise(resolve => {
                     const requestId = Math.random().toString(36).substring(7), listener = e => {
-                        "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener), 
+                        "SENTIENCE_SCREENSHOT_RESULT" === e.data.type && e.data.requestId === requestId && (window.removeEventListener("message", listener),
                         resolve(e.data.screenshot));
                     };
                     window.addEventListener("message", listener), window.postMessage({
@@ -888,15 +888,15 @@
                 }
                 if (node.nodeType !== Node.ELEMENT_NODE) return;
                 const tag = node.tagName.toLowerCase();
-                if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "), 
-                "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"), 
-                "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), 
-                "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), 
+                if ("h1" === tag && (markdown += "\n# "), "h2" === tag && (markdown += "\n## "),
+                "h3" === tag && (markdown += "\n### "), "li" === tag && (markdown += "\n- "), insideLink || "p" !== tag && "div" !== tag && "br" !== tag || (markdown += "\n"),
+                "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"),
+                "a" === tag && (markdown += "[", insideLink = !0), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk),
                 "a" === tag) {
                     const href = node.getAttribute("href");
                     markdown += href ? `](${href})` : "]", insideLink = !1;
                 }
-                "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"), 
+                "strong" !== tag && "b" !== tag || (markdown += "**"), "em" !== tag && "i" !== tag || (markdown += "_"),
                 insideLink || "h1" !== tag && "h2" !== tag && "h3" !== tag && "p" !== tag && "div" !== tag || (markdown += "\n");
             }(tempDiv), markdown.replace(/\n{3,}/g, "\n\n").trim();
         }(document.body) : function(root) {
@@ -909,7 +909,7 @@
                         const style = window.getComputedStyle(node);
                         if ("none" === style.display || "hidden" === style.visibility) return;
                         const isBlock = "block" === style.display || "flex" === style.display || "P" === node.tagName || "DIV" === node.tagName;
-                        isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk), 
+                        isBlock && (text += " "), node.shadowRoot ? Array.from(node.shadowRoot.childNodes).forEach(walk) : node.childNodes.forEach(walk),
                         isBlock && (text += "\n");
                     }
                 } else text += node.textContent;
@@ -1008,25 +1008,25 @@
     }
     function startRecording(options = {}) {
         const {highlightColor: highlightColor = "#ff0000", successColor: successColor = "#00ff00", autoDisableTimeout: autoDisableTimeout = 18e5, keyboardShortcut: keyboardShortcut = "Ctrl+Shift+I"} = options;
-        if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"), 
+        if (!window.sentience_registry || 0 === window.sentience_registry.length) return alert("Registry empty. Run `await window.sentience.snapshot()` first!"),
         () => {};
         window.sentience_registry_map = new Map, window.sentience_registry.forEach((el, idx) => {
             el && window.sentience_registry_map.set(el, idx);
         });
         let highlightBox = document.getElementById("sentience-highlight-box");
-        highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box", 
-        highlightBox.style.cssText = `\n            position: fixed;\n            pointer-events: none;\n            z-index: 2147483647;\n            border: 2px solid ${highlightColor};\n            background: rgba(255, 0, 0, 0.1);\n            display: none;\n            transition: all 0.1s ease;\n            box-sizing: border-box;\n        `, 
+        highlightBox || (highlightBox = document.createElement("div"), highlightBox.id = "sentience-highlight-box",
+        highlightBox.style.cssText = `\n            position: fixed;\n            pointer-events: none;\n            z-index: 2147483647;\n            border: 2px solid ${highlightColor};\n            background: rgba(255, 0, 0, 0.1);\n            display: none;\n            transition: all 0.1s ease;\n            box-sizing: border-box;\n        `,
         document.body.appendChild(highlightBox));
         let recordingIndicator = document.getElementById("sentience-recording-indicator");
-        recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator", 
-        recordingIndicator.style.cssText = `\n            position: fixed;\n            top: 0;\n            left: 0;\n            right: 0;\n            height: 3px;\n            background: ${highlightColor};\n            z-index: 2147483646;\n            pointer-events: none;\n        `, 
+        recordingIndicator || (recordingIndicator = document.createElement("div"), recordingIndicator.id = "sentience-recording-indicator",
+        recordingIndicator.style.cssText = `\n            position: fixed;\n            top: 0;\n            left: 0;\n            right: 0;\n            height: 3px;\n            background: ${highlightColor};\n            z-index: 2147483646;\n            pointer-events: none;\n        `,
         document.body.appendChild(recordingIndicator)), recordingIndicator.style.display = "block";
         const mouseOverHandler = e => {
             const el = e.target;
             if (!el || el === highlightBox || el === recordingIndicator) return;
             const rect = el.getBoundingClientRect();
-            highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px", 
-            highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px", 
+            highlightBox.style.display = "block", highlightBox.style.top = rect.top + window.scrollY + "px",
+            highlightBox.style.left = rect.left + window.scrollX + "px", highlightBox.style.width = rect.width + "px",
             highlightBox.style.height = rect.height + "px";
         }, clickHandler = e => {
             e.preventDefault(), e.stopPropagation();
@@ -1103,7 +1103,7 @@
                 debug_snapshot: rawData
             }, jsonString = JSON.stringify(snippet, null, 2);
             navigator.clipboard.writeText(jsonString).then(() => {
-                highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)", 
+                highlightBox.style.border = `2px solid ${successColor}`, highlightBox.style.background = "rgba(0, 255, 0, 0.2)",
                 setTimeout(() => {
                     highlightBox.style.border = `2px solid ${highlightColor}`, highlightBox.style.background = "rgba(255, 0, 0, 0.1)";
                 }, 500);
@@ -1113,15 +1113,15 @@
         };
         let timeoutId = null;
         const stopRecording = () => {
-            document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0), 
-            document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId), 
-            timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"), 
+            document.removeEventListener("mouseover", mouseOverHandler, !0), document.removeEventListener("click", clickHandler, !0),
+            document.removeEventListener("keydown", keyboardHandler, !0), timeoutId && (clearTimeout(timeoutId),
+            timeoutId = null), highlightBox && (highlightBox.style.display = "none"), recordingIndicator && (recordingIndicator.style.display = "none"),
             window.sentience_registry_map && window.sentience_registry_map.clear(), window.sentience_stopRecording === stopRecording && delete window.sentience_stopRecording;
         }, keyboardHandler = e => {
-            (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(), 
+            (e.ctrlKey || e.metaKey) && e.shiftKey && "I" === e.key && (e.preventDefault(),
             stopRecording());
         };
-        return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0), 
+        return document.addEventListener("mouseover", mouseOverHandler, !0), document.addEventListener("click", clickHandler, !0),
         document.addEventListener("keydown", keyboardHandler, !0), autoDisableTimeout > 0 && (timeoutId = setTimeout(() => {
             stopRecording();
         }, autoDisableTimeout)), window.sentience_stopRecording = stopRecording, stopRecording;
@@ -1190,4 +1190,4 @@
             }
         }), window.sentience_iframe_handler_setup = !0));
     })();
-}();
\ No newline at end of file
+}();
diff --git a/sentience/extension/pkg/sentience_core.js b/sentience/extension/pkg/sentience_core.js
index bb9cae0..c50ad61 100644
--- a/sentience/extension/pkg/sentience_core.js
+++ b/sentience/extension/pkg/sentience_core.js
@@ -25,7 +25,7 @@ function __wbg_get_imports() {
         },
         __wbg___wbindgen_bigint_get_as_i64_8fcf4ce7f1ca72a2: function(arg0, arg1) {
             const v = getObject(arg1), ret = "bigint" == typeof v ? v : void 0;
-            getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0), 
+            getDataViewMemory0().setBigInt64(arg0 + 8, isLikeNone(ret) ? BigInt(0) : ret, !0),
             getDataViewMemory0().setInt32(arg0 + 0, !isLikeNone(ret), !0);
         },
         __wbg___wbindgen_boolean_get_bbbb1c18aa2f5e25: function(arg0) {
@@ -224,7 +224,7 @@ function getArrayU8FromWasm0(ptr, len) {
 let cachedDataViewMemory0 = null;
 
 function getDataViewMemory0() {
-    return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)), 
+    return (null === cachedDataViewMemory0 || !0 === cachedDataViewMemory0.buffer.detached || void 0 === cachedDataViewMemory0.buffer.detached && cachedDataViewMemory0.buffer !== wasm.memory.buffer) && (cachedDataViewMemory0 = new DataView(wasm.memory.buffer)),
     cachedDataViewMemory0;
 }
 
@@ -235,7 +235,7 @@ function getStringFromWasm0(ptr, len) {
 let cachedUint8ArrayMemory0 = null;
 
 function getUint8ArrayMemory0() {
-    return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)), 
+    return null !== cachedUint8ArrayMemory0 && 0 !== cachedUint8ArrayMemory0.byteLength || (cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer)),
     cachedUint8ArrayMemory0;
 }
 
@@ -264,7 +264,7 @@ function isLikeNone(x) {
 function passStringToWasm0(arg, malloc, realloc) {
     if (void 0 === realloc) {
         const buf = cachedTextEncoder.encode(arg), ptr = malloc(buf.length, 1) >>> 0;
-        return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length, 
+        return getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf), WASM_VECTOR_LEN = buf.length,
         ptr;
     }
     let len = arg.length, ptr = malloc(len, 1) >>> 0;
@@ -319,7 +319,7 @@ const cachedTextEncoder = new TextEncoder;
 let wasmModule, wasm, WASM_VECTOR_LEN = 0;
 
 function __wbg_finalize_init(instance, module) {
-    return wasm = instance.exports, wasmModule = module, cachedDataViewMemory0 = null, 
+    return wasm = instance.exports, wasmModule = module, cachedDataViewMemory0 = null,
     cachedUint8ArrayMemory0 = null, wasm;
 }
 
@@ -360,7 +360,7 @@ function initSync(module) {
 
 async function __wbg_init(module_or_path) {
     if (void 0 !== wasm) return wasm;
-    void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path), 
+    void 0 !== module_or_path && Object.getPrototypeOf(module_or_path) === Object.prototype && ({module_or_path: module_or_path} = module_or_path),
     void 0 === module_or_path && (module_or_path = new URL("sentience_core_bg.wasm", import.meta.url));
     const imports = __wbg_get_imports();
     ("string" == typeof module_or_path || "function" == typeof Request && module_or_path instanceof Request || "function" == typeof URL && module_or_path instanceof URL) && (module_or_path = fetch(module_or_path));
@@ -368,4 +368,4 @@ async function __wbg_init(module_or_path) {
     return __wbg_finalize_init(instance, module);
 }
 
-export { initSync, __wbg_init as default };
\ No newline at end of file
+export { initSync, __wbg_init as default };
diff --git a/sentience/runtime_agent.py b/sentience/runtime_agent.py
new file mode 100644
index 0000000..8e2be77
--- /dev/null
+++ b/sentience/runtime_agent.py
@@ -0,0 +1,423 @@
+"""
+AgentRuntime-backed agent with optional vision executor fallback.
+
+This module intentionally keeps the control plane verification-first:
+- Actions may be proposed by either a structured executor (DOM snapshot prompt)
+  or a vision executor (screenshot prompt).
+- Verification is always executed via AgentRuntime predicates.
+"""
+
+from __future__ import annotations
+
+import base64
+import re
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+from .agent_runtime import AgentRuntime
+from .backends import actions as backend_actions
+from .llm_interaction_handler import LLMInteractionHandler
+from .llm_provider import LLMProvider
+from .models import BBox, Snapshot
+from .verification import AssertContext, AssertOutcome, Predicate
+
+
+@dataclass(frozen=True)
+class StepVerification:
+    predicate: Predicate
+    label: str
+    required: bool = True
+    eventually: bool = True
+    timeout_s: float = 10.0
+    poll_s: float = 0.25
+    max_snapshot_attempts: int = 3
+    min_confidence: float | None = None
+
+
+@dataclass(frozen=True)
+class RuntimeStep:
+    goal: str
+    intent: str | None = None
+    verifications: list[StepVerification] = field(default_factory=list)
+
+    # Snapshot quality policy (handled at agent layer; SDK core unchanged).
+    snapshot_limit_base: int = 60
+    snapshot_limit_step: int = 40
+    snapshot_limit_max: int = 220
+    max_snapshot_attempts: int = 3
+    min_confidence: float | None = None
+    min_actionables: int | None = None
+
+    # Vision executor fallback (bounded).
+    vision_executor_enabled: bool = True
+    max_vision_executor_attempts: int = 1
+
+
+class RuntimeAgent:
+    """
+    A thin orchestration layer over AgentRuntime:
+    - snapshot (with limit ramp)
+    - propose action (structured executor; optionally vision executor fallback)
+    - execute action (backend-agnostic primitives)
+    - verify (AgentRuntime predicates)
+    """
+
+    def __init__(
+        self,
+        *,
+        runtime: AgentRuntime,
+        executor: LLMProvider,
+        vision_executor: LLMProvider | None = None,
+        vision_verifier: LLMProvider | None = None,
+        short_circuit_canvas: bool = True,
+    ) -> None:
+        self.runtime = runtime
+        self.executor = executor
+        self.vision_executor = vision_executor
+        self.vision_verifier = vision_verifier
+        self.short_circuit_canvas = short_circuit_canvas
+
+        self._structured_llm = LLMInteractionHandler(executor)
+
+    async def run_step(
+        self,
+        *,
+        task_goal: str,
+        step: RuntimeStep,
+    ) -> bool:
+        self.runtime.begin_step(step.goal)
+
+        snap = await self._snapshot_with_ramp(step=step)
+
+        if await self._should_short_circuit_to_vision(step=step, snap=snap):
+            ok = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
+            return ok
+
+        # 1) Structured executor attempt.
+        action = self._propose_structured_action(task_goal=task_goal, step=step, snap=snap)
+        await self._execute_action(action=action, snap=snap)
+        ok = await self._apply_verifications(step=step)
+        if ok:
+            return True
+
+        # 2) Optional vision executor fallback (bounded).
+        if step.vision_executor_enabled and step.max_vision_executor_attempts > 0:
+            ok2 = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
+            return ok2
+
+        return False
+
+    async def _snapshot_with_ramp(self, *, step: RuntimeStep) -> Snapshot:
+        limit = step.snapshot_limit_base
+        last: Snapshot | None = None
+
+        for _attempt in range(max(1, step.max_snapshot_attempts)):
+            last = await self.runtime.snapshot(limit=limit, goal=step.goal)
+            if last is None:
+                limit = min(step.snapshot_limit_max, limit + step.snapshot_limit_step)
+                continue
+
+            if step.min_confidence is not None:
+                conf = getattr(getattr(last, "diagnostics", None), "confidence", None)
+                if isinstance(conf, (int, float)) and conf < step.min_confidence:
+                    limit = min(step.snapshot_limit_max, limit + step.snapshot_limit_step)
+                    continue
+
+            if step.min_actionables is not None:
+                if self._count_actionables(last) < step.min_actionables:
+                    limit = min(step.snapshot_limit_max, limit + step.snapshot_limit_step)
+                    continue
+
+            return last
+
+        # If we didn't return early, use last snapshot (may be low quality).
+        if last is None:
+            raise RuntimeError("snapshot() returned None repeatedly")
+        return last
+
+    def _propose_structured_action(self, *, task_goal: str, step: RuntimeStep, snap: Snapshot) -> str:
+        dom_context = self._structured_llm.build_context(snap, step.goal)
+        combined_goal = f"{task_goal}\n\nSTEP: {step.goal}"
+        resp = self._structured_llm.query_llm(dom_context, combined_goal)
+        return self._structured_llm.extract_action(resp.content)
+
+    async def _vision_executor_attempt(
+        self,
+        *,
+        task_goal: str,
+        step: RuntimeStep,
+        snap: Snapshot | None,
+    ) -> bool:
+        if not self.vision_executor or not self.vision_executor.supports_vision():
+            return False
+
+        url = await self._get_url_for_prompt()
+        image_b64 = await self._screenshot_base64_png()
+        system_prompt, user_prompt = self._vision_executor_prompts(
+            task_goal=task_goal,
+            step=step,
+            url=url,
+            snap=snap,
+        )
+
+        resp = self.vision_executor.generate_with_image(
+            system_prompt,
+            user_prompt,
+            image_b64,
+            temperature=0.0,
+        )
+
+        action = self._extract_action_from_text(resp.content)
+        await self._execute_action(action=action, snap=snap)
+        # Important: vision executor fallback is a *retry* of the same step.
+        # Clear prior step assertions so required_assertions_passed reflects the final attempt.
+        self.runtime.flush_assertions()
+        return await self._apply_verifications(step=step)
+
+    async def _apply_verifications(self, *, step: RuntimeStep) -> bool:
+        if not step.verifications:
+            # No explicit verifications provided: treat as pass.
+            return True
+
+        all_ok = True
+        for v in step.verifications:
+            if v.eventually:
+                ok = await self.runtime.check(v.predicate, label=v.label, required=v.required).eventually(
+                    timeout_s=v.timeout_s,
+                    poll_s=v.poll_s,
+                    max_snapshot_attempts=v.max_snapshot_attempts,
+                    min_confidence=v.min_confidence,
+                    vision_provider=self.vision_verifier,
+                )
+            else:
+                ok = self.runtime.assert_(v.predicate, label=v.label, required=v.required)
+            all_ok = all_ok and ok
+
+        # Respect required verifications semantics.
+        return self.runtime.required_assertions_passed() and all_ok
+
+    async def _execute_action(self, *, action: str, snap: Snapshot | None) -> None:
+        url = None
+        try:
+            url = await self.runtime.get_url()
+        except Exception:
+            url = getattr(snap, "url", None)
+
+        await self.runtime.record_action(action, url=url)
+
+        # Coordinate-backed execution (by snapshot id or explicit coordinates).
+        kind, payload = self._parse_action(action)
+
+        if kind == "finish":
+            return
+
+        if kind == "press":
+            await self._press_key_best_effort(payload["key"])
+            await self._stabilize_best_effort()
+            return
+
+        if kind == "click_xy":
+            await backend_actions.click(self.runtime.backend, (payload["x"], payload["y"]))
+            await self._stabilize_best_effort()
+            return
+
+        if kind == "click_rect":
+            bbox = BBox(x=payload["x"], y=payload["y"], width=payload["w"], height=payload["h"])
+            await backend_actions.click(self.runtime.backend, bbox)
+            await self._stabilize_best_effort()
+            return
+
+        if snap is None:
+            raise RuntimeError("Cannot execute CLICK(id)/TYPE(id, ...) without a snapshot")
+
+        if kind == "click":
+            el = self._find_element(snap, payload["id"])
+            if el is None:
+                raise RuntimeError(f"Element id {payload['id']} not found in snapshot")
+            await backend_actions.click(self.runtime.backend, el.bbox)
+            await self._stabilize_best_effort()
+            return
+
+        if kind == "type":
+            el = self._find_element(snap, payload["id"])
+            if el is None:
+                raise RuntimeError(f"Element id {payload['id']} not found in snapshot")
+            await backend_actions.type_text(self.runtime.backend, payload["text"], target=el.bbox)
+            await self._stabilize_best_effort()
+            return
+
+        raise ValueError(f"Unknown action kind: {kind}")
+
+    async def _stabilize_best_effort(self) -> None:
+        try:
+            await self.runtime.backend.wait_ready_state(state="interactive", timeout_ms=15000)
+        except Exception:
+            return
+
+    async def _press_key_best_effort(self, key: str) -> None:
+        # BrowserBackend does not expose a dedicated keypress primitive; do best-effort JS events.
+        key_esc = key.replace("\\", "\\\\").replace("'", "\\'")
+        await self.runtime.backend.eval(
+            f"""
+            (() => {{
+              const el = document.activeElement || document.body;
+              const down = new KeyboardEvent('keydown', {{key: '{key_esc}', bubbles: true}});
+              const up = new KeyboardEvent('keyup', {{key: '{key_esc}', bubbles: true}});
+              el.dispatchEvent(down);
+              el.dispatchEvent(up);
+              return true;
+            }})()
+            """
+        )
+
+    async def _screenshot_base64_png(self) -> str:
+        png = await self.runtime.backend.screenshot_png()
+        return base64.b64encode(png).decode("utf-8")
+
+    async def _get_url_for_prompt(self) -> str | None:
+        try:
+            return await self.runtime.get_url()
+        except Exception:
+            return getattr(self.runtime.last_snapshot, "url", None)
+
+    async def _should_short_circuit_to_vision(self, *, step: RuntimeStep, snap: Snapshot | None) -> bool:
+        if not (step.vision_executor_enabled and self.vision_executor and self.vision_executor.supports_vision()):
+            return False
+
+        if snap is None:
+            return True
+
+        if step.min_actionables is not None and self._count_actionables(snap) < step.min_actionables:
+            if self.short_circuit_canvas:
+                try:
+                    n_canvas = await self.runtime.backend.eval("document.querySelectorAll('canvas').length")
+                    if isinstance(n_canvas, (int, float)) and n_canvas > 0:
+                        return True
+                except Exception:
+                    pass
+
+        return False
+
+    def _vision_executor_prompts(
+        self,
+        *,
+        task_goal: str,
+        step: RuntimeStep,
+        url: str | None,
+        snap: Snapshot | None,
+    ) -> tuple[str, str]:
+        # Include URL as text: screenshots generally don't include browser chrome reliably.
+        verify_targets = self._verification_targets_human(step.verifications)
+
+        snapshot_summary = ""
+        if snap is not None:
+            snapshot_summary = (
+                f"\n\nStructured snapshot summary:\n"
+                f"- url: {getattr(snap, 'url', None)}\n"
+                f"- elements: {len(getattr(snap, 'elements', []) or [])}\n"
+            )
+
+        system_prompt = f"""You are a vision-capable web automation executor.
+
+TASK GOAL:
+{task_goal}
+
+STEP GOAL:
+{step.goal}
+
+CURRENT URL (text):
+{url or "(unknown)"}
+
+VERIFICATION TARGETS (text):
+{verify_targets or "(none provided)"}
+{snapshot_summary}
+
+RESPONSE FORMAT:
+Return ONLY ONE of:
+- CLICK(id)
+- TYPE(id, "text")
+- CLICK_XY(x, y)
+- CLICK_RECT(x, y, w, h)
+- PRESS("key")
+- FINISH()
+
+No explanations, no markdown.
+"""
+
+        user_prompt = "From the screenshot, return the single best next action:"
+        return system_prompt, user_prompt
+
+    def _verification_targets_human(self, verifications: list[StepVerification]) -> str:
+        if not verifications:
+            return ""
+        lines: list[str] = []
+        for v in verifications:
+            req = "required" if v.required else "optional"
+            lines.append(f"- {v.label} ({req})")
+        return "\n".join(lines)
+
+    def _count_actionables(self, snap: Snapshot) -> int:
+        n = 0
+        for el in snap.elements or []:
+            cues = getattr(el, "visual_cues", None)
+            clickable = bool(getattr(cues, "is_clickable", False))
+            if clickable:
+                n += 1
+        return n
+
+    def _find_element(self, snap: Snapshot, element_id: int) -> Any | None:
+        for el in snap.elements or []:
+            if getattr(el, "id", None) == element_id:
+                return el
+        return None
+
+    def _parse_action(
+        self,
+        action: str,
+    ) -> tuple[Literal["click", "type", "press", "finish", "click_xy", "click_rect"], dict[str, Any]]:
+        action = action.strip()
+
+        if re.match(r"FINISH\s*\(\s*\)\s*$", action, re.IGNORECASE):
+            return "finish", {}
+
+        if m := re.match(
+            r"CLICK_XY\s*\(\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*\)\s*$",
+            action,
+            re.IGNORECASE,
+        ):
+            return "click_xy", {"x": float(m.group(1)), "y": float(m.group(2))}
+
+        if m := re.match(
+            r"CLICK_RECT\s*\(\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*,\s*(-?\d+(?:\.\d+)?)\s*\)\s*$",
+            action,
+            re.IGNORECASE,
+        ):
+            return "click_rect", {
+                "x": float(m.group(1)),
+                "y": float(m.group(2)),
+                "w": float(m.group(3)),
+                "h": float(m.group(4)),
+            }
+
+        if m := re.match(r"CLICK\s*\(\s*(\d+)\s*\)\s*$", action, re.IGNORECASE):
+            return "click", {"id": int(m.group(1))}
+
+        if m := re.match(
+            r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)\s*$',
+            action,
+            re.IGNORECASE,
+        ):
+            return "type", {"id": int(m.group(1)), "text": m.group(2)}
+
+        if m := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)\s*$', action, re.IGNORECASE):
+            return "press", {"key": m.group(1)}
+
+        raise ValueError(f"Unknown action format: {action}")
+
+    def _extract_action_from_text(self, text: str) -> str:
+        # Keep consistent with LLMInteractionHandler.extract_action, but without DOM context dependency.
+        text = re.sub(r"```[\w]*\n?", "", text).strip()
+        pat = r'(CLICK_XY\s*\(\s*-?\d+(?:\.\d+)?\s*,\s*-?\d+(?:\.\d+)?\s*\)|CLICK_RECT\s*\(\s*-?\d+(?:\.\d+)?\s*,\s*-?\d+(?:\.\d+)?\s*,\s*-?\d+(?:\.\d+)?\s*,\s*-?\d+(?:\.\d+)?\s*\)|CLICK\s*\(\s*\d+\s*\)|TYPE\s*\(\s*\d+\s*,\s*["\'].*?["\']\s*\)|PRESS\s*\(\s*["\'].*?["\']\s*\)|FINISH\s*\(\s*\))'
+        m = re.search(pat, text, re.IGNORECASE)
+        return m.group(1) if m else text
+
diff --git a/tests/unit/test_runtime_agent.py b/tests/unit/test_runtime_agent.py
new file mode 100644
index 0000000..e069b76
--- /dev/null
+++ b/tests/unit/test_runtime_agent.py
@@ -0,0 +1,338 @@
+from __future__ import annotations
+
+from unittest.mock import AsyncMock
+
+import pytest
+
+from sentience.agent_runtime import AgentRuntime
+from sentience.llm_provider import LLMProvider, LLMResponse
+from sentience.models import BBox, Element, Snapshot, SnapshotDiagnostics, Viewport, VisualCues
+from sentience.runtime_agent import RuntimeAgent, RuntimeStep, StepVerification
+from sentience.verification import AssertContext, AssertOutcome
+
+
+class MockTracer:
+    def __init__(self) -> None:
+        self.events: list[dict] = []
+
+    def emit(self, event_type: str, data: dict, step_id: str | None = None) -> None:
+        self.events.append({"type": event_type, "data": data, "step_id": step_id})
+
+
+class MockBackend:
+    def __init__(self) -> None:
+        self._url = "https://example.com/start"
+        self.mouse_clicks: list[tuple[float, float]] = []
+        self.typed: list[str] = []
+        self.eval_calls: list[str] = []
+
+    async def get_url(self) -> str:
+        return self._url
+
+    async def refresh_page_info(self):
+        return None
+
+    async def eval(self, expression: str):
+        self.eval_calls.append(expression)
+        # default: no canvas
+        if "querySelectorAll('canvas')" in expression:
+            return 0
+        return None
+
+    async def call(self, function_declaration: str, args=None):
+        _ = function_declaration, args
+        return None
+
+    async def get_layout_metrics(self):
+        return None
+
+    async def screenshot_png(self) -> bytes:
+        return b"png"
+
+    async def screenshot_jpeg(self, quality: int | None = None) -> bytes:
+        _ = quality
+        return b"jpeg"
+
+    async def mouse_move(self, x: float, y: float) -> None:
+        _ = x, y
+        return None
+
+    async def mouse_click(self, x: float, y: float, button="left", click_count=1) -> None:
+        _ = button, click_count
+        self.mouse_clicks.append((float(x), float(y)))
+
+    async def wheel(self, delta_y: float, x=None, y=None) -> None:
+        _ = delta_y, x, y
+        return None
+
+    async def type_text(self, text: str) -> None:
+        self.typed.append(text)
+
+    async def wait_ready_state(self, state="interactive", timeout_ms=15000) -> None:
+        _ = state, timeout_ms
+        return None
+
+
+class ProviderStub(LLMProvider):
+    def __init__(self, *, model: str = "stub", responses: list[str] | None = None):
+        super().__init__(model)
+        self._responses = responses or []
+        self.calls: list[dict] = []
+
+    def generate(self, system_prompt: str, user_prompt: str, **kwargs) -> LLMResponse:
+        self.calls.append({"system": system_prompt, "user": user_prompt, "kwargs": kwargs})
+        content = self._responses.pop(0) if self._responses else "FINISH()"
+        return LLMResponse(content=content, model_name=self.model_name)
+
+    def supports_json_mode(self) -> bool:
+        return True
+
+    @property
+    def model_name(self) -> str:
+        return self._model_name
+
+
+class VisionProviderStub(ProviderStub):
+    def supports_vision(self) -> bool:
+        return True
+
+    def generate_with_image(self, system_prompt: str, user_prompt: str, image_base64: str, **kwargs):
+        self.calls.append(
+            {"system": system_prompt, "user": user_prompt, "image_base64": image_base64, "kwargs": kwargs}
+        )
+        content = self._responses.pop(0) if self._responses else "FINISH()"
+        return LLMResponse(content=content, model_name=self.model_name)
+
+
+def make_snapshot(*, url: str, elements: list[Element], confidence: float | None = None) -> Snapshot:
+    diagnostics = SnapshotDiagnostics(confidence=confidence) if confidence is not None else None
+    return Snapshot(
+        status="success",
+        url=url,
+        elements=elements,
+        viewport=Viewport(width=1280, height=720),
+        diagnostics=diagnostics,
+    )
+
+
+def make_clickable_element(element_id: int) -> Element:
+    return Element(
+        id=element_id,
+        role="button",
+        text="OK",
+        importance=100,
+        bbox=BBox(x=10, y=20, width=100, height=40),
+        visual_cues=VisualCues(is_primary=True, is_clickable=True, background_color_name=None),
+        in_viewport=True,
+        is_occluded=False,
+    )
+
+
+@pytest.mark.asyncio
+async def test_runtime_agent_structured_executor_success_no_vision_used() -> None:
+    backend = MockBackend()
+    tracer = MockTracer()
+    runtime = AgentRuntime(backend=backend, tracer=tracer)
+
+    # snapshot (ramp) -> S0, then verification eventually -> S1
+    s0 = make_snapshot(url="https://example.com/start", elements=[make_clickable_element(1)])
+    s1 = make_snapshot(url="https://example.com/done", elements=[make_clickable_element(1)])
+
+    async def fake_snapshot(**_kwargs):
+        runtime.last_snapshot = snaps.pop(0)
+        return runtime.last_snapshot
+
+    snaps = [s0, s1]
+    runtime.snapshot = AsyncMock(side_effect=fake_snapshot)  # type: ignore[method-assign]
+
+    executor = ProviderStub(responses=["CLICK(1)"])
+    agent = RuntimeAgent(runtime=runtime, executor=executor, vision_executor=None)
+
+    def pred(ctx: AssertContext) -> AssertOutcome:
+        ok = (ctx.url or "").endswith("/done")
+        return AssertOutcome(passed=ok, reason="" if ok else "not done", details={})
+
+    step = RuntimeStep(
+        goal="Click OK",
+        verifications=[
+            StepVerification(
+                predicate=pred,
+                label="url_done",
+                required=True,
+                eventually=True,
+                timeout_s=0.1,
+                poll_s=0.0,
+                max_snapshot_attempts=1,
+            )
+        ],
+        max_snapshot_attempts=1,
+    )
+
+    ok = await agent.run_step(task_goal="test", step=step)
+    assert ok is True
+    assert len(executor.calls) == 1
+    assert backend.mouse_clicks  # click happened
+
+
+@pytest.mark.asyncio
+async def test_runtime_agent_vision_executor_fallback_after_verification_fail() -> None:
+    backend = MockBackend()
+    tracer = MockTracer()
+    runtime = AgentRuntime(backend=backend, tracer=tracer)
+
+    s0 = make_snapshot(url="https://example.com/start", elements=[make_clickable_element(1)])
+    s1 = make_snapshot(url="https://example.com/still", elements=[make_clickable_element(1)])
+    s2 = make_snapshot(url="https://example.com/done", elements=[make_clickable_element(1)])
+
+    async def fake_snapshot(**_kwargs):
+        runtime.last_snapshot = snaps.pop(0)
+        return runtime.last_snapshot
+
+    # ramp -> s0, first verification -> s1 (fail), second verification -> s2 (pass)
+    snaps = [s0, s1, s2]
+    runtime.snapshot = AsyncMock(side_effect=fake_snapshot)  # type: ignore[method-assign]
+
+    executor = ProviderStub(responses=["CLICK(1)"])
+    vision = VisionProviderStub(responses=["CLICK(1)"])
+    agent = RuntimeAgent(runtime=runtime, executor=executor, vision_executor=vision)
+
+    def pred(ctx: AssertContext) -> AssertOutcome:
+        ok = (ctx.url or "").endswith("/done")
+        return AssertOutcome(passed=ok, reason="" if ok else "not done", details={})
+
+    step = RuntimeStep(
+        goal="Try click; fallback if needed",
+        verifications=[
+            StepVerification(
+                predicate=pred,
+                label="url_done",
+                required=True,
+                eventually=True,
+                timeout_s=0.0,
+                poll_s=0.0,
+                max_snapshot_attempts=1,
+            )
+        ],
+        max_snapshot_attempts=1,
+        vision_executor_enabled=True,
+        max_vision_executor_attempts=1,
+    )
+
+    ok = await agent.run_step(task_goal="test", step=step)
+    assert ok is True
+    assert len(executor.calls) == 1
+    assert len(vision.calls) == 1
+
+
+@pytest.mark.asyncio
+async def test_snapshot_limit_ramp_increases_limit_on_low_confidence() -> None:
+    backend = MockBackend()
+    tracer = MockTracer()
+    runtime = AgentRuntime(backend=backend, tracer=tracer)
+
+    s_low = make_snapshot(url="https://example.com/start", elements=[make_clickable_element(1)], confidence=0.1)
+    s_hi = make_snapshot(url="https://example.com/start", elements=[make_clickable_element(1)], confidence=0.9)
+    s_done = make_snapshot(url="https://example.com/done", elements=[make_clickable_element(1)])
+
+    seen_limits: list[int] = []
+
+    async def fake_snapshot(**kwargs):
+        if kwargs.get("limit") is not None:
+            seen_limits.append(int(kwargs["limit"]))
+        runtime.last_snapshot = snaps.pop(0)
+        return runtime.last_snapshot
+
+    # ramp tries low then high; verification uses done
+    snaps = [s_low, s_hi, s_done]
+    runtime.snapshot = AsyncMock(side_effect=fake_snapshot)  # type: ignore[method-assign]
+
+    executor = ProviderStub(responses=["CLICK(1)"])
+    agent = RuntimeAgent(runtime=runtime, executor=executor)
+
+    def pred(ctx: AssertContext) -> AssertOutcome:
+        ok = (ctx.url or "").endswith("/done")
+        return AssertOutcome(passed=ok, reason="" if ok else "not done", details={})
+
+    step = RuntimeStep(
+        goal="ramp snapshot",
+        min_confidence=0.7,
+        snapshot_limit_base=60,
+        snapshot_limit_step=40,
+        snapshot_limit_max=220,
+        max_snapshot_attempts=2,
+        verifications=[
+            StepVerification(
+                predicate=pred,
+                label="url_done",
+                required=True,
+                eventually=True,
+                timeout_s=0.1,
+                poll_s=0.0,
+                max_snapshot_attempts=1,
+            )
+        ],
+    )
+
+    ok = await agent.run_step(task_goal="test", step=step)
+    assert ok is True
+    assert seen_limits[:2] == [60, 100]
+
+
+@pytest.mark.asyncio
+async def test_short_circuit_to_vision_on_canvas_and_low_actionables() -> None:
+    backend = MockBackend()
+
+    async def eval_canvas(expression: str):
+        backend.eval_calls.append(expression)
+        if "querySelectorAll('canvas')" in expression:
+            return 1
+        return None
+
+    backend.eval = eval_canvas  # type: ignore[method-assign]
+
+    tracer = MockTracer()
+    runtime = AgentRuntime(backend=backend, tracer=tracer)
+
+    s0 = make_snapshot(url="https://example.com/start", elements=[])  # no actionables
+    s1 = make_snapshot(url="https://example.com/done", elements=[])
+
+    async def fake_snapshot(**_kwargs):
+        runtime.last_snapshot = snaps.pop(0)
+        return runtime.last_snapshot
+
+    snaps = [s0, s1]
+    runtime.snapshot = AsyncMock(side_effect=fake_snapshot)  # type: ignore[method-assign]
+
+    executor = ProviderStub(responses=["CLICK(999)"])  # should NOT be called
+    vision = VisionProviderStub(responses=["CLICK_XY(100, 200)"])
+    agent = RuntimeAgent(runtime=runtime, executor=executor, vision_executor=vision, short_circuit_canvas=True)
+
+    def pred(ctx: AssertContext) -> AssertOutcome:
+        ok = (ctx.url or "").endswith("/done")
+        return AssertOutcome(passed=ok, reason="" if ok else "not done", details={})
+
+    step = RuntimeStep(
+        goal="canvas step",
+        min_actionables=1,
+        max_snapshot_attempts=1,
+        verifications=[
+            StepVerification(
+                predicate=pred,
+                label="url_done",
+                required=True,
+                eventually=True,
+                timeout_s=0.1,
+                poll_s=0.0,
+                max_snapshot_attempts=1,
+            )
+        ],
+        vision_executor_enabled=True,
+        max_vision_executor_attempts=1,
+    )
+
+    ok = await agent.run_step(task_goal="test", step=step)
+    assert ok is True
+    assert len(executor.calls) == 0
+    assert len(vision.calls) == 1
+    assert backend.mouse_clicks == [(100.0, 200.0)]
+

From 3582cd6d2337f560f9a04fddb8ed48ab9a7f6f93 Mon Sep 17 00:00:00 2001
From: SentienceDEV <dev@sentienceapi.com>
Date: Wed, 21 Jan 2026 18:22:37 -0800
Subject: [PATCH 2/2] correction

---
 sentience/agent_runtime.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py
index bac9837..01ba2ff 100644
--- a/sentience/agent_runtime.py
+++ b/sentience/agent_runtime.py
@@ -582,7 +582,7 @@ def assert_done(
             True if task is complete (assertion passed), False otherwise
         """
         # Convenience wrapper for assert_ with required=True
-        ok = self.assertTrue(predicate, label=label, required=True)
+        ok = self.assert_(predicate, label=label, required=True)
         if ok:
             self._task_done = True
             self._task_done_label = label