Skip to content

Commit 37eb0ce

Browse files
authored
Merge pull request #189 from SentienceAPI/parity_win2
Phase 2: ToolRegistry + Filesystem + extraction
2 parents 73c5737 + 9156c44 commit 37eb0ce

18 files changed

+1582
-38
lines changed

sentience/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@
1919
click_rect,
2020
press,
2121
scroll_to,
22+
search,
23+
search_async,
2224
select_option,
25+
send_keys,
26+
send_keys_async,
2327
submit,
2428
type_text,
2529
uncheck,
@@ -102,13 +106,14 @@
102106
from .ordinal import OrdinalIntent, boost_ordinal_elements, detect_ordinal_intent, select_by_ordinal
103107
from .overlay import clear_overlay, show_overlay
104108
from .query import find, query
105-
from .read import read
109+
from .read import extract, extract_async, read
106110
from .recorder import Recorder, Trace, TraceStep, record
107111
from .runtime_agent import RuntimeAgent, RuntimeStep, StepVerification
108112
from .screenshot import screenshot
109113
from .sentience_methods import AgentAction, SentienceMethod
110114
from .snapshot import snapshot
111115
from .text_search import find_text_rect
116+
from .tools import BackendCapabilities, ToolContext, ToolRegistry, ToolSpec, register_default_tools
112117
from .tracer_factory import SENTIENCE_API_URL, create_tracer
113118
from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
114119

sentience/actions.py

Lines changed: 229 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55
import asyncio
66
import time
77
from pathlib import Path
8+
from urllib.parse import quote_plus
89

910
from .browser import AsyncSentienceBrowser, SentienceBrowser
1011
from .browser_evaluator import BrowserEvaluator
1112
from .cursor_policy import CursorPolicy, build_human_cursor_path
12-
from .models import ActionResult, BBox, Snapshot
13+
from .models import ActionResult, BBox, Snapshot, SnapshotOptions
1314
from .sentience_methods import SentienceMethod
1415
from .snapshot import snapshot, snapshot_async
1516

@@ -709,6 +710,146 @@ def press(browser: SentienceBrowser, key: str, take_snapshot: bool = False) -> A
709710
)
710711

711712

713+
def _normalize_key_token(token: str) -> str:
714+
lookup = {
715+
"CMD": "Meta",
716+
"COMMAND": "Meta",
717+
"CTRL": "Control",
718+
"CONTROL": "Control",
719+
"ALT": "Alt",
720+
"OPTION": "Alt",
721+
"SHIFT": "Shift",
722+
"ESC": "Escape",
723+
"ESCAPE": "Escape",
724+
"ENTER": "Enter",
725+
"RETURN": "Enter",
726+
"TAB": "Tab",
727+
"SPACE": "Space",
728+
}
729+
upper = token.strip().upper()
730+
return lookup.get(upper, token.strip())
731+
732+
733+
def _parse_key_sequence(sequence: str) -> list[str]:
734+
parts = []
735+
for raw in sequence.replace(",", " ").split():
736+
raw = raw.strip()
737+
if not raw:
738+
continue
739+
if raw.startswith("{") and raw.endswith("}"):
740+
raw = raw[1:-1]
741+
if "+" in raw:
742+
combo = "+".join(_normalize_key_token(tok) for tok in raw.split("+") if tok)
743+
parts.append(combo)
744+
else:
745+
parts.append(_normalize_key_token(raw))
746+
return parts
747+
748+
749+
def send_keys(
750+
browser: SentienceBrowser,
751+
sequence: str,
752+
take_snapshot: bool = False,
753+
delay_ms: int = 50,
754+
) -> ActionResult:
755+
"""
756+
Send a sequence of key presses (e.g., "CMD+H", "CTRL+SHIFT+P").
757+
758+
Supports sequences separated by commas/spaces, and brace-wrapped tokens
759+
like "{ENTER}" or "{CTRL+L}".
760+
"""
761+
if not browser.page:
762+
raise RuntimeError("Browser not started. Call browser.start() first.")
763+
764+
start_time = time.time()
765+
url_before = browser.page.url
766+
767+
keys = _parse_key_sequence(sequence)
768+
if not keys:
769+
raise ValueError("send_keys sequence is empty")
770+
for key in keys:
771+
browser.page.keyboard.press(key)
772+
if delay_ms > 0:
773+
browser.page.wait_for_timeout(delay_ms)
774+
775+
duration_ms = int((time.time() - start_time) * 1000)
776+
url_after = browser.page.url
777+
url_changed = url_before != url_after
778+
outcome = "navigated" if url_changed else "dom_updated"
779+
780+
snapshot_after: Snapshot | None = None
781+
if take_snapshot:
782+
snapshot_after = snapshot(browser)
783+
784+
return ActionResult(
785+
success=True,
786+
duration_ms=duration_ms,
787+
outcome=outcome,
788+
url_changed=url_changed,
789+
snapshot_after=snapshot_after,
790+
)
791+
792+
793+
def _build_search_url(query: str, engine: str) -> str:
794+
q = quote_plus(query)
795+
key = engine.strip().lower()
796+
if key in {"duckduckgo", "ddg"}:
797+
return f"https://duckduckgo.com/?q={q}"
798+
if key in {"google.com", "google"}:
799+
return f"https://www.google.com/search?q={q}"
800+
if key in {"google"}:
801+
return f"https://www.google.com/search?q={q}"
802+
if key in {"bing"}:
803+
return f"https://www.bing.com/search?q={q}"
804+
raise ValueError(f"unsupported search engine: {engine}")
805+
806+
807+
def search(
808+
browser: SentienceBrowser,
809+
query: str,
810+
engine: str = "duckduckgo",
811+
take_snapshot: bool = False,
812+
snapshot_options: SnapshotOptions | None = None,
813+
) -> ActionResult:
814+
"""
815+
Navigate to a search results page for the given query.
816+
817+
Args:
818+
browser: SentienceBrowser instance
819+
query: Search query string
820+
engine: Search engine name (duckduckgo, google, google.com, bing)
821+
take_snapshot: Whether to take snapshot after navigation
822+
snapshot_options: Snapshot options passed to snapshot() when take_snapshot is True.
823+
"""
824+
if not browser.page:
825+
raise RuntimeError("Browser not started. Call browser.start() first.")
826+
if not query.strip():
827+
raise ValueError("search query is empty")
828+
829+
start_time = time.time()
830+
url_before = browser.page.url
831+
url = _build_search_url(query, engine)
832+
browser.goto(url)
833+
browser.page.wait_for_load_state("networkidle")
834+
835+
duration_ms = int((time.time() - start_time) * 1000)
836+
url_after = browser.page.url
837+
url_changed = url_before != url_after
838+
outcome = "navigated" if url_changed else "dom_updated"
839+
840+
snapshot_after: Snapshot | None = None
841+
if take_snapshot:
842+
snapshot_after = snapshot(browser, snapshot_options)
843+
844+
return ActionResult(
845+
success=True,
846+
duration_ms=duration_ms,
847+
outcome=outcome,
848+
url_changed=url_changed,
849+
snapshot_after=snapshot_after,
850+
)
851+
852+
712853
def scroll_to(
713854
browser: SentienceBrowser,
714855
element_id: int,
@@ -1698,6 +1839,93 @@ async def press_async(
16981839
)
16991840

17001841

1842+
async def send_keys_async(
1843+
browser: AsyncSentienceBrowser,
1844+
sequence: str,
1845+
take_snapshot: bool = False,
1846+
delay_ms: int = 50,
1847+
) -> ActionResult:
1848+
"""
1849+
Async version of send_keys().
1850+
"""
1851+
if not browser.page:
1852+
raise RuntimeError("Browser not started. Call await browser.start() first.")
1853+
1854+
start_time = time.time()
1855+
url_before = browser.page.url
1856+
1857+
keys = _parse_key_sequence(sequence)
1858+
if not keys:
1859+
raise ValueError("send_keys sequence is empty")
1860+
for key in keys:
1861+
await browser.page.keyboard.press(key)
1862+
if delay_ms > 0:
1863+
await browser.page.wait_for_timeout(delay_ms)
1864+
1865+
duration_ms = int((time.time() - start_time) * 1000)
1866+
url_after = browser.page.url
1867+
url_changed = url_before != url_after
1868+
outcome = "navigated" if url_changed else "dom_updated"
1869+
1870+
snapshot_after: Snapshot | None = None
1871+
if take_snapshot:
1872+
snapshot_after = await snapshot_async(browser)
1873+
1874+
return ActionResult(
1875+
success=True,
1876+
duration_ms=duration_ms,
1877+
outcome=outcome,
1878+
url_changed=url_changed,
1879+
snapshot_after=snapshot_after,
1880+
)
1881+
1882+
1883+
async def search_async(
1884+
browser: AsyncSentienceBrowser,
1885+
query: str,
1886+
engine: str = "duckduckgo",
1887+
take_snapshot: bool = False,
1888+
snapshot_options: SnapshotOptions | None = None,
1889+
) -> ActionResult:
1890+
"""
1891+
Async version of search().
1892+
1893+
Args:
1894+
browser: AsyncSentienceBrowser instance
1895+
query: Search query string
1896+
engine: Search engine name (duckduckgo, google, google.com, bing)
1897+
take_snapshot: Whether to take snapshot after navigation
1898+
snapshot_options: Snapshot options passed to snapshot_async() when take_snapshot is True.
1899+
"""
1900+
if not browser.page:
1901+
raise RuntimeError("Browser not started. Call await browser.start() first.")
1902+
if not query.strip():
1903+
raise ValueError("search query is empty")
1904+
1905+
start_time = time.time()
1906+
url_before = browser.page.url
1907+
url = _build_search_url(query, engine)
1908+
await browser.goto(url)
1909+
await browser.page.wait_for_load_state("networkidle")
1910+
1911+
duration_ms = int((time.time() - start_time) * 1000)
1912+
url_after = browser.page.url
1913+
url_changed = url_before != url_after
1914+
outcome = "navigated" if url_changed else "dom_updated"
1915+
1916+
snapshot_after: Snapshot | None = None
1917+
if take_snapshot:
1918+
snapshot_after = await snapshot_async(browser, snapshot_options)
1919+
1920+
return ActionResult(
1921+
success=True,
1922+
duration_ms=duration_ms,
1923+
outcome=outcome,
1924+
url_changed=url_changed,
1925+
snapshot_after=snapshot_after,
1926+
)
1927+
1928+
17011929
async def scroll_to_async(
17021930
browser: AsyncSentienceBrowser,
17031931
element_id: int,

sentience/agent.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,7 @@ def _safe_hook_call_sync(
8989
if verbose:
9090
print(f"⚠️ Hook error (non-fatal): {hook_error}")
9191
else:
92-
logging.getLogger(__name__).warning(
93-
"Hook error (non-fatal): %s", hook_error
94-
)
92+
logging.getLogger(__name__).warning("Hook error (non-fatal): %s", hook_error)
9593

9694

9795
async def _safe_hook_call_async(
@@ -109,9 +107,7 @@ async def _safe_hook_call_async(
109107
if verbose:
110108
print(f"⚠️ Hook error (non-fatal): {hook_error}")
111109
else:
112-
logging.getLogger(__name__).warning(
113-
"Hook error (non-fatal): %s", hook_error
114-
)
110+
logging.getLogger(__name__).warning("Hook error (non-fatal): %s", hook_error)
115111

116112

117113
class SentienceAgent(BaseAgent):

sentience/agent_runtime.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
TabListResult,
8282
TabOperationResult,
8383
)
84+
from .tools import BackendCapabilities, ToolRegistry
8485
from .trace_event_builder import TraceEventBuilder
8586
from .verification import AssertContext, AssertOutcome, Predicate
8687

@@ -118,6 +119,7 @@ def __init__(
118119
tracer: Tracer,
119120
snapshot_options: SnapshotOptions | None = None,
120121
sentience_api_key: str | None = None,
122+
tool_registry: ToolRegistry | None = None,
121123
):
122124
"""
123125
Initialize agent runtime with any BrowserBackend-compatible browser.
@@ -130,9 +132,11 @@ def __init__(
130132
tracer: Tracer for emitting verification events
131133
snapshot_options: Default options for snapshots
132134
sentience_api_key: API key for Pro/Enterprise tier (enables Gateway refinement)
135+
tool_registry: Optional ToolRegistry for LLM-callable tools
133136
"""
134137
self.backend = backend
135138
self.tracer = tracer
139+
self.tool_registry = tool_registry
136140

137141
# Build default snapshot options with API key if provided
138142
default_opts = snapshot_options or SnapshotOptions()
@@ -372,6 +376,33 @@ def _get_tab_backend(self):
372376
return None
373377
return backend
374378

379+
def capabilities(self) -> BackendCapabilities:
380+
backend = getattr(self, "backend", None)
381+
if backend is None:
382+
return BackendCapabilities()
383+
has_eval = hasattr(backend, "eval")
384+
has_keyboard = hasattr(backend, "type_text") or bool(
385+
getattr(getattr(backend, "_page", None), "keyboard", None)
386+
)
387+
has_downloads = bool(getattr(backend, "downloads", None))
388+
has_files = False
389+
if self.tool_registry is not None:
390+
try:
391+
has_files = self.tool_registry.get("read_file") is not None
392+
except Exception:
393+
has_files = False
394+
return BackendCapabilities(
395+
tabs=self._get_tab_backend() is not None,
396+
evaluate_js=bool(has_eval),
397+
downloads=has_downloads,
398+
filesystem_tools=has_files,
399+
keyboard=bool(has_keyboard or has_eval),
400+
)
401+
402+
def can(self, capability: str) -> bool:
403+
caps = self.capabilities()
404+
return bool(getattr(caps, capability, False))
405+
375406
@staticmethod
376407
def _stringify_eval_value(value: Any) -> str:
377408
if value is None:

0 commit comments

Comments
 (0)