Skip to content

Commit aebd525

Browse files
authored
Merge pull request #102 from SentienceAPI/hardening
clean up & hardening
2 parents f7ab6c5 + 3ff3783 commit aebd525

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+4969
-1252
lines changed

.github/workflows/test.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,24 @@ jobs:
3232
- name: Install dependencies
3333
run: |
3434
pip install -e ".[dev]"
35+
pip install pre-commit mypy types-requests
36+
37+
- name: Lint with pre-commit
38+
continue-on-error: true
39+
run: |
40+
pre-commit run --all-files
41+
42+
- name: Type check with mypy
43+
continue-on-error: true
44+
run: |
45+
mypy sentience --ignore-missing-imports --no-strict-optional
46+
47+
- name: Check code style
48+
continue-on-error: true
49+
run: |
50+
black --check sentience tests --line-length=100
51+
isort --check-only --profile black sentience tests
52+
flake8 sentience tests --max-line-length=100 --extend-ignore=E203,W503,E501 --max-complexity=15
3553
3654
- name: Build extension (if needed)
3755
if: runner.os != 'Windows'

.pre-commit-config.yaml

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -50,20 +50,19 @@ repos:
5050
- '--max-complexity=15'
5151
exclude: ^(venv/|\.venv/|build/|dist/|tests/fixtures/)
5252

53-
# Type checking with mypy (disabled for now - too strict)
54-
# Uncomment to enable strict type checking
55-
# - repo: https://github.com/pre-commit/mirrors-mypy
56-
# rev: v1.8.0
57-
# hooks:
58-
# - id: mypy
59-
# additional_dependencies:
60-
# - pydantic>=2.0
61-
# - types-requests
62-
# args:
63-
# - '--ignore-missing-imports'
64-
# - '--no-strict-optional'
65-
# - '--warn-unused-ignores'
66-
# exclude: ^(tests/|examples/|venv/|\.venv/|build/|dist/)
53+
# Type checking with mypy
54+
- repo: https://github.com/pre-commit/mirrors-mypy
55+
rev: v1.8.0
56+
hooks:
57+
- id: mypy
58+
additional_dependencies:
59+
- pydantic>=2.0
60+
- types-requests
61+
args:
62+
- '--ignore-missing-imports'
63+
- '--no-strict-optional'
64+
- '--warn-unused-ignores'
65+
exclude: ^(tests/|examples/|venv/|\.venv/|build/|dist/)
6766

6867
# Security checks
6968
- repo: https://github.com/PyCQA/bandit

sentience/__init__.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
from .cloud_tracing import CloudTraceSink, SentienceLogger
1515
from .conversational_agent import ConversationalAgent
1616
from .expect import expect
17-
18-
# Formatting (v0.12.0+)
19-
from .formatting import format_snapshot_for_llm
2017
from .generator import ScriptGenerator, generate
2118
from .inspector import Inspector, inspect
2219
from .llm_provider import (
@@ -55,19 +52,24 @@
5552
from .read import read
5653
from .recorder import Recorder, Trace, TraceStep, record
5754
from .screenshot import screenshot
55+
from .sentience_methods import AgentAction, SentienceMethod
5856
from .snapshot import snapshot
5957
from .text_search import find_text_rect
6058
from .tracer_factory import SENTIENCE_API_URL, create_tracer
6159
from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
6260

6361
# Utilities (v0.12.0+)
62+
# Import from utils package (re-exports from submodules for backward compatibility)
6463
from .utils import (
6564
canonical_snapshot_loose,
6665
canonical_snapshot_strict,
6766
compute_snapshot_digests,
6867
save_storage_state,
6968
sha256_digest,
7069
)
70+
71+
# Formatting (v0.12.0+)
72+
from .utils.formatting import format_snapshot_for_llm
7173
from .wait import wait_for
7274

7375
__version__ = "0.91.1"
@@ -150,4 +152,7 @@
150152
"format_snapshot_for_llm",
151153
# Agent Config (v0.12.0+)
152154
"AgentConfig",
155+
# Enums
156+
"SentienceMethod",
157+
"AgentAction",
153158
]

sentience/action_executor.py

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
"""
2+
Action Executor for Sentience Agent.
3+
4+
Handles parsing and execution of action commands (CLICK, TYPE, PRESS, FINISH).
5+
This separates action execution concerns from LLM interaction.
6+
"""
7+
8+
import re
9+
from typing import Any, Union
10+
11+
from .actions import click, click_async, press, press_async, type_text, type_text_async
12+
from .browser import AsyncSentienceBrowser, SentienceBrowser
13+
from .models import Snapshot
14+
from .protocols import AsyncBrowserProtocol, BrowserProtocol
15+
16+
17+
class ActionExecutor:
18+
"""
19+
Executes actions and handles parsing of action command strings.
20+
21+
This class encapsulates all action execution logic, making it easier to:
22+
- Test action execution independently
23+
- Add new action types in one place
24+
- Handle action parsing errors consistently
25+
"""
26+
27+
def __init__(
28+
self,
29+
browser: SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol,
30+
):
31+
"""
32+
Initialize action executor.
33+
34+
Args:
35+
browser: SentienceBrowser, AsyncSentienceBrowser, or protocol-compatible instance
36+
(for testing, can use mock objects that implement BrowserProtocol)
37+
"""
38+
self.browser = browser
39+
# Check if browser is async - support both concrete types and protocols
40+
# Check concrete types first (most reliable)
41+
if isinstance(browser, AsyncSentienceBrowser):
42+
self._is_async = True
43+
elif isinstance(browser, SentienceBrowser):
44+
self._is_async = False
45+
else:
46+
# For protocol-based browsers, check if methods are actually async
47+
# This is more reliable than isinstance checks which can match both protocols
48+
import inspect
49+
50+
start_method = getattr(browser, "start", None)
51+
if start_method and inspect.iscoroutinefunction(start_method):
52+
self._is_async = True
53+
elif isinstance(browser, BrowserProtocol):
54+
# If it implements BrowserProtocol and start is not async, it's sync
55+
self._is_async = False
56+
else:
57+
# Default to sync for unknown types
58+
self._is_async = False
59+
60+
def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
61+
"""
62+
Parse action string and execute SDK call (synchronous).
63+
64+
Args:
65+
action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
66+
snap: Current snapshot (for context, currently unused but kept for API consistency)
67+
68+
Returns:
69+
Execution result dictionary with keys:
70+
- success: bool
71+
- action: str (e.g., "click", "type", "press", "finish")
72+
- element_id: Optional[int] (for click/type actions)
73+
- text: Optional[str] (for type actions)
74+
- key: Optional[str] (for press actions)
75+
- outcome: Optional[str] (action outcome)
76+
- url_changed: Optional[bool] (for click actions)
77+
- error: Optional[str] (if action failed)
78+
- message: Optional[str] (for finish action)
79+
80+
Raises:
81+
ValueError: If action format is unknown
82+
RuntimeError: If called on async browser (use execute_async instead)
83+
"""
84+
if self._is_async:
85+
raise RuntimeError(
86+
"ActionExecutor.execute() called on async browser. Use execute_async() instead."
87+
)
88+
89+
# Parse CLICK(42)
90+
if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
91+
element_id = int(match.group(1))
92+
result = click(self.browser, element_id) # type: ignore
93+
return {
94+
"success": result.success,
95+
"action": "click",
96+
"element_id": element_id,
97+
"outcome": result.outcome,
98+
"url_changed": result.url_changed,
99+
}
100+
101+
# Parse TYPE(42, "hello world")
102+
elif match := re.match(
103+
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
104+
action_str,
105+
re.IGNORECASE,
106+
):
107+
element_id = int(match.group(1))
108+
text = match.group(2)
109+
result = type_text(self.browser, element_id, text) # type: ignore
110+
return {
111+
"success": result.success,
112+
"action": "type",
113+
"element_id": element_id,
114+
"text": text,
115+
"outcome": result.outcome,
116+
}
117+
118+
# Parse PRESS("Enter")
119+
elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
120+
key = match.group(1)
121+
result = press(self.browser, key) # type: ignore
122+
return {
123+
"success": result.success,
124+
"action": "press",
125+
"key": key,
126+
"outcome": result.outcome,
127+
}
128+
129+
# Parse FINISH()
130+
elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
131+
return {
132+
"success": True,
133+
"action": "finish",
134+
"message": "Task marked as complete",
135+
}
136+
137+
else:
138+
raise ValueError(
139+
f"Unknown action format: {action_str}\n"
140+
f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
141+
)
142+
143+
async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
144+
"""
145+
Parse action string and execute SDK call (asynchronous).
146+
147+
Args:
148+
action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
149+
snap: Current snapshot (for context, currently unused but kept for API consistency)
150+
151+
Returns:
152+
Execution result dictionary (same format as execute())
153+
154+
Raises:
155+
ValueError: If action format is unknown
156+
RuntimeError: If called on sync browser (use execute() instead)
157+
"""
158+
if not self._is_async:
159+
raise RuntimeError(
160+
"ActionExecutor.execute_async() called on sync browser. Use execute() instead."
161+
)
162+
163+
# Parse CLICK(42)
164+
if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
165+
element_id = int(match.group(1))
166+
result = await click_async(self.browser, element_id) # type: ignore
167+
return {
168+
"success": result.success,
169+
"action": "click",
170+
"element_id": element_id,
171+
"outcome": result.outcome,
172+
"url_changed": result.url_changed,
173+
}
174+
175+
# Parse TYPE(42, "hello world")
176+
elif match := re.match(
177+
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
178+
action_str,
179+
re.IGNORECASE,
180+
):
181+
element_id = int(match.group(1))
182+
text = match.group(2)
183+
result = await type_text_async(self.browser, element_id, text) # type: ignore
184+
return {
185+
"success": result.success,
186+
"action": "type",
187+
"element_id": element_id,
188+
"text": text,
189+
"outcome": result.outcome,
190+
}
191+
192+
# Parse PRESS("Enter")
193+
elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
194+
key = match.group(1)
195+
result = await press_async(self.browser, key) # type: ignore
196+
return {
197+
"success": result.success,
198+
"action": "press",
199+
"key": key,
200+
"outcome": result.outcome,
201+
}
202+
203+
# Parse FINISH()
204+
elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
205+
return {
206+
"success": True,
207+
"action": "finish",
208+
"message": "Task marked as complete",
209+
}
210+
211+
else:
212+
raise ValueError(
213+
f"Unknown action format: {action_str}\n"
214+
f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
215+
)

sentience/actions.py

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1+
from typing import Optional
2+
13
"""
24
Actions v1 - click, type, press
35
"""
46

57
import time
68

79
from .browser import AsyncSentienceBrowser, SentienceBrowser
10+
from .browser_evaluator import BrowserEvaluator
811
from .models import ActionResult, BBox, Snapshot
12+
from .sentience_methods import SentienceMethod
913
from .snapshot import snapshot, snapshot_async
1014

1115

@@ -59,41 +63,22 @@ def click( # noqa: C901
5963
else:
6064
# Fallback to JS click if element not found in snapshot
6165
try:
62-
success = browser.page.evaluate(
63-
"""
64-
(id) => {
65-
return window.sentience.click(id);
66-
}
67-
""",
68-
element_id,
66+
success = BrowserEvaluator.invoke(
67+
browser.page, SentienceMethod.CLICK, element_id
6968
)
7069
except Exception:
7170
# Navigation might have destroyed context, assume success if URL changed
7271
success = True
7372
except Exception:
7473
# Fallback to JS click on error
7574
try:
76-
success = browser.page.evaluate(
77-
"""
78-
(id) => {
79-
return window.sentience.click(id);
80-
}
81-
""",
82-
element_id,
83-
)
75+
success = BrowserEvaluator.invoke(browser.page, SentienceMethod.CLICK, element_id)
8476
except Exception:
8577
# Navigation might have destroyed context, assume success if URL changed
8678
success = True
8779
else:
8880
# Legacy JS-based click
89-
success = browser.page.evaluate(
90-
"""
91-
(id) => {
92-
return window.sentience.click(id);
93-
}
94-
""",
95-
element_id,
96-
)
81+
success = BrowserEvaluator.invoke(browser.page, SentienceMethod.CLICK, element_id)
9782

9883
# Wait a bit for navigation/DOM updates
9984
try:

0 commit comments

Comments
 (0)