Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9de198a
clean up & hardening
rcholic Jan 2, 2026
7fcf91b
docs
rcholic Jan 2, 2026
0316503
refactoring
rcholic Jan 2, 2026
aa00325
phase 2.2 and 2.3 done
rcholic Jan 2, 2026
1a2d85c
Phase 3.1 and 3.2 completed
rcholic Jan 2, 2026
ebc44d3
Phase 4: Modularize code
rcholic Jan 2, 2026
9544018
Phase 4.2 completed
rcholic Jan 2, 2026
7865dcd
Merge pull request #105 from SentienceAPI/hardening4.1
rcholic Jan 2, 2026
300ab4d
Merge pull request #104 from SentienceAPI/hardening3.1
rcholic Jan 2, 2026
5eb38cc
Merge pull request #103 from SentienceAPI/hardening2.2
rcholic Jan 2, 2026
275ad8c
Phase 5: BrowserProtocol PageProtocl for mocking mor unit tests
rcholic Jan 2, 2026
4496ee8
Phase 5: BrowserProtocol PageProtocl for mocking mor unit tests
rcholic Jan 2, 2026
43cfc23
Phase 5: fixed new tests
rcholic Jan 2, 2026
3bc278c
Merge pull request #106 from SentienceAPI/hardening5
rcholic Jan 2, 2026
c7b1c02
fix tests
rcholic Jan 2, 2026
1fd70cc
testing
rcholic Jan 2, 2026
360ceeb
add tests
rcholic Jan 3, 2026
62c202f
Merge pull request #107 from SentienceAPI/phase5.3
rcholic Jan 3, 2026
e06a6f5
fix tests
rcholic Jan 3, 2026
1e9fa79
fix tests
rcholic Jan 3, 2026
fbb0cca
fix tests
rcholic Jan 3, 2026
ad11076
random trace file name
rcholic Jan 3, 2026
0c6f944
fix trace name
rcholic Jan 3, 2026
1e71d91
fix trace name
rcholic Jan 3, 2026
192be38
fix tests
rcholic Jan 3, 2026
c033fce
remove doc
rcholic Jan 3, 2026
8b0e51e
resolve merge conflicts
rcholic Jan 3, 2026
f4d3151
Merge pull request #108 from SentienceAPI/fix_trace_name
rcholic Jan 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,24 @@ jobs:
- name: Install dependencies
run: |
pip install -e ".[dev]"
pip install pre-commit mypy types-requests

- name: Lint with pre-commit
continue-on-error: true
run: |
pre-commit run --all-files

- name: Type check with mypy
continue-on-error: true
run: |
mypy sentience --ignore-missing-imports --no-strict-optional

- name: Check code style
continue-on-error: true
run: |
black --check sentience tests --line-length=100
isort --check-only --profile black sentience tests
flake8 sentience tests --max-line-length=100 --extend-ignore=E203,W503,E501 --max-complexity=15

- name: Build extension (if needed)
if: runner.os != 'Windows'
Expand Down
27 changes: 13 additions & 14 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,19 @@ repos:
- '--max-complexity=15'
exclude: ^(venv/|\.venv/|build/|dist/|tests/fixtures/)

# Type checking with mypy (disabled for now - too strict)
# Uncomment to enable strict type checking
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.8.0
# hooks:
# - id: mypy
# additional_dependencies:
# - pydantic>=2.0
# - types-requests
# args:
# - '--ignore-missing-imports'
# - '--no-strict-optional'
# - '--warn-unused-ignores'
# exclude: ^(tests/|examples/|venv/|\.venv/|build/|dist/)
# Type checking with mypy
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
hooks:
- id: mypy
additional_dependencies:
- pydantic>=2.0
- types-requests
args:
- '--ignore-missing-imports'
- '--no-strict-optional'
- '--warn-unused-ignores'
exclude: ^(tests/|examples/|venv/|\.venv/|build/|dist/)

# Security checks
- repo: https://github.com/PyCQA/bandit
Expand Down
11 changes: 8 additions & 3 deletions sentience/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@
from .cloud_tracing import CloudTraceSink, SentienceLogger
from .conversational_agent import ConversationalAgent
from .expect import expect

# Formatting (v0.12.0+)
from .formatting import format_snapshot_for_llm
from .generator import ScriptGenerator, generate
from .inspector import Inspector, inspect
from .llm_provider import (
Expand Down Expand Up @@ -55,19 +52,24 @@
from .read import read
from .recorder import Recorder, Trace, TraceStep, record
from .screenshot import screenshot
from .sentience_methods import AgentAction, SentienceMethod
from .snapshot import snapshot
from .text_search import find_text_rect
from .tracer_factory import SENTIENCE_API_URL, create_tracer
from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink

# Utilities (v0.12.0+)
# Import from utils package (re-exports from submodules for backward compatibility)
from .utils import (
canonical_snapshot_loose,
canonical_snapshot_strict,
compute_snapshot_digests,
save_storage_state,
sha256_digest,
)

# Formatting (v0.12.0+)
from .utils.formatting import format_snapshot_for_llm
from .wait import wait_for

__version__ = "0.91.1"
Expand Down Expand Up @@ -150,4 +152,7 @@
"format_snapshot_for_llm",
# Agent Config (v0.12.0+)
"AgentConfig",
# Enums
"SentienceMethod",
"AgentAction",
]
215 changes: 215 additions & 0 deletions sentience/action_executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
"""
Action Executor for Sentience Agent.

Handles parsing and execution of action commands (CLICK, TYPE, PRESS, FINISH).
This separates action execution concerns from LLM interaction.
"""

import re
from typing import Any, Union

from .actions import click, click_async, press, press_async, type_text, type_text_async
from .browser import AsyncSentienceBrowser, SentienceBrowser
from .models import Snapshot
from .protocols import AsyncBrowserProtocol, BrowserProtocol


class ActionExecutor:
"""
Executes actions and handles parsing of action command strings.

This class encapsulates all action execution logic, making it easier to:
- Test action execution independently
- Add new action types in one place
- Handle action parsing errors consistently
"""

def __init__(
self,
browser: SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol,
):
"""
Initialize action executor.

Args:
browser: SentienceBrowser, AsyncSentienceBrowser, or protocol-compatible instance
(for testing, can use mock objects that implement BrowserProtocol)
"""
self.browser = browser
# Check if browser is async - support both concrete types and protocols
# Check concrete types first (most reliable)
if isinstance(browser, AsyncSentienceBrowser):
self._is_async = True
elif isinstance(browser, SentienceBrowser):
self._is_async = False
else:
# For protocol-based browsers, check if methods are actually async
# This is more reliable than isinstance checks which can match both protocols
import inspect

start_method = getattr(browser, "start", None)
if start_method and inspect.iscoroutinefunction(start_method):
self._is_async = True
elif isinstance(browser, BrowserProtocol):
# If it implements BrowserProtocol and start is not async, it's sync
self._is_async = False
else:
# Default to sync for unknown types
self._is_async = False

def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
"""
Parse action string and execute SDK call (synchronous).

Args:
action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
snap: Current snapshot (for context, currently unused but kept for API consistency)

Returns:
Execution result dictionary with keys:
- success: bool
- action: str (e.g., "click", "type", "press", "finish")
- element_id: Optional[int] (for click/type actions)
- text: Optional[str] (for type actions)
- key: Optional[str] (for press actions)
- outcome: Optional[str] (action outcome)
- url_changed: Optional[bool] (for click actions)
- error: Optional[str] (if action failed)
- message: Optional[str] (for finish action)

Raises:
ValueError: If action format is unknown
RuntimeError: If called on async browser (use execute_async instead)
"""
if self._is_async:
raise RuntimeError(
"ActionExecutor.execute() called on async browser. Use execute_async() instead."
)

# Parse CLICK(42)
if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
element_id = int(match.group(1))
result = click(self.browser, element_id) # type: ignore
return {
"success": result.success,
"action": "click",
"element_id": element_id,
"outcome": result.outcome,
"url_changed": result.url_changed,
}

# Parse TYPE(42, "hello world")
elif match := re.match(
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
action_str,
re.IGNORECASE,
):
element_id = int(match.group(1))
text = match.group(2)
result = type_text(self.browser, element_id, text) # type: ignore
return {
"success": result.success,
"action": "type",
"element_id": element_id,
"text": text,
"outcome": result.outcome,
}

# Parse PRESS("Enter")
elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
key = match.group(1)
result = press(self.browser, key) # type: ignore
return {
"success": result.success,
"action": "press",
"key": key,
"outcome": result.outcome,
}

# Parse FINISH()
elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
return {
"success": True,
"action": "finish",
"message": "Task marked as complete",
}

else:
raise ValueError(
f"Unknown action format: {action_str}\n"
f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
)

async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
"""
Parse action string and execute SDK call (asynchronous).

Args:
action_str: Action string from LLM (e.g., "CLICK(42)", "TYPE(15, \"text\")")
snap: Current snapshot (for context, currently unused but kept for API consistency)

Returns:
Execution result dictionary (same format as execute())

Raises:
ValueError: If action format is unknown
RuntimeError: If called on sync browser (use execute() instead)
"""
if not self._is_async:
raise RuntimeError(
"ActionExecutor.execute_async() called on sync browser. Use execute() instead."
)

# Parse CLICK(42)
if match := re.match(r"CLICK\s*\(\s*(\d+)\s*\)", action_str, re.IGNORECASE):
element_id = int(match.group(1))
result = await click_async(self.browser, element_id) # type: ignore
return {
"success": result.success,
"action": "click",
"element_id": element_id,
"outcome": result.outcome,
"url_changed": result.url_changed,
}

# Parse TYPE(42, "hello world")
elif match := re.match(
r'TYPE\s*\(\s*(\d+)\s*,\s*["\']([^"\']*)["\']\s*\)',
action_str,
re.IGNORECASE,
):
element_id = int(match.group(1))
text = match.group(2)
result = await type_text_async(self.browser, element_id, text) # type: ignore
return {
"success": result.success,
"action": "type",
"element_id": element_id,
"text": text,
"outcome": result.outcome,
}

# Parse PRESS("Enter")
elif match := re.match(r'PRESS\s*\(\s*["\']([^"\']+)["\']\s*\)', action_str, re.IGNORECASE):
key = match.group(1)
result = await press_async(self.browser, key) # type: ignore
return {
"success": result.success,
"action": "press",
"key": key,
"outcome": result.outcome,
}

# Parse FINISH()
elif re.match(r"FINISH\s*\(\s*\)", action_str, re.IGNORECASE):
return {
"success": True,
"action": "finish",
"message": "Task marked as complete",
}

else:
raise ValueError(
f"Unknown action format: {action_str}\n"
f'Expected: CLICK(id), TYPE(id, "text"), PRESS("key"), or FINISH()'
)
31 changes: 8 additions & 23 deletions sentience/actions.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from typing import Optional

"""
Actions v1 - click, type, press
"""

import time

from .browser import AsyncSentienceBrowser, SentienceBrowser
from .browser_evaluator import BrowserEvaluator
from .models import ActionResult, BBox, Snapshot
from .sentience_methods import SentienceMethod
from .snapshot import snapshot, snapshot_async


Expand Down Expand Up @@ -59,41 +63,22 @@ def click( # noqa: C901
else:
# Fallback to JS click if element not found in snapshot
try:
success = browser.page.evaluate(
"""
(id) => {
return window.sentience.click(id);
}
""",
element_id,
success = BrowserEvaluator.invoke(
browser.page, SentienceMethod.CLICK, element_id
)
except Exception:
# Navigation might have destroyed context, assume success if URL changed
success = True
except Exception:
# Fallback to JS click on error
try:
success = browser.page.evaluate(
"""
(id) => {
return window.sentience.click(id);
}
""",
element_id,
)
success = BrowserEvaluator.invoke(browser.page, SentienceMethod.CLICK, element_id)
except Exception:
# Navigation might have destroyed context, assume success if URL changed
success = True
else:
# Legacy JS-based click
success = browser.page.evaluate(
"""
(id) => {
return window.sentience.click(id);
}
""",
element_id,
)
success = BrowserEvaluator.invoke(browser.page, SentienceMethod.CLICK, element_id)

# Wait a bit for navigation/DOM updates
try:
Expand Down
Loading
Loading