Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sentience/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
# Agent Layer (Phase 1 & 2)
from .base_agent import BaseAgent
from .browser import AsyncSentienceBrowser, SentienceBrowser
from .permissions import PermissionPolicy
from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
from .captcha_strategies import ExternalSolver, HumanHandoffSolver, VisionSolver

Expand Down
16 changes: 16 additions & 0 deletions sentience/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,21 @@ def capabilities(self) -> BackendCapabilities:
getattr(getattr(backend, "_page", None), "keyboard", None)
)
has_downloads = bool(getattr(backend, "downloads", None))
has_permissions = False
try:
context = None
legacy_browser = getattr(self, "_legacy_browser", None)
if legacy_browser is not None:
context = getattr(legacy_browser, "context", None)
if context is None:
page = getattr(backend, "_page", None) or getattr(backend, "page", None)
context = getattr(page, "context", None) if page is not None else None
if context is not None:
has_permissions = bool(
hasattr(context, "clear_permissions") and hasattr(context, "grant_permissions")
)
except Exception:
has_permissions = False
has_files = False
if self.tool_registry is not None:
try:
Expand All @@ -397,6 +412,7 @@ def capabilities(self) -> BackendCapabilities:
downloads=has_downloads,
filesystem_tools=has_files,
keyboard=bool(has_keyboard or has_eval),
permissions=has_permissions,
)

def can(self, capability: str) -> bool:
Expand Down
57 changes: 57 additions & 0 deletions sentience/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from sentience._extension_loader import find_extension_path
from sentience.constants import SENTIENCE_API_URL
from sentience.models import ProxyConfig, StorageState, Viewport
from sentience.permissions import PermissionPolicy

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -97,6 +98,7 @@ def __init__(
allowed_domains: list[str] | None = None,
prohibited_domains: list[str] | None = None,
keep_alive: bool = False,
permission_policy: PermissionPolicy | dict | None = None,
):
"""
Initialize Sentience browser
Expand Down Expand Up @@ -134,6 +136,7 @@ def __init__(
Viewport(width=1920, height=1080) (Full HD)
{"width": 1280, "height": 800} (dict also supported)
If None, defaults to Viewport(width=1280, height=800).
permission_policy: Optional permission policy to apply on context creation.
"""
self.api_key = api_key
# Only set api_url if api_key is provided, otherwise None (free tier)
Expand Down Expand Up @@ -165,6 +168,7 @@ def __init__(
self.allowed_domains = allowed_domains or []
self.prohibited_domains = prohibited_domains or []
self.keep_alive = keep_alive
self.permission_policy = self._coerce_permission_policy(permission_policy)

# Viewport configuration - convert dict to Viewport if needed
if viewport is None:
Expand Down Expand Up @@ -231,6 +235,28 @@ def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
)
return None

def _coerce_permission_policy(
self, policy: PermissionPolicy | dict | None
) -> PermissionPolicy | None:
if policy is None:
return None
if isinstance(policy, PermissionPolicy):
return policy
if isinstance(policy, dict):
return PermissionPolicy(**policy)
raise TypeError("permission_policy must be PermissionPolicy, dict, or None")

def apply_permission_policy(self, context: BrowserContext) -> None:
policy = self.permission_policy
if policy is None:
return
if policy.default in ("clear", "deny"):
context.clear_permissions()
if policy.geolocation:
context.set_geolocation(policy.geolocation)
if policy.auto_grant:
context.grant_permissions(policy.auto_grant, origin=policy.origin)

def start(self) -> None:
"""Launch browser with extension loaded"""
# Get extension source path using shared utility
Expand Down Expand Up @@ -338,6 +364,9 @@ def start(self) -> None:
# headless mode via the --headless=new arg above. This is a Playwright workaround.
self.context = self.playwright.chromium.launch_persistent_context(**launch_params)

if self.context is not None:
self.apply_permission_policy(self.context)

self.page = self.context.pages[0] if self.context.pages else self.context.new_page()

# Inject storage state if provided (must be after context creation)
Expand Down Expand Up @@ -712,6 +741,7 @@ def __init__(
allowed_domains: list[str] | None = None,
prohibited_domains: list[str] | None = None,
keep_alive: bool = False,
permission_policy: PermissionPolicy | dict | None = None,
):
"""
Initialize Async Sentience browser
Expand Down Expand Up @@ -740,6 +770,7 @@ def __init__(
this specific browser binary instead of Playwright's managed browser.
Useful to guarantee Chromium (not Chrome for Testing) on macOS.
Example: "/path/to/playwright/chromium-1234/chrome-mac/Chromium.app/Contents/MacOS/Chromium"
permission_policy: Optional permission policy to apply on context creation.
"""
self.api_key = api_key
# Only set api_url if api_key is provided, otherwise None (free tier)
Expand Down Expand Up @@ -770,6 +801,7 @@ def __init__(
self.allowed_domains = allowed_domains or []
self.prohibited_domains = prohibited_domains or []
self.keep_alive = keep_alive
self.permission_policy = self._coerce_permission_policy(permission_policy)

# Viewport configuration - convert dict to Viewport if needed
if viewport is None:
Expand Down Expand Up @@ -836,6 +868,28 @@ def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None:
)
return None

def _coerce_permission_policy(
self, policy: PermissionPolicy | dict | None
) -> PermissionPolicy | None:
if policy is None:
return None
if isinstance(policy, PermissionPolicy):
return policy
if isinstance(policy, dict):
return PermissionPolicy(**policy)
raise TypeError("permission_policy must be PermissionPolicy, dict, or None")

async def apply_permission_policy(self, context: AsyncBrowserContext) -> None:
policy = self.permission_policy
if policy is None:
return
if policy.default in ("clear", "deny"):
await context.clear_permissions()
if policy.geolocation:
await context.set_geolocation(policy.geolocation)
if policy.auto_grant:
await context.grant_permissions(policy.auto_grant, origin=policy.origin)

async def start(self) -> None:
"""Launch browser with extension loaded (async)"""
# Get extension source path using shared utility
Expand Down Expand Up @@ -939,6 +993,9 @@ async def start(self) -> None:
# Launch persistent context
self.context = await self.playwright.chromium.launch_persistent_context(**launch_params)

if self.context is not None:
await self.apply_permission_policy(self.context)

self.page = self.context.pages[0] if self.context.pages else await self.context.new_page()

# Inject storage state if provided
Expand Down
18 changes: 18 additions & 0 deletions sentience/permissions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Literal

PermissionDefault = Literal["clear", "deny", "grant"]


@dataclass
class PermissionPolicy:
"""
Browser permission handling policy applied on context creation.
"""

default: PermissionDefault = "clear"
auto_grant: list[str] = field(default_factory=list)
geolocation: dict | None = None
origin: str | None = None
1 change: 1 addition & 0 deletions sentience/tools/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class BackendCapabilities(BaseModel):
downloads: bool = False
filesystem_tools: bool = False
keyboard: bool = False
permissions: bool = False


class UnsupportedCapabilityError(RuntimeError):
Expand Down
88 changes: 87 additions & 1 deletion sentience/tools/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ..agent_runtime import AgentRuntime
from ..backends import actions as backend_actions
from ..models import ActionResult, BBox, EvaluateJsRequest, Snapshot
from .context import ToolContext
from .context import ToolContext, UnsupportedCapabilityError
from .registry import ToolRegistry


Expand Down Expand Up @@ -54,6 +54,21 @@ class EvaluateJsToolInput(BaseModel):
truncate: bool = Field(True, description="Truncate output when too long.")


class GrantPermissionsInput(BaseModel):
permissions: list[str] = Field(..., min_length=1, description="Permissions to grant.")
origin: str | None = Field(None, description="Optional origin to apply permissions.")


class ClearPermissionsInput(BaseModel):
pass


class SetGeolocationInput(BaseModel):
latitude: float = Field(..., description="Latitude in decimal degrees.")
longitude: float = Field(..., description="Longitude in decimal degrees.")
accuracy: float | None = Field(None, description="Optional accuracy in meters.")


def register_default_tools(
registry: ToolRegistry, runtime: ToolContext | "AgentRuntime" | None = None
) -> ToolRegistry:
Expand All @@ -68,6 +83,17 @@ def _get_runtime(ctx: ToolContext | None):
return runtime
raise RuntimeError("ToolContext with runtime is required")

def _get_permission_context(runtime_ref):
legacy_browser = getattr(runtime_ref, "_legacy_browser", None)
if legacy_browser is not None:
context = getattr(legacy_browser, "context", None)
if context is not None:
return context
backend = getattr(runtime_ref, "backend", None)
page = getattr(backend, "_page", None) or getattr(backend, "page", None)
context = getattr(page, "context", None) if page is not None else None
return context

@registry.tool(
name="snapshot_state",
input_model=SnapshotToolInput,
Expand Down Expand Up @@ -229,4 +255,64 @@ async def evaluate_js_tool(ctx, params: EvaluateJsToolInput) -> ActionResult:
outcome="dom_updated",
)

@registry.tool(
name="grant_permissions",
input_model=GrantPermissionsInput,
output_model=ActionResult,
description="Grant browser permissions for the current context.",
)
async def grant_permissions_tool(ctx, params: GrantPermissionsInput) -> ActionResult:
runtime_ref = _get_runtime(ctx)
if ctx is not None:
ctx.require("permissions")
elif not runtime_ref.can("permissions"):
raise UnsupportedCapabilityError("permissions")
context = _get_permission_context(runtime_ref)
if context is None:
raise RuntimeError("Permission context unavailable")
await context.grant_permissions(params.permissions, origin=params.origin)
return ActionResult(success=True, duration_ms=0, outcome="dom_updated")

@registry.tool(
name="clear_permissions",
input_model=ClearPermissionsInput,
output_model=ActionResult,
description="Clear browser permissions for the current context.",
)
async def clear_permissions_tool(ctx, _params: ClearPermissionsInput) -> ActionResult:
runtime_ref = _get_runtime(ctx)
if ctx is not None:
ctx.require("permissions")
elif not runtime_ref.can("permissions"):
raise UnsupportedCapabilityError("permissions")
context = _get_permission_context(runtime_ref)
if context is None:
raise RuntimeError("Permission context unavailable")
await context.clear_permissions()
return ActionResult(success=True, duration_ms=0, outcome="dom_updated")

@registry.tool(
name="set_geolocation",
input_model=SetGeolocationInput,
output_model=ActionResult,
description="Set geolocation for the current browser context.",
)
async def set_geolocation_tool(ctx, params: SetGeolocationInput) -> ActionResult:
runtime_ref = _get_runtime(ctx)
if ctx is not None:
ctx.require("permissions")
elif not runtime_ref.can("permissions"):
raise UnsupportedCapabilityError("permissions")
context = _get_permission_context(runtime_ref)
if context is None:
raise RuntimeError("Permission context unavailable")
await context.set_geolocation(
{
"latitude": params.latitude,
"longitude": params.longitude,
"accuracy": params.accuracy,
}
)
return ActionResult(success=True, duration_ms=0, outcome="dom_updated")

return registry
66 changes: 66 additions & 0 deletions tests/unit/test_permission_policy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import pytest

from sentience import AsyncSentienceBrowser, SentienceBrowser
from sentience.permissions import PermissionPolicy


class SyncContextStub:
def __init__(self) -> None:
self.calls: list[tuple | str] = []

def clear_permissions(self) -> None:
self.calls.append("clear")

def set_geolocation(self, geolocation: dict) -> None:
self.calls.append(("geolocation", geolocation))

def grant_permissions(self, permissions: list[str], origin: str | None = None) -> None:
self.calls.append(("grant", permissions, origin))


class AsyncContextStub:
def __init__(self) -> None:
self.calls: list[tuple | str] = []

async def clear_permissions(self) -> None:
self.calls.append("clear")

async def set_geolocation(self, geolocation: dict) -> None:
self.calls.append(("geolocation", geolocation))

async def grant_permissions(self, permissions: list[str], origin: str | None = None) -> None:
self.calls.append(("grant", permissions, origin))


def test_apply_permission_policy_sync() -> None:
policy = PermissionPolicy(
default="clear",
auto_grant=["geolocation"],
geolocation={"latitude": 37.77, "longitude": -122.41},
origin="https://example.com",
)
browser = SentienceBrowser(permission_policy=policy)
context = SyncContextStub()
browser.apply_permission_policy(context)
assert context.calls == [
"clear",
("geolocation", {"latitude": 37.77, "longitude": -122.41}),
("grant", ["geolocation"], "https://example.com"),
]


@pytest.mark.asyncio
async def test_apply_permission_policy_async() -> None:
policy = PermissionPolicy(
default="clear",
auto_grant=["notifications"],
geolocation={"latitude": 40.71, "longitude": -74.0, "accuracy": 10},
)
browser = AsyncSentienceBrowser(permission_policy=policy)
context = AsyncContextStub()
await browser.apply_permission_policy(context)
assert context.calls == [
"clear",
("geolocation", {"latitude": 40.71, "longitude": -74.0, "accuracy": 10}),
("grant", ["notifications"], None),
]
Loading
Loading