From 38007eb37976762d5d72812a4939050399a7d46e Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Fri, 23 Jan 2026 22:25:55 -0800 Subject: [PATCH] solution for handle Chrome permission popup --- sentience/__init__.py | 1 + sentience/agent_runtime.py | 16 +++++ sentience/browser.py | 57 ++++++++++++++++++ sentience/permissions.py | 18 ++++++ sentience/tools/context.py | 1 + sentience/tools/defaults.py | 88 +++++++++++++++++++++++++++- tests/unit/test_permission_policy.py | 66 +++++++++++++++++++++ tests/unit/test_tool_registry.py | 22 +++++-- 8 files changed, 264 insertions(+), 5 deletions(-) create mode 100644 sentience/permissions.py create mode 100644 tests/unit/test_permission_policy.py diff --git a/sentience/__init__.py b/sentience/__init__.py index f2e4b58..31ccf6a 100644 --- a/sentience/__init__.py +++ b/sentience/__init__.py @@ -56,6 +56,7 @@ # Agent Layer (Phase 1 & 2) from .base_agent import BaseAgent from .browser import AsyncSentienceBrowser, SentienceBrowser +from .permissions import PermissionPolicy from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution from .captcha_strategies import ExternalSolver, HumanHandoffSolver, VisionSolver diff --git a/sentience/agent_runtime.py b/sentience/agent_runtime.py index bebe547..e983204 100644 --- a/sentience/agent_runtime.py +++ b/sentience/agent_runtime.py @@ -385,6 +385,21 @@ def capabilities(self) -> BackendCapabilities: getattr(getattr(backend, "_page", None), "keyboard", None) ) has_downloads = bool(getattr(backend, "downloads", None)) + has_permissions = False + try: + context = None + legacy_browser = getattr(self, "_legacy_browser", None) + if legacy_browser is not None: + context = getattr(legacy_browser, "context", None) + if context is None: + page = getattr(backend, "_page", None) or getattr(backend, "page", None) + context = getattr(page, "context", None) if page is not None else None + if context is not None: + has_permissions = bool( + hasattr(context, "clear_permissions") and hasattr(context, "grant_permissions") + ) + except Exception: + has_permissions = False has_files = False if self.tool_registry is not None: try: @@ -397,6 +412,7 @@ def capabilities(self) -> BackendCapabilities: downloads=has_downloads, filesystem_tools=has_files, keyboard=bool(has_keyboard or has_eval), + permissions=has_permissions, ) def can(self, capability: str) -> bool: diff --git a/sentience/browser.py b/sentience/browser.py index 2191963..1422598 100644 --- a/sentience/browser.py +++ b/sentience/browser.py @@ -22,6 +22,7 @@ from sentience._extension_loader import find_extension_path from sentience.constants import SENTIENCE_API_URL from sentience.models import ProxyConfig, StorageState, Viewport +from sentience.permissions import PermissionPolicy logger = logging.getLogger(__name__) @@ -97,6 +98,7 @@ def __init__( allowed_domains: list[str] | None = None, prohibited_domains: list[str] | None = None, keep_alive: bool = False, + permission_policy: PermissionPolicy | dict | None = None, ): """ Initialize Sentience browser @@ -134,6 +136,7 @@ def __init__( Viewport(width=1920, height=1080) (Full HD) {"width": 1280, "height": 800} (dict also supported) If None, defaults to Viewport(width=1280, height=800). + permission_policy: Optional permission policy to apply on context creation. """ self.api_key = api_key # Only set api_url if api_key is provided, otherwise None (free tier) @@ -165,6 +168,7 @@ def __init__( self.allowed_domains = allowed_domains or [] self.prohibited_domains = prohibited_domains or [] self.keep_alive = keep_alive + self.permission_policy = self._coerce_permission_policy(permission_policy) # Viewport configuration - convert dict to Viewport if needed if viewport is None: @@ -231,6 +235,28 @@ def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None: ) return None + def _coerce_permission_policy( + self, policy: PermissionPolicy | dict | None + ) -> PermissionPolicy | None: + if policy is None: + return None + if isinstance(policy, PermissionPolicy): + return policy + if isinstance(policy, dict): + return PermissionPolicy(**policy) + raise TypeError("permission_policy must be PermissionPolicy, dict, or None") + + def apply_permission_policy(self, context: BrowserContext) -> None: + policy = self.permission_policy + if policy is None: + return + if policy.default in ("clear", "deny"): + context.clear_permissions() + if policy.geolocation: + context.set_geolocation(policy.geolocation) + if policy.auto_grant: + context.grant_permissions(policy.auto_grant, origin=policy.origin) + def start(self) -> None: """Launch browser with extension loaded""" # Get extension source path using shared utility @@ -338,6 +364,9 @@ def start(self) -> None: # headless mode via the --headless=new arg above. This is a Playwright workaround. self.context = self.playwright.chromium.launch_persistent_context(**launch_params) + if self.context is not None: + self.apply_permission_policy(self.context) + self.page = self.context.pages[0] if self.context.pages else self.context.new_page() # Inject storage state if provided (must be after context creation) @@ -712,6 +741,7 @@ def __init__( allowed_domains: list[str] | None = None, prohibited_domains: list[str] | None = None, keep_alive: bool = False, + permission_policy: PermissionPolicy | dict | None = None, ): """ Initialize Async Sentience browser @@ -740,6 +770,7 @@ def __init__( this specific browser binary instead of Playwright's managed browser. Useful to guarantee Chromium (not Chrome for Testing) on macOS. Example: "/path/to/playwright/chromium-1234/chrome-mac/Chromium.app/Contents/MacOS/Chromium" + permission_policy: Optional permission policy to apply on context creation. """ self.api_key = api_key # Only set api_url if api_key is provided, otherwise None (free tier) @@ -770,6 +801,7 @@ def __init__( self.allowed_domains = allowed_domains or [] self.prohibited_domains = prohibited_domains or [] self.keep_alive = keep_alive + self.permission_policy = self._coerce_permission_policy(permission_policy) # Viewport configuration - convert dict to Viewport if needed if viewport is None: @@ -836,6 +868,28 @@ def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None: ) return None + def _coerce_permission_policy( + self, policy: PermissionPolicy | dict | None + ) -> PermissionPolicy | None: + if policy is None: + return None + if isinstance(policy, PermissionPolicy): + return policy + if isinstance(policy, dict): + return PermissionPolicy(**policy) + raise TypeError("permission_policy must be PermissionPolicy, dict, or None") + + async def apply_permission_policy(self, context: AsyncBrowserContext) -> None: + policy = self.permission_policy + if policy is None: + return + if policy.default in ("clear", "deny"): + await context.clear_permissions() + if policy.geolocation: + await context.set_geolocation(policy.geolocation) + if policy.auto_grant: + await context.grant_permissions(policy.auto_grant, origin=policy.origin) + async def start(self) -> None: """Launch browser with extension loaded (async)""" # Get extension source path using shared utility @@ -939,6 +993,9 @@ async def start(self) -> None: # Launch persistent context self.context = await self.playwright.chromium.launch_persistent_context(**launch_params) + if self.context is not None: + await self.apply_permission_policy(self.context) + self.page = self.context.pages[0] if self.context.pages else await self.context.new_page() # Inject storage state if provided diff --git a/sentience/permissions.py b/sentience/permissions.py new file mode 100644 index 0000000..f7d4c30 --- /dev/null +++ b/sentience/permissions.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Literal + +PermissionDefault = Literal["clear", "deny", "grant"] + + +@dataclass +class PermissionPolicy: + """ + Browser permission handling policy applied on context creation. + """ + + default: PermissionDefault = "clear" + auto_grant: list[str] = field(default_factory=list) + geolocation: dict | None = None + origin: str | None = None diff --git a/sentience/tools/context.py b/sentience/tools/context.py index dfcd562..2148de3 100644 --- a/sentience/tools/context.py +++ b/sentience/tools/context.py @@ -18,6 +18,7 @@ class BackendCapabilities(BaseModel): downloads: bool = False filesystem_tools: bool = False keyboard: bool = False + permissions: bool = False class UnsupportedCapabilityError(RuntimeError): diff --git a/sentience/tools/defaults.py b/sentience/tools/defaults.py index 5b319dd..d84c669 100644 --- a/sentience/tools/defaults.py +++ b/sentience/tools/defaults.py @@ -7,7 +7,7 @@ from ..agent_runtime import AgentRuntime from ..backends import actions as backend_actions from ..models import ActionResult, BBox, EvaluateJsRequest, Snapshot -from .context import ToolContext +from .context import ToolContext, UnsupportedCapabilityError from .registry import ToolRegistry @@ -54,6 +54,21 @@ class EvaluateJsToolInput(BaseModel): truncate: bool = Field(True, description="Truncate output when too long.") +class GrantPermissionsInput(BaseModel): + permissions: list[str] = Field(..., min_length=1, description="Permissions to grant.") + origin: str | None = Field(None, description="Optional origin to apply permissions.") + + +class ClearPermissionsInput(BaseModel): + pass + + +class SetGeolocationInput(BaseModel): + latitude: float = Field(..., description="Latitude in decimal degrees.") + longitude: float = Field(..., description="Longitude in decimal degrees.") + accuracy: float | None = Field(None, description="Optional accuracy in meters.") + + def register_default_tools( registry: ToolRegistry, runtime: ToolContext | "AgentRuntime" | None = None ) -> ToolRegistry: @@ -68,6 +83,17 @@ def _get_runtime(ctx: ToolContext | None): return runtime raise RuntimeError("ToolContext with runtime is required") + def _get_permission_context(runtime_ref): + legacy_browser = getattr(runtime_ref, "_legacy_browser", None) + if legacy_browser is not None: + context = getattr(legacy_browser, "context", None) + if context is not None: + return context + backend = getattr(runtime_ref, "backend", None) + page = getattr(backend, "_page", None) or getattr(backend, "page", None) + context = getattr(page, "context", None) if page is not None else None + return context + @registry.tool( name="snapshot_state", input_model=SnapshotToolInput, @@ -229,4 +255,64 @@ async def evaluate_js_tool(ctx, params: EvaluateJsToolInput) -> ActionResult: outcome="dom_updated", ) + @registry.tool( + name="grant_permissions", + input_model=GrantPermissionsInput, + output_model=ActionResult, + description="Grant browser permissions for the current context.", + ) + async def grant_permissions_tool(ctx, params: GrantPermissionsInput) -> ActionResult: + runtime_ref = _get_runtime(ctx) + if ctx is not None: + ctx.require("permissions") + elif not runtime_ref.can("permissions"): + raise UnsupportedCapabilityError("permissions") + context = _get_permission_context(runtime_ref) + if context is None: + raise RuntimeError("Permission context unavailable") + await context.grant_permissions(params.permissions, origin=params.origin) + return ActionResult(success=True, duration_ms=0, outcome="dom_updated") + + @registry.tool( + name="clear_permissions", + input_model=ClearPermissionsInput, + output_model=ActionResult, + description="Clear browser permissions for the current context.", + ) + async def clear_permissions_tool(ctx, _params: ClearPermissionsInput) -> ActionResult: + runtime_ref = _get_runtime(ctx) + if ctx is not None: + ctx.require("permissions") + elif not runtime_ref.can("permissions"): + raise UnsupportedCapabilityError("permissions") + context = _get_permission_context(runtime_ref) + if context is None: + raise RuntimeError("Permission context unavailable") + await context.clear_permissions() + return ActionResult(success=True, duration_ms=0, outcome="dom_updated") + + @registry.tool( + name="set_geolocation", + input_model=SetGeolocationInput, + output_model=ActionResult, + description="Set geolocation for the current browser context.", + ) + async def set_geolocation_tool(ctx, params: SetGeolocationInput) -> ActionResult: + runtime_ref = _get_runtime(ctx) + if ctx is not None: + ctx.require("permissions") + elif not runtime_ref.can("permissions"): + raise UnsupportedCapabilityError("permissions") + context = _get_permission_context(runtime_ref) + if context is None: + raise RuntimeError("Permission context unavailable") + await context.set_geolocation( + { + "latitude": params.latitude, + "longitude": params.longitude, + "accuracy": params.accuracy, + } + ) + return ActionResult(success=True, duration_ms=0, outcome="dom_updated") + return registry diff --git a/tests/unit/test_permission_policy.py b/tests/unit/test_permission_policy.py new file mode 100644 index 0000000..0b068fc --- /dev/null +++ b/tests/unit/test_permission_policy.py @@ -0,0 +1,66 @@ +import pytest + +from sentience import AsyncSentienceBrowser, SentienceBrowser +from sentience.permissions import PermissionPolicy + + +class SyncContextStub: + def __init__(self) -> None: + self.calls: list[tuple | str] = [] + + def clear_permissions(self) -> None: + self.calls.append("clear") + + def set_geolocation(self, geolocation: dict) -> None: + self.calls.append(("geolocation", geolocation)) + + def grant_permissions(self, permissions: list[str], origin: str | None = None) -> None: + self.calls.append(("grant", permissions, origin)) + + +class AsyncContextStub: + def __init__(self) -> None: + self.calls: list[tuple | str] = [] + + async def clear_permissions(self) -> None: + self.calls.append("clear") + + async def set_geolocation(self, geolocation: dict) -> None: + self.calls.append(("geolocation", geolocation)) + + async def grant_permissions(self, permissions: list[str], origin: str | None = None) -> None: + self.calls.append(("grant", permissions, origin)) + + +def test_apply_permission_policy_sync() -> None: + policy = PermissionPolicy( + default="clear", + auto_grant=["geolocation"], + geolocation={"latitude": 37.77, "longitude": -122.41}, + origin="https://example.com", + ) + browser = SentienceBrowser(permission_policy=policy) + context = SyncContextStub() + browser.apply_permission_policy(context) + assert context.calls == [ + "clear", + ("geolocation", {"latitude": 37.77, "longitude": -122.41}), + ("grant", ["geolocation"], "https://example.com"), + ] + + +@pytest.mark.asyncio +async def test_apply_permission_policy_async() -> None: + policy = PermissionPolicy( + default="clear", + auto_grant=["notifications"], + geolocation={"latitude": 40.71, "longitude": -74.0, "accuracy": 10}, + ) + browser = AsyncSentienceBrowser(permission_policy=policy) + context = AsyncContextStub() + await browser.apply_permission_policy(context) + assert context.calls == [ + "clear", + ("geolocation", {"latitude": 40.71, "longitude": -74.0, "accuracy": 10}), + ("grant", ["notifications"], None), + ] diff --git a/tests/unit/test_tool_registry.py b/tests/unit/test_tool_registry.py index bcb9929..31588dd 100644 --- a/tests/unit/test_tool_registry.py +++ b/tests/unit/test_tool_registry.py @@ -96,6 +96,9 @@ async def snapshot(self, **_kwargs): "click_rect", "press", "evaluate_js", + "grant_permissions", + "clear_permissions", + "set_geolocation", } <= names @@ -149,8 +152,9 @@ def can(self, _name: str) -> bool: return False ctx = ToolContext(RuntimeStub()) - with pytest.raises(UnsupportedCapabilityError, match="unsupported_capability"): + with pytest.raises(UnsupportedCapabilityError) as excinfo: ctx.require("tabs") + assert excinfo.value.error == "unsupported_capability" @pytest.mark.asyncio @@ -208,7 +212,7 @@ def capabilities(self): return None def can(self, name: str) -> bool: - return name != "keyboard" and name != "evaluate_js" + return name not in {"keyboard", "evaluate_js", "permissions"} async def snapshot(self, **_kwargs): return None @@ -219,12 +223,22 @@ async def snapshot(self, **_kwargs): ctx = ToolContext(RuntimeStub()) register_default_tools(registry, ctx) - with pytest.raises(UnsupportedCapabilityError, match="unsupported_capability"): + with pytest.raises(UnsupportedCapabilityError) as excinfo: await registry.execute("press", {"key": "Enter"}, ctx=ctx) + assert excinfo.value.error == "unsupported_capability" - with pytest.raises(UnsupportedCapabilityError, match="unsupported_capability"): + with pytest.raises(UnsupportedCapabilityError) as excinfo: await registry.execute( "scroll_to_element", {"element_id": 1, "behavior": "instant", "block": "center"}, ctx=ctx, ) + assert excinfo.value.error == "unsupported_capability" + + with pytest.raises(UnsupportedCapabilityError) as excinfo: + await registry.execute( + "grant_permissions", + {"permissions": ["geolocation"]}, + ctx=ctx, + ) + assert excinfo.value.error == "unsupported_capability"