diff --git a/README.md b/README.md index ad84c89..d6a8d92 100644 --- a/README.md +++ b/README.md @@ -396,6 +396,36 @@ browser = SentienceBrowser(headless=True) browser = SentienceBrowser() # headless=True if CI=true, else False ``` +### Residential Proxy Support + +Use residential proxies to route traffic and protect your IP address. Supports HTTP, HTTPS, and SOCKS5 with automatic SSL certificate handling: + +```python +# Method 1: Direct configuration +browser = SentienceBrowser(proxy="http://user:pass@proxy.example.com:8080") + +# Method 2: Environment variable +# export SENTIENCE_PROXY="http://user:pass@proxy.example.com:8080" +browser = SentienceBrowser() + +# Works with agents +llm = OpenAIProvider(api_key="your-key", model="gpt-4o") +agent = SentienceAgent(browser, llm) + +with browser: + browser.page.goto("https://example.com") + agent.act("Search for products") + # All traffic routed through proxy with WebRTC leak protection +``` + +**Features:** +- HTTP, HTTPS, SOCKS5 proxy support +- Username/password authentication +- Automatic self-signed SSL certificate handling +- WebRTC IP leak protection (automatic) + +See `examples/residential_proxy_agent.py` for complete examples. + ## Best Practices ### 1. Wait for Dynamic Content diff --git a/examples/residential_proxy_agent.py b/examples/residential_proxy_agent.py new file mode 100644 index 0000000..83e1693 --- /dev/null +++ b/examples/residential_proxy_agent.py @@ -0,0 +1,366 @@ +""" +Example: Using Residential Proxies with SentienceAgent + +Demonstrates how to configure and use residential proxies with SentienceBrowser +for web automation while protecting your real IP address. + +Proxy Support: +- HTTP, HTTPS, and SOCKS5 proxies +- Authentication (username/password) +- Environment variable configuration +- WebRTC leak protection (automatic) +- Self-signed SSL certificate handling (automatic for proxies) + +HTTPS Certificate Handling: +When using a proxy, the SDK automatically sets `ignore_https_errors=True` to handle +residential proxies that use self-signed SSL certificates. This is common with proxy +providers and prevents `ERR_CERT_AUTHORITY_INVALID` errors. + +Note: HTTPS errors are ONLY ignored when a proxy is configured - normal browsing +(without proxy) maintains full SSL certificate validation for security. + +Usage: + # Method 1: Direct proxy argument + python examples/residential_proxy_agent.py + + # Method 2: Environment variable + export SENTIENCE_PROXY="http://user:pass@proxy.example.com:8080" + python examples/residential_proxy_agent.py + +Requirements: +- OpenAI API key (OPENAI_API_KEY) for LLM +- Optional: Sentience API key (SENTIENCE_API_KEY) for Pro/Enterprise features +- Optional: Proxy server credentials +""" + +import os + +from sentience import SentienceAgent, SentienceBrowser +from sentience.agent_config import AgentConfig +from sentience.llm_provider import OpenAIProvider + + +def example_proxy_direct_argument(): + """Example 1: Configure proxy via direct argument""" + print("=" * 60) + print("Example 1: Proxy via Direct Argument") + print("=" * 60) + + # Configure your proxy credentials here + # Supported formats: + # - HTTP: http://user:pass@proxy.example.com:8080 + # - HTTPS: https://user:pass@proxy.example.com:8443 + # - SOCKS5: socks5://user:pass@proxy.example.com:1080 + # - No auth: http://proxy.example.com:8080 + + proxy_url = "http://user:pass@proxy.example.com:8080" + + # Create browser with proxy + browser = SentienceBrowser( + proxy=proxy_url, # Direct proxy configuration + headless=False, # Set to True for production + ) + + openai_key = os.environ.get("OPENAI_API_KEY") + if not openai_key: + print("āŒ Error: OPENAI_API_KEY not set") + return + + llm = OpenAIProvider(api_key=openai_key, model="gpt-4o-mini") + agent = SentienceAgent(browser, llm, verbose=True) + + try: + print("\nšŸš€ Starting browser with proxy...") + browser.start() + + print("🌐 Navigating to IP check service...") + browser.page.goto("https://api.ipify.org?format=json") + browser.page.wait_for_load_state("networkidle") + + # Verify proxy is working by checking IP + print("\nāœ… Browser started successfully with proxy!") + print(" You should see the proxy's IP address in the browser\n") + + # Example: Use agent to interact with pages through proxy + browser.page.goto("https://www.google.com") + browser.page.wait_for_load_state("networkidle") + + agent.act("Click the search box") + agent.act('Type "my ip address" into the search field') + agent.act("Press Enter key") + + import time + + time.sleep(2) + + print("\nāœ… Agent execution complete!") + print(" All traffic was routed through the proxy") + + except Exception as e: + print(f"\nāŒ Error: {e}") + raise + + finally: + browser.close() + + +def example_proxy_environment_variable(): + """Example 2: Configure proxy via environment variable""" + print("=" * 60) + print("Example 2: Proxy via Environment Variable") + print("=" * 60) + + # Check if SENTIENCE_PROXY is set + proxy_from_env = os.environ.get("SENTIENCE_PROXY") + + if not proxy_from_env: + print("\nāš ļø SENTIENCE_PROXY environment variable not set") + print(" Set it with:") + print(' export SENTIENCE_PROXY="http://user:pass@proxy.example.com:8080"') + print("\n Skipping this example...\n") + return + + print(f"\nšŸ”§ Using proxy from environment: {proxy_from_env.split('@')[0]}@***") + + # Create browser without explicit proxy argument + # It will automatically use SENTIENCE_PROXY from environment + browser = SentienceBrowser(headless=False) + + openai_key = os.environ.get("OPENAI_API_KEY") + if not openai_key: + print("āŒ Error: OPENAI_API_KEY not set") + return + + llm = OpenAIProvider(api_key=openai_key, model="gpt-4o-mini") + agent = SentienceAgent(browser, llm, verbose=True) + + try: + print("\nšŸš€ Starting browser with proxy from environment...") + browser.start() + + print("🌐 Navigating to IP check service...") + browser.page.goto("https://api.ipify.org?format=json") + browser.page.wait_for_load_state("networkidle") + + print("\nāœ… Browser started successfully with environment proxy!") + + except Exception as e: + print(f"\nāŒ Error: {e}") + raise + + finally: + browser.close() + + +def example_proxy_types(): + """Example 3: Different proxy types (HTTP, HTTPS, SOCKS5)""" + print("=" * 60) + print("Example 3: Different Proxy Types") + print("=" * 60) + + proxy_examples = { + "HTTP": "http://user:pass@proxy.example.com:8080", + "HTTPS": "https://user:pass@secure-proxy.example.com:8443", + "SOCKS5": "socks5://user:pass@socks-proxy.example.com:1080", + "No Auth": "http://proxy.example.com:8080", # Without credentials + } + + print("\nšŸ“‹ Supported proxy formats:") + for proxy_type, example_url in proxy_examples.items(): + # Hide credentials in output + display_url = example_url.replace("user:pass@", "user:***@") + print(f" {proxy_type:10s}: {display_url}") + + print("\nšŸ’” To use a specific proxy type, pass it to SentienceBrowser:") + print(' browser = SentienceBrowser(proxy="socks5://user:pass@proxy.com:1080")') + + +def example_webrtc_leak_protection(): + """Example 4: WebRTC leak protection (automatic)""" + print("=" * 60) + print("Example 4: WebRTC Leak Protection (Automatic)") + print("=" * 60) + + print("\nšŸ”’ WebRTC leak protection is AUTOMATICALLY enabled for all users!") + print(" This prevents your real IP from leaking via WebRTC when using proxies.") + print("\n Browser flags applied:") + print(" - --disable-features=WebRtcHideLocalIpsWithMdns") + print(" - --force-webrtc-ip-handling-policy=disable_non_proxied_udp") + print("\nšŸ”’ HTTPS certificate handling (when using proxy):") + print(" - ignore_https_errors=True (automatically set)") + print(" - Handles residential proxies with self-signed SSL certificates") + print(" - Prevents ERR_CERT_AUTHORITY_INVALID errors") + print(" - Only active when proxy is configured (normal browsing unaffected)") + print("\n No additional configuration needed - it just works!\n") + + +def example_proxy_with_cloud_tracing(): + """Example 5: Combine proxy with cloud tracing""" + print("=" * 60) + print("Example 5: Proxy + Cloud Tracing") + print("=" * 60) + + sentience_key = os.environ.get("SENTIENCE_API_KEY") + openai_key = os.environ.get("OPENAI_API_KEY") + + if not sentience_key: + print("\nāš ļø SENTIENCE_API_KEY not set") + print(" Cloud tracing requires Pro or Enterprise tier") + print(" Get your API key at: https://sentience.studio") + print("\n Skipping this example...\n") + return + + if not openai_key: + print("āŒ Error: OPENAI_API_KEY not set") + return + + from sentience.tracer_factory import create_tracer + + # Create tracer for cloud upload + run_id = "proxy-with-tracing-demo" + tracer = create_tracer(api_key=sentience_key, run_id=run_id) + + # Configure proxy + proxy_url = "http://user:pass@proxy.example.com:8080" + + # Create browser with BOTH proxy and API key + browser = SentienceBrowser(api_key=sentience_key, proxy=proxy_url, headless=False) + + llm = OpenAIProvider(api_key=openai_key, model="gpt-4o-mini") + + # Configure agent with screenshots + tracer + config = AgentConfig( + snapshot_limit=50, + capture_screenshots=True, + screenshot_format="jpeg", + screenshot_quality=80, + ) + + agent = SentienceAgent(browser, llm, tracer=tracer, config=config) + + try: + print("\nšŸš€ Starting browser with proxy + cloud tracing...") + browser.start() + + print("🌐 Executing agent actions (all traced)...") + browser.page.goto("https://www.google.com") + browser.page.wait_for_load_state("networkidle") + + agent.act("Click the search box") + agent.act('Type "sentience AI SDK" into the search field') + agent.act("Press Enter key") + + print("\nāœ… Agent execution complete!") + + except Exception as e: + print(f"\nāŒ Error: {e}") + raise + + finally: + # Upload trace to cloud + print("\nšŸ“¤ Uploading trace to cloud...") + try: + tracer.close(blocking=True) + print("āœ… Trace uploaded successfully!") + print(f" View at: https://studio.sentienceapi.com (run_id: {run_id})") + except Exception as e: + print(f"āš ļø Upload failed: {e}") + + browser.close() + + +def example_no_proxy_baseline(): + """Example 0: Baseline - Run agent without proxy to show it works""" + print("=" * 60) + print("Example 0: Baseline (No Proxy)") + print("=" * 60) + + openai_key = os.environ.get("OPENAI_API_KEY") + if not openai_key: + print("āŒ Error: OPENAI_API_KEY not set") + print(" Set it with: export OPENAI_API_KEY='your-key-here'") + return + + # Create browser WITHOUT proxy (baseline) + browser = SentienceBrowser(headless=False) + llm = OpenAIProvider(api_key=openai_key, model="gpt-4o-mini") + agent = SentienceAgent(browser, llm, verbose=True) + + try: + print("\nšŸš€ Starting browser (without proxy)...") + browser.start() + + print("🌐 Navigating to Google...") + browser.page.goto("https://www.google.com") + browser.page.wait_for_load_state("networkidle") + + print("\nšŸ¤– Running agent actions...") + agent.act("Click the search box") + agent.act('Type "what is my ip" into the search field') + agent.act("Press Enter key") + + import time + + time.sleep(3) + + print("\nāœ… Agent execution complete!") + print(" This shows your REAL IP address (no proxy)") + print("\nšŸ’” To use a proxy:") + print(" 1. Get proxy credentials from your provider") + print(" 2. Run example_proxy_direct_argument() or set SENTIENCE_PROXY") + print(" You should see a DIFFERENT IP (the proxy's IP)") + + stats = agent.get_token_stats() + print("\nšŸ“Š Token Usage:") + print(f" Prompt tokens: {stats.total_prompt_tokens}") + print(f" Completion tokens: {stats.total_completion_tokens}") + print(f" Total tokens: {stats.total_tokens}") + + except Exception as e: + print(f"\nāŒ Error: {e}") + raise + + finally: + browser.close() + + +def main(): + """Run all proxy examples""" + print("\n" + "=" * 60) + print("Sentience SDK - Residential Proxy Examples") + print("=" * 60 + "\n") + + # Run examples + # Note: Uncomment the examples you want to run + + # Example 0: Baseline (no proxy) - WORKS OUT OF THE BOX + example_no_proxy_baseline() + + # Example 1: Direct argument (configure proxy_url first) + # example_proxy_direct_argument() + + # Example 2: Environment variable (set SENTIENCE_PROXY first) + # example_proxy_environment_variable() + + # Example 3: Show supported proxy types + # example_proxy_types() + + # Example 4: WebRTC leak protection info + # example_webrtc_leak_protection() + + # Example 5: Proxy + Cloud Tracing (requires API key) + # example_proxy_with_cloud_tracing() + + print("\n" + "=" * 60) + print("Examples Complete!") + print("=" * 60) + print("\nšŸ’” Tips:") + print(" - Example 0 shows baseline (no proxy) - works immediately") + print(" - Uncomment other examples after configuring proxy credentials") + print(" - Set SENTIENCE_PROXY environment variable for easy configuration") + print(" - WebRTC leak protection is automatic - no configuration needed") + print(" - Combine with cloud tracing for full visibility\n") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 7722855..49e132a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sentience-python" -version = "0.12.1" +version = "0.90.0" description = "Python SDK for Sentience AI Agent Browser Automation" readme = "README.md" requires-python = ">=3.11" diff --git a/screenshot.png b/screenshot.png index e96bc42..b6b7e62 100644 Binary files a/screenshot.png and b/screenshot.png differ diff --git a/sentience/__init__.py b/sentience/__init__.py index c4b7d03..6b7a2f2 100644 --- a/sentience/__init__.py +++ b/sentience/__init__.py @@ -58,7 +58,7 @@ ) from .wait import wait_for -__version__ = "0.12.1" +__version__ = "0.90.0" __all__ = [ # Core SDK diff --git a/sentience/browser.py b/sentience/browser.py index cbeff56..e465d4d 100644 --- a/sentience/browser.py +++ b/sentience/browser.py @@ -7,9 +7,12 @@ import tempfile import time from pathlib import Path +from urllib.parse import urlparse from playwright.sync_api import BrowserContext, Page, Playwright, sync_playwright +from sentience.models import ProxyConfig + # Import stealth for bot evasion (optional - graceful fallback if not available) try: from playwright_stealth import stealth_sync @@ -27,6 +30,7 @@ def __init__( api_key: str | None = None, api_url: str | None = None, headless: bool | None = None, + proxy: str | None = None, ): """ Initialize Sentience browser @@ -39,6 +43,9 @@ def __init__( If None and no api_key, uses free tier (local extension only) If 'local' or Docker sidecar URL, uses Enterprise tier headless: Whether to run in headless mode. If None, defaults to True in CI, False otherwise + proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080') + Supports HTTP, HTTPS, and SOCKS5 proxies + Falls back to SENTIENCE_PROXY environment variable if not provided """ self.api_key = api_key # Only set api_url if api_key is provided, otherwise None (free tier) @@ -55,11 +62,60 @@ def __init__( else: self.headless = headless + # Support proxy from argument or environment variable + self.proxy = proxy or os.environ.get("SENTIENCE_PROXY") + self.playwright: Playwright | None = None self.context: BrowserContext | None = None self.page: Page | None = None self._extension_path: str | None = None + def _parse_proxy(self, proxy_string: str) -> ProxyConfig | None: + """ + Parse proxy connection string into ProxyConfig. + + Args: + proxy_string: Proxy URL (e.g., 'http://user:pass@proxy.example.com:8080') + + Returns: + ProxyConfig object or None if invalid + + Raises: + ValueError: If proxy format is invalid + """ + if not proxy_string: + return None + + try: + parsed = urlparse(proxy_string) + + # Validate scheme + if parsed.scheme not in ("http", "https", "socks5"): + print(f"āš ļø [Sentience] Unsupported proxy scheme: {parsed.scheme}") + print(" Supported: http, https, socks5") + return None + + # Validate host and port + if not parsed.hostname or not parsed.port: + print("āš ļø [Sentience] Proxy URL must include hostname and port") + print(" Expected format: http://username:password@host:port") + return None + + # Build server URL + server = f"{parsed.scheme}://{parsed.hostname}:{parsed.port}" + + # Create ProxyConfig with optional credentials + return ProxyConfig( + server=server, + username=parsed.username if parsed.username else None, + password=parsed.password if parsed.password else None, + ) + + except Exception as e: + print(f"āš ļø [Sentience] Invalid proxy configuration: {e}") + print(" Expected format: http://username:password@host:port") + return None + def start(self) -> None: """Launch browser with extension loaded""" # Get extension source path (relative to project root/package) @@ -99,6 +155,9 @@ def start(self) -> None: "--disable-blink-features=AutomationControlled", # Hides 'navigator.webdriver' "--no-sandbox", "--disable-infobars", + # WebRTC leak protection (prevents real IP exposure when using proxies/VPNs) + "--disable-features=WebRtcHideLocalIpsWithMdns", + "--force-webrtc-ip-handling-policy=disable_non_proxied_udp", ] # Handle headless mode correctly for extensions @@ -108,17 +167,30 @@ def start(self) -> None: if self.headless: args.append("--headless=new") # Use new headless mode via args + # Parse proxy configuration if provided + proxy_config = self._parse_proxy(self.proxy) if self.proxy else None + + # Build launch_persistent_context parameters + launch_params = { + "user_data_dir": "", # Ephemeral temp dir + "headless": False, # IMPORTANT: See note above + "args": args, + "viewport": {"width": 1280, "height": 800}, + # Remove "HeadlessChrome" from User Agent automatically + "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + } + + # Add proxy if configured + if proxy_config: + launch_params["proxy"] = proxy_config.to_playwright_dict() + # Ignore HTTPS errors when using proxy (many residential proxies use self-signed certs) + launch_params["ignore_https_errors"] = True + print(f"🌐 [Sentience] Using proxy: {proxy_config.server}") + # Launch persistent context (required for extensions) # Note: We pass headless=False to launch_persistent_context because we handle # headless mode via the --headless=new arg above. This is a Playwright workaround. - self.context = self.playwright.chromium.launch_persistent_context( - user_data_dir="", # Ephemeral temp dir - headless=False, # IMPORTANT: See note above - args=args, - viewport={"width": 1280, "height": 800}, - # Remove "HeadlessChrome" from User Agent automatically - user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", - ) + self.context = self.playwright.chromium.launch_persistent_context(**launch_params) self.page = self.context.pages[0] if self.context.pages else self.context.new_page() diff --git a/sentience/models.py b/sentience/models.py index d613828..6d617c1 100644 --- a/sentience/models.py +++ b/sentience/models.py @@ -182,3 +182,37 @@ class ActionHistory(BaseModel): success: bool attempt: int duration_ms: int + + +class ProxyConfig(BaseModel): + """ + Proxy configuration for browser networking. + + Supports HTTP, HTTPS, and SOCKS5 proxies with optional authentication. + """ + + server: str = Field( + ..., + description="Proxy server URL including scheme and port (e.g., 'http://proxy.example.com:8080')", + ) + username: str | None = Field( + None, + description="Username for proxy authentication (optional)", + ) + password: str | None = Field( + None, + description="Password for proxy authentication (optional)", + ) + + def to_playwright_dict(self) -> dict: + """ + Convert to Playwright proxy configuration format. + + Returns: + Dict compatible with Playwright's proxy parameter + """ + config = {"server": self.server} + if self.username and self.password: + config["username"] = self.username + config["password"] = self.password + return config diff --git a/tests/test_proxy.py b/tests/test_proxy.py new file mode 100644 index 0000000..63e4366 --- /dev/null +++ b/tests/test_proxy.py @@ -0,0 +1,324 @@ +"""Tests for proxy support in SentienceBrowser""" + +import os +from unittest.mock import MagicMock, patch + +from sentience.browser import SentienceBrowser +from sentience.models import ProxyConfig + + +class TestProxyConfig: + """Test ProxyConfig Pydantic model""" + + def test_proxy_config_basic(self): + """Test basic proxy config creation""" + config = ProxyConfig( + server="http://proxy.example.com:8080", + ) + assert config.server == "http://proxy.example.com:8080" + assert config.username is None + assert config.password is None + + def test_proxy_config_with_auth(self): + """Test proxy config with authentication""" + config = ProxyConfig( + server="http://proxy.example.com:8080", + username="testuser", + password="testpass", + ) + assert config.server == "http://proxy.example.com:8080" + assert config.username == "testuser" + assert config.password == "testpass" + + def test_proxy_config_to_playwright_dict_without_auth(self): + """Test conversion to Playwright dict without authentication""" + config = ProxyConfig(server="http://proxy.example.com:8080") + playwright_dict = config.to_playwright_dict() + + assert playwright_dict == {"server": "http://proxy.example.com:8080"} + + def test_proxy_config_to_playwright_dict_with_auth(self): + """Test conversion to Playwright dict with authentication""" + config = ProxyConfig( + server="http://proxy.example.com:8080", + username="testuser", + password="testpass", + ) + playwright_dict = config.to_playwright_dict() + + assert playwright_dict == { + "server": "http://proxy.example.com:8080", + "username": "testuser", + "password": "testpass", + } + + +class TestBrowserProxyParsing: + """Test SentienceBrowser proxy parsing functionality""" + + def test_parse_proxy_http_no_auth(self): + """Test parsing HTTP proxy without authentication""" + browser = SentienceBrowser() + config = browser._parse_proxy("http://proxy.example.com:8080") + + assert config is not None + assert config.server == "http://proxy.example.com:8080" + assert config.username is None + assert config.password is None + + def test_parse_proxy_http_with_auth(self): + """Test parsing HTTP proxy with authentication""" + browser = SentienceBrowser() + config = browser._parse_proxy("http://user:pass@proxy.example.com:8080") + + assert config is not None + assert config.server == "http://proxy.example.com:8080" + assert config.username == "user" + assert config.password == "pass" + + def test_parse_proxy_https(self): + """Test parsing HTTPS proxy""" + browser = SentienceBrowser() + config = browser._parse_proxy("https://user:pass@secure-proxy.example.com:8443") + + assert config is not None + assert config.server == "https://secure-proxy.example.com:8443" + assert config.username == "user" + assert config.password == "pass" + + def test_parse_proxy_socks5(self): + """Test parsing SOCKS5 proxy""" + browser = SentienceBrowser() + config = browser._parse_proxy("socks5://user:pass@socks-proxy.example.com:1080") + + assert config is not None + assert config.server == "socks5://socks-proxy.example.com:1080" + assert config.username == "user" + assert config.password == "pass" + + def test_parse_proxy_invalid_scheme(self, capsys): + """Test parsing proxy with invalid scheme""" + browser = SentienceBrowser() + config = browser._parse_proxy("ftp://proxy.example.com:8080") + + assert config is None + captured = capsys.readouterr() + assert "Unsupported proxy scheme: ftp" in captured.out + assert "Supported: http, https, socks5" in captured.out + + def test_parse_proxy_missing_port(self, capsys): + """Test parsing proxy without port""" + browser = SentienceBrowser() + config = browser._parse_proxy("http://proxy.example.com") + + assert config is None + captured = capsys.readouterr() + assert "Proxy URL must include hostname and port" in captured.out + + def test_parse_proxy_missing_host(self, capsys): + """Test parsing proxy without hostname""" + browser = SentienceBrowser() + config = browser._parse_proxy("http://:8080") + + assert config is None + captured = capsys.readouterr() + assert "Proxy URL must include hostname and port" in captured.out + + def test_parse_proxy_empty_string(self): + """Test parsing empty proxy string""" + browser = SentienceBrowser() + config = browser._parse_proxy("") + + assert config is None + + def test_parse_proxy_none(self): + """Test parsing None proxy""" + browser = SentienceBrowser() + config = browser._parse_proxy(None) + + assert config is None + + +class TestBrowserProxyInitialization: + """Test SentienceBrowser initialization with proxy""" + + def test_browser_init_with_proxy_arg(self): + """Test browser initialization with proxy argument""" + browser = SentienceBrowser(proxy="http://proxy.example.com:8080") + assert browser.proxy == "http://proxy.example.com:8080" + + def test_browser_init_with_env_var(self, monkeypatch): + """Test browser initialization with SENTIENCE_PROXY env var""" + monkeypatch.setenv("SENTIENCE_PROXY", "http://env-proxy.example.com:8080") + browser = SentienceBrowser() + assert browser.proxy == "http://env-proxy.example.com:8080" + + def test_browser_init_arg_overrides_env(self, monkeypatch): + """Test that proxy argument overrides environment variable""" + monkeypatch.setenv("SENTIENCE_PROXY", "http://env-proxy.example.com:8080") + browser = SentienceBrowser(proxy="http://arg-proxy.example.com:8080") + assert browser.proxy == "http://arg-proxy.example.com:8080" + + def test_browser_init_without_proxy(self): + """Test browser initialization without proxy""" + # Ensure env var is not set + if "SENTIENCE_PROXY" in os.environ: + del os.environ["SENTIENCE_PROXY"] + + browser = SentienceBrowser() + assert browser.proxy is None + + +class TestBrowserProxyIntegration: + """Test proxy integration in browser start() method""" + + @patch("sentience.browser.shutil.copytree") + @patch("sentience.browser.sync_playwright") + def test_start_without_proxy(self, mock_playwright, mock_copytree): + """Test browser start without proxy""" + # Mock Playwright + mock_pw_instance = MagicMock() + mock_context = MagicMock() + mock_context.pages = [] + mock_page = MagicMock() + mock_context.new_page.return_value = mock_page + mock_pw_instance.chromium.launch_persistent_context.return_value = mock_context + mock_playwright.return_value.start.return_value = mock_pw_instance + + # Mock extension path check + with patch("sentience.browser.Path") as mock_path: + mock_ext_path = MagicMock() + mock_ext_path.exists.return_value = True + (mock_ext_path / "manifest.json").exists.return_value = True + mock_path.return_value.parent.parent.parent = MagicMock() + mock_path.return_value.parent.parent.parent.__truediv__.return_value = mock_ext_path + + browser = SentienceBrowser() + browser.start() + + # Verify proxy was not passed to launch_persistent_context + call_kwargs = mock_pw_instance.chromium.launch_persistent_context.call_args[1] + assert "proxy" not in call_kwargs + + @patch("sentience.browser.shutil.copytree") + @patch("sentience.browser.sync_playwright") + def test_start_with_proxy(self, mock_playwright, mock_copytree, capsys): + """Test browser start with proxy""" + # Mock Playwright + mock_pw_instance = MagicMock() + mock_context = MagicMock() + mock_context.pages = [] + mock_page = MagicMock() + mock_context.new_page.return_value = mock_page + mock_pw_instance.chromium.launch_persistent_context.return_value = mock_context + mock_playwright.return_value.start.return_value = mock_pw_instance + + # Mock extension path check + with patch("sentience.browser.Path") as mock_path: + mock_ext_path = MagicMock() + mock_ext_path.exists.return_value = True + (mock_ext_path / "manifest.json").exists.return_value = True + mock_path.return_value.parent.parent.parent = MagicMock() + mock_path.return_value.parent.parent.parent.__truediv__.return_value = mock_ext_path + + browser = SentienceBrowser(proxy="http://user:pass@proxy.example.com:8080") + browser.start() + + # Verify proxy was passed to launch_persistent_context + call_kwargs = mock_pw_instance.chromium.launch_persistent_context.call_args[1] + assert "proxy" in call_kwargs + assert call_kwargs["proxy"] == { + "server": "http://proxy.example.com:8080", + "username": "user", + "password": "pass", + } + + # Verify console message + captured = capsys.readouterr() + assert "Using proxy: http://proxy.example.com:8080" in captured.out + + @patch("sentience.browser.shutil.copytree") + @patch("sentience.browser.sync_playwright") + def test_start_with_webrtc_flags(self, mock_playwright, mock_copytree): + """Test that WebRTC leak protection flags are always included""" + # Mock Playwright + mock_pw_instance = MagicMock() + mock_context = MagicMock() + mock_context.pages = [] + mock_page = MagicMock() + mock_context.new_page.return_value = mock_page + mock_pw_instance.chromium.launch_persistent_context.return_value = mock_context + mock_playwright.return_value.start.return_value = mock_pw_instance + + # Mock extension path check + with patch("sentience.browser.Path") as mock_path: + mock_ext_path = MagicMock() + mock_ext_path.exists.return_value = True + (mock_ext_path / "manifest.json").exists.return_value = True + mock_path.return_value.parent.parent.parent = MagicMock() + mock_path.return_value.parent.parent.parent.__truediv__.return_value = mock_ext_path + + browser = SentienceBrowser() + browser.start() + + # Verify WebRTC flags are included in args + call_kwargs = mock_pw_instance.chromium.launch_persistent_context.call_args[1] + args = call_kwargs["args"] + assert "--disable-features=WebRtcHideLocalIpsWithMdns" in args + assert "--force-webrtc-ip-handling-policy=disable_non_proxied_udp" in args + + @patch("sentience.browser.shutil.copytree") + @patch("sentience.browser.sync_playwright") + def test_start_with_proxy_ignores_https_errors(self, mock_playwright, mock_copytree): + """Test that ignore_https_errors is set when using proxy (for self-signed certs)""" + # Mock Playwright + mock_pw_instance = MagicMock() + mock_context = MagicMock() + mock_context.pages = [] + mock_page = MagicMock() + mock_context.new_page.return_value = mock_page + mock_pw_instance.chromium.launch_persistent_context.return_value = mock_context + mock_playwright.return_value.start.return_value = mock_pw_instance + + # Mock extension path check + with patch("sentience.browser.Path") as mock_path: + mock_ext_path = MagicMock() + mock_ext_path.exists.return_value = True + (mock_ext_path / "manifest.json").exists.return_value = True + mock_path.return_value.parent.parent.parent = MagicMock() + mock_path.return_value.parent.parent.parent.__truediv__.return_value = mock_ext_path + + browser = SentienceBrowser(proxy="http://user:pass@proxy.example.com:8080") + browser.start() + + # Verify ignore_https_errors is set when using proxy + call_kwargs = mock_pw_instance.chromium.launch_persistent_context.call_args[1] + assert call_kwargs["ignore_https_errors"] is True + + @patch("sentience.browser.shutil.copytree") + @patch("sentience.browser.sync_playwright") + def test_start_without_proxy_does_not_ignore_https_errors(self, mock_playwright, mock_copytree): + """Test that ignore_https_errors is NOT set when not using proxy""" + # Mock Playwright + mock_pw_instance = MagicMock() + mock_context = MagicMock() + mock_context.pages = [] + mock_page = MagicMock() + mock_context.new_page.return_value = mock_page + mock_pw_instance.chromium.launch_persistent_context.return_value = mock_context + mock_playwright.return_value.start.return_value = mock_pw_instance + + # Mock extension path check + with patch("sentience.browser.Path") as mock_path: + mock_ext_path = MagicMock() + mock_ext_path.exists.return_value = True + (mock_ext_path / "manifest.json").exists.return_value = True + mock_path.return_value.parent.parent.parent = MagicMock() + mock_path.return_value.parent.parent.parent.__truediv__.return_value = mock_ext_path + + browser = SentienceBrowser() # No proxy + browser.start() + + # Verify ignore_https_errors is NOT set (maintains default security) + call_kwargs = mock_pw_instance.chromium.launch_persistent_context.call_args[1] + assert "ignore_https_errors" not in call_kwargs