From 3ce9bb38c5133bcfb88795bc67fe5e678bd5a0c5 Mon Sep 17 00:00:00 2001 From: mutnale_sushant Date: Mon, 16 Mar 2026 09:13:23 +0530 Subject: [PATCH] fix(playwright): filter unsupported context options in persistent browser This addresses issue #1784 by dynamically filtering options passed to launch_persistent_context and providing a warning log for ignored options like storage_state. --- pyproject.toml | 3 +++ src/crawlee/browsers/_playwright_browser.py | 16 +++++++++++++++- tests/unit/browsers/test_playwright_browser.py | 10 ++++++++++ uv.lock | 6 ++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2a3d21fb42..f4fb299805 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,11 +34,14 @@ keywords = [ "scraping", ] dependencies = [ + "apify-fingerprint-datapoints>=0.11.0", "async-timeout>=5.0.1", + "browserforge>=1.2.4", "cachetools>=5.5.0", "colorama>=0.4.0", "impit>=0.8.0", "more-itertools>=10.2.0", + "playwright>=1.58.0", "protego>=0.5.0", "psutil>=6.0.0", "pydantic-settings>=2.12.0", diff --git a/src/crawlee/browsers/_playwright_browser.py b/src/crawlee/browsers/_playwright_browser.py index 8ce19bfd26..c2a4ff9ddf 100644 --- a/src/crawlee/browsers/_playwright_browser.py +++ b/src/crawlee/browsers/_playwright_browser.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import inspect import shutil import tempfile from logging import getLogger @@ -67,8 +68,21 @@ async def new_context(self, **context_options: Any) -> BrowserContext: user_data_dir = tempfile.mkdtemp(prefix=self._TMP_DIR_PREFIX) self._temp_dir = Path(user_data_dir) + launch_persistent_context_sig = inspect.signature(self._browser_type.launch_persistent_context) + filtered_launch_options = { + key: value for key, value in launch_options.items() if key in launch_persistent_context_sig.parameters + } + + removed_options = set(launch_options.keys()) - set(filtered_launch_options.keys()) + if removed_options: + logger.warning( + f"The following options are not supported by Playwright's launch_persistent_context " + f'and will be ignored: {removed_options}. ' + 'To use these options, consider using incognito pages (use_incognito_pages=True).' + ) + self._context = await self._browser_type.launch_persistent_context( - user_data_dir=user_data_dir, **launch_options + user_data_dir=user_data_dir, **filtered_launch_options ) if self._temp_dir: diff --git a/tests/unit/browsers/test_playwright_browser.py b/tests/unit/browsers/test_playwright_browser.py index 120b886c59..56b8f15c1f 100644 --- a/tests/unit/browsers/test_playwright_browser.py +++ b/tests/unit/browsers/test_playwright_browser.py @@ -42,3 +42,13 @@ async def test_delete_temp_folder_with_close_browser(playwright: Playwright) -> assert current_temp_dir.exists() await persist_browser.close() assert not current_temp_dir.exists() + + +async def test_new_context_unsupported_options(playwright: Playwright) -> None: + persist_browser = PlaywrightPersistentBrowser( + playwright.chromium, user_data_dir=None, browser_launch_options={'headless': True} + ) + # This should not crash despite 'storage_state' being invalid for launch_persistent_context + context = await persist_browser.new_context(storage_state={'cookies': [], 'origins': []}) + assert context is not None + await persist_browser.close() diff --git a/uv.lock b/uv.lock index b57b9aabf0..1de293799e 100644 --- a/uv.lock +++ b/uv.lock @@ -787,11 +787,14 @@ name = "crawlee" version = "1.5.0" source = { editable = "." } dependencies = [ + { name = "apify-fingerprint-datapoints" }, { name = "async-timeout" }, + { name = "browserforge" }, { name = "cachetools" }, { name = "colorama" }, { name = "impit" }, { name = "more-itertools" }, + { name = "playwright" }, { name = "protego" }, { name = "psutil" }, { name = "pydantic" }, @@ -924,12 +927,14 @@ dev = [ requires-dist = [ { name = "aiomysql", marker = "extra == 'sql-mysql'", specifier = ">=0.3.2" }, { name = "aiosqlite", marker = "extra == 'sql-sqlite'", specifier = ">=0.21.0" }, + { name = "apify-fingerprint-datapoints", specifier = ">=0.11.0" }, { name = "apify-fingerprint-datapoints", marker = "extra == 'adaptive-crawler'", specifier = ">=0.0.3" }, { name = "apify-fingerprint-datapoints", marker = "extra == 'httpx'", specifier = ">=0.0.2" }, { name = "apify-fingerprint-datapoints", marker = "extra == 'playwright'", specifier = ">=0.0.2" }, { name = "async-timeout", specifier = ">=5.0.1" }, { name = "asyncpg", marker = "extra == 'sql-postgres'", specifier = ">=0.24.0" }, { name = "beautifulsoup4", extras = ["lxml"], marker = "extra == 'beautifulsoup'", specifier = ">=4.12.0" }, + { name = "browserforge", specifier = ">=1.2.4" }, { name = "browserforge", marker = "extra == 'adaptive-crawler'", specifier = ">=1.2.4" }, { name = "browserforge", marker = "extra == 'httpx'", specifier = ">=1.2.3" }, { name = "browserforge", marker = "extra == 'playwright'", specifier = ">=1.2.3" }, @@ -952,6 +957,7 @@ requires-dist = [ { name = "opentelemetry-sdk", marker = "extra == 'otel'", specifier = ">=1.34.1" }, { name = "opentelemetry-semantic-conventions", marker = "extra == 'otel'", specifier = ">=0.54" }, { name = "parsel", marker = "extra == 'parsel'", specifier = ">=1.10.0" }, + { name = "playwright", specifier = ">=1.58.0" }, { name = "playwright", marker = "extra == 'adaptive-crawler'", specifier = ">=1.27.0" }, { name = "playwright", marker = "extra == 'playwright'", specifier = ">=1.27.0" }, { name = "protego", specifier = ">=0.5.0" },