|
| 1 | +""" |
| 2 | +Debug helper: investigate empty `read_async()` results. |
| 3 | +
|
| 4 | +This script is meant to reproduce situations where the extension-backed `read_async()` |
| 5 | +returns `status="success"` but `content` is empty / near-empty (e.g. length=1). |
| 6 | +
|
| 7 | +It prints the final `ReadResult` for: |
| 8 | +- a stable control page (example.com) |
| 9 | +- an AceHardware product details page (often triggers the empty-read symptom) |
| 10 | +
|
| 11 | +Tip: |
| 12 | + SENTIENCE_DEBUG_READ=1 python sdk-python/examples/debug_read_async_empty_content.py |
| 13 | +""" |
| 14 | + |
| 15 | +import asyncio |
| 16 | +import os |
| 17 | + |
| 18 | +from sentience.async_api import AsyncSentienceBrowser, read_async |
| 19 | + |
| 20 | + |
| 21 | +ACE_PDP_URL = "https://www.acehardware.com/departments/tools/power-tools/combo-power-tool-sets/2026525" |
| 22 | + |
| 23 | + |
| 24 | +async def dump_read(browser: AsyncSentienceBrowser, url: str) -> None: |
| 25 | + print(f"\n=== URL: {url} ===") |
| 26 | + await browser.goto(url, wait_until="domcontentloaded") |
| 27 | + |
| 28 | + res_md = await read_async(browser, output_format="markdown", enhance_markdown=True) |
| 29 | + print( |
| 30 | + f"[markdown] status={res_md.status!r} length={res_md.length} url={res_md.url!r} error={res_md.error!r}" |
| 31 | + ) |
| 32 | + print(res_md.content[:400].strip() or "<empty>") |
| 33 | + |
| 34 | + res_raw = await read_async(browser, output_format="raw") |
| 35 | + print( |
| 36 | + f"[raw] status={res_raw.status!r} length={res_raw.length} url={res_raw.url!r} error={res_raw.error!r}" |
| 37 | + ) |
| 38 | + print(res_raw.content[:200].strip() or "<empty>") |
| 39 | + |
| 40 | + |
| 41 | +async def main() -> None: |
| 42 | + api_key = os.environ.get("SENTIENCE_API_KEY") |
| 43 | + headless = os.environ.get("HEADLESS", "1").strip() not in {"0", "false", "False"} |
| 44 | + |
| 45 | + async with AsyncSentienceBrowser(api_key=api_key, headless=headless) as browser: |
| 46 | + await dump_read(browser, "https://example.com") |
| 47 | + await dump_read(browser, ACE_PDP_URL) |
| 48 | + |
| 49 | + |
| 50 | +if __name__ == "__main__": |
| 51 | + asyncio.run(main()) |
| 52 | + |
0 commit comments