From 7ba546f5ce85579b04f4d5a6a7c0fb7924fe0956 Mon Sep 17 00:00:00 2001 From: Developers Digest <124798203+developersdigest@users.noreply.github.com> Date: Thu, 12 Mar 2026 11:39:46 -0400 Subject: [PATCH 1/4] add --query flag for query format support --- .agents/skills/firecrawl-scrape/SKILL.md | 67 ++++++++++++++++++++++++ src/commands/scrape.ts | 13 ++++- src/index.ts | 4 ++ src/types/scrape.ts | 2 + src/utils/options.ts | 1 + 5 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 .agents/skills/firecrawl-scrape/SKILL.md diff --git a/.agents/skills/firecrawl-scrape/SKILL.md b/.agents/skills/firecrawl-scrape/SKILL.md new file mode 100644 index 0000000..4a33683 --- /dev/null +++ b/.agents/skills/firecrawl-scrape/SKILL.md @@ -0,0 +1,67 @@ +--- +name: firecrawl-scrape +description: | + Extract clean markdown from any URL, including JavaScript-rendered SPAs. Use this skill whenever the user provides a URL and wants its content, says "scrape", "grab", "fetch", "pull", "get the page", "extract from this URL", or "read this webpage". Handles JS-rendered pages, multiple concurrent URLs, and returns LLM-optimized markdown. Use this instead of WebFetch for any webpage content extraction. +allowed-tools: + - Bash(firecrawl *) + - Bash(npx firecrawl *) +--- + +# firecrawl scrape + +Scrape one or more URLs. Returns clean, LLM-optimized markdown. Multiple URLs are scraped concurrently. + +## When to use + +- You have a specific URL and want its content +- The page is static or JS-rendered (SPA) +- Step 2 in the [workflow escalation pattern](firecrawl-cli): search → **scrape** → map → crawl → browser + +## Quick start + +```bash +# Basic markdown extraction +firecrawl scrape "" -o .firecrawl/page.md + +# Main content only, no nav/footer +firecrawl scrape "" --only-main-content -o .firecrawl/page.md + +# Wait for JS to render, then scrape +firecrawl scrape "" --wait-for 3000 -o .firecrawl/page.md + +# Multiple URLs (each saved to .firecrawl/) +firecrawl scrape https://example.com https://example.com/blog https://example.com/docs + +# Get markdown and links together +firecrawl scrape "" --format markdown,links -o .firecrawl/page.json + +# Ask a question about the page +firecrawl scrape "https://example.com/pricing" --query "What is the enterprise plan price?" +``` + +## Options + +| Option | Description | +| ------------------------ | ---------------------------------------------------------------- | +| `-f, --format ` | Output formats: markdown, html, rawHtml, links, screenshot, json | +| `-Q, --query ` | Ask a question about the page content (5 credits) | +| `-H` | Include HTTP headers in output | +| `--only-main-content` | Strip nav, footer, sidebar — main content only | +| `--wait-for ` | Wait for JS rendering before scraping | +| `--include-tags ` | Only include these HTML tags | +| `--exclude-tags ` | Exclude these HTML tags | +| `-o, --output ` | Output file path | + +## Tips + +- **Try scrape before browser.** Scrape handles static pages and JS-rendered SPAs. Only escalate to browser when you need interaction (clicks, form fills, pagination). +- Multiple URLs are scraped concurrently — check `firecrawl --status` for your concurrency limit. +- Single format outputs raw content. Multiple formats (e.g., `--format markdown,links`) output JSON. +- Always quote URLs — shell interprets `?` and `&` as special characters. +- Naming convention: `.firecrawl/{site}-{path}.md` + +## See also + +- [firecrawl-search](../firecrawl-search/SKILL.md) — find pages when you don't have a URL +- [firecrawl-browser](../firecrawl-browser/SKILL.md) — when scrape can't get the content (interaction needed) +- [firecrawl-download](../firecrawl-download/SKILL.md) — bulk download an entire site to local files diff --git a/src/commands/scrape.ts b/src/commands/scrape.ts index 96b652b..7f68b5e 100644 --- a/src/commands/scrape.ts +++ b/src/commands/scrape.ts @@ -10,7 +10,7 @@ import type { ScrapeLocation, } from '../types/scrape'; import { getClient } from '../utils/client'; -import { handleScrapeOutput } from '../utils/output'; +import { handleScrapeOutput, writeOutput } from '../utils/output'; import { getOrigin } from '../utils/url'; import { executeMap } from './map'; import { getStatus } from './status'; @@ -71,6 +71,11 @@ export async function executeScrape( formats.push('screenshot'); } + // Inject query format if --query was provided + if (options.query) { + formats.push({ type: 'query', prompt: options.query } as any); + } + // If no formats specified, default to markdown if (formats.length === 0) { formats.push('markdown'); @@ -136,6 +141,12 @@ export async function handleScrapeCommand( ): Promise { const result = await executeScrape(options); + // Query mode: output answer directly + if (options.query && result.success && result.data?.answer) { + writeOutput(result.data.answer, options.output, !!options.output); + return; + } + // Determine effective formats for output handling const effectiveFormats: ScrapeFormat[] = options.formats && options.formats.length > 0 diff --git a/src/index.ts b/src/index.ts index 2ceb242..072aa29 100644 --- a/src/index.ts +++ b/src/index.ts @@ -158,6 +158,10 @@ function createScrapeCommand(): Command { '--languages ', 'Comma-separated language codes for scraping (e.g., en,es)' ) + .option( + '-Q, --query ', + 'Ask a question about the page content (query format)' + ) .action(async (positionalArgs, options) => { // Collect URLs from positional args and --url option diff --git a/src/types/scrape.ts b/src/types/scrape.ts index eaa553d..345b797 100644 --- a/src/types/scrape.ts +++ b/src/types/scrape.ts @@ -55,6 +55,8 @@ export interface ScrapeOptions { maxAge?: number; /** Location settings for geo-targeted scraping */ location?: ScrapeLocation; + /** Question to ask about the page content (query format) */ + query?: string; } export interface ScrapeResult { diff --git a/src/utils/options.ts b/src/utils/options.ts index 290c665..52a3a95 100644 --- a/src/utils/options.ts +++ b/src/utils/options.ts @@ -111,5 +111,6 @@ export function parseScrapeOptions(options: any): ScrapeOptions { timing: options.timing, maxAge: options.maxAge, location, + query: options.query, }; } From 549efd3d763297b5a49a458812d1829ce1f92c4c Mon Sep 17 00:00:00 2001 From: Developers Digest <124798203+developersdigest@users.noreply.github.com> Date: Thu, 12 Mar 2026 11:44:33 -0400 Subject: [PATCH 2/4] bump version to 1.10.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 0d803b9..ae708d7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "firecrawl-cli", - "version": "1.9.8", + "version": "1.10.0", "description": "Command-line interface for Firecrawl. Scrape, crawl, and extract data from any website directly from your terminal.", "main": "dist/index.js", "bin": { From 4627961cb08f8a45a1f645232fbfdddcbb2048f7 Mon Sep 17 00:00:00 2001 From: Developers Digest <124798203+developersdigest@users.noreply.github.com> Date: Thu, 12 Mar 2026 12:38:56 -0400 Subject: [PATCH 3/4] move query docs to tracked skill file, remove .agents/ duplicate the --query scrape docs were accidentally committed to .agents/skills/firecrawl-scrape/ instead of skills/firecrawl-scrape/. --- .agents/skills/firecrawl-scrape/SKILL.md | 67 ------------------------ skills/firecrawl-scrape/SKILL.md | 4 ++ 2 files changed, 4 insertions(+), 67 deletions(-) delete mode 100644 .agents/skills/firecrawl-scrape/SKILL.md diff --git a/.agents/skills/firecrawl-scrape/SKILL.md b/.agents/skills/firecrawl-scrape/SKILL.md deleted file mode 100644 index 4a33683..0000000 --- a/.agents/skills/firecrawl-scrape/SKILL.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -name: firecrawl-scrape -description: | - Extract clean markdown from any URL, including JavaScript-rendered SPAs. Use this skill whenever the user provides a URL and wants its content, says "scrape", "grab", "fetch", "pull", "get the page", "extract from this URL", or "read this webpage". Handles JS-rendered pages, multiple concurrent URLs, and returns LLM-optimized markdown. Use this instead of WebFetch for any webpage content extraction. -allowed-tools: - - Bash(firecrawl *) - - Bash(npx firecrawl *) ---- - -# firecrawl scrape - -Scrape one or more URLs. Returns clean, LLM-optimized markdown. Multiple URLs are scraped concurrently. - -## When to use - -- You have a specific URL and want its content -- The page is static or JS-rendered (SPA) -- Step 2 in the [workflow escalation pattern](firecrawl-cli): search → **scrape** → map → crawl → browser - -## Quick start - -```bash -# Basic markdown extraction -firecrawl scrape "" -o .firecrawl/page.md - -# Main content only, no nav/footer -firecrawl scrape "" --only-main-content -o .firecrawl/page.md - -# Wait for JS to render, then scrape -firecrawl scrape "" --wait-for 3000 -o .firecrawl/page.md - -# Multiple URLs (each saved to .firecrawl/) -firecrawl scrape https://example.com https://example.com/blog https://example.com/docs - -# Get markdown and links together -firecrawl scrape "" --format markdown,links -o .firecrawl/page.json - -# Ask a question about the page -firecrawl scrape "https://example.com/pricing" --query "What is the enterprise plan price?" -``` - -## Options - -| Option | Description | -| ------------------------ | ---------------------------------------------------------------- | -| `-f, --format ` | Output formats: markdown, html, rawHtml, links, screenshot, json | -| `-Q, --query ` | Ask a question about the page content (5 credits) | -| `-H` | Include HTTP headers in output | -| `--only-main-content` | Strip nav, footer, sidebar — main content only | -| `--wait-for ` | Wait for JS rendering before scraping | -| `--include-tags ` | Only include these HTML tags | -| `--exclude-tags ` | Exclude these HTML tags | -| `-o, --output ` | Output file path | - -## Tips - -- **Try scrape before browser.** Scrape handles static pages and JS-rendered SPAs. Only escalate to browser when you need interaction (clicks, form fills, pagination). -- Multiple URLs are scraped concurrently — check `firecrawl --status` for your concurrency limit. -- Single format outputs raw content. Multiple formats (e.g., `--format markdown,links`) output JSON. -- Always quote URLs — shell interprets `?` and `&` as special characters. -- Naming convention: `.firecrawl/{site}-{path}.md` - -## See also - -- [firecrawl-search](../firecrawl-search/SKILL.md) — find pages when you don't have a URL -- [firecrawl-browser](../firecrawl-browser/SKILL.md) — when scrape can't get the content (interaction needed) -- [firecrawl-download](../firecrawl-download/SKILL.md) — bulk download an entire site to local files diff --git a/skills/firecrawl-scrape/SKILL.md b/skills/firecrawl-scrape/SKILL.md index a090f48..4a33683 100644 --- a/skills/firecrawl-scrape/SKILL.md +++ b/skills/firecrawl-scrape/SKILL.md @@ -34,6 +34,9 @@ firecrawl scrape https://example.com https://example.com/blog https://example.co # Get markdown and links together firecrawl scrape "" --format markdown,links -o .firecrawl/page.json + +# Ask a question about the page +firecrawl scrape "https://example.com/pricing" --query "What is the enterprise plan price?" ``` ## Options @@ -41,6 +44,7 @@ firecrawl scrape "" --format markdown,links -o .firecrawl/page.json | Option | Description | | ------------------------ | ---------------------------------------------------------------- | | `-f, --format ` | Output formats: markdown, html, rawHtml, links, screenshot, json | +| `-Q, --query ` | Ask a question about the page content (5 credits) | | `-H` | Include HTTP headers in output | | `--only-main-content` | Strip nav, footer, sidebar — main content only | | `--wait-for ` | Wait for JS rendering before scraping | From 18858ba23a62b80c5da4a4b88aa92218725906ac Mon Sep 17 00:00:00 2001 From: Developers Digest <124798203+developersdigest@users.noreply.github.com> Date: Thu, 12 Mar 2026 12:42:33 -0400 Subject: [PATCH 4/4] prefer plain scrape over --query in skill docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scrape + grep/read is the default for coding agents — full content stays local for agentic reasoning. --query is for quick targeted answers without saving the page. --- skills/firecrawl-scrape/SKILL.md | 1 + 1 file changed, 1 insertion(+) diff --git a/skills/firecrawl-scrape/SKILL.md b/skills/firecrawl-scrape/SKILL.md index 4a33683..fc313d3 100644 --- a/skills/firecrawl-scrape/SKILL.md +++ b/skills/firecrawl-scrape/SKILL.md @@ -54,6 +54,7 @@ firecrawl scrape "https://example.com/pricing" --query "What is the enterprise p ## Tips +- **Prefer plain scrape over `--query`.** Scrape to a file, then use `grep`, `head`, or read the markdown directly — you can search and reason over the full content yourself. Use `--query` only when you want a single targeted answer without saving the page (costs 5 extra credits). - **Try scrape before browser.** Scrape handles static pages and JS-rendered SPAs. Only escalate to browser when you need interaction (clicks, form fills, pagination). - Multiple URLs are scraped concurrently — check `firecrawl --status` for your concurrency limit. - Single format outputs raw content. Multiple formats (e.g., `--format markdown,links`) output JSON.