diff --git a/package.json b/package.json index 0d803b9..ae708d7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "firecrawl-cli", - "version": "1.9.8", + "version": "1.10.0", "description": "Command-line interface for Firecrawl. Scrape, crawl, and extract data from any website directly from your terminal.", "main": "dist/index.js", "bin": { diff --git a/skills/firecrawl-scrape/SKILL.md b/skills/firecrawl-scrape/SKILL.md index a090f48..fc313d3 100644 --- a/skills/firecrawl-scrape/SKILL.md +++ b/skills/firecrawl-scrape/SKILL.md @@ -34,6 +34,9 @@ firecrawl scrape https://example.com https://example.com/blog https://example.co # Get markdown and links together firecrawl scrape "" --format markdown,links -o .firecrawl/page.json + +# Ask a question about the page +firecrawl scrape "https://example.com/pricing" --query "What is the enterprise plan price?" ``` ## Options @@ -41,6 +44,7 @@ firecrawl scrape "" --format markdown,links -o .firecrawl/page.json | Option | Description | | ------------------------ | ---------------------------------------------------------------- | | `-f, --format ` | Output formats: markdown, html, rawHtml, links, screenshot, json | +| `-Q, --query ` | Ask a question about the page content (5 credits) | | `-H` | Include HTTP headers in output | | `--only-main-content` | Strip nav, footer, sidebar — main content only | | `--wait-for ` | Wait for JS rendering before scraping | @@ -50,6 +54,7 @@ firecrawl scrape "" --format markdown,links -o .firecrawl/page.json ## Tips +- **Prefer plain scrape over `--query`.** Scrape to a file, then use `grep`, `head`, or read the markdown directly — you can search and reason over the full content yourself. Use `--query` only when you want a single targeted answer without saving the page (costs 5 extra credits). - **Try scrape before browser.** Scrape handles static pages and JS-rendered SPAs. Only escalate to browser when you need interaction (clicks, form fills, pagination). - Multiple URLs are scraped concurrently — check `firecrawl --status` for your concurrency limit. - Single format outputs raw content. Multiple formats (e.g., `--format markdown,links`) output JSON. diff --git a/src/commands/scrape.ts b/src/commands/scrape.ts index 96b652b..7f68b5e 100644 --- a/src/commands/scrape.ts +++ b/src/commands/scrape.ts @@ -10,7 +10,7 @@ import type { ScrapeLocation, } from '../types/scrape'; import { getClient } from '../utils/client'; -import { handleScrapeOutput } from '../utils/output'; +import { handleScrapeOutput, writeOutput } from '../utils/output'; import { getOrigin } from '../utils/url'; import { executeMap } from './map'; import { getStatus } from './status'; @@ -71,6 +71,11 @@ export async function executeScrape( formats.push('screenshot'); } + // Inject query format if --query was provided + if (options.query) { + formats.push({ type: 'query', prompt: options.query } as any); + } + // If no formats specified, default to markdown if (formats.length === 0) { formats.push('markdown'); @@ -136,6 +141,12 @@ export async function handleScrapeCommand( ): Promise { const result = await executeScrape(options); + // Query mode: output answer directly + if (options.query && result.success && result.data?.answer) { + writeOutput(result.data.answer, options.output, !!options.output); + return; + } + // Determine effective formats for output handling const effectiveFormats: ScrapeFormat[] = options.formats && options.formats.length > 0 diff --git a/src/index.ts b/src/index.ts index 2ceb242..072aa29 100644 --- a/src/index.ts +++ b/src/index.ts @@ -158,6 +158,10 @@ function createScrapeCommand(): Command { '--languages ', 'Comma-separated language codes for scraping (e.g., en,es)' ) + .option( + '-Q, --query ', + 'Ask a question about the page content (query format)' + ) .action(async (positionalArgs, options) => { // Collect URLs from positional args and --url option diff --git a/src/types/scrape.ts b/src/types/scrape.ts index eaa553d..345b797 100644 --- a/src/types/scrape.ts +++ b/src/types/scrape.ts @@ -55,6 +55,8 @@ export interface ScrapeOptions { maxAge?: number; /** Location settings for geo-targeted scraping */ location?: ScrapeLocation; + /** Question to ask about the page content (query format) */ + query?: string; } export interface ScrapeResult { diff --git a/src/utils/options.ts b/src/utils/options.ts index 290c665..52a3a95 100644 --- a/src/utils/options.ts +++ b/src/utils/options.ts @@ -111,5 +111,6 @@ export function parseScrapeOptions(options: any): ScrapeOptions { timing: options.timing, maxAge: options.maxAge, location, + query: options.query, }; }