From 65177405d016d9c81b46fd67baef78a4ec73d560 Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Wed, 28 Jan 2026 15:13:12 +0000
Subject: [PATCH 01/12] Implement high level Rust API

---
 CLAUDE.md                     | 143 ++++++--
 ggsql-jupyter/src/executor.rs |  27 +-
 src/api.rs                    | 606 ++++++++++++++++++++++++++++++++++
 src/cli.rs                    | 106 +++---
 src/doc/API.md                | 520 +++++++++++++++++++++++++++++
 src/execute.rs                | 158 ++++++---
 src/lib.rs                    |  23 +-
 src/reader/duckdb.rs          |  50 +--
 src/reader/mod.rs             |  58 +++-
 src/rest.rs                   |  98 +++---
 10 files changed, 1560 insertions(+), 229 deletions(-)
 create mode 100644 src/api.rs
 create mode 100644 src/doc/API.md
diff --git a/CLAUDE.md b/CLAUDE.md
index bdc93293..b5bafd99 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -147,6 +147,79 @@ DRAW line MAPPING month AS x, total AS y
 
 ---
 
+## Public API (`src/api.rs`)
+
+### Quick Start
+
+```rust
+use ggsql::{prepare, reader::DuckDBReader, writer::VegaLiteWriter};
+
+// Create a reader
+let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+
+// Prepare the visualization
+let prepared = ggsql::prepare(
+    "SELECT x, y FROM data VISUALISE x, y DRAW point",
+    &reader
+)?;
+
+// Render to Vega-Lite JSON
+let writer = VegaLiteWriter::new();
+let json = prepared.render(&writer)?;
+```
+
+### Core Functions
+
+| Function                 | Purpose                                                |
+| ------------------------ | ------------------------------------------------------ |
+| `prepare(query, reader)` | Main entry point: parse, execute SQL, resolve mappings |
+| `render(writer)`         | Generate output (Vega-Lite JSON) from prepared data    |
+| `validate(query)`        | Validate syntax + semantics, inspect query structure   |
+
+### Key Types
+
+**`Validated`** - Result of `validate()`:
+
+- `has_visual()` - Whether query has VISUALISE clause
+- `sql()` - The SQL portion (before VISUALISE)
+- `visual()` - The VISUALISE portion (raw text)
+- `tree()` - CST for advanced inspection
+- `valid()` - Whether query is valid
+- `errors()` - Validation errors
+- `warnings()` - Validation warnings
+
+**`Prepared`** - Result of `prepare()`, ready for rendering:
+
+- `render(writer)` - Generate output (Vega-Lite JSON)
+- `plot()` - Resolved plot specification
+- `metadata()` - Rows, columns, layer count
+- `warnings()` - Validation warnings from preparation
+- `data()` / `layer_data(i)` / `stat_data(i)` - Access DataFrames
+- `sql()` / `visual()` / `layer_sql(i)` / `stat_sql(i)` - Query introspection
+
+**`Metadata`**:
+
+- `rows` - Number of rows in primary data
+- `columns` - Column names
+- `layer_count` - Number of layers
+
+### Reader & Writer
+
+**Reader trait** (data source abstraction):
+
+- `execute(sql)` - Run SQL, return DataFrame
+- `register(name, df)` - Register DataFrame as table
+- Implementation: `DuckDBReader`
+
+**Writer trait** (output format abstraction):
+
+- `write(spec, data)` - Generate output string
+- Implementation: `VegaLiteWriter` (Vega-Lite v6 JSON)
+
+For detailed API documentation, see [`src/doc/API.md`](src/doc/API.md).
+
+---
+
 ## Component Breakdown
 
 ### 1. Parser Module (`src/parser/`)
@@ -462,7 +535,6 @@ pub fn parse_connection_string(uri: &str) -> Result<ConnectionInfo> {
 The codebase includes connection string parsing and feature flags for additional readers, but they are not yet implemented:
 
 - **PostgreSQL Reader** (`postgres://...`)
-
   - Feature flag: `postgres`
   - Connection string parsing exists in `connection.rs`
   - Reader implementation: Not yet available
@@ -800,7 +872,9 @@ When running in Positron IDE, the extension provides enhanced functionality:
 - Works with any narwhals-compatible DataFrame (polars, pandas, etc.)
 - LazyFrames are collected automatically
 - Returns native `altair.Chart` objects for easy display and customization
-- Query splitting to separate SQL from VISUALISE portions
+- Two-stage API: `prepare()` → `render()`
+- DuckDB reader with DataFrame registration
+- Query introspection (SQL, layer queries, stat queries)
 
 **Installation**:
 
@@ -817,26 +891,41 @@ maturin develop
 import ggsql
 import polars as pl
 
-# Split a ggSQL query into SQL and VISUALISE portions
-sql, viz = ggsql.split_query("""
-    SELECT date, revenue FROM sales
-    VISUALISE date AS x, revenue AS y
-    DRAW line
-""")
-
-# Execute SQL and render to Altair chart
+# Create reader and register data
+reader = ggsql.DuckDBReader("duckdb://memory")
 df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
-chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point")
+reader.register("data", df)
+
+# Prepare visualization
+prepared = ggsql.prepare(
+    "SELECT * FROM data VISUALISE x, y DRAW point",
+    reader
+)
+
+# Inspect
+print(f"Rows: {prepared.metadata()['rows']}")
+print(f"SQL: {prepared.sql()}")
 
-# Display or save
-chart.display()  # In Jupyter
-chart.save("chart.html")
+# Render to Vega-Lite JSON
+writer = ggsql.VegaLiteWriter()
+json_output = prepared.render(writer)
 ```
 
+**Classes**:
+
+| Class                      | Description                  |
+| -------------------------- | ---------------------------- |
+| `DuckDBReader(connection)` | Database reader              |
+| `VegaLiteWriter()`         | Vega-Lite JSON output writer |
+| `Validated`                | Result of `validate()`       |
+
 **Functions**:
 
-- `split_query(query: str) -> tuple[str, str]` - Split ggSQL query into SQL and VISUALISE portions
-- `render_altair(df, viz, **kwargs) -> altair.Chart` - Render DataFrame with VISUALISE spec to Altair chart
+| Function                 | Description                                      |
+| ------------------------ | ------------------------------------------------ |
+| `validate(query)`        | Syntax/semantic validation with query inspection |
+| `prepare(query, reader)` | Full preparation pipeline                        |
+| `render_altair(df, viz)` | Render DataFrame to Altair chart                 |
 
 **Dependencies**:
 
@@ -920,22 +1009,23 @@ cargo build --all-features
 ```
 
 Where `<global_mapping>` can be:
+
 - Empty: `VISUALISE` (layers must define all mappings)
 - Mappings: `VISUALISE x, y, date AS x` (mixed implicit/explicit)
 - Wildcard: `VISUALISE *` (map all columns)
 
 ### Clause Types
 
-| Clause         | Repeatable | Purpose            | Example                              |
-| -------------- | ---------- | ------------------ | ------------------------------------ |
-| `VISUALISE`    | ✅ Yes     | Entry point        | `VISUALISE date AS x, revenue AS y`  |
-| `DRAW`         | ✅ Yes     | Define layers      | `DRAW line MAPPING date AS x, value AS y` |
-| `SCALE`        | ✅ Yes     | Configure scales   | `SCALE x SETTING type => 'date'`          |
-| `FACET`        | ❌ No      | Small multiples    | `FACET WRAP region`                  |
-| `COORD`        | ❌ No      | Coordinate system  | `COORD cartesian SETTING xlim => [0,100]` |
-| `LABEL`        | ❌ No      | Text labels        | `LABEL title => 'My Chart', x => 'Date'`   |
-| `GUIDE`        | ✅ Yes     | Legend/axis config | `GUIDE color SETTING position => 'right'` |
-| `THEME`        | ❌ No      | Visual styling     | `THEME minimal`                      |
+| Clause      | Repeatable | Purpose            | Example                                   |
+| ----------- | ---------- | ------------------ | ----------------------------------------- |
+| `VISUALISE` | ✅ Yes     | Entry point        | `VISUALISE date AS x, revenue AS y`       |
+| `DRAW`      | ✅ Yes     | Define layers      | `DRAW line MAPPING date AS x, value AS y` |
+| `SCALE`     | ✅ Yes     | Configure scales   | `SCALE x SETTING type => 'date'`          |
+| `FACET`     | ❌ No      | Small multiples    | `FACET WRAP region`                       |
+| `COORD`     | ❌ No      | Coordinate system  | `COORD cartesian SETTING xlim => [0,100]` |
+| `LABEL`     | ❌ No      | Text labels        | `LABEL title => 'My Chart', x => 'Date'`  |
+| `GUIDE`     | ✅ Yes     | Legend/axis config | `GUIDE color SETTING position => 'right'` |
+| `THEME`     | ❌ No      | Visual styling     | `THEME minimal`                           |
 
 ### DRAW Clause (Layers)
 
@@ -1201,7 +1291,6 @@ COORD cartesian SETTING xlim => [0, 100], ylim => [0, 200]
 LABEL x => 'Category', y => 'Count'
 ```
 
-
 ### LABEL Clause
 
 **Syntax**:
diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs
index 1c38e3ae..0f523ebb 100644
--- a/ggsql-jupyter/src/executor.rs
+++ b/ggsql-jupyter/src/executor.rs
@@ -5,10 +5,9 @@
 
 use anyhow::Result;
 use ggsql::{
-    execute::prepare_data,
-    parser,
+    prepare, validate,
     reader::{DuckDBReader, Reader},
-    writer::{VegaLiteWriter, Writer},
+    writer::VegaLiteWriter,
 };
 use polars::frame::DataFrame;
 
@@ -54,11 +53,11 @@ impl QueryExecutor {
     pub fn execute(&self, code: &str) -> Result<ExecutionResult> {
         tracing::debug!("Executing query: {} chars", code.len());
 
-        // 1. Split query to check if there's a visualization
-        let (_sql_part, viz_part) = parser::split_query(code)?;
+        // 1. Validate to check if there's a visualization
+        let validated = validate(code)?;
 
         // 2. Check if there's a visualization
-        if viz_part.is_empty() {
+        if !validated.has_visual() {
             // Pure SQL query - execute directly and return DataFrame
             let df = self.reader.execute(code)?;
             tracing::info!(
@@ -69,17 +68,21 @@ impl QueryExecutor {
             return Ok(ExecutionResult::DataFrame(df));
         }
 
-        // 3. Prepare data using shared execution logic (handles layer sources)
-        let prepared = prepare_data(code, &self.reader)?;
+        // 3. Prepare data using the new API
+        let prepared = prepare(code, &self.reader)?;
 
-        tracing::info!("Data sources prepared: {} sources", prepared.data.len());
+        tracing::info!(
+            "Data prepared: {} rows, {} layers",
+            prepared.metadata().rows,
+            prepared.metadata().layer_count
+        );
 
-        // 4. Generate Vega-Lite spec (use first spec if multiple)
-        let vega_json = self.writer.write(&prepared.specs[0], &prepared.data)?;
+        // 4. Render to Vega-Lite
+        let vega_json = prepared.render(&self.writer)?;
 
         tracing::debug!("Generated Vega-Lite spec: {} chars", vega_json.len());
 
-        // 6. Return result
+        // 5. Return result
         Ok(ExecutionResult::Visualization { spec: vega_json })
     }
 }
diff --git a/src/api.rs b/src/api.rs
new file mode 100644
index 00000000..ecfbdeaf
--- /dev/null
+++ b/src/api.rs
@@ -0,0 +1,606 @@
+//! High-level ggsql API.
+//!
+//! Two-stage API: `prepare()` → `render()`.
+
+use crate::naming;
+use crate::parser;
+use crate::plot::Plot;
+use crate::{DataFrame, Result};
+use std::collections::HashMap;
+
+#[cfg(feature = "duckdb")]
+use crate::execute::prepare_data_with_executor;
+#[cfg(feature = "duckdb")]
+use crate::reader::Reader;
+
+#[cfg(feature = "vegalite")]
+use crate::writer::Writer;
+
+// ============================================================================
+// Core Types
+// ============================================================================
+
+/// Result of `prepare()`, ready for rendering.
+pub struct Prepared {
+    /// Single resolved plot specification
+    plot: Plot,
+    /// Internal data map (global + layer-specific DataFrames)
+    data: HashMap<String, DataFrame>,
+    /// Cached metadata about the prepared visualization
+    metadata: Metadata,
+    /// The main SQL query that was executed
+    sql: String,
+    /// The raw VISUALISE portion text
+    visual: String,
+    /// Per-layer filter/source queries (None = uses global data directly)
+    layer_sql: Vec<Option<String>>,
+    /// Per-layer stat transform queries (None = no stat transform)
+    stat_sql: Vec<Option<String>>,
+    /// Validation warnings from preparation
+    warnings: Vec<ValidationWarning>,
+}
+
+impl Prepared {
+    /// Create a new Prepared from PreparedData
+    pub(crate) fn new(
+        plot: Plot,
+        data: HashMap<String, DataFrame>,
+        sql: String,
+        visual: String,
+        layer_sql: Vec<Option<String>>,
+        stat_sql: Vec<Option<String>>,
+        warnings: Vec<ValidationWarning>,
+    ) -> Self {
+        // Compute metadata from data
+        let (rows, columns) = if let Some(df) = data.get(naming::GLOBAL_DATA_KEY) {
+            let cols: Vec<String> = df
+                .get_column_names()
+                .iter()
+                .map(|s| s.to_string())
+                .collect();
+            (df.height(), cols)
+        } else if let Some(df) = data.values().next() {
+            let cols: Vec<String> = df
+                .get_column_names()
+                .iter()
+                .map(|s| s.to_string())
+                .collect();
+            (df.height(), cols)
+        } else {
+            (0, Vec::new())
+        };
+
+        let layer_count = plot.layers.len();
+        let metadata = Metadata {
+            rows,
+            columns,
+            layer_count,
+        };
+
+        Self {
+            plot,
+            data,
+            metadata,
+            sql,
+            visual,
+            layer_sql,
+            stat_sql,
+            warnings,
+        }
+    }
+
+    /// Render to output format (e.g., Vega-Lite JSON).
+    #[cfg(feature = "vegalite")]
+    pub fn render(&self, writer: &dyn Writer) -> Result<String> {
+        writer.write(&self.plot, &self.data)
+    }
+
+    /// Get the resolved plot specification.
+    pub fn plot(&self) -> &Plot {
+        &self.plot
+    }
+
+    /// Get visualization metadata.
+    pub fn metadata(&self) -> &Metadata {
+        &self.metadata
+    }
+
+    /// Number of layers.
+    pub fn layer_count(&self) -> usize {
+        self.plot.layers.len()
+    }
+
+    /// Get global data (main query result).
+    pub fn data(&self) -> Option<&DataFrame> {
+        self.data.get(naming::GLOBAL_DATA_KEY)
+    }
+
+    /// Get layer-specific data (from FILTER or FROM clause).
+    pub fn layer_data(&self, layer_index: usize) -> Option<&DataFrame> {
+        self.data.get(&naming::layer_key(layer_index))
+    }
+
+    /// Get stat transform data (e.g., histogram bins, density estimates).
+    pub fn stat_data(&self, layer_index: usize) -> Option<&DataFrame> {
+        self.layer_data(layer_index)
+    }
+
+    /// Get internal data map (all DataFrames by key).
+    pub fn data_map(&self) -> &HashMap<String, DataFrame> {
+        &self.data
+    }
+
+    /// The main SQL query that was executed.
+    pub fn sql(&self) -> &str {
+        &self.sql
+    }
+
+    /// The VISUALISE portion (raw text).
+    pub fn visual(&self) -> &str {
+        &self.visual
+    }
+
+    /// Layer filter/source query, or `None` if using global data.
+    pub fn layer_sql(&self, layer_index: usize) -> Option<&str> {
+        self.layer_sql.get(layer_index).and_then(|s| s.as_deref())
+    }
+
+    /// Stat transform query, or `None` if no stat transform.
+    pub fn stat_sql(&self, layer_index: usize) -> Option<&str> {
+        self.stat_sql.get(layer_index).and_then(|s| s.as_deref())
+    }
+
+    /// Validation warnings from preparation.
+    pub fn warnings(&self) -> &[ValidationWarning] {
+        &self.warnings
+    }
+}
+
+/// Metadata about the prepared visualization.
+#[derive(Debug, Clone)]
+pub struct Metadata {
+    pub rows: usize,
+    pub columns: Vec<String>,
+    pub layer_count: usize,
+}
+
+/// Result of `validate()` - query inspection and validation without SQL execution.
+pub struct Validated {
+    sql: String,
+    visual: String,
+    has_visual: bool,
+    tree: Option<tree_sitter::Tree>,
+    valid: bool,
+    errors: Vec<ValidationError>,
+    warnings: Vec<ValidationWarning>,
+}
+
+impl Validated {
+    /// Whether the query contains a VISUALISE clause.
+    pub fn has_visual(&self) -> bool {
+        self.has_visual
+    }
+
+    /// The SQL portion (before VISUALISE).
+    pub fn sql(&self) -> &str {
+        &self.sql
+    }
+
+    /// The VISUALISE portion (raw text).
+    pub fn visual(&self) -> &str {
+        &self.visual
+    }
+
+    /// CST for advanced inspection.
+    pub fn tree(&self) -> Option<&tree_sitter::Tree> {
+        self.tree.as_ref()
+    }
+
+    /// Whether the query is valid (no errors).
+    pub fn valid(&self) -> bool {
+        self.valid
+    }
+
+    /// Validation errors.
+    pub fn errors(&self) -> &[ValidationError] {
+        &self.errors
+    }
+
+    /// Validation warnings.
+    pub fn warnings(&self) -> &[ValidationWarning] {
+        &self.warnings
+    }
+}
+
+/// A validation error (fatal).
+#[derive(Debug, Clone)]
+pub struct ValidationError {
+    pub message: String,
+    pub location: Option<Location>,
+}
+
+/// A validation warning (non-fatal).
+#[derive(Debug, Clone)]
+pub struct ValidationWarning {
+    pub message: String,
+    pub location: Option<Location>,
+}
+
+/// Location within a query string (0-based).
+#[derive(Debug, Clone)]
+pub struct Location {
+    pub line: usize,
+    pub column: usize,
+}
+
+// ============================================================================
+// High-Level API Functions
+// ============================================================================
+
+/// Prepare a query for visualization. Main entry point for the two-stage API.
+#[cfg(feature = "duckdb")]
+pub fn prepare(query: &str, reader: &dyn Reader) -> Result<Prepared> {
+    // Run validation first to capture warnings
+    let validated = validate(query)?;
+    let warnings: Vec<ValidationWarning> = validated.warnings().to_vec();
+
+    // Prepare data (this also validates, but we want the warnings from above)
+    let prepared_data = prepare_data_with_executor(query, |sql| reader.execute(sql))?;
+
+    Ok(Prepared::new(
+        prepared_data.spec,
+        prepared_data.data,
+        prepared_data.sql,
+        prepared_data.visual,
+        prepared_data.layer_sql,
+        prepared_data.stat_sql,
+        warnings,
+    ))
+}
+
+/// Validate query syntax and semantics without executing SQL.
+pub fn validate(query: &str) -> Result<Validated> {
+    let mut errors = Vec::new();
+    let warnings = Vec::new();
+
+    // Split to determine if there's a viz portion
+    let (sql_part, viz_part) = match parser::split_query(query) {
+        Ok((sql, viz)) => (sql, viz),
+        Err(e) => {
+            // Split error - return as validation error
+            errors.push(ValidationError {
+                message: e.to_string(),
+                location: None,
+            });
+            return Ok(Validated {
+                sql: String::new(),
+                visual: String::new(),
+                has_visual: false,
+                tree: None,
+                valid: false,
+                errors,
+                warnings,
+            });
+        }
+    };
+
+    let has_visual = !viz_part.trim().is_empty();
+
+    // Parse the full query to get the CST
+    let tree = if has_visual {
+        let mut ts_parser = tree_sitter::Parser::new();
+        ts_parser
+            .set_language(&tree_sitter_ggsql::language())
+            .map_err(|e| {
+                crate::GgsqlError::InternalError(format!("Failed to set language: {}", e))
+            })?;
+        ts_parser.parse(query, None)
+    } else {
+        None
+    };
+
+    // If no visualization, just syntax check passed
+    if !has_visual {
+        return Ok(Validated {
+            sql: sql_part,
+            visual: viz_part,
+            has_visual,
+            tree,
+            valid: true,
+            errors,
+            warnings,
+        });
+    }
+
+    // Parse to get plot specifications for validation
+    let plots = match parser::parse_query(query) {
+        Ok(p) => p,
+        Err(e) => {
+            errors.push(ValidationError {
+                message: e.to_string(),
+                location: None,
+            });
+            return Ok(Validated {
+                sql: sql_part,
+                visual: viz_part,
+                has_visual,
+                tree,
+                valid: false,
+                errors,
+                warnings,
+            });
+        }
+    };
+
+    // Validate the single plot (we only support one VISUALISE statement)
+    if let Some(plot) = plots.first() {
+        // Validate each layer
+        for (layer_idx, layer) in plot.layers.iter().enumerate() {
+            let context = format!("Layer {}", layer_idx + 1);
+
+            // Check required aesthetics
+            // Note: Without schema data, we can only check if mappings exist,
+            // not if the columns are valid. We skip this check for wildcards.
+            if !layer.mappings.wildcard {
+                if let Err(e) = layer.validate_required_aesthetics() {
+                    errors.push(ValidationError {
+                        message: format!("{}: {}", context, e),
+                        location: None,
+                    });
+                }
+            }
+
+            // Validate SETTING parameters
+            if let Err(e) = layer.validate_settings() {
+                errors.push(ValidationError {
+                    message: format!("{}: {}", context, e),
+                    location: None,
+                });
+            }
+        }
+    }
+
+    Ok(Validated {
+        sql: sql_part,
+        visual: viz_part,
+        has_visual,
+        tree,
+        valid: errors.is_empty(),
+        errors,
+        warnings,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_validate_with_visual() {
+        let validated =
+            validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y").unwrap();
+        assert!(validated.has_visual());
+        assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y");
+        assert!(validated.visual().starts_with("VISUALISE"));
+        assert!(validated.tree().is_some());
+        assert!(validated.valid());
+    }
+
+    #[test]
+    fn test_validate_without_visual() {
+        let validated = validate("SELECT 1 as x, 2 as y").unwrap();
+        assert!(!validated.has_visual());
+        assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y");
+        assert!(validated.visual().is_empty());
+        assert!(validated.tree().is_none());
+        assert!(validated.valid());
+    }
+
+    #[test]
+    fn test_validate_valid_query() {
+        let validated =
+            validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y").unwrap();
+        assert!(
+            validated.valid(),
+            "Expected valid query: {:?}",
+            validated.errors()
+        );
+        assert!(validated.errors().is_empty());
+    }
+
+    #[test]
+    fn test_validate_missing_required_aesthetic() {
+        // Point requires x and y, but we only provide x
+        let validated =
+            validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x").unwrap();
+        assert!(!validated.valid());
+        assert!(!validated.errors().is_empty());
+        assert!(validated.errors()[0].message.contains("y"));
+    }
+
+    #[test]
+    fn test_validate_syntax_error() {
+        let validated = validate("SELECT 1 VISUALISE DRAW invalidgeom").unwrap();
+        assert!(!validated.valid());
+        assert!(!validated.errors().is_empty());
+    }
+
+    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
+    #[test]
+    fn test_prepare_and_render() {
+        use crate::reader::DuckDBReader;
+        use crate::writer::VegaLiteWriter;
+
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let prepared = prepare("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point", &reader).unwrap();
+
+        assert_eq!(prepared.plot().layers.len(), 1);
+        assert_eq!(prepared.metadata().layer_count, 1);
+        assert!(prepared.data().is_some());
+
+        let writer = VegaLiteWriter::new();
+        let result = prepared.render(&writer).unwrap();
+        assert!(result.contains("point"));
+    }
+
+    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
+    #[test]
+    fn test_prepare_metadata() {
+        use crate::reader::DuckDBReader;
+
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let prepared = prepare(
+            "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) VISUALISE x, y DRAW point",
+            &reader,
+        )
+        .unwrap();
+
+        let metadata = prepared.metadata();
+        assert_eq!(metadata.rows, 3);
+        assert_eq!(metadata.columns.len(), 2);
+        assert!(metadata.columns.contains(&"x".to_string()));
+        assert!(metadata.columns.contains(&"y".to_string()));
+        assert_eq!(metadata.layer_count, 1);
+    }
+
+    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
+    #[test]
+    fn test_prepare_with_cte() {
+        use crate::reader::DuckDBReader;
+
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let query = r#"
+            WITH data AS (
+                SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y)
+            )
+            SELECT * FROM data
+            VISUALISE x, y DRAW point
+        "#;
+
+        let prepared = prepare(query, &reader).unwrap();
+
+        assert_eq!(prepared.plot().layers.len(), 1);
+        assert!(prepared.data().is_some());
+        let df = prepared.data().unwrap();
+        assert_eq!(df.height(), 2);
+    }
+
+    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
+    #[test]
+    fn test_render_multi_layer() {
+        use crate::reader::DuckDBReader;
+        use crate::writer::VegaLiteWriter;
+
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let query = r#"
+            SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y)
+            VISUALISE
+            DRAW point MAPPING x AS x, y AS y
+            DRAW line MAPPING x AS x, y AS y
+        "#;
+
+        let prepared = prepare(query, &reader).unwrap();
+        let writer = VegaLiteWriter::new();
+        let result = prepared.render(&writer).unwrap();
+
+        assert!(result.contains("layer"));
+    }
+
+    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
+    #[test]
+    fn test_register_and_query() {
+        use crate::reader::{DuckDBReader, Reader};
+        use crate::writer::VegaLiteWriter;
+        use polars::prelude::*;
+
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        let df = df! {
+            "x" => [1i32, 2, 3],
+            "y" => [10i32, 20, 30],
+        }
+        .unwrap();
+
+        reader.register("my_data", df).unwrap();
+
+        let query = "SELECT * FROM my_data VISUALISE x, y DRAW point";
+        let prepared = prepare(query, &reader).unwrap();
+
+        assert_eq!(prepared.metadata().rows, 3);
+        assert!(prepared.metadata().columns.contains(&"x".to_string()));
+
+        let writer = VegaLiteWriter::new();
+        let result = prepared.render(&writer).unwrap();
+        assert!(result.contains("point"));
+    }
+
+    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
+    #[test]
+    fn test_register_and_join() {
+        use crate::reader::{DuckDBReader, Reader};
+        use polars::prelude::*;
+
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        let sales = df! {
+            "id" => [1i32, 2, 3],
+            "amount" => [100i32, 200, 300],
+            "product_id" => [1i32, 1, 2],
+        }
+        .unwrap();
+
+        let products = df! {
+            "id" => [1i32, 2],
+            "name" => ["Widget", "Gadget"],
+        }
+        .unwrap();
+
+        reader.register("sales", sales).unwrap();
+        reader.register("products", products).unwrap();
+
+        let query = r#"
+            SELECT s.id, s.amount, p.name
+            FROM sales s
+            JOIN products p ON s.product_id = p.id
+            VISUALISE id AS x, amount AS y
+            DRAW bar
+        "#;
+
+        let prepared = prepare(query, &reader).unwrap();
+        assert_eq!(prepared.metadata().rows, 3);
+    }
+
+    #[cfg(feature = "duckdb")]
+    #[test]
+    fn test_prepare_no_viz_fails() {
+        use crate::reader::DuckDBReader;
+
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let query = "SELECT 1 as x, 2 as y";
+
+        let result = prepare(query, &reader);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_validate_sql_and_visual_content() {
+        let query = "SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y DRAW line MAPPING x AS x, y AS y";
+        let validated = validate(query).unwrap();
+
+        assert!(validated.has_visual());
+        assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y");
+        assert!(validated.visual().contains("DRAW point"));
+        assert!(validated.visual().contains("DRAW line"));
+        assert!(validated.valid());
+    }
+
+    #[test]
+    fn test_validate_sql_only() {
+        let query = "SELECT 1 as x, 2 as y";
+        let validated = validate(query).unwrap();
+
+        // SQL-only queries should be valid (just syntax check)
+        assert!(validated.valid());
+        assert!(validated.errors().is_empty());
+    }
+}
diff --git a/src/cli.rs b/src/cli.rs
index 80dec31e..1844ff01 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -9,13 +9,13 @@ use ggsql::parser::extract_sql;
 use ggsql::{parser, VERSION};
 use std::path::PathBuf;
 
-#[cfg(feature = "duckdb")]
-use ggsql::execute::prepare_data;
 #[cfg(feature = "duckdb")]
 use ggsql::reader::{DuckDBReader, Reader};
+#[cfg(feature = "duckdb")]
+use ggsql::{prepare, validate};
 
 #[cfg(feature = "vegalite")]
-use ggsql::writer::{VegaLiteWriter, Writer};
+use ggsql::writer::VegaLiteWriter;
 
 #[derive(Parser)]
 #[command(name = "ggsql")]
@@ -169,15 +169,16 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option<PathBu
     }
     let db_reader = db_reader.unwrap();
 
-    // Check if visualise part is empty
-    let parsed = parser::split_query(&query);
-    if let Err(e) = parsed {
-        eprintln!("Failed to split query: {}", e);
-        std::process::exit(1);
-    }
-    let (_, viz_part) = parsed.unwrap();
+    // Use validate() to check if query has visualization
+    let validated = match validate(&query) {
+        Ok(v) => v,
+        Err(e) => {
+            eprintln!("Failed to validate query: {}", e);
+            std::process::exit(1);
+        }
+    };
 
-    if viz_part.is_empty() {
+    if !validated.has_visual() {
         if verbose {
             eprintln!("Visualisation is empty. Printing table instead.");
         }
@@ -185,28 +186,27 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option<PathBu
         return;
     }
 
-    // Prepare data (parses query, executes SQL, handles layer sources)
-    let prepared = prepare_data(&query, &db_reader);
-    if let Err(e) = prepared {
-        eprintln!("Failed to prepare data: {}", e);
-        std::process::exit(1);
-    }
-    let prepared = prepared.unwrap();
+    // Prepare data
+    let prepared = match prepare(&query, &db_reader) {
+        Ok(p) => p,
+        Err(e) => {
+            eprintln!("Failed to prepare data: {}", e);
+            std::process::exit(1);
+        }
+    };
 
     if verbose {
-        eprintln!("\nData sources loaded:");
-        for (key, df) in &prepared.data {
-            eprintln!("  {}: {:?}", key, df.shape());
-        }
-        eprintln!("\nParsed {} visualisation spec(s)", prepared.specs.len());
+        let metadata = prepared.metadata();
+        eprintln!("\nData prepared:");
+        eprintln!("  Rows: {}", metadata.rows);
+        eprintln!("  Columns: {}", metadata.columns.join(", "));
+        eprintln!("  Layers: {}", metadata.layer_count);
     }
 
-    let first_spec = prepared.specs.first();
-    if first_spec.is_none() {
+    if prepared.plot().layers.is_empty() {
         eprintln!("No visualization specifications found");
         std::process::exit(1);
     }
-    let first_spec = first_spec.unwrap();
 
     // Check writer
     if writer != "vegalite" {
@@ -220,14 +220,15 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option<PathBu
         std::process::exit(1)
     }
 
-    // Write visualization
+    // Render
     let vl_writer = VegaLiteWriter::new();
-    let json_output = vl_writer.write(first_spec, &prepared.data);
-    if let Err(ref e) = json_output {
-        eprintln!("Failed to generate Vega-Lite output: {}", e);
-        std::process::exit(1);
-    }
-    let json_output = json_output.unwrap();
+    let json_output = match prepared.render(&vl_writer) {
+        Ok(r) => r,
+        Err(e) => {
+            eprintln!("Failed to generate Vega-Lite output: {}", e);
+            std::process::exit(1);
+        }
+    };
 
     if output.is_none() {
         // Empty output location, write to stdout
@@ -237,7 +238,7 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option<PathBu
     let output = output.unwrap();
 
     // Write to file
-    match std::fs::write(&output, &json_output) {
+    match std::fs::write(&output, json_output) {
         Ok(_) => {
             if verbose {
                 eprintln!("\nVega-Lite JSON written to: {}", output.display());
@@ -291,13 +292,38 @@ fn cmd_parse(query: String, format: String) {
     }
 }
 
-fn cmd_validate(query: String, reader: Option<String>) {
-    println!("Validating query: {}", query);
-    if let Some(reader) = reader {
-        println!("Reader: {}", reader);
+fn cmd_validate(query: String, _reader: Option<String>) {
+    #[cfg(feature = "duckdb")]
+    {
+        match validate(&query) {
+            Ok(validated) if validated.valid() => {
+                println!("✓ Query syntax is valid");
+            }
+            Ok(validated) => {
+                println!("✗ Validation errors:");
+                for err in validated.errors() {
+                    println!("  - {}", err.message);
+                }
+                if !validated.warnings().is_empty() {
+                    println!("\nWarnings:");
+                    for warning in validated.warnings() {
+                        println!("  - {}", warning.message);
+                    }
+                }
+                std::process::exit(1);
+            }
+            Err(e) => {
+                eprintln!("Error during validation: {}", e);
+                std::process::exit(1);
+            }
+        }
+    }
+
+    #[cfg(not(feature = "duckdb"))]
+    {
+        eprintln!("Validation requires the duckdb feature");
+        std::process::exit(1);
     }
-    // TODO: Implement validation logic
-    println!("Validation not yet implemented");
 }
 
 // Prints a CSV-like output to stdout with aligned columns
diff --git a/src/doc/API.md b/src/doc/API.md
new file mode 100644
index 00000000..5ccd70e6
--- /dev/null
+++ b/src/doc/API.md
@@ -0,0 +1,520 @@
+# ggsql API Reference
+
+This document provides a comprehensive reference for the ggsql public API.
+
+## Overview
+
+- **Stage 1: `prepare()`** - Parse query, execute SQL, resolve mappings, prepare data
+- **Stage 2: `render()`** - Generate output (Vega-Lite JSON, etc.)
+
+### API Functions
+
+| Function     | Use Case                                             |
+| ------------ | ---------------------------------------------------- |
+| `prepare()`  | Main entry point - full visualization pipeline       |
+| `render()`   | Generate output from prepared data                   |
+| `validate()` | Validate syntax + semantics, inspect query structure |
+
+---
+
+## Core Functions
+
+### `prepare`
+
+```rust
+pub fn prepare(query: &str, reader: &dyn Reader) -> Result<Prepared>
+```
+
+Prepare a ggsql query for visualization. This is the main entry point for the two-stage API.
+
+**What happens during preparation:**
+
+1. Parses the query (SQL + VISUALISE portions)
+2. Executes the main SQL query using the provided reader
+3. Resolves wildcards (`VISUALISE *`) against actual columns
+4. Merges global mappings into each layer
+5. Executes layer-specific queries (filters, stats)
+6. Injects constant values as synthetic columns
+7. Computes aesthetic labels from column names
+
+**Arguments:**
+
+- `query` - The full ggsql query string
+- `reader` - A reader implementing the `Reader` trait
+
+**Returns:**
+
+- `Ok(Prepared)` - Ready for rendering
+- `Err(GgsqlError)` - Parse, validation, or execution error
+
+**Example:**
+
+```rust
+use ggsql::{prepare, reader::DuckDBReader, writer::VegaLiteWriter};
+
+let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+let prepared = prepare(
+    "SELECT x, y FROM data VISUALISE x, y DRAW point",
+    &reader
+)?;
+
+// Access metadata
+println!("Rows: {}", prepared.metadata().rows);
+println!("Columns: {:?}", prepared.metadata().columns);
+
+// Render to Vega-Lite
+let writer = VegaLiteWriter::new();
+let result = prepared.render(&writer)?;
+```
+
+**Error Conditions:**
+
+- Parse error in SQL or VISUALISE portion
+- SQL execution failure
+- Missing required aesthetics
+- Invalid geom type
+- Multiple VISUALISE statements (not yet supported)
+
+---
+
+### `validate`
+
+```rust
+pub fn validate(query: &str) -> Result<Validated>
+```
+
+Validate query syntax and semantics without executing SQL. This function combines query parsing and validation into a single operation.
+
+**What is validated:**
+
+- Syntax (parsing)
+- Required aesthetics for each geom type
+- Valid scale types (linear, log10, date, etc.)
+- Valid coord types and properties
+- Valid geom types
+- Valid aesthetic names
+- Valid SETTING parameters
+
+**Arguments:**
+
+- `query` - The full ggsql query string (SQL + VISUALISE)
+
+**Returns:**
+
+- `Ok(Validated)` - Validation results with query inspection methods
+- `Err(GgsqlError)` - Internal error
+
+**Example:**
+
+```rust
+use ggsql::validate;
+
+let validated = validate("SELECT x, y FROM data VISUALISE x, y DRAW point")?;
+
+// Check validity
+if !validated.valid() {
+    for error in validated.errors() {
+        eprintln!("Error: {}", error.message);
+    }
+}
+
+// Inspect query structure
+if validated.has_visual() {
+    println!("SQL: {}", validated.sql());
+    println!("Visual: {}", validated.visual());
+}
+```
+
+**Notes:**
+
+- Does not execute SQL
+- Does not resolve wildcards or global mappings
+- Cannot validate column existence (requires data)
+- Returns all errors, not just the first one
+- CST available via `tree()` for advanced inspection
+
+---
+
+## Type Reference
+
+### `Validated`
+
+Result of validating a query (syntax + semantics, no SQL execution).
+
+```rust
+pub struct Validated {
+    // All fields private
+}
+```
+
+**Methods:**
+
+| Method       | Signature                                    | Description                        |
+| ------------ | -------------------------------------------- | ---------------------------------- |
+| `has_visual` | `fn has_visual(&self) -> bool`               | Whether query contains VISUALISE   |
+| `sql`        | `fn sql(&self) -> &str`                      | The SQL portion (before VISUALISE) |
+| `visual`     | `fn visual(&self) -> &str`                   | The VISUALISE portion (raw text)   |
+| `tree`       | `fn tree(&self) -> Option<&Tree>`            | CST for advanced inspection        |
+| `valid`      | `fn valid(&self) -> bool`                    | Whether query is valid             |
+| `errors`     | `fn errors(&self) -> &[ValidationError]`     | Validation errors                  |
+| `warnings`   | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings                |
+
+**Example:**
+
+```rust
+let validated = ggsql::validate("SELECT 1 as x VISUALISE DRAW point MAPPING x AS x, y AS y")?;
+
+// Check validity
+if !validated.valid() {
+    for error in validated.errors() {
+        eprintln!("Error: {}", error.message);
+    }
+}
+
+// Inspect query structure
+assert!(validated.has_visual());
+assert_eq!(validated.sql(), "SELECT 1 as x");
+assert!(validated.visual().starts_with("VISUALISE"));
+
+// CST access for advanced use cases
+if let Some(tree) = validated.tree() {
+    println!("Root node: {}", tree.root_node().kind());
+}
+```
+
+---
+
+### `Prepared`
+
+Result of preparing a visualization, ready for rendering.
+
+#### Rendering Methods
+
+| Method   | Signature                                                 | Description             |
+| -------- | --------------------------------------------------------- | ----------------------- |
+| `render` | `fn render(&self, writer: &dyn Writer) -> Result<String>` | Render to output format |
+
+**Example:**
+
+```rust
+let writer = VegaLiteWriter::new();
+let json = prepared.render(&writer)?;
+println!("{}", json);
+```
+
+#### Plot Access Methods
+
+| Method        | Signature                        | Description                     |
+| ------------- | -------------------------------- | ------------------------------- |
+| `plot`        | `fn plot(&self) -> &Plot`        | Get resolved plot specification |
+| `layer_count` | `fn layer_count(&self) -> usize` | Number of layers                |
+
+**Example:**
+
+```rust
+println!("Layers: {}", prepared.layer_count());
+
+let plot = prepared.plot();
+for (i, layer) in plot.layers.iter().enumerate() {
+    println!("Layer {}: {:?}", i, layer.geom);
+}
+```
+
+#### Metadata Methods
+
+| Method     | Signature                         | Description                |
+| ---------- | --------------------------------- | -------------------------- |
+| `metadata` | `fn metadata(&self) -> &Metadata` | Get visualization metadata |
+
+**Example:**
+
+```rust
+let meta = prepared.metadata();
+println!("Rows: {}", meta.rows);
+println!("Columns: {:?}", meta.columns);
+println!("Layer count: {}", meta.layer_count);
+```
+
+#### Data Access Methods
+
+| Method       | Signature                                              | Description                     |
+| ------------ | ------------------------------------------------------ | ------------------------------- |
+| `data`       | `fn data(&self) -> Option<&DataFrame>`                 | Global data (main query result) |
+| `layer_data` | `fn layer_data(&self, i: usize) -> Option<&DataFrame>` | Layer-specific data             |
+| `stat_data`  | `fn stat_data(&self, i: usize) -> Option<&DataFrame>`  | Stat transform results          |
+| `data_map`   | `fn data_map(&self) -> &HashMap<String, DataFrame>`    | Raw data map access             |
+
+**Example:**
+
+```rust
+// Global data
+if let Some(df) = prepared.data() {
+    println!("Global data: {} rows", df.height());
+}
+
+// Layer-specific data (from FILTER or FROM clause)
+if let Some(df) = prepared.layer_data(0) {
+    println!("Layer 0 has filtered data: {} rows", df.height());
+}
+
+// Stat data (histogram bins, density estimates, etc.)
+if let Some(df) = prepared.stat_data(1) {
+    println!("Layer 1 stat data: {} rows", df.height());
+}
+```
+
+#### Query Introspection Methods
+
+| Method      | Signature                                       | Description                      |
+| ----------- | ----------------------------------------------- | -------------------------------- |
+| `sql`       | `fn sql(&self) -> &str`                         | Main SQL query that was executed |
+| `visual`    | `fn visual(&self) -> &str`                      | Raw VISUALISE text               |
+| `layer_sql` | `fn layer_sql(&self, i: usize) -> Option<&str>` | Layer filter/source query        |
+| `stat_sql`  | `fn stat_sql(&self, i: usize) -> Option<&str>`  | Stat transform query             |
+
+**Example:**
+
+```rust
+// Main query
+println!("SQL: {}", prepared.sql());
+println!("Visual: {}", prepared.visual());
+
+// Per-layer queries
+for i in 0..prepared.layer_count() {
+    if let Some(sql) = prepared.layer_sql(i) {
+        println!("Layer {} filter: {}", i, sql);
+    }
+    if let Some(sql) = prepared.stat_sql(i) {
+        println!("Layer {} stat: {}", i, sql);
+    }
+}
+```
+
+#### Warnings Method
+
+| Method     | Signature                                    | Description                          |
+| ---------- | -------------------------------------------- | ------------------------------------ |
+| `warnings` | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings from preparation |
+
+**Example:**
+
+```rust
+let prepared = ggsql::prepare(query, &reader)?;
+
+// Check for warnings
+if !prepared.warnings().is_empty() {
+    for warning in prepared.warnings() {
+        eprintln!("Warning: {}", warning.message);
+    }
+}
+
+// Continue with rendering
+let json = prepared.render(&writer)?;
+```
+
+---
+
+### `Metadata`
+
+Information about the prepared visualization.
+
+```rust
+pub struct Metadata {
+    pub rows: usize,           // Rows in primary data source
+    pub columns: Vec<String>,  // Column names
+    pub layer_count: usize,    // Number of layers in the plot
+}
+```
+
+---
+
+### `ValidationError`
+
+A validation error (fatal issue).
+
+```rust
+pub struct ValidationError {
+    pub message: String,
+    pub location: Option<Location>,
+}
+```
+
+---
+
+### `ValidationWarning`
+
+A validation warning (non-fatal issue).
+
+```rust
+pub struct ValidationWarning {
+    pub message: String,
+    pub location: Option<Location>,
+}
+```
+
+---
+
+### `Location`
+
+Location within a query string.
+
+```rust
+pub struct Location {
+    pub line: usize,    // 0-based line number
+    pub column: usize,  // 0-based column number
+}
+```
+
+---
+
+## Reader Trait & Implementations
+
+### `Reader` Trait
+
+```rust
+pub trait Reader {
+    /// Execute a SQL query and return a DataFrame
+    fn execute(&self, sql: &str) -> Result<DataFrame>;
+
+    /// Register a DataFrame as a queryable table
+    fn register(&mut self, name: &str, df: DataFrame) -> Result<()>;
+
+    /// Check if this reader supports DataFrame registration
+    fn supports_register(&self) -> bool;
+}
+```
+
+---
+
+## Writer Trait & Implementations
+
+### `Writer` Trait
+
+```rust
+pub trait Writer {
+    /// Render a plot specification to output format
+    fn write(&self, spec: &Plot, data: &HashMap<String, DataFrame>) -> Result<String>;
+
+    /// Get the file extension for this writer's output
+    fn file_extension(&self) -> &str;
+}
+```
+
+## Python Bindings
+
+The Python bindings provide the same two-stage API with Pythonic conventions.
+
+### Classes
+
+#### `DuckDBReader`
+
+```python
+class DuckDBReader:
+    def __init__(self, connection: str) -> None:
+        """Create a DuckDB reader.
+
+        Args:
+            connection: Connection string (e.g., "duckdb://memory")
+        """
+
+    def register(self, name: str, df: Any) -> None:
+        """Register a DataFrame as a queryable table.
+
+        Args:
+            name: Table name
+            df: Polars DataFrame or narwhals-compatible DataFrame
+        """
+
+    def execute(self, sql: str) -> polars.DataFrame:
+        """Execute SQL and return a Polars DataFrame."""
+
+    def supports_register(self) -> bool:
+        """Check if registration is supported."""
+```
+
+#### `VegaLiteWriter`
+
+```python
+class VegaLiteWriter:
+    def __init__(self) -> None:
+        """Create a Vega-Lite writer."""
+```
+
+#### `Validated`
+
+```python
+class Validated:
+    def has_visual(self) -> bool:
+        """Check if query has VISUALISE clause."""
+
+    def sql(self) -> str:
+        """Get the SQL portion."""
+
+    def visual(self) -> str:
+        """Get the VISUALISE portion."""
+
+    def valid(self) -> bool:
+        """Check if query is valid."""
+
+    def errors(self) -> list[dict]:
+        """Get validation errors as list of dicts with 'message', 'location'."""
+
+    def warnings(self) -> list[dict]:
+        """Get validation warnings as list of dicts with 'message', 'location'."""
+
+    # Note: tree() not exposed (tree-sitter nodes are Rust-only)
+```
+
+#### `Prepared`
+
+```python
+class Prepared:
+    def render(self, writer: VegaLiteWriter) -> str:
+        """Render to output format."""
+
+    def metadata(self) -> dict:
+        """Get metadata as dict with keys: rows, columns, layer_count."""
+
+    def sql(self) -> str:
+        """Get the main SQL query."""
+
+    def visual(self) -> str:
+        """Get the VISUALISE text."""
+
+    def layer_count(self) -> int:
+        """Get number of layers."""
+
+    def warnings(self) -> list[dict]:
+        """Get validation warnings as list of dicts with 'message', 'location'."""
+
+    def data(self) -> polars.DataFrame | None:
+        """Get global data."""
+
+    def layer_data(self, index: int) -> polars.DataFrame | None:
+        """Get layer-specific data."""
+
+    def stat_data(self, index: int) -> polars.DataFrame | None:
+        """Get stat transform data."""
+
+    def layer_sql(self, index: int) -> str | None:
+        """Get layer filter query."""
+
+    def stat_sql(self, index: int) -> str | None:
+        """Get stat transform query."""
+```
+
+### Functions
+
+```python
+def validate(query: str) -> Validated:
+    """Validate query syntax and semantics.
+
+    Returns Validated object with query inspection and validation methods.
+    """
+
+def prepare(query: str, reader: DuckDBReader) -> Prepared:
+    """Prepare a query for visualization."""
+
+def split_query(query: str) -> tuple[str, str]:
+    """Split query into (sql, visualise) portions."""
+```
diff --git a/src/execute.rs b/src/execute.rs
index 33116ceb..e79bb196 100644
--- a/src/execute.rs
+++ b/src/execute.rs
@@ -531,6 +531,23 @@ fn transform_global_sql(sql: &str, materialized_ctes: &HashSet<String>) -> Optio
     }
 }
 
+/// Result of building a layer query
+///
+/// Contains information about the queries executed for a layer,
+/// distinguishing between base filter queries and stat transform queries.
+#[derive(Debug, Default)]
+pub struct LayerQueryResult {
+    /// The final query to execute (if any)
+    /// None means layer uses global data directly
+    pub query: Option<String>,
+    /// The base query before stat transform (filter/source only)
+    /// None if layer uses global data directly without filter
+    pub layer_sql: Option<String>,
+    /// The stat transform query (if a stat transform was applied)
+    /// None if no stat transform was needed
+    pub stat_sql: Option<String>,
+}
+
 /// Build a layer query handling all source types
 ///
 /// Handles:
@@ -544,8 +561,7 @@ fn transform_global_sql(sql: &str, materialized_ctes: &HashSet<String>) -> Optio
 /// (e.g., histogram binning, bar counting).
 ///
 /// Returns:
-/// - `Ok(Some(query))` - execute this query and store result
-/// - `Ok(None)` - layer uses `__global__` directly (no source, no filter, no constants, no stat transform)
+/// - `Ok(LayerQueryResult)` with information about queries executed
 /// - `Err(...)` - validation error (e.g., filter without global data)
 ///
 /// Note: This function takes `&mut Layer` because stat transforms may add new aesthetic mappings
@@ -559,7 +575,7 @@ fn build_layer_query<F>(
     facet: Option<&Facet>,
     constants: &[(String, LiteralValue)],
     execute_query: &F,
-) -> Result<Option<String>>
+) -> Result<LayerQueryResult>
 where
     F: Fn(&str) -> Result<DataFrame>,
 {
@@ -603,7 +619,7 @@ where
                 naming::global_table()
             } else {
                 // No source, no filter, no constants, no stat transform - use __global__ data directly
-                return Ok(None);
+                return Ok(LayerQueryResult::default());
             }
         }
     };
@@ -635,6 +651,9 @@ where
         query = format!("{} WHERE {}", query, f);
     }
 
+    // Save the base query (with filter) before stat transform
+    let base_query = query.clone();
+
     // Apply statistical transformation (after filter, uses combined group_by)
     // Returns StatResult::Identity for no transformation, StatResult::Transformed for transformed query
     let stat_result = layer.geom.apply_stat_transform(
@@ -692,11 +711,15 @@ where
             }
 
             // Use the transformed query
-            let mut final_query = transformed_query;
+            let mut final_query = transformed_query.clone();
             if let Some(o) = order_by {
                 final_query = format!("{} ORDER BY {}", final_query, o);
             }
-            Ok(Some(final_query))
+            Ok(LayerQueryResult {
+                query: Some(final_query),
+                layer_sql: Some(base_query),
+                stat_sql: Some(transformed_query),
+            })
         }
         StatResult::Identity => {
             // Identity - no stat transformation
@@ -707,14 +730,18 @@ where
                 && order_by.is_none()
                 && constants.is_empty()
             {
-                Ok(None)
+                Ok(LayerQueryResult::default())
             } else {
                 // Layer has filter, order_by, or constants - still need the query
                 let mut final_query = query;
                 if let Some(o) = order_by {
                     final_query = format!("{} ORDER BY {}", final_query, o);
                 }
-                Ok(Some(final_query))
+                Ok(LayerQueryResult {
+                    query: Some(final_query.clone()),
+                    layer_sql: Some(final_query),
+                    stat_sql: None,
+                })
             }
         }
     }
@@ -860,8 +887,16 @@ fn split_color_aesthetic(layers: &mut Vec<Layer>) {
 pub struct PreparedData {
     /// Data map with global and layer-specific DataFrames
     pub data: HashMap<String, DataFrame>,
-    /// Parsed and resolved visualization specifications
-    pub specs: Vec<Plot>,
+    /// Parsed and resolved visualization specification
+    pub spec: Plot,
+    /// The main SQL query that was executed
+    pub sql: String,
+    /// The raw VISUALISE portion text
+    pub visual: String,
+    /// Per-layer filter/source queries (None = uses global data directly)
+    pub layer_sql: Vec<Option<String>>,
+    /// Per-layer stat transform queries (None = no stat transform)
+    pub stat_sql: Vec<Option<String>>,
 }
 
 /// Build data map from a query using a custom query executor function
@@ -888,6 +923,13 @@ where
         ));
     }
 
+    // TODO: Support multiple VISUALISE statements in future
+    if specs.len() > 1 {
+        return Err(GgsqlError::ValidationError(
+            "Multiple VISUALISE statements are not yet supported. Please use a single VISUALISE statement.".to_string(),
+        ));
+    }
+
     // Check if we have any visualization content
     if viz_part.trim().is_empty() {
         return Err(GgsqlError::ValidationError(
@@ -1054,6 +1096,10 @@ where
     // - Layer with no source, no filter, no order_by → returns None (use global directly, constants already injected)
     let facet = specs[0].facet.clone();
 
+    // Track layer and stat queries for introspection
+    let mut layer_sql_vec: Vec<Option<String>> = Vec::new();
+    let mut stat_sql_vec: Vec<Option<String>> = Vec::new();
+
     for (idx, layer) in specs[0].layers.iter_mut().enumerate() {
         // For layers using global data without filter, constants are already in global data
         // (injected with layer-indexed names). For other layers, extract constants for injection.
@@ -1064,7 +1110,7 @@ where
         };
 
         // Get mutable reference to layer for stat transform to update aesthetics
-        if let Some(layer_query) = build_layer_query(
+        let query_result = build_layer_query(
             layer,
             &layer_schemas[idx],
             &materialized_ctes,
@@ -1073,7 +1119,14 @@ where
             facet.as_ref(),
             &constants,
             &execute_query,
-        )? {
+        )?;
+
+        // Store query information for introspection
+        layer_sql_vec.push(query_result.layer_sql);
+        stat_sql_vec.push(query_result.stat_sql);
+
+        // Execute the query if one was generated
+        if let Some(layer_query) = query_result.query {
             let df = execute_query(&layer_query).map_err(|e| {
                 GgsqlError::ReaderError(format!(
                     "Failed to fetch data for layer {}: {}",
@@ -1105,20 +1158,24 @@ where
         ));
     }
 
-    // Post-process specs: replace literals with column references and compute labels
-    for spec in &mut specs {
-        // Replace literal aesthetic values with column references to synthetic constant columns
-        replace_literals_with_columns(spec);
-        // Compute aesthetic labels (uses first non-constant column, respects user-specified labels)
-        spec.compute_aesthetic_labels();
-        // Divide 'color' over 'stroke' and 'fill'. This needs to happens after
-        // literals have associated columns.
-        split_color_aesthetic(&mut spec.layers);
-    }
+    let mut spec = specs.into_iter().next().unwrap();
+
+    // Post-process spec: replace literals with column references and compute labels
+    // Replace literal aesthetic values with column references to synthetic constant columns
+    replace_literals_with_columns(&mut spec);
+    // Compute aesthetic labels (uses first non-constant column, respects user-specified labels)
+    spec.compute_aesthetic_labels();
+    // Divide 'color' over 'stroke' and 'fill'. This needs to happens after
+    // literals have associated columns.
+    split_color_aesthetic(&mut spec.layers);
 
     Ok(PreparedData {
         data: data_map,
-        specs,
+        spec,
+        sql: sql_part,
+        visual: viz_part,
+        layer_sql: layer_sql_vec,
+        stat_sql: stat_sql_vec,
     })
 }
 
@@ -1146,7 +1203,7 @@ mod tests {
         let result = prepare_data(query, &reader).unwrap();
 
         assert!(result.data.contains_key(naming::GLOBAL_DATA_KEY));
-        assert_eq!(result.specs.len(), 1);
+        assert_eq!(result.spec.layers.len(), 1);
     }
 
     #[cfg(feature = "duckdb")]
@@ -1373,7 +1430,8 @@ mod tests {
         );
 
         // Should use temp table name with session UUID
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.starts_with("SELECT * FROM __ggsql_cte_sales_"));
         assert!(query.ends_with("__"));
         assert!(query.contains(naming::session_id()));
@@ -1401,7 +1459,8 @@ mod tests {
         );
 
         // Should use temp table name with session UUID and filter
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.contains("__ggsql_cte_sales_"));
         assert!(query.ends_with(" WHERE year = 2024"));
         assert!(query.contains(naming::session_id()));
@@ -1427,8 +1486,9 @@ mod tests {
         );
 
         // Should use table name directly
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some("SELECT * FROM some_table".to_string())
         );
     }
@@ -1453,8 +1513,9 @@ mod tests {
             &mock_execute,
         );
 
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some("SELECT * FROM some_table WHERE value > 100".to_string())
         );
     }
@@ -1479,8 +1540,9 @@ mod tests {
         );
 
         // File paths should be wrapped in single quotes
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some("SELECT * FROM 'data/sales.csv'".to_string())
         );
     }
@@ -1505,8 +1567,9 @@ mod tests {
             &mock_execute,
         );
 
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some("SELECT * FROM 'data.parquet' WHERE x > 10".to_string())
         );
     }
@@ -1531,7 +1594,8 @@ mod tests {
         );
 
         // Should query global table with session UUID and filter
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.starts_with("SELECT * FROM __ggsql_global_"));
         assert!(query.ends_with("__ WHERE category = 'A'"));
         assert!(query.contains(naming::session_id()));
@@ -1555,8 +1619,11 @@ mod tests {
             &mock_execute,
         );
 
-        // Should return None - layer uses __global__ directly
-        assert_eq!(result.unwrap(), None);
+        // Should return empty result - layer uses __global__ directly
+        let query_result = result.unwrap();
+        assert!(query_result.query.is_none());
+        assert!(query_result.layer_sql.is_none());
+        assert!(query_result.stat_sql.is_none());
     }
 
     #[test]
@@ -1605,8 +1672,9 @@ mod tests {
             &mock_execute,
         );
 
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some("SELECT * FROM some_table ORDER BY date ASC".to_string())
         );
     }
@@ -1632,8 +1700,9 @@ mod tests {
             &mock_execute,
         );
 
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some(
                 "SELECT * FROM some_table WHERE year = 2024 ORDER BY date DESC, value ASC"
                     .to_string()
@@ -1661,7 +1730,8 @@ mod tests {
         );
 
         // Should query global table with session UUID and order_by
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.starts_with("SELECT * FROM __ggsql_global_"));
         assert!(query.ends_with("__ ORDER BY x ASC"));
         assert!(query.contains(naming::session_id()));
@@ -1697,7 +1767,8 @@ mod tests {
         );
 
         // Should inject constants as columns
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.contains("SELECT *"));
         assert!(query.contains("'value' AS __ggsql_const_color__"));
         assert!(query.contains("'value2' AS __ggsql_const_size__"));
@@ -1727,7 +1798,8 @@ mod tests {
             &mock_execute,
         );
 
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.contains("FROM __ggsql_global_"));
         assert!(query.contains(naming::session_id()));
         assert!(query.contains("'value' AS __ggsql_const_fill__"));
@@ -2259,8 +2331,8 @@ mod tests {
         assert_eq!(global_df.height(), 3);
 
         // Verify spec has x and y aesthetics merged into layer
-        assert_eq!(result.specs.len(), 1);
-        let layer = &result.specs[0].layers[0];
+        assert_eq!(result.spec.layers.len(), 1);
+        let layer = &result.spec.layers[0];
         assert!(
             layer.mappings.contains_key("x"),
             "Layer should have x from global mapping"
@@ -2721,7 +2793,7 @@ mod tests {
 
         let result = prepare_data(query, &reader).unwrap();
 
-        let aes = &result.specs[0].layers[0].mappings.aesthetics;
+        let aes = &result.spec.layers[0].mappings.aesthetics;
 
         assert!(aes.contains_key("stroke"));
         assert!(aes.contains_key("fill"));
@@ -2739,7 +2811,7 @@ mod tests {
         "#;
 
         let result = prepare_data(query, &reader).unwrap();
-        let aes = &result.specs[0].layers[0].mappings.aesthetics;
+        let aes = &result.spec.layers[0].mappings.aesthetics;
 
         let stroke = aes.get("stroke").unwrap();
         assert_eq!(stroke.column_name().unwrap(), "island");
@@ -2754,7 +2826,7 @@ mod tests {
         "#;
 
         let result = prepare_data(query, &reader).unwrap();
-        let aes = &result.specs[0].layers[0].mappings.aesthetics;
+        let aes = &result.spec.layers[0].mappings.aesthetics;
 
         let stroke = aes.get("stroke").unwrap();
         assert_eq!(stroke.column_name().unwrap(), "__ggsql_const_color_0__");
diff --git a/src/lib.rs b/src/lib.rs
index 9eec2d49..cf13aaa5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -27,10 +27,10 @@ ggsql splits queries at the `VISUALISE` boundary:
 
 ## Core Components
 
+- [`api`] - High-level API (prepare, parse, validate)
 - [`parser`] - Query parsing and AST generation
-- [`engine`] - Core execution engine
-- [`readers`] - Data source abstraction layer
-- [`writers`] - Output format abstraction layer
+- [`reader`] - Data source abstraction layer
+- [`writer`] - Output format abstraction layer
 */
 
 pub mod naming;
@@ -46,14 +46,17 @@ pub mod writer;
 #[cfg(feature = "duckdb")]
 pub mod execute;
 
+pub mod api;
+
 // Re-export key types for convenience
 pub use plot::{
     AestheticValue, DataSource, Facet, Geom, Layer, Mappings, Plot, Scale, SqlExpression,
 };
 
-// Future modules - not yet implemented
-// #[cfg(feature = "engine")]
-// pub mod engine;
+// Re-export API types and functions
+pub use api::{
+    prepare, validate, Location, Metadata, Prepared, Validated, ValidationError, ValidationWarning,
+};
 
 // DataFrame abstraction (wraps Polars)
 pub use polars::prelude::DataFrame;
@@ -547,7 +550,7 @@ mod integration_tests {
             !prepared.data.contains_key(&naming::layer_key(1)),
             "Layer 1 should use global data, not layer-specific data"
         );
-        assert_eq!(prepared.specs.len(), 1);
+        assert_eq!(prepared.spec.layers.len(), 2);
 
         // Verify global data contains layer-indexed constant columns
         let global_df = prepared.data.get(naming::GLOBAL_DATA_KEY).unwrap();
@@ -565,7 +568,7 @@ mod integration_tests {
 
         // Generate Vega-Lite
         let writer = VegaLiteWriter::new();
-        let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap();
+        let json_str = writer.write(&prepared.spec, &prepared.data).unwrap();
         let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();
 
         // Verify we have two layers
@@ -685,7 +688,7 @@ mod integration_tests {
 
         // Generate Vega-Lite and verify faceting structure
         let writer = VegaLiteWriter::new();
-        let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap();
+        let json_str = writer.write(&prepared.spec, &prepared.data).unwrap();
         let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();
 
         // Should have facet structure (row and column)
@@ -750,7 +753,7 @@ mod integration_tests {
 
         // Generate Vega-Lite and verify it works
         let writer = VegaLiteWriter::new();
-        let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap();
+        let json_str = writer.write(&prepared.spec, &prepared.data).unwrap();
         let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();
 
         // Both layers should have color field-mapped to their indexed constant columns
diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs
index 8ee13ebb..113ceea7 100644
--- a/src/reader/duckdb.rs
+++ b/src/reader/duckdb.rs
@@ -6,6 +6,7 @@ use crate::reader::data::init_builtin_data;
 use crate::reader::{connection::ConnectionInfo, Reader};
 use crate::{DataFrame, GgsqlError, Result};
 use duckdb::{params, Connection};
+use polars::prelude::*;
 
 /// DuckDB database reader
 ///
@@ -413,30 +414,13 @@ impl Reader for DuckDBReader {
         Ok(df)
     }
 
-    fn validate_columns(&self, sql: &str, columns: &[String]) -> Result<()> {
-        // Execute the query to get the schema
-        let df = self.execute(sql)?;
-
-        // Get column names from the DataFrame
-        let schema_columns: Vec<String> = df
-            .get_column_names()
-            .iter()
-            .map(|s| s.to_string())
-            .collect();
-
-        // Check if all required columns exist
-        for col in columns {
-            if !schema_columns.contains(col) {
-                return Err(GgsqlError::ValidationError(format!(
-                    "Column '{}' not found in query result. Available columns: {}",
-                    col,
-                    schema_columns.join(", ")
-                )));
-            }
-        }
-
+    fn register(&mut self, _name: &str, _df: DataFrame) -> Result<()> {
         Ok(())
     }
+
+    fn supports_register(&self) -> bool {
+        false
+    }
 }
 
 #[cfg(test)]
@@ -481,28 +465,6 @@ mod tests {
         assert_eq!(df.get_column_names(), vec!["x", "y"]);
     }
 
-    #[test]
-    fn test_validate_columns_success() {
-        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let sql = "SELECT 1 as x, 2 as y";
-
-        let result = reader.validate_columns(sql, &["x".to_string(), "y".to_string()]);
-        assert!(result.is_ok());
-    }
-
-    #[test]
-    fn test_validate_columns_missing() {
-        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let sql = "SELECT 1 as x, 2 as y";
-
-        let result = reader.validate_columns(sql, &["z".to_string()]);
-        assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("Column 'z' not found"));
-    }
-
     #[test]
     fn test_invalid_sql() {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 7f3f403a..762c0319 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -7,7 +7,7 @@
 //!
 //! All readers implement the `Reader` trait, which provides:
 //! - SQL query execution → DataFrame conversion
-//! - Column validation for query introspection
+//! - Optional DataFrame registration for queryable tables
 //! - Connection management and error handling
 //!
 //! # Example
@@ -15,11 +15,17 @@
 //! ```rust,ignore
 //! use ggsql::reader::{Reader, DuckDBReader};
 //!
+//! // Basic usage
 //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
 //! let df = reader.execute("SELECT * FROM table")?;
+//!
+//! // With DataFrame registration
+//! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+//! reader.register("my_table", some_dataframe)?;
+//! let result = reader.execute("SELECT * FROM my_table")?;
 //! ```
 
-use crate::{DataFrame, Result};
+use crate::{DataFrame, GgsqlError, Result};
 
 #[cfg(feature = "duckdb")]
 pub mod duckdb;
@@ -35,6 +41,20 @@ pub use duckdb::DuckDBReader;
 ///
 /// Readers execute SQL queries and return Polars DataFrames.
 /// They provide a uniform interface for different database backends.
+///
+/// # DataFrame Registration
+///
+/// Some readers support registering DataFrames as queryable tables using
+/// the [`register`](Reader::register) method. This allows you to query
+/// in-memory DataFrames with SQL, join them with other tables, etc.
+///
+/// ```rust,ignore
+/// // Register a DataFrame (takes ownership)
+/// reader.register("sales", sales_df)?;
+///
+/// // Now you can query it
+/// let result = reader.execute("SELECT * FROM sales WHERE amount > 100")?;
+/// ```
 pub trait Reader {
     /// Execute a SQL query and return the result as a DataFrame
     ///
@@ -54,18 +74,38 @@ pub trait Reader {
     /// - The table or columns don't exist
     fn execute(&self, sql: &str) -> Result<DataFrame>;
 
-    /// Validate that specified columns exist in a query result
+    /// Register a DataFrame as a queryable table (takes ownership)
     ///
-    /// This is useful for checking column names before visualization
-    /// to provide better error messages.
+    /// After registration, the DataFrame can be queried by name in SQL:
+    /// ```sql
+    /// SELECT * FROM <name> WHERE ...
+    /// ```
     ///
     /// # Arguments
     ///
-    /// * `sql` - The SQL query to introspect
-    /// * `columns` - Column names to validate
+    /// * `name` - The table name to register under
+    /// * `df` - The DataFrame to register (ownership is transferred)
+    ///
+    /// # Returns
+    ///
+    /// `Ok(())` on success, error if registration fails or isn't supported.
+    ///
+    /// # Default Implementation
+    ///
+    /// Returns an error by default. Override for readers that support registration.
+    fn register(&mut self, name: &str, _df: DataFrame) -> Result<()> {
+        Err(GgsqlError::ReaderError(format!(
+            "This reader does not support DataFrame registration for table '{}'",
+            name
+        )))
+    }
+
+    /// Check if this reader supports DataFrame registration
     ///
     /// # Returns
     ///
-    /// Ok(()) if all columns exist, otherwise an error
-    fn validate_columns(&self, sql: &str, columns: &[String]) -> Result<()>;
+    /// `true` if [`register`](Reader::register) is implemented, `false` otherwise.
+    fn supports_register(&self) -> bool {
+        false
+    }
 }
diff --git a/src/rest.rs b/src/rest.rs
index 88fb61a6..45d21963 100644
--- a/src/rest.rs
+++ b/src/rest.rs
@@ -34,12 +34,12 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 use ggsql::{parser, GgsqlError, VERSION};
 
 #[cfg(feature = "duckdb")]
-use ggsql::execute::prepare_data_with_executor;
+use ggsql::reader::DuckDBReader;
 #[cfg(feature = "duckdb")]
-use ggsql::reader::{DuckDBReader, Reader};
+use ggsql::{parse, prepare};
 
 #[cfg(feature = "vegalite")]
-use ggsql::writer::{VegaLiteWriter, Writer};
+use ggsql::writer::VegaLiteWriter;
 
 /// CLI arguments for the REST API server
 #[derive(Parser)]
@@ -442,61 +442,38 @@ async fn query_handler(
 
     #[cfg(feature = "duckdb")]
     if request.reader.starts_with("duckdb://") {
-        // Create query executor that handles shared state vs new reader
-        let execute_query = |sql: &str| -> Result<ggsql::DataFrame, GgsqlError> {
-            if request.reader == "duckdb://memory" && state.reader.is_some() {
-                let reader_mutex = state.reader.as_ref().unwrap();
-                let reader = reader_mutex.lock().map_err(|e| {
-                    GgsqlError::InternalError(format!("Failed to lock reader: {}", e))
-                })?;
-                reader.execute(sql)
-            } else {
-                let reader = DuckDBReader::from_connection_string(&request.reader)?;
-                reader.execute(sql)
-            }
-        };
-
-        // Prepare data using shared execution logic
-        let prepared = prepare_data_with_executor(&request.query, execute_query)?;
-
-        // Get metadata from available data
-        let (rows, columns) = if let Some(df) = prepared.data.get("__global__") {
-            let (r, _) = df.shape();
-            let cols: Vec<String> = df
-                .get_column_names()
-                .iter()
-                .map(|s| s.to_string())
-                .collect();
-            (r, cols)
+        // Use shared reader or create new one
+        let prepared = if request.reader == "duckdb://memory" && state.reader.is_some() {
+            let reader_mutex = state.reader.as_ref().unwrap();
+            let reader = reader_mutex.lock().map_err(|e| {
+                GgsqlError::InternalError(format!("Failed to lock reader: {}", e))
+            })?;
+            prepare(&request.query, &*reader)?
         } else {
-            // Use first available data for metadata
-            let df = prepared.data.values().next().unwrap();
-            let (r, _) = df.shape();
-            let cols: Vec<String> = df
-                .get_column_names()
-                .iter()
-                .map(|s| s.to_string())
-                .collect();
-            (r, cols)
+            let reader = DuckDBReader::from_connection_string(&request.reader)?;
+            prepare(&request.query, &reader)?
         };
 
-        let first_spec = &prepared.specs[0];
+        // Get metadata
+        let metadata = prepared.metadata();
 
         // Generate visualization output using writer
         #[cfg(feature = "vegalite")]
         if request.writer == "vegalite" {
             let writer = VegaLiteWriter::new();
-            let json_output = writer.write(first_spec, &prepared.data)?;
+            let json_output = prepared.render(&writer)?;
             let spec_value: serde_json::Value = serde_json::from_str(&json_output)
                 .map_err(|e| GgsqlError::WriterError(format!("Failed to parse JSON: {}", e)))?;
 
+            let plot = prepared.plot();
+
             let result = QueryResult {
                 spec: spec_value,
                 metadata: QueryMetadata {
-                    rows,
-                    columns,
-                    global_mappings: format!("{:?}", first_spec.global_mappings),
-                    layers: first_spec.layers.len(),
+                    rows: metadata.rows,
+                    columns: metadata.columns.clone(),
+                    global_mappings: format!("{:?}", plot.global_mappings),
+                    layers: plot.layers.len(),
                 },
             };
 
@@ -525,6 +502,39 @@ async fn query_handler(
 }
 
 /// POST /api/v1/parse - Parse a ggsql query
+#[cfg(feature = "duckdb")]
+async fn parse_handler(
+    Json(request): Json<ParseRequest>,
+) -> Result<Json<ApiSuccess<ParseResult>>, ApiErrorResponse> {
+    info!("Parsing query: {} chars", request.query.len());
+
+    // Split query (for backwards compatibility)
+    let (sql_part, viz_part) = parser::split_query(&request.query)?;
+
+    // Parse using new API
+    let parsed = parse(&request.query)?;
+
+    // Convert specs to JSON
+    let specs_json: Vec<serde_json::Value> = parsed
+        .plots()
+        .iter()
+        .map(|spec| serde_json::to_value(spec).unwrap_or(serde_json::Value::Null))
+        .collect();
+
+    let result = ParseResult {
+        sql_portion: sql_part,
+        viz_portion: viz_part,
+        specs: specs_json,
+    };
+
+    Ok(Json(ApiSuccess {
+        status: "success".to_string(),
+        data: result,
+    }))
+}
+
+/// POST /api/v1/parse - Parse a ggsql query
+#[cfg(not(feature = "duckdb"))]
 async fn parse_handler(
     Json(request): Json<ParseRequest>,
 ) -> Result<Json<ApiSuccess<ParseResult>>, ApiErrorResponse> {

From b760081fb550a10a527fc53a8da3610bc9121892 Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Thu, 29 Jan 2026 09:48:41 +0000
Subject: [PATCH 02/12] Add arrow table registration for duckdb

---
 Cargo.toml           |   3 +-
 src/Cargo.toml       |   3 +-
 src/reader/duckdb.rs | 210 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 212 insertions(+), 4 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index cd5b672c..0339c104 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,7 +32,8 @@ csscolorparser = "0.8.1"
 polars = { version = "0.52", features = ["lazy", "sql", "ipc"] }
 
 # Readers
-duckdb = { version = "1.1", features = ["bundled"] }
+duckdb = { version = "1.1", features = ["bundled", "vtab-arrow"] }
+arrow = { version = "56", default-features = false, features = ["ipc"] }
 postgres = "0.19"
 sqlx = { version = "0.8", features = ["postgres", "runtime-tokio-rustls"] }
 rusqlite = "0.32"
diff --git a/src/Cargo.toml b/src/Cargo.toml
index 75cbd1f6..dd60aac1 100644
--- a/src/Cargo.toml
+++ b/src/Cargo.toml
@@ -32,6 +32,7 @@ polars.workspace = true
 
 # Readers
 duckdb = { workspace = true, optional = true }
+arrow = { workspace = true, optional = true }
 postgres = { workspace = true, optional = true }
 sqlx = { workspace = true, optional = true }
 rusqlite = { workspace = true, optional = true }
@@ -69,7 +70,7 @@ proptest.workspace = true
 
 [features]
 default = ["duckdb", "sqlite", "vegalite"]
-duckdb = ["dep:duckdb"]
+duckdb = ["dep:duckdb", "dep:arrow"]
 postgres = ["dep:postgres"]
 sqlite = ["dep:rusqlite"]
 vegalite = []
diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs
index 113ceea7..b3cf46d7 100644
--- a/src/reader/duckdb.rs
+++ b/src/reader/duckdb.rs
@@ -5,8 +5,12 @@
 use crate::reader::data::init_builtin_data;
 use crate::reader::{connection::ConnectionInfo, Reader};
 use crate::{DataFrame, GgsqlError, Result};
+use arrow::ipc::reader::FileReader;
+use duckdb::vtab::arrow::{arrow_recordbatch_to_query_params, ArrowVTab};
 use duckdb::{params, Connection};
+use polars::io::SerWriter;
 use polars::prelude::*;
+use std::io::Cursor;
 
 /// DuckDB database reader
 ///
@@ -65,6 +69,12 @@ impl DuckDBReader {
             }
         };
 
+        // Register Arrow virtual table function for DataFrame registration
+        conn.register_table_function::<ArrowVTab>("arrow")
+            .map_err(|e| {
+                GgsqlError::ReaderError(format!("Failed to register arrow function: {}", e))
+            })?;
+
         Ok(Self { conn })
     }
 
@@ -74,6 +84,81 @@ impl DuckDBReader {
     pub fn connection(&self) -> &Connection {
         &self.conn
     }
+
+    /// Check if a table exists in the database
+    fn table_exists(&self, name: &str) -> Result<bool> {
+        let sql = "SELECT COUNT(*) FROM information_schema.tables WHERE table_name = ?";
+        let count: i64 = self
+            .conn
+            .query_row(sql, [name], |row| row.get(0))
+            .unwrap_or(0);
+        Ok(count > 0)
+    }
+}
+
+/// Validate a table name
+fn validate_table_name(name: &str) -> Result<()> {
+    if name.is_empty() {
+        return Err(GgsqlError::ReaderError("Table name cannot be empty".into()));
+    }
+
+    // Reject characters that could break double-quoted identifiers or cause issues
+    let forbidden = ['"', '\0', '\n', '\r'];
+    for ch in forbidden {
+        if name.contains(ch) {
+            return Err(GgsqlError::ReaderError(format!(
+                "Table name '{}' contains invalid character '{}'",
+                name,
+                ch.escape_default()
+            )));
+        }
+    }
+
+    // Reasonable length limit
+    if name.len() > 128 {
+        return Err(GgsqlError::ReaderError(format!(
+            "Table name '{}' exceeds maximum length of 128 characters",
+            name
+        )));
+    }
+
+    Ok(())
+}
+
+/// Convert a Polars DataFrame to DuckDB Arrow query parameters via IPC serialization
+fn dataframe_to_arrow_params(df: DataFrame) -> Result<[usize; 2]> {
+    // Serialize DataFrame to IPC format
+    let mut buffer = Vec::new();
+    {
+        let mut writer = IpcWriter::new(&mut buffer);
+        writer.finish(&mut df.clone()).map_err(|e| {
+            GgsqlError::ReaderError(format!("Failed to serialize DataFrame: {}", e))
+        })?;
+    }
+
+    // Read IPC into arrow crate's RecordBatch
+    let cursor = Cursor::new(buffer);
+    let reader = FileReader::try_new(cursor, None)
+        .map_err(|e| GgsqlError::ReaderError(format!("Failed to read IPC: {}", e)))?;
+
+    // Collect all batches and concatenate if needed
+    let batches: Vec<_> = reader.filter_map(|r| r.ok()).collect();
+
+    if batches.is_empty() {
+        return Err(GgsqlError::ReaderError(
+            "DataFrame produced no Arrow batches".into(),
+        ));
+    }
+
+    // For single batch, use directly; for multiple, concatenate
+    let rb = if batches.len() == 1 {
+        batches.into_iter().next().unwrap()
+    } else {
+        arrow::compute::concat_batches(&batches[0].schema(), &batches)
+            .map_err(|e| GgsqlError::ReaderError(format!("Failed to concat batches: {}", e)))?
+    };
+
+    Ok(arrow_recordbatch_to_query_params(rb))
 }
 
 /// Helper struct for building typed columns from rows
@@ -414,12 +499,35 @@ impl Reader for DuckDBReader {
         Ok(df)
     }
 
-    fn register(&mut self, _name: &str, _df: DataFrame) -> Result<()> {
+    fn register(&mut self, name: &str, df: DataFrame) -> Result<()> {
+        // Validate table name
+        validate_table_name(name)?;
+
+        // Check for duplicates
+        if self.table_exists(name)? {
+            return Err(GgsqlError::ReaderError(format!(
+                "Table '{}' already exists",
+                name
+            )));
+        }
+
+        // Convert DataFrame to Arrow query params
+        let params = dataframe_to_arrow_params(df)?;
+
+        // Create temp table from Arrow data
+        let sql = format!(
+            "CREATE TEMP TABLE \"{}\" AS SELECT * FROM arrow(?, ?)",
+            name
+        );
+        self.conn.execute(&sql, params).map_err(|e| {
+            GgsqlError::ReaderError(format!("Failed to register table '{}': {}", name, e))
+        })?;
+
         Ok(())
     }
 
     fn supports_register(&self) -> bool {
-        false
+        true
     }
 }
 
@@ -496,4 +604,102 @@ mod tests {
         assert_eq!(df.shape(), (2, 2));
         assert_eq!(df.get_column_names(), vec!["region", "total"]);
     }
+
+    #[test]
+    fn test_register_and_query() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        // Create a DataFrame
+        let df = DataFrame::new(vec![
+            Column::new("x".into(), vec![1i32, 2, 3]),
+            Column::new("y".into(), vec![10i32, 20, 30]),
+        ])
+        .unwrap();
+
+        // Register the DataFrame
+        reader.register("my_table", df).unwrap();
+
+        // Query the registered table
+        let result = reader.execute("SELECT * FROM my_table ORDER BY x").unwrap();
+        assert_eq!(result.shape(), (3, 2));
+        assert_eq!(result.get_column_names(), vec!["x", "y"]);
+    }
+
+    #[test]
+    fn test_register_duplicate_name_errors() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        let df1 = DataFrame::new(vec![Column::new("a".into(), vec![1i32])]).unwrap();
+        let df2 = DataFrame::new(vec![Column::new("b".into(), vec![2i32])]).unwrap();
+
+        // First registration should succeed
+        reader.register("dup_table", df1).unwrap();
+
+        // Second registration with same name should fail
+        let result = reader.register("dup_table", df2);
+        assert!(result.is_err());
+        let err = result.unwrap_err().to_string();
+        assert!(err.contains("already exists"));
+    }
+
+    #[test]
+    fn test_register_invalid_table_names() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let df = DataFrame::new(vec![Column::new("a".into(), vec![1i32])]).unwrap();
+
+        // Empty name
+        let result = reader.register("", df.clone());
+        assert!(result.is_err());
+        assert!(result.unwrap_err().to_string().contains("cannot be empty"));
+
+        // Name with double quote
+        let result = reader.register("bad\"name", df.clone());
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("invalid character"));
+
+        // Name with null byte
+        let result = reader.register("bad\0name", df.clone());
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("invalid character"));
+
+        // Name too long
+        let long_name = "a".repeat(200);
+        let result = reader.register(&long_name, df);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("exceeds maximum length"));
+    }
+
+    #[test]
+    fn test_supports_register() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        assert!(reader.supports_register());
+    }
+
+    #[test]
+    fn test_register_empty_dataframe() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        // Create an empty DataFrame with schema
+        let df = DataFrame::new(vec![
+            Column::new("x".into(), Vec::<i32>::new()),
+            Column::new("y".into(), Vec::<String>::new()),
+        ])
+        .unwrap();
+
+        reader.register("empty_table", df).unwrap();
+
+        // Query should return empty result with correct schema
+        let result = reader.execute("SELECT * FROM empty_table").unwrap();
+        assert_eq!(result.shape(), (0, 2));
+        assert_eq!(result.get_column_names(), vec!["x", "y"]);
+    }
 }

From 8181fea5cb1518411d36ee9af25e9b2b2190a805 Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Thu, 29 Jan 2026 12:55:53 +0000
Subject: [PATCH 03/12] Rework Python bindings

---
 CLAUDE.md                             | 108 +++-
 README.md                             |  43 +-
 ggsql-python/Cargo.toml               |   2 +-
 ggsql-python/README.md                | 249 +++++++--
 ggsql-python/python/ggsql/__init__.py |  38 +-
 ggsql-python/src/lib.rs               | 737 +++++++++++++++++++++++---
 ggsql-python/tests/test_ggsql.py      | 414 ++++++++++++++-
 src/doc/API.md                        |   3 -
 src/rest.rs                           |  27 +-
 9 files changed, 1440 insertions(+), 181 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index b5bafd99..aa093221 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -864,17 +864,18 @@ When running in Positron IDE, the extension provides enhanced functionality:
 
 ### 8. Python Bindings (`ggsql-python/`)
 
-**Responsibility**: Python bindings for ggsql, enabling Python users to render Altair charts using ggsql's VISUALISE syntax.
+**Responsibility**: Python bindings for ggsql, enabling Python users to create visualizations using ggsql's VISUALISE syntax.
 
 **Features**:
 
 - PyO3-based Rust bindings compiled to a native Python extension
+- Two-stage API mirroring the Rust API: `prepare()` → `render()`
+- DuckDB reader with DataFrame registration
+- Custom Python reader support: any object with `execute(sql) -> DataFrame` method
 - Works with any narwhals-compatible DataFrame (polars, pandas, etc.)
 - LazyFrames are collected automatically
-- Returns native `altair.Chart` objects for easy display and customization
-- Two-stage API: `prepare()` → `render()`
-- DuckDB reader with DataFrame registration
-- Query introspection (SQL, layer queries, stat queries)
+- Returns native `altair.Chart` objects via `render_altair()` convenience function
+- Query validation and introspection (SQL, layer queries, stat queries)
 
 **Installation**:
 
@@ -902,8 +903,9 @@ prepared = ggsql.prepare(
     reader
 )
 
-# Inspect
+# Inspect metadata
 print(f"Rows: {prepared.metadata()['rows']}")
+print(f"Columns: {prepared.metadata()['columns']}")
 print(f"SQL: {prepared.sql()}")
 
 # Render to Vega-Lite JSON
@@ -911,21 +913,95 @@ writer = ggsql.VegaLiteWriter()
 json_output = prepared.render(writer)
 ```
 
+**Convenience Function** (`render_altair`):
+
+For quick visualizations without explicit reader setup:
+
+```python
+import ggsql
+import polars as pl
+
+df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+
+# Render DataFrame to Altair chart in one call
+chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point")
+chart.display()  # In Jupyter
+```
+
+**Query Validation**:
+
+```python
+# Validate syntax without execution
+validated = ggsql.validate(
+    "SELECT x, y FROM data VISUALISE x, y DRAW point"
+)
+print(f"Valid: {validated.valid()}")
+print(f"Has VISUALISE: {validated.has_visual()}")
+print(f"SQL portion: {validated.sql()}")
+print(f"Errors: {validated.errors()}")
+```
+
 **Classes**:
 
-| Class                      | Description                  |
-| -------------------------- | ---------------------------- |
-| `DuckDBReader(connection)` | Database reader              |
-| `VegaLiteWriter()`         | Vega-Lite JSON output writer |
-| `Validated`                | Result of `validate()`       |
+| Class                      | Description                                  |
+| -------------------------- | -------------------------------------------- |
+| `DuckDBReader(connection)` | Database reader with DataFrame registration  |
+| `VegaLiteWriter()`         | Vega-Lite JSON output writer                 |
+| `Validated`                | Result of `validate()` with query inspection |
+| `Prepared`                 | Result of `prepare()`, ready for rendering   |
 
 **Functions**:
 
-| Function                 | Description                                      |
-| ------------------------ | ------------------------------------------------ |
-| `validate(query)`        | Syntax/semantic validation with query inspection |
-| `prepare(query, reader)` | Full preparation pipeline                        |
-| `render_altair(df, viz)` | Render DataFrame to Altair chart                 |
+| Function                 | Description                                       |
+| ------------------------ | ------------------------------------------------- |
+| `validate(query)`        | Syntax/semantic validation with query inspection  |
+| `prepare(query, reader)` | Full preparation (reader can be native or custom) |
+| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart     |
+
+**Prepared Object Methods**:
+
+| Method           | Description                                  |
+| ---------------- | -------------------------------------------- |
+| `render(writer)` | Generate Vega-Lite JSON                      |
+| `metadata()`     | Get rows, columns, layer_count               |
+| `sql()`          | Get the SQL portion                          |
+| `visual()`       | Get the VISUALISE portion                    |
+| `layer_count()`  | Number of DRAW layers                        |
+| `data()`         | Get the main DataFrame                       |
+| `layer_data(i)`  | Get layer-specific DataFrame (if filtered)   |
+| `stat_data(i)`   | Get stat transform DataFrame (if applicable) |
+| `layer_sql(i)`   | Get layer filter SQL (if applicable)         |
+| `stat_sql(i)`    | Get stat transform SQL (if applicable)       |
+| `warnings()`     | Get validation warnings                      |
+
+**Custom Python Readers**:
+
+Any Python object with an `execute(sql: str) -> polars.DataFrame` method can be used as a reader:
+
+```python
+import ggsql
+import polars as pl
+
+class MyReader:
+    """Custom reader that returns static data."""
+
+    def execute(self, sql: str) -> pl.DataFrame:
+        return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+
+# Use custom reader with prepare()
+reader = MyReader()
+prepared = ggsql.prepare(
+    "SELECT * FROM data VISUALISE x, y DRAW point",
+    reader
+)
+```
+
+Optional methods for custom readers:
+
+- `supports_register() -> bool` - Return `True` if registration is supported
+- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a table
+
+Native readers (e.g., `DuckDBReader`) use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization.
 
 **Dependencies**:
 
diff --git a/README.md b/README.md
index 43d70847..46ff2b50 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,7 @@ THEME minimal
 - ✅ REST API server (`ggsql-rest`) with CORS support
 - ✅ Jupyter kernel (`ggsql-jupyter`) with inline Vega-Lite visualizations
 - ✅ VS Code extension (`ggsql-vscode`) with syntax highlighting and Positron IDE integration
+- ✅ Python bindings (`ggsql-python`) with Altair chart output
 
 **Planned:**
 
@@ -93,7 +94,9 @@ ggsql/
 │
 ├── ggsql-jupyter/                   # Jupyter kernel
 │
-└── ggsql-vscode/                    # VS Code extension
+├── ggsql-vscode/                    # VS Code extension
+│
+└── ggsql-python/                    # Python bindings
 ```
 
 ## Development Workflow
@@ -297,6 +300,44 @@ When running in Positron IDE, the extension provides additional features:
 - **Language runtime registration** for executing ggsql queries directly within Positron
 - **Plot pane integration** - visualizations are automatically routed to Positron's Plots pane
 
+## Python Bindings
+
+The `ggsql-python` package provides Python bindings for using ggsql with DataFrames.
+
+### Installation
+
+```bash
+cd ggsql-python
+pip install maturin
+maturin develop
+```
+
+### Usage
+
+```python
+import ggsql
+import polars as pl
+
+# Simple usage with render_altair
+df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point")
+chart.display()
+
+# Two-stage API for full control
+reader = ggsql.DuckDBReader("duckdb://memory")
+reader.register("data", df)
+
+prepared = ggsql.prepare(
+    "SELECT * FROM data VISUALISE x, y DRAW point",
+    reader
+)
+
+writer = ggsql.VegaLiteWriter()
+json_output = prepared.render(writer)
+```
+
+See the [ggsql-python README](ggsql-python/README.md) for complete API documentation.
+
 ## CLI
 
 ### Installation
diff --git a/ggsql-python/Cargo.toml b/ggsql-python/Cargo.toml
index 62229afd..8f73e6f8 100644
--- a/ggsql-python/Cargo.toml
+++ b/ggsql-python/Cargo.toml
@@ -12,7 +12,7 @@ crate-type = ["cdylib"]
 [dependencies]
 pyo3 = { version = "0.26", features = ["extension-module"] }
 polars = { workspace = true, features = ["ipc"] }
-ggsql = { path = "../src", default-features = false, features = ["vegalite"] }
+ggsql = { path = "../src", default-features = false, features = ["duckdb", "vegalite"] }
 
 [features]
 default = []
diff --git a/ggsql-python/README.md b/ggsql-python/README.md
index 0d97bbee..3ea2c603 100644
--- a/ggsql-python/README.md
+++ b/ggsql-python/README.md
@@ -2,7 +2,7 @@
 
 Python bindings for [ggsql](https://github.com/georgestagg/ggsql), a SQL extension for declarative data visualization.
 
-This package provides a thin wrapper around the Rust `ggsql` crate, enabling Python users to render Altair charts from DataFrames using ggsql's VISUALISE syntax.
+This package provides Python bindings to the Rust `ggsql` crate, enabling Python users to create visualizations using ggsql's VISUALISE syntax with native Altair chart output.
 
 ## Installation
 
@@ -15,6 +15,7 @@ pip install ggsql
 ### From source
 
 Building from source requires:
+
 - Rust toolchain (install via [rustup](https://rustup.rs/))
 - Python 3.10+
 - [maturin](https://github.com/PyO3/maturin)
@@ -39,35 +40,183 @@ maturin build --release
 pip install target/wheels/ggsql-*.whl
 ```
 
-## Usage
+## Quick Start
+
+### Simple Usage with `render_altair`
+
+For quick visualizations, use the `render_altair` convenience function:
 
 ```python
 import ggsql
-import duckdb
+import polars as pl
+
+# Create a DataFrame
+df = pl.DataFrame({
+    "x": [1, 2, 3, 4, 5],
+    "y": [10, 20, 15, 30, 25],
+    "category": ["A", "B", "A", "B", "A"]
+})
+
+# Render to Altair chart
+chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point")
+
+# Display or save
+chart.display()  # In Jupyter
+chart.save("chart.html")  # Save to file
+```
+
+### Two-Stage API
 
-# Split a ggSQL query into SQL and VISUALISE portions
-sql, viz = ggsql.split_query("""
-    SELECT date, revenue, region FROM sales
-    WHERE year = 2024
+For more control, use the two-stage API with explicit reader and writer:
+
+```python
+import ggsql
+import polars as pl
+
+# 1. Create a DuckDB reader
+reader = ggsql.DuckDBReader("duckdb://memory")
+
+# 2. Register your DataFrame as a table
+df = pl.DataFrame({
+    "date": ["2024-01-01", "2024-01-02", "2024-01-03"],
+    "revenue": [100, 150, 120],
+    "region": ["North", "South", "North"]
+})
+reader.register("sales", df)
+
+# 3. Prepare the visualization
+prepared = ggsql.prepare(
+    """
+    SELECT * FROM sales
     VISUALISE date AS x, revenue AS y, region AS color
     DRAW line
-    LABEL title => 'Sales Trends'
-""")
+    LABEL title => 'Sales by Region'
+    """,
+    reader
+)
+
+# 4. Inspect metadata
+print(f"Rows: {prepared.metadata()['rows']}")
+print(f"Columns: {prepared.metadata()['columns']}")
+print(f"Layers: {prepared.layer_count()}")
+
+# 5. Inspect SQL/VISUALISE portions and data
+print(f"SQL: {prepared.sql()}")
+print(f"Visual: {prepared.visual()}")
+print(prepared.data())  # Returns polars DataFrame
+
+# 6. Render to Vega-Lite JSON
+writer = ggsql.VegaLiteWriter()
+vegalite_json = prepared.render(writer)
+print(vegalite_json)
+```
 
-# Execute SQL with DuckDB
-df = duckdb.sql(sql).pl()
+## API Reference
 
-# Render DataFrame + VISUALISE spec to Altair chart
-chart = ggsql.render_altair(df, viz)
+### Classes
 
-# Display or save the chart
-chart.display()  # In Jupyter
-chart.save("chart.html")  # Save to file
+#### `DuckDBReader(connection: str)`
+
+Database reader that executes SQL and manages DataFrames.
+
+```python
+reader = ggsql.DuckDBReader("duckdb://memory")  # In-memory database
+reader = ggsql.DuckDBReader("duckdb:///path/to/file.db")  # File database
 ```
 
-### Mapping styles
+**Methods:**
+
+- `register(name: str, df: polars.DataFrame)` - Register a DataFrame as a queryable table
+- `execute(sql: str) -> polars.DataFrame` - Execute SQL and return results
+- `supports_register() -> bool` - Check if registration is supported
 
-The `render_altair()` function supports various mapping styles:
+#### `VegaLiteWriter()`
+
+Writer that generates Vega-Lite v6 JSON specifications.
+
+```python
+writer = ggsql.VegaLiteWriter()
+json_output = prepared.render(writer)
+```
+
+#### `Validated`
+
+Result of `validate()` containing query analysis without SQL execution.
+
+**Methods:**
+
+- `valid() -> bool` - Whether the query is syntactically and semantically valid
+- `has_visual() -> bool` - Whether the query contains a VISUALISE clause
+- `sql() -> str` - The SQL portion (before VISUALISE)
+- `visual() -> str` - The VISUALISE portion
+- `errors() -> list[dict]` - Validation errors with messages and locations
+- `warnings() -> list[dict]` - Validation warnings
+
+#### `Prepared`
+
+Result of `prepare()`, containing resolved visualization ready for rendering.
+
+**Methods:**
+
+- `render(writer: VegaLiteWriter) -> str` - Generate Vega-Lite JSON
+- `metadata() -> dict` - Get `{"rows": int, "columns": list[str], "layer_count": int}`
+- `sql() -> str` - The executed SQL query
+- `visual() -> str` - The VISUALISE clause
+- `layer_count() -> int` - Number of DRAW layers
+- `data() -> polars.DataFrame | None` - Main query result DataFrame
+- `layer_data(index: int) -> polars.DataFrame | None` - Layer-specific data (if filtered)
+- `stat_data(index: int) -> polars.DataFrame | None` - Statistical transform data
+- `layer_sql(index: int) -> str | None` - Layer filter SQL
+- `stat_sql(index: int) -> str | None` - Stat transform SQL
+- `warnings() -> list[dict]` - Validation warnings from preparation
+
+### Functions
+
+#### `validate(query: str) -> Validated`
+
+Validate query syntax and semantics without executing SQL.
+
+```python
+validated = ggsql.validate("SELECT x, y FROM data VISUALISE x, y DRAW point")
+if validated.valid():
+    print("Query is valid!")
+else:
+    for error in validated.errors():
+        print(f"Error: {error['message']}")
+```
+
+#### `prepare(query: str, reader: DuckDBReader) -> Prepared`
+
+Parse, validate, and execute a ggsql query.
+
+```python
+reader = ggsql.DuckDBReader("duckdb://memory")
+prepared = ggsql.prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
+```
+
+#### `render_altair(df, viz: str, **kwargs) -> altair.Chart`
+
+Convenience function to render a DataFrame with a VISUALISE spec to an Altair chart.
+
+**Parameters:**
+
+- `df` - Any narwhals-compatible DataFrame (polars, pandas, etc.). LazyFrames are collected automatically.
+- `viz` - The VISUALISE specification string
+- `**kwargs` - Additional arguments passed to `altair.Chart.from_json()` (e.g., `validate=False`)
+
+**Returns:** An Altair chart object (Chart, LayerChart, FacetChart, etc.)
+
+```python
+import polars as pl
+import ggsql
+
+df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point")
+```
+
+## Examples
+
+### Mapping Styles
 
 ```python
 df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30], "category": ["A", "B", "A"]})
@@ -85,41 +234,65 @@ ggsql.render_altair(df, "VISUALISE * DRAW point")
 ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point")
 ```
 
-## API
+### Custom Readers
 
-### `split_query(query: str) -> tuple[str, str]`
+You can use any Python object with an `execute(sql: str) -> polars.DataFrame` method as a reader. This enables integration with any data source.
 
-Split a ggSQL query into SQL and VISUALISE portions.
+```python
+import ggsql
+import polars as pl
+
+class CSVReader:
+    """Custom reader that loads data from CSV files."""
+
+    def __init__(self, data_dir: str):
+        self.data_dir = data_dir
+
+    def execute(self, sql: str) -> pl.DataFrame:
+        # Simple implementation: ignore SQL and return fixed data
+        # A real implementation would parse SQL to determine which file to load
+        return pl.read_csv(f"{self.data_dir}/data.csv")
+
+# Use custom reader with prepare()
+reader = CSVReader("/path/to/data")
+prepared = ggsql.prepare(
+    "SELECT * FROM data VISUALISE x, y DRAW point",
+    reader
+)
+writer = ggsql.VegaLiteWriter()
+json_output = prepared.render(writer)
+```
 
-**Parameters:**
-- `query`: The full ggSQL query string
+**Optional methods** for custom readers:
 
-**Returns:**
-- Tuple of `(sql_portion, visualise_portion)`
+- `supports_register() -> bool` - Return `True` if your reader supports DataFrame registration
+- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a queryable table
 
-**Raises:**
-- `ValueError`: If the query cannot be parsed
+```python
+class AdvancedReader:
+    """Custom reader with registration support."""
 
-### `render_altair(df, viz, **kwargs) -> altair.Chart`
+    def __init__(self):
+        self.tables = {}
 
-Render a DataFrame with a VISUALISE specification to an Altair chart.
+    def execute(self, sql: str) -> pl.DataFrame:
+        # Your SQL execution logic here
+        ...
 
-**Parameters:**
-- `df`: Any narwhals-compatible DataFrame (polars, pandas, etc.). LazyFrames are collected automatically.
-- `viz`: The VISUALISE specification string
-- `**kwargs`: Additional keyword arguments passed to `altair.Chart.from_json()`. Common options include `validate=False` to skip schema validation.
+    def supports_register(self) -> bool:
+        return True
 
-**Returns:**
-- An `altair.Chart` object that can be displayed, saved, or further customized
+    def register(self, name: str, df: pl.DataFrame) -> None:
+        self.tables[name] = df
+```
 
-**Raises:**
-- `ValueError`: If the spec cannot be parsed or rendered
+Native readers like `DuckDBReader` use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization.
 
 ## Development
 
 ### Keeping in sync with the monorepo
 
-The `ggsql-python` package is part of the [ggsql monorepo](https://github.com/georgestagg/ggsql) and depends on the Rust `ggsql` crate via a path dependency. When the Rust crate is updated, you may need to rebuild:
+The `ggsql-python` package is part of the [ggsql monorepo](https://github.com/posit-dev/ggsql) and depends on the Rust `ggsql` crate via a path dependency. When the Rust crate is updated, you may need to rebuild:
 
 ```bash
 cd ggsql-python
diff --git a/ggsql-python/python/ggsql/__init__.py b/ggsql-python/python/ggsql/__init__.py
index dbbb5afb..06b5f720 100644
--- a/ggsql-python/python/ggsql/__init__.py
+++ b/ggsql-python/python/ggsql/__init__.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import io
 import json
 from typing import Any, Union
 
@@ -8,9 +7,26 @@
 import narwhals as nw
 from narwhals.typing import IntoFrame
 
-from ggsql._ggsql import split_query, render as _render
+from ggsql._ggsql import (
+    DuckDBReader,
+    VegaLiteWriter,
+    Validated,
+    Prepared,
+    validate,
+    prepare,
+)
 
-__all__ = ["split_query", "render_altair"]
+__all__ = [
+    # Classes
+    "DuckDBReader",
+    "VegaLiteWriter",
+    "Validated",
+    "Prepared",
+    # Functions
+    "validate",
+    "prepare",
+    "render_altair",
+]
 __version__ = "0.1.0"
 
 # Type alias for any Altair chart type
@@ -56,13 +72,19 @@ def render_altair(
     if not isinstance(df, nw.DataFrame):
         raise TypeError("df must be a narwhals DataFrame or compatible type")
 
-    # Convert to polars and serialize to IPC bytes
     pl_df = df.to_polars()
-    buffer = io.BytesIO()
-    pl_df.write_ipc(buffer)
-    ipc_bytes = buffer.getvalue()
 
-    vegalite_json = _render(ipc_bytes, viz, writer="vegalite")
+    # Create temporary reader and register data
+    reader = DuckDBReader("duckdb://memory")
+    reader.register("__data__", pl_df)
+
+    # Build full query: SELECT * FROM __data__ + VISUALISE clause
+    query = f"SELECT * FROM __data__ {viz}"
+
+    # Prepare and render
+    prepared = prepare(query, reader)
+    writer = VegaLiteWriter()
+    vegalite_json = prepared.render(writer)
 
     # Parse to determine the correct Altair class
     spec = json.loads(vegalite_json)
diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs
index 7c472c35..b9d6496d 100644
--- a/ggsql-python/src/lib.rs
+++ b/ggsql-python/src/lib.rs
@@ -3,100 +3,693 @@
 #![allow(clippy::useless_conversion)]
 
 use pyo3::prelude::*;
-use pyo3::types::PyBytes;
-use std::collections::{HashMap, HashSet};
+use pyo3::types::{PyBytes, PyDict, PyList};
 use std::io::Cursor;
 
-use ggsql::naming::GLOBAL_DATA_KEY;
-use ggsql::parser::parse_query;
-use ggsql::writer::{VegaLiteWriter, Writer};
-use ggsql::AestheticValue;
+use ggsql::api::{prepare as rust_prepare, validate as rust_validate, Prepared, ValidationWarning};
+use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader};
+use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter;
+use ggsql::GgsqlError;
 
-use polars::prelude::{DataFrame, IpcReader, SerReader};
+use polars::prelude::{DataFrame, IpcReader, IpcWriter, SerReader, SerWriter};
 
-#[pyfunction]
-fn split_query(query: &str) -> PyResult<(String, String)> {
-    ggsql::parser::split_query(query)
-        .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+// ============================================================================
+// Helper Functions for DataFrame Conversion
+// ============================================================================
+
+/// Convert a Polars DataFrame to a Python polars DataFrame via IPC serialization
+fn polars_to_py(py: Python<'_>, df: &DataFrame) -> PyResult<Py<PyAny>> {
+    let mut buffer = Vec::new();
+    IpcWriter::new(&mut buffer)
+        .finish(&mut df.clone())
+        .map_err(|e| {
+            PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
+                "Failed to serialize DataFrame: {}",
+                e
+            ))
+        })?;
+
+    let io = py.import("io")?;
+    let bytes_io = io.call_method1("BytesIO", (PyBytes::new(py, &buffer),))?;
+
+    let polars = py.import("polars")?;
+    polars
+        .call_method1("read_ipc", (bytes_io,))
+        .map(|obj| obj.into())
 }
 
-#[pyfunction]
-#[pyo3(signature = (ipc_bytes, viz, *, writer = "vegalite"))]
-fn render(ipc_bytes: &Bound<'_, PyBytes>, viz: &str, writer: &str) -> PyResult<String> {
-    // Read DataFrame from IPC bytes
-    let bytes = ipc_bytes.as_bytes();
-    let cursor = Cursor::new(bytes);
-    let df: DataFrame = IpcReader::new(cursor).finish().map_err(|e| {
-        PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Failed to read IPC data: {}", e))
-    })?;
+/// Convert a Python polars DataFrame to a Rust Polars DataFrame via IPC serialization
+fn py_to_polars(py: Python<'_>, df: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
+    let io = py.import("io")?;
+    let bytes_io = io.call_method0("BytesIO")?;
+    df.call_method1("write_ipc", (&bytes_io,))?;
+    bytes_io.call_method1("seek", (0i64,))?;
 
-    // Parse the visualization spec
-    // The viz string should be a complete VISUALISE statement
-    let specs = parse_query(viz)
-        .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
+    let ipc_bytes: Vec<u8> = bytes_io.call_method0("read")?.extract()?;
+    let cursor = Cursor::new(ipc_bytes);
+
+    IpcReader::new(cursor).finish().map_err(|e| {
+        PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Failed to read DataFrame: {}", e))
+    })
+}
 
-    let mut spec = specs.into_iter().next().ok_or_else(|| {
-        PyErr::new::<pyo3::exceptions::PyValueError, _>("No visualization spec found")
+/// Convert a Python polars DataFrame to Rust DataFrame - for use inside Python::attach
+/// This variant is used by PyReaderBridge where we already hold the GIL.
+fn py_to_polars_inner(df: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
+    let py = df.py();
+    let io = py.import("io")?;
+    let bytes_io = io.call_method0("BytesIO")?;
+
+    df.call_method1("write_ipc", (&bytes_io,)).map_err(|_| {
+        PyErr::new::<pyo3::exceptions::PyTypeError, _>(
+            "Reader.execute() must return a polars.DataFrame",
+        )
     })?;
 
-    // Get column names for resolving global mappings
-    let column_names: HashSet<&str> = df.get_column_names().iter().map(|s| s.as_str()).collect();
-
-    // Merge global mappings into layers and handle wildcards
-    for layer in &mut spec.layers {
-        let supported_aesthetics = layer.geom.aesthetics().supported;
-
-        // 1. Merge explicit global aesthetics into layer (layer takes precedence)
-        for (aesthetic, value) in &spec.global_mappings.aesthetics {
-            if supported_aesthetics.contains(&aesthetic.as_str()) {
-                layer
-                    .mappings
-                    .aesthetics
-                    .entry(aesthetic.clone())
-                    .or_insert_with(|| value.clone());
-            }
+    bytes_io.call_method1("seek", (0i64,))?;
+    let ipc_bytes: Vec<u8> = bytes_io.call_method0("read")?.extract()?;
+    let cursor = Cursor::new(ipc_bytes);
+
+    IpcReader::new(cursor).finish().map_err(|e| {
+        PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
+            "Failed to deserialize DataFrame: {}",
+            e
+        ))
+    })
+}
+
+/// Convert validation errors/warnings to a Python list of dicts
+fn errors_to_pylist(
+    py: Python<'_>,
+    items: &[(String, Option<(usize, usize)>)],
+) -> PyResult<Py<PyList>> {
+    let list = PyList::empty(py);
+    for (message, location) in items {
+        let dict = PyDict::new(py);
+        dict.set_item("message", message)?;
+        if let Some((line, column)) = location {
+            let loc_dict = PyDict::new(py);
+            loc_dict.set_item("line", line)?;
+            loc_dict.set_item("column", column)?;
+            dict.set_item("location", loc_dict)?;
+        } else {
+            dict.set_item("location", py.None())?;
         }
+        list.append(dict)?;
+    }
+    Ok(list.into())
+}
+
+/// Convert ValidationWarning slice to Python list format
+fn warnings_to_pylist(py: Python<'_>, warnings: &[ValidationWarning]) -> PyResult<Py<PyList>> {
+    let items: Vec<_> = warnings
+        .iter()
+        .map(|w| {
+            (
+                w.message.clone(),
+                w.location.as_ref().map(|l| (l.line, l.column)),
+            )
+        })
+        .collect();
+    errors_to_pylist(py, &items)
+}
+
+// ============================================================================
+// PyReaderBridge - Bridges Python reader objects to Rust Reader trait
+// ============================================================================
+
+/// Bridges a Python reader object to the Rust Reader trait.
+///
+/// This allows any Python object with an `execute(sql: str) -> polars.DataFrame`
+/// method to be used as a ggsql reader.
+struct PyReaderBridge {
+    obj: Py<PyAny>,
+}
+
+impl Reader for PyReaderBridge {
+    fn execute(&self, sql: &str) -> ggsql::Result<DataFrame> {
+        Python::attach(|py| {
+            let bound = self.obj.bind(py);
+            let result = bound
+                .call_method1("execute", (sql,))
+                .map_err(|e| GgsqlError::ReaderError(format!("Reader.execute() failed: {}", e)))?;
+            py_to_polars_inner(&result).map_err(|e| GgsqlError::ReaderError(e.to_string()))
+        })
+    }
+
+    fn supports_register(&self) -> bool {
+        Python::attach(|py| {
+            self.obj
+                .bind(py)
+                .call_method0("supports_register")
+                .and_then(|r| r.extract::<bool>())
+                .unwrap_or(false)
+        })
+    }
+
+    fn register(&mut self, name: &str, df: DataFrame) -> ggsql::Result<()> {
+        Python::attach(|py| {
+            let py_df =
+                polars_to_py(py, &df).map_err(|e| GgsqlError::ReaderError(e.to_string()))?;
+            self.obj
+                .bind(py)
+                .call_method1("register", (name, py_df))
+                .map_err(|e| GgsqlError::ReaderError(format!("Reader.register() failed: {}", e)))?;
+            Ok(())
+        })
+    }
+}
 
-        // 2. Handle wildcard expansion: map columns to aesthetics with matching names
-        let has_wildcard = layer.mappings.wildcard || spec.global_mappings.wildcard;
-        if has_wildcard {
-            for &aes in supported_aesthetics {
-                // Only create mapping if column exists in the dataframe
-                if column_names.contains(aes) {
-                    layer
-                        .mappings
-                        .aesthetics
-                        .entry(aes.to_string())
-                        .or_insert_with(|| AestheticValue::standard_column(aes));
-                }
+// ============================================================================
+// Native Reader Detection Macro
+// ============================================================================
+
+/// Macro to try native readers and fall back to bridge.
+/// Adding new native readers = add to the macro invocation list.
+macro_rules! try_native_readers {
+    ($query:expr, $reader:expr, $($native_type:ty),*) => {{
+        $(
+            if let Ok(native) = $reader.downcast::<$native_type>() {
+                return rust_prepare($query, &native.borrow().inner)
+                    .map(|p| PyPrepared { inner: p })
+                    .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()));
             }
-        }
+        )*
+    }};
+}
+
+// ============================================================================
+// PyDuckDBReader
+// ============================================================================
+
+/// DuckDB database reader for executing SQL queries.
+///
+/// Creates an in-memory or file-based DuckDB connection that can execute
+/// SQL queries and register DataFrames as queryable tables.
+///
+/// Examples
+/// --------
+/// >>> reader = DuckDBReader("duckdb://memory")
+/// >>> df = reader.execute("SELECT 1 as x, 2 as y")
+///
+/// >>> reader = DuckDBReader("duckdb://memory")
+/// >>> reader.register("data", pl.DataFrame({"x": [1, 2, 3]}))
+/// >>> df = reader.execute("SELECT * FROM data WHERE x > 1")
+#[pyclass(name = "DuckDBReader", unsendable)]
+struct PyDuckDBReader {
+    inner: RustDuckDBReader,
+}
+
+#[pymethods]
+impl PyDuckDBReader {
+    /// Create a new DuckDB reader from a connection string.
+    ///
+    /// Parameters
+    /// ----------
+    /// connection : str
+    ///     Connection string. Use "duckdb://memory" for in-memory database
+    ///     or "duckdb://path/to/file.db" for file-based database.
+    ///
+    /// Returns
+    /// -------
+    /// DuckDBReader
+    ///     A configured DuckDB reader instance.
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If the connection string is invalid or the database cannot be opened.
+    #[new]
+    fn new(connection: &str) -> PyResult<Self> {
+        let inner = RustDuckDBReader::from_connection_string(connection)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
+        Ok(Self { inner })
     }
 
-    // Compute aesthetic labels from column names
-    spec.compute_aesthetic_labels();
+    /// Register a DataFrame as a queryable table.
+    ///
+    /// After registration, the DataFrame can be queried by name in SQL.
+    ///
+    /// Parameters
+    /// ----------
+    /// name : str
+    ///     The table name to register under.
+    /// df : polars.DataFrame
+    ///     The DataFrame to register. Must be a polars DataFrame.
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If registration fails or the table name is invalid.
+    fn register(&mut self, py: Python<'_>, name: &str, df: &Bound<'_, PyAny>) -> PyResult<()> {
+        let rust_df = py_to_polars(py, df)?;
+        self.inner
+            .register(name, rust_df)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+    }
 
-    // Create data map with the DataFrame as global data
-    let mut data_map: HashMap<String, DataFrame> = HashMap::new();
-    data_map.insert(GLOBAL_DATA_KEY.to_string(), df);
+    /// Execute a SQL query and return the result as a DataFrame.
+    ///
+    /// Parameters
+    /// ----------
+    /// sql : str
+    ///     The SQL query to execute.
+    ///
+    /// Returns
+    /// -------
+    /// polars.DataFrame
+    ///     The query result as a polars DataFrame.
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If the SQL is invalid or execution fails.
+    fn execute(&self, py: Python<'_>, sql: &str) -> PyResult<Py<PyAny>> {
+        let df = self
+            .inner
+            .execute(sql)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
+        polars_to_py(py, &df)
+    }
 
-    // Write using the specified writer
-    match writer {
-        "vegalite" => {
-            let w = VegaLiteWriter::new();
-            w.write(&spec, &data_map)
-                .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+    /// Check if this reader supports DataFrame registration.
+    ///
+    /// Returns
+    /// -------
+    /// bool
+    ///     True if register() is supported, False otherwise.
+    fn supports_register(&self) -> bool {
+        self.inner.supports_register()
+    }
+}
+
+// ============================================================================
+// PyVegaLiteWriter
+// ============================================================================
+
+/// Vega-Lite JSON output writer.
+///
+/// Converts prepared visualization specifications to Vega-Lite v6 JSON.
+///
+/// Examples
+/// --------
+/// >>> writer = VegaLiteWriter()
+/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
+/// >>> json_output = prepared.render(writer)
+#[pyclass(name = "VegaLiteWriter")]
+struct PyVegaLiteWriter {
+    inner: RustVegaLiteWriter,
+}
+
+#[pymethods]
+impl PyVegaLiteWriter {
+    /// Create a new Vega-Lite writer.
+    ///
+    /// Returns
+    /// -------
+    /// VegaLiteWriter
+    ///     A configured Vega-Lite writer instance.
+    #[new]
+    fn new() -> Self {
+        Self {
+            inner: RustVegaLiteWriter::new(),
         }
-        _ => Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
-            "Unknown writer: {}",
-            writer
-        ))),
     }
 }
 
+// ============================================================================
+// PyValidated
+// ============================================================================
+
+/// Result of validate() - query inspection and validation without SQL execution.
+///
+/// Contains information about query structure and any validation errors/warnings.
+/// The tree() method from Rust is not exposed as it's not useful in Python.
+#[pyclass(name = "Validated")]
+struct PyValidated {
+    sql: String,
+    visual: String,
+    has_visual: bool,
+    valid: bool,
+    errors: Vec<(String, Option<(usize, usize)>)>,
+    warnings: Vec<(String, Option<(usize, usize)>)>,
+}
+
+#[pymethods]
+impl PyValidated {
+    /// Whether the query contains a VISUALISE clause.
+    ///
+    /// Returns
+    /// -------
+    /// bool
+    ///     True if the query has a VISUALISE clause.
+    fn has_visual(&self) -> bool {
+        self.has_visual
+    }
+
+    /// The SQL portion (before VISUALISE).
+    ///
+    /// Returns
+    /// -------
+    /// str
+    ///     The SQL part of the query.
+    fn sql(&self) -> &str {
+        &self.sql
+    }
+
+    /// The VISUALISE portion (raw text).
+    ///
+    /// Returns
+    /// -------
+    /// str
+    ///     The VISUALISE part of the query.
+    fn visual(&self) -> &str {
+        &self.visual
+    }
+
+    /// Whether the query is valid (no errors).
+    ///
+    /// Returns
+    /// -------
+    /// bool
+    ///     True if the query is syntactically and semantically valid.
+    fn valid(&self) -> bool {
+        self.valid
+    }
+
+    /// Validation errors (fatal issues).
+    ///
+    /// Returns
+    /// -------
+    /// list[dict]
+    ///     List of error dictionaries with 'message' and optional 'location' keys.
+    fn errors(&self, py: Python<'_>) -> PyResult<Py<PyList>> {
+        errors_to_pylist(py, &self.errors)
+    }
+
+    /// Validation warnings (non-fatal issues).
+    ///
+    /// Returns
+    /// -------
+    /// list[dict]
+    ///     List of warning dictionaries with 'message' and optional 'location' keys.
+    fn warnings(&self, py: Python<'_>) -> PyResult<Py<PyList>> {
+        errors_to_pylist(py, &self.warnings)
+    }
+}
+
+// ============================================================================
+// PyPrepared
+// ============================================================================
+
+/// Result of prepare(), ready for rendering.
+///
+/// Contains the resolved plot specification, data, and metadata.
+/// Use render() to generate Vega-Lite JSON output.
+///
+/// Examples
+/// --------
+/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
+/// >>> print(f"Rows: {prepared.metadata()['rows']}")
+/// >>> json_output = prepared.render(VegaLiteWriter())
+#[pyclass(name = "Prepared")]
+struct PyPrepared {
+    inner: Prepared,
+}
+
+#[pymethods]
+impl PyPrepared {
+    /// Render to output format (Vega-Lite JSON).
+    ///
+    /// Parameters
+    /// ----------
+    /// writer : VegaLiteWriter
+    ///     The writer to use for rendering.
+    ///
+    /// Returns
+    /// -------
+    /// str
+    ///     The Vega-Lite JSON specification as a string.
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If rendering fails.
+    fn render(&self, writer: &PyVegaLiteWriter) -> PyResult<String> {
+        self.inner
+            .render(&writer.inner)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+    }
+
+    /// Get visualization metadata.
+    ///
+    /// Returns
+    /// -------
+    /// dict
+    ///     Dictionary with 'rows', 'columns', and 'layer_count' keys.
+    fn metadata(&self, py: Python<'_>) -> PyResult<Py<PyDict>> {
+        let m = self.inner.metadata();
+        let dict = PyDict::new(py);
+        dict.set_item("rows", m.rows)?;
+        dict.set_item("columns", m.columns.clone())?;
+        dict.set_item("layer_count", m.layer_count)?;
+        Ok(dict.into())
+    }
+
+    /// The main SQL query that was executed.
+    ///
+    /// Returns
+    /// -------
+    /// str
+    ///     The SQL query string.
+    fn sql(&self) -> &str {
+        self.inner.sql()
+    }
+
+    /// The VISUALISE portion (raw text).
+    ///
+    /// Returns
+    /// -------
+    /// str
+    ///     The VISUALISE clause text.
+    fn visual(&self) -> &str {
+        self.inner.visual()
+    }
+
+    /// Number of layers.
+    ///
+    /// Returns
+    /// -------
+    /// int
+    ///     The number of DRAW clauses in the visualization.
+    fn layer_count(&self) -> usize {
+        self.inner.layer_count()
+    }
+
+    /// Get global data (main query result).
+    ///
+    /// Returns
+    /// -------
+    /// polars.DataFrame | None
+    ///     The main query result DataFrame, or None if not available.
+    fn data(&self, py: Python<'_>) -> PyResult<Option<Py<PyAny>>> {
+        self.inner.data().map(|df| polars_to_py(py, df)).transpose()
+    }
+
+    /// Get layer-specific data (from FILTER or FROM clause).
+    ///
+    /// Parameters
+    /// ----------
+    /// index : int
+    ///     The layer index (0-based).
+    ///
+    /// Returns
+    /// -------
+    /// polars.DataFrame | None
+    ///     The layer-specific DataFrame, or None if the layer uses global data.
+    fn layer_data(&self, py: Python<'_>, index: usize) -> PyResult<Option<Py<PyAny>>> {
+        self.inner
+            .layer_data(index)
+            .map(|df| polars_to_py(py, df))
+            .transpose()
+    }
+
+    /// Get stat transform data (e.g., histogram bins, density estimates).
+    ///
+    /// Parameters
+    /// ----------
+    /// index : int
+    ///     The layer index (0-based).
+    ///
+    /// Returns
+    /// -------
+    /// polars.DataFrame | None
+    ///     The stat transform DataFrame, or None if no stat transform.
+    fn stat_data(&self, py: Python<'_>, index: usize) -> PyResult<Option<Py<PyAny>>> {
+        self.inner
+            .stat_data(index)
+            .map(|df| polars_to_py(py, df))
+            .transpose()
+    }
+
+    /// Layer filter/source query, or None if using global data.
+    ///
+    /// Parameters
+    /// ----------
+    /// index : int
+    ///     The layer index (0-based).
+    ///
+    /// Returns
+    /// -------
+    /// str | None
+    ///     The filter SQL query, or None if the layer uses global data directly.
+    fn layer_sql(&self, index: usize) -> Option<String> {
+        self.inner.layer_sql(index).map(|s| s.to_string())
+    }
+
+    /// Stat transform query, or None if no stat transform.
+    ///
+    /// Parameters
+    /// ----------
+    /// index : int
+    ///     The layer index (0-based).
+    ///
+    /// Returns
+    /// -------
+    /// str | None
+    ///     The stat transform SQL query, or None if no stat transform.
+    fn stat_sql(&self, index: usize) -> Option<String> {
+        self.inner.stat_sql(index).map(|s| s.to_string())
+    }
+
+    /// Validation warnings from preparation.
+    ///
+    /// Returns
+    /// -------
+    /// list[dict]
+    ///     List of warning dictionaries with 'message' and optional 'location' keys.
+    fn warnings(&self, py: Python<'_>) -> PyResult<Py<PyList>> {
+        warnings_to_pylist(py, self.inner.warnings())
+    }
+}
+
+// ============================================================================
+// Module Functions
+// ============================================================================
+
+/// Validate query syntax and semantics without executing SQL.
+///
+/// Parameters
+/// ----------
+/// query : str
+///     The ggsql query to validate.
+///
+/// Returns
+/// -------
+/// Validated
+///     Validation result with query inspection methods.
+///
+/// Raises
+/// ------
+/// ValueError
+///     If validation fails unexpectedly (not for syntax errors, which are captured).
+#[pyfunction]
+fn validate(query: &str) -> PyResult<PyValidated> {
+    let v = rust_validate(query)
+        .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
+
+    Ok(PyValidated {
+        sql: v.sql().to_string(),
+        visual: v.visual().to_string(),
+        has_visual: v.has_visual(),
+        valid: v.valid(),
+        errors: v
+            .errors()
+            .iter()
+            .map(|e| {
+                (
+                    e.message.clone(),
+                    e.location.as_ref().map(|l| (l.line, l.column)),
+                )
+            })
+            .collect(),
+        warnings: v
+            .warnings()
+            .iter()
+            .map(|w| {
+                (
+                    w.message.clone(),
+                    w.location.as_ref().map(|l| (l.line, l.column)),
+                )
+            })
+            .collect(),
+    })
+}
+
+/// Prepare a query for visualization. Main entry point for the Rust API.
+///
+/// Parameters
+/// ----------
+/// query : str
+///     The ggsql query to prepare.
+/// reader : DuckDBReader | object
+///     The database reader to execute SQL against. Can be a native DuckDBReader
+///     for optimal performance, or any Python object with an
+///     `execute(sql: str) -> polars.DataFrame` method.
+///
+/// Returns
+/// -------
+/// Prepared
+///     A prepared visualization ready for rendering.
+///
+/// Raises
+/// ------
+/// ValueError
+///     If parsing, validation, or SQL execution fails.
+///
+/// Examples
+/// --------
+/// >>> # Using native reader (fast path)
+/// >>> reader = DuckDBReader("duckdb://memory")
+/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
+/// >>> json_output = prepared.render(VegaLiteWriter())
+///
+/// >>> # Using custom Python reader
+/// >>> class MyReader:
+/// ...     def execute(self, sql: str) -> pl.DataFrame:
+/// ...         return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+/// >>> reader = MyReader()
+/// >>> prepared = prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader)
+#[pyfunction]
+fn prepare(query: &str, reader: &Bound<'_, PyAny>) -> PyResult<PyPrepared> {
+    // Fast path: try all known native reader types
+    // Add new native readers to this list as they're implemented
+    try_native_readers!(query, reader, PyDuckDBReader);
+
+    // Bridge path: wrap Python object as Reader
+    let bridge = PyReaderBridge {
+        obj: reader.clone().unbind(),
+    };
+    rust_prepare(query, &bridge)
+        .map(|p| PyPrepared { inner: p })
+        .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+}
+
+// ============================================================================
+// Module Registration
+// ============================================================================
+
 #[pymodule]
 fn _ggsql(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_function(wrap_pyfunction!(split_query, m)?)?;
-    m.add_function(wrap_pyfunction!(render, m)?)?;
+    // Classes
+    m.add_class::<PyDuckDBReader>()?;
+    m.add_class::<PyVegaLiteWriter>()?;
+    m.add_class::<PyValidated>()?;
+    m.add_class::<PyPrepared>()?;
+
+    // Functions
+    m.add_function(wrap_pyfunction!(validate, m)?)?;
+    m.add_function(wrap_pyfunction!(prepare, m)?)?;
+
     Ok(())
 }
diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py
index 9df03779..970dcf5a 100644
--- a/ggsql-python/tests/test_ggsql.py
+++ b/ggsql-python/tests/test_ggsql.py
@@ -3,10 +3,13 @@
 These tests focus on Python-specific logic:
 - DataFrame conversion via narwhals
 - Return type handling
+- Two-stage API (prepare -> render)
 
 Rust logic (parsing, Vega-Lite generation) is tested in the Rust test suite.
 """
 
+import json
+
 import pytest
 import polars as pl
 import altair
@@ -14,20 +17,188 @@
 import ggsql
 
 
-class TestSplitQuery:
-    """Tests for split_query() function."""
+class TestValidate:
+    """Tests for validate() function."""
+
+    def test_valid_query_with_visualise(self):
+        validated = ggsql.validate(
+            "SELECT 1 AS x, 2 AS y VISUALISE DRAW point MAPPING x AS x, y AS y"
+        )
+        assert validated.has_visual()
+        assert validated.valid()
+        assert "SELECT" in validated.sql()
+        assert "VISUALISE" in validated.visual()
+        assert len(validated.errors()) == 0
+
+    def test_valid_query_without_visualise(self):
+        validated = ggsql.validate("SELECT 1 AS x, 2 AS y")
+        assert not validated.has_visual()
+        assert validated.valid()
+        assert validated.sql() == "SELECT 1 AS x, 2 AS y"
+        assert validated.visual() == ""
+
+    def test_invalid_query_has_errors(self):
+        validated = ggsql.validate("SELECT 1 VISUALISE DRAW invalid_geom")
+        assert not validated.valid()
+        assert len(validated.errors()) > 0
+
+    def test_missing_required_aesthetic(self):
+        # Point requires x and y, only providing x
+        validated = ggsql.validate(
+            "SELECT 1 AS x, 2 AS y VISUALISE DRAW point MAPPING x AS x"
+        )
+        assert not validated.valid()
+        errors = validated.errors()
+        assert len(errors) > 0
+        assert any("y" in e["message"] for e in errors)
+
+
+class TestDuckDBReader:
+    """Tests for DuckDBReader class."""
+
+    def test_create_in_memory(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        assert reader is not None
+
+    def test_execute_simple_query(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        df = reader.execute("SELECT 1 AS x, 2 AS y")
+        assert isinstance(df, pl.DataFrame)
+        assert df.shape == (1, 2)
+        assert list(df.columns) == ["x", "y"]
+
+    def test_register_and_query(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+        reader.register("my_data", df)
+
+        result = reader.execute("SELECT * FROM my_data WHERE x > 1")
+        assert isinstance(result, pl.DataFrame)
+        assert result.shape == (2, 2)
+
+    def test_supports_register(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        assert reader.supports_register() is True
+
+    def test_invalid_connection_string(self):
+        with pytest.raises(ValueError):
+            ggsql.DuckDBReader("invalid://connection")
+
+
+class TestVegaLiteWriter:
+    """Tests for VegaLiteWriter class."""
+
+    def test_create_writer(self):
+        writer = ggsql.VegaLiteWriter()
+        assert writer is not None
+
+
+class TestPrepare:
+    """Tests for prepare() function."""
+
+    def test_prepare_simple_query(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        prepared = ggsql.prepare(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        )
+        assert prepared is not None
+        assert prepared.layer_count() == 1
+
+    def test_prepare_with_registered_data(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+        reader.register("data", df)
+
+        prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader)
+        assert prepared.metadata()["rows"] == 3
+
+    def test_prepare_metadata(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        prepared = ggsql.prepare(
+            "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) "
+            "VISUALISE x, y DRAW point",
+            reader,
+        )
+
+        metadata = prepared.metadata()
+        assert metadata["rows"] == 3
+        assert "x" in metadata["columns"]
+        assert "y" in metadata["columns"]
+        assert metadata["layer_count"] == 1
+
+    def test_prepare_sql_accessor(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        prepared = ggsql.prepare(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        )
+        assert "SELECT" in prepared.sql()
+
+    def test_prepare_visual_accessor(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        prepared = ggsql.prepare(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        )
+        assert "VISUALISE" in prepared.visual()
 
-    def test_splits_sql_and_visualise(self):
-        sql, viz = ggsql.split_query(
-            "SELECT x, y FROM data VISUALISE x, y DRAW point"
+    def test_prepare_data_accessor(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        prepared = ggsql.prepare(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
         )
-        assert "SELECT" in sql
-        assert "VISUALISE" in viz
+        data = prepared.data()
+        assert isinstance(data, pl.DataFrame)
+        assert data.shape == (1, 2)
+
+    def test_prepare_without_visualise_fails(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        with pytest.raises(ValueError):
+            ggsql.prepare("SELECT 1 AS x, 2 AS y", reader)
+
 
-    def test_no_visualise_returns_empty_viz(self):
-        sql, viz = ggsql.split_query("SELECT * FROM data")
-        assert sql == "SELECT * FROM data"
-        assert viz == ""
+class TestPreparedRender:
+    """Tests for Prepared.render() method."""
+
+    def test_render_to_vegalite(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        prepared = ggsql.prepare(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        )
+        writer = ggsql.VegaLiteWriter()
+
+        result = prepared.render(writer)
+        assert isinstance(result, str)
+
+        spec = json.loads(result)
+        assert "$schema" in spec
+        assert "vega-lite" in spec["$schema"]
+
+    def test_render_contains_data(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+        reader.register("data", df)
+
+        prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader)
+        writer = ggsql.VegaLiteWriter()
+
+        result = prepared.render(writer)
+        spec = json.loads(result)
+        # Data should be in the spec (either inline or in datasets)
+        assert "data" in spec or "datasets" in spec
+
+    def test_render_multi_layer(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        prepared = ggsql.prepare(
+            "SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) "
+            "VISUALISE "
+            "DRAW point MAPPING x AS x, y AS y "
+            "DRAW line MAPPING x AS x, y AS y",
+            reader,
+        )
+        writer = ggsql.VegaLiteWriter()
+
+        result = prepared.render(writer)
+        spec = json.loads(result)
+        assert "layer" in spec
 
 
 class TestRenderAltairDataFrameConversion:
@@ -112,23 +283,31 @@ def test_layered_chart_can_round_trip(self):
 
     def test_faceted_chart_returns_facet_chart(self):
         """FACET WRAP specs produce FacetChart."""
-        df = pl.DataFrame({
-            "x": [1, 2, 3, 4, 5, 6],
-            "y": [10, 20, 30, 40, 50, 60],
-            "group": ["A", "A", "A", "B", "B", "B"],
-        })
+        df = pl.DataFrame(
+            {
+                "x": [1, 2, 3, 4, 5, 6],
+                "y": [10, 20, 30, 40, 50, 60],
+                "group": ["A", "A", "A", "B", "B", "B"],
+            }
+        )
         # Need validate=False because ggsql produces v6 specs
-        chart = ggsql.render_altair(df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False)
+        chart = ggsql.render_altair(
+            df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False
+        )
         assert isinstance(chart, altair.FacetChart)
 
     def test_faceted_chart_can_round_trip(self):
         """FacetChart can be converted to dict and back."""
-        df = pl.DataFrame({
-            "x": [1, 2, 3, 4, 5, 6],
-            "y": [10, 20, 30, 40, 50, 60],
-            "group": ["A", "A", "A", "B", "B", "B"],
-        })
-        chart = ggsql.render_altair(df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False)
+        df = pl.DataFrame(
+            {
+                "x": [1, 2, 3, 4, 5, 6],
+                "y": [10, 20, 30, 40, 50, 60],
+                "group": ["A", "A", "A", "B", "B", "B"],
+            }
+        )
+        chart = ggsql.render_altair(
+            df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False
+        )
 
         # Convert to dict (skip validation for ggsql specs)
         spec = chart.to_dict(validate=False)
@@ -140,11 +319,13 @@ def test_faceted_chart_can_round_trip(self):
 
     def test_chart_with_color_encoding(self):
         """Charts with color encoding still return correct type."""
-        df = pl.DataFrame({
-            "x": [1, 2, 3, 4],
-            "y": [10, 20, 30, 40],
-            "category": ["A", "B", "A", "B"],
-        })
+        df = pl.DataFrame(
+            {
+                "x": [1, 2, 3, 4],
+                "y": [10, 20, 30, 40],
+                "category": ["A", "B", "A", "B"],
+            }
+        )
         chart = ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point")
         # Should still be a LayerChart (ggsql wraps in layer)
         assert isinstance(chart, altair.LayerChart)
@@ -157,3 +338,180 @@ def test_invalid_viz_raises(self):
         df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
         with pytest.raises(ValueError):
             ggsql.render_altair(df, "NOT VALID SYNTAX")
+
+
+class TestTwoStageAPIIntegration:
+    """Integration tests for the two-stage prepare -> render API."""
+
+    def test_end_to_end_workflow(self):
+        """Complete workflow: create reader, register data, prepare, render."""
+        # Create reader
+        reader = ggsql.DuckDBReader("duckdb://memory")
+
+        # Register data
+        df = pl.DataFrame(
+            {
+                "date": ["2024-01-01", "2024-01-02", "2024-01-03"],
+                "value": [10, 20, 30],
+                "region": ["North", "South", "North"],
+            }
+        )
+        reader.register("sales", df)
+
+        # Prepare visualization
+        prepared = ggsql.prepare(
+            "SELECT * FROM sales VISUALISE date AS x, value AS y, region AS color DRAW line",
+            reader,
+        )
+
+        # Verify metadata
+        assert prepared.metadata()["rows"] == 3
+        assert prepared.layer_count() == 1
+
+        # Render to Vega-Lite
+        writer = ggsql.VegaLiteWriter()
+        result = prepared.render(writer)
+
+        # Verify output
+        spec = json.loads(result)
+        assert "$schema" in spec
+        assert "line" in json.dumps(spec)
+
+    def test_can_introspect_prepared(self):
+        """Test all introspection methods on Prepared."""
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        prepared = ggsql.prepare(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        )
+
+        # All these should work without error
+        assert prepared.sql() is not None
+        assert prepared.visual() is not None
+        assert prepared.layer_count() >= 1
+        assert prepared.metadata() is not None
+        assert prepared.data() is not None
+        assert prepared.warnings() is not None
+
+        # Layer-specific accessors (may return None)
+        _ = prepared.layer_data(0)
+        _ = prepared.stat_data(0)
+        _ = prepared.layer_sql(0)
+        _ = prepared.stat_sql(0)
+
+
+class TestCustomReader:
+    """Tests for custom Python reader support."""
+
+    def test_simple_custom_reader(self):
+        """Custom reader with execute() method works."""
+
+        class SimpleReader:
+            def execute(self, sql: str) -> pl.DataFrame:
+                return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+
+        reader = SimpleReader()
+        prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader)
+        assert prepared.metadata()["rows"] == 3
+
+    def test_custom_reader_with_register(self):
+        """Custom reader with register() support."""
+
+        class RegisterReader:
+            def __init__(self):
+                self.tables = {}
+
+            def execute(self, sql: str) -> pl.DataFrame:
+                # Simple: just return the first registered table
+                if self.tables:
+                    return next(iter(self.tables.values()))
+                return pl.DataFrame({"x": [1], "y": [2]})
+
+            def supports_register(self) -> bool:
+                return True
+
+            def register(self, name: str, df: pl.DataFrame) -> None:
+                self.tables[name] = df
+
+        reader = RegisterReader()
+        prepared = ggsql.prepare(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        )
+        assert prepared is not None
+
+    def test_custom_reader_error_handling(self):
+        """Custom reader errors are propagated."""
+
+        class ErrorReader:
+            def execute(self, sql: str) -> pl.DataFrame:
+                raise ValueError("Custom reader error")
+
+        reader = ErrorReader()
+        with pytest.raises(ValueError, match="Custom reader error"):
+            ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader)
+
+    def test_custom_reader_wrong_return_type(self):
+        """Custom reader returning wrong type raises TypeError."""
+
+        class WrongTypeReader:
+            def execute(self, sql: str):
+                return {"x": [1, 2, 3]}  # dict, not DataFrame
+
+        reader = WrongTypeReader()
+        with pytest.raises((ValueError, TypeError)):
+            ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader)
+
+    def test_native_reader_fast_path(self):
+        """Native DuckDBReader still works (fast path)."""
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        prepared = ggsql.prepare(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        )
+        assert prepared.metadata()["rows"] == 1
+
+    def test_custom_reader_can_render(self):
+        """Custom reader result can be rendered to Vega-Lite."""
+
+        class StaticReader:
+            def execute(self, sql: str) -> pl.DataFrame:
+                return pl.DataFrame(
+                    {
+                        "x": [1, 2, 3, 4, 5],
+                        "y": [10, 40, 20, 50, 30],
+                        "category": ["A", "B", "A", "B", "A"],
+                    }
+                )
+
+        reader = StaticReader()
+        prepared = ggsql.prepare(
+            "SELECT * FROM data VISUALISE x, y, category AS color DRAW point",
+            reader,
+        )
+
+        writer = ggsql.VegaLiteWriter()
+        result = prepared.render(writer)
+
+        spec = json.loads(result)
+        assert "$schema" in spec
+        assert "vega-lite" in spec["$schema"]
+
+    def test_custom_reader_execute_called(self):
+        """Verify execute() is called on the custom reader."""
+
+        class RecordingReader:
+            def __init__(self):
+                self.execute_calls = []
+
+            def execute(self, sql: str) -> pl.DataFrame:
+                self.execute_calls.append(sql)
+                return pl.DataFrame({"x": [1], "y": [2]})
+
+        reader = RecordingReader()
+        ggsql.prepare(
+            "SELECT * FROM data VISUALISE x, y DRAW point",
+            reader,
+        )
+
+        # execute() should have been called at least once
+        assert len(reader.execute_calls) > 0
+        # All calls should be valid SQL strings
+        assert all(isinstance(sql, str) for sql in reader.execute_calls)
diff --git a/src/doc/API.md b/src/doc/API.md
index 5ccd70e6..a0f97ab6 100644
--- a/src/doc/API.md
+++ b/src/doc/API.md
@@ -514,7 +514,4 @@ def validate(query: str) -> Validated:
 
 def prepare(query: str, reader: DuckDBReader) -> Prepared:
     """Prepare a query for visualization."""
-
-def split_query(query: str) -> tuple[str, str]:
-    """Split query into (sql, visualise) portions."""
 ```
diff --git a/src/rest.rs b/src/rest.rs
index 45d21963..3dba85fe 100644
--- a/src/rest.rs
+++ b/src/rest.rs
@@ -31,12 +31,12 @@ use tower_http::cors::{Any, CorsLayer};
 use tracing::info;
 use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 
-use ggsql::{parser, GgsqlError, VERSION};
+use ggsql::{parser, validate, GgsqlError, VERSION};
 
 #[cfg(feature = "duckdb")]
 use ggsql::reader::DuckDBReader;
 #[cfg(feature = "duckdb")]
-use ggsql::{parse, prepare};
+use ggsql::prepare;
 
 #[cfg(feature = "vegalite")]
 use ggsql::writer::VegaLiteWriter;
@@ -508,22 +508,21 @@ async fn parse_handler(
 ) -> Result<Json<ApiSuccess<ParseResult>>, ApiErrorResponse> {
     info!("Parsing query: {} chars", request.query.len());
 
-    // Split query (for backwards compatibility)
-    let (sql_part, viz_part) = parser::split_query(&request.query)?;
+    // Validate query to get sql/viz portions
+    let validated = validate(&request.query)?;
 
-    // Parse using new API
-    let parsed = parse(&request.query)?;
+    // Parse ggsql portion
+    let specs = parser::parse_query(&request.query)?;
 
     // Convert specs to JSON
-    let specs_json: Vec<serde_json::Value> = parsed
-        .plots()
+    let specs_json: Vec<serde_json::Value> = specs
         .iter()
         .map(|spec| serde_json::to_value(spec).unwrap_or(serde_json::Value::Null))
         .collect();
 
     let result = ParseResult {
-        sql_portion: sql_part,
-        viz_portion: viz_part,
+        sql_portion: validated.sql().to_string(),
+        viz_portion: validated.visual().to_string(),
         specs: specs_json,
     };
 
@@ -540,8 +539,8 @@ async fn parse_handler(
 ) -> Result<Json<ApiSuccess<ParseResult>>, ApiErrorResponse> {
     info!("Parsing query: {} chars", request.query.len());
 
-    // Split query
-    let (sql_part, viz_part) = parser::split_query(&request.query)?;
+    // Validate query to get sql/viz portions
+    let validated = validate(&request.query)?;
 
     // Parse ggsql portion
     let specs = parser::parse_query(&request.query)?;
@@ -553,8 +552,8 @@ async fn parse_handler(
         .collect();
 
     let result = ParseResult {
-        sql_portion: sql_part,
-        viz_portion: viz_part,
+        sql_portion: validated.sql().to_string(),
+        viz_portion: validated.visual().to_string(),
         specs: specs_json,
     };
 

From 6a60e7121d8606b52dc0c3c333f3594c4ad44a8c Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Thu, 29 Jan 2026 13:34:51 +0000
Subject: [PATCH 04/12] Keep clippy happy

---
 Cargo.toml     | 2 +-
 src/execute.rs | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 0339c104..5e98f8aa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,7 +32,7 @@ csscolorparser = "0.8.1"
 polars = { version = "0.52", features = ["lazy", "sql", "ipc"] }
 
 # Readers
-duckdb = { version = "1.1", features = ["bundled", "vtab-arrow"] }
+duckdb = { version = "1.4", features = ["bundled", "vtab-arrow"] }
 arrow = { version = "56", default-features = false, features = ["ipc"] }
 postgres = "0.19"
 sqlx = { version = "0.8", features = ["postgres", "runtime-tokio-rustls"] }
diff --git a/src/execute.rs b/src/execute.rs
index e79bb196..b7683f56 100644
--- a/src/execute.rs
+++ b/src/execute.rs
@@ -566,6 +566,7 @@ pub struct LayerQueryResult {
 ///
 /// Note: This function takes `&mut Layer` because stat transforms may add new aesthetic mappings
 /// (e.g., mapping y to `__ggsql_stat__count` for histogram or bar count).
+#[allow(clippy::too_many_arguments)]
 fn build_layer_query<F>(
     layer: &mut Layer,
     schema: &Schema,

From 3bc364590b5a169754899689fadb4c4a2a90741d Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Thu, 29 Jan 2026 13:58:53 +0000
Subject: [PATCH 05/12] cargo fmt

---
 ggsql-jupyter/src/executor.rs |  3 ++-
 src/rest.rs                   | 10 +++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs
index 0f523ebb..40f74f72 100644
--- a/ggsql-jupyter/src/executor.rs
+++ b/ggsql-jupyter/src/executor.rs
@@ -5,8 +5,9 @@
 
 use anyhow::Result;
 use ggsql::{
-    prepare, validate,
+    prepare,
     reader::{DuckDBReader, Reader},
+    validate,
     writer::VegaLiteWriter,
 };
 use polars::frame::DataFrame;
diff --git a/src/rest.rs b/src/rest.rs
index 3dba85fe..e87a14f9 100644
--- a/src/rest.rs
+++ b/src/rest.rs
@@ -33,10 +33,10 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 
 use ggsql::{parser, validate, GgsqlError, VERSION};
 
-#[cfg(feature = "duckdb")]
-use ggsql::reader::DuckDBReader;
 #[cfg(feature = "duckdb")]
 use ggsql::prepare;
+#[cfg(feature = "duckdb")]
+use ggsql::reader::DuckDBReader;
 
 #[cfg(feature = "vegalite")]
 use ggsql::writer::VegaLiteWriter;
@@ -445,9 +445,9 @@ async fn query_handler(
         // Use shared reader or create new one
         let prepared = if request.reader == "duckdb://memory" && state.reader.is_some() {
             let reader_mutex = state.reader.as_ref().unwrap();
-            let reader = reader_mutex.lock().map_err(|e| {
-                GgsqlError::InternalError(format!("Failed to lock reader: {}", e))
-            })?;
+            let reader = reader_mutex
+                .lock()
+                .map_err(|e| GgsqlError::InternalError(format!("Failed to lock reader: {}", e)))?;
             prepare(&request.query, &*reader)?
         } else {
             let reader = DuckDBReader::from_connection_string(&request.reader)?;

From c1aff572a44f8ac8a16609e2e4ecd2193622ec10 Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Mon, 2 Feb 2026 11:32:52 +0000
Subject: [PATCH 06/12] Rename reader.execute to reader.execute_sql

---
 CLAUDE.md                        | 10 +++++-----
 ggsql-jupyter/src/executor.rs    |  2 +-
 ggsql-python/README.md           |  8 ++++----
 ggsql-python/src/lib.rs          | 16 ++++++++--------
 ggsql-python/tests/test_ggsql.py | 16 ++++++++--------
 src/api.rs                       |  2 +-
 src/cli.rs                       |  2 +-
 src/doc/API.md                   |  4 ++--
 src/execute.rs                   |  2 +-
 src/lib.rs                       | 22 +++++++++++-----------
 src/reader/duckdb.rs             | 18 +++++++++---------
 src/reader/mod.rs                |  8 ++++----
 12 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index aa093221..1ac04a87 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -207,7 +207,7 @@ let json = prepared.render(&writer)?;
 
 **Reader trait** (data source abstraction):
 
-- `execute(sql)` - Run SQL, return DataFrame
+- `execute_sql(sql)` - Run SQL, return DataFrame
 - `register(name, df)` - Register DataFrame as table
 - Implementation: `DuckDBReader`
 
@@ -505,7 +505,7 @@ pub type Result<T> = std::result::Result<T, GgsqlError>;
 
 ```rust
 pub trait Reader {
-    fn execute(&self, sql: &str) -> Result<DataFrame>;
+    fn execute_sql(&self, sql: &str) -> Result<DataFrame>;
     fn supports_query(&self, sql: &str) -> bool;
 }
 ```
@@ -871,7 +871,7 @@ When running in Positron IDE, the extension provides enhanced functionality:
 - PyO3-based Rust bindings compiled to a native Python extension
 - Two-stage API mirroring the Rust API: `prepare()` → `render()`
 - DuckDB reader with DataFrame registration
-- Custom Python reader support: any object with `execute(sql) -> DataFrame` method
+- Custom Python reader support: any object with `execute_sql(sql) -> DataFrame` method
 - Works with any narwhals-compatible DataFrame (polars, pandas, etc.)
 - LazyFrames are collected automatically
 - Returns native `altair.Chart` objects via `render_altair()` convenience function
@@ -976,7 +976,7 @@ print(f"Errors: {validated.errors()}")
 
 **Custom Python Readers**:
 
-Any Python object with an `execute(sql: str) -> polars.DataFrame` method can be used as a reader:
+Any Python object with an `execute_sql(sql: str) -> polars.DataFrame` method can be used as a reader:
 
 ```python
 import ggsql
@@ -985,7 +985,7 @@ import polars as pl
 class MyReader:
     """Custom reader that returns static data."""
 
-    def execute(self, sql: str) -> pl.DataFrame:
+    def execute_sql(self, sql: str) -> pl.DataFrame:
         return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
 
 # Use custom reader with prepare()
diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs
index 40f74f72..b2415228 100644
--- a/ggsql-jupyter/src/executor.rs
+++ b/ggsql-jupyter/src/executor.rs
@@ -60,7 +60,7 @@ impl QueryExecutor {
         // 2. Check if there's a visualization
         if !validated.has_visual() {
             // Pure SQL query - execute directly and return DataFrame
-            let df = self.reader.execute(code)?;
+            let df = self.reader.execute_sql(code)?;
             tracing::info!(
                 "Pure SQL executed: {} rows, {} cols",
                 df.height(),
diff --git a/ggsql-python/README.md b/ggsql-python/README.md
index 3ea2c603..22b5fb8b 100644
--- a/ggsql-python/README.md
+++ b/ggsql-python/README.md
@@ -127,7 +127,7 @@ reader = ggsql.DuckDBReader("duckdb:///path/to/file.db")  # File database
 **Methods:**
 
 - `register(name: str, df: polars.DataFrame)` - Register a DataFrame as a queryable table
-- `execute(sql: str) -> polars.DataFrame` - Execute SQL and return results
+- `execute_sql(sql: str) -> polars.DataFrame` - Execute SQL and return results
 - `supports_register() -> bool` - Check if registration is supported
 
 #### `VegaLiteWriter()`
@@ -236,7 +236,7 @@ ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point")
 
 ### Custom Readers
 
-You can use any Python object with an `execute(sql: str) -> polars.DataFrame` method as a reader. This enables integration with any data source.
+You can use any Python object with an `execute_sql(sql: str) -> polars.DataFrame` method as a reader. This enables integration with any data source.
 
 ```python
 import ggsql
@@ -248,7 +248,7 @@ class CSVReader:
     def __init__(self, data_dir: str):
         self.data_dir = data_dir
 
-    def execute(self, sql: str) -> pl.DataFrame:
+    def execute_sql(self, sql: str) -> pl.DataFrame:
         # Simple implementation: ignore SQL and return fixed data
         # A real implementation would parse SQL to determine which file to load
         return pl.read_csv(f"{self.data_dir}/data.csv")
@@ -275,7 +275,7 @@ class AdvancedReader:
     def __init__(self):
         self.tables = {}
 
-    def execute(self, sql: str) -> pl.DataFrame:
+    def execute_sql(self, sql: str) -> pl.DataFrame:
         # Your SQL execution logic here
         ...
 
diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs
index b9d6496d..726db016 100644
--- a/ggsql-python/src/lib.rs
+++ b/ggsql-python/src/lib.rs
@@ -62,7 +62,7 @@ fn py_to_polars_inner(df: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
 
     df.call_method1("write_ipc", (&bytes_io,)).map_err(|_| {
         PyErr::new::<pyo3::exceptions::PyTypeError, _>(
-            "Reader.execute() must return a polars.DataFrame",
+            "Reader.execute_sql() must return a polars.DataFrame",
         )
     })?;
 
@@ -127,12 +127,12 @@ struct PyReaderBridge {
 }
 
 impl Reader for PyReaderBridge {
-    fn execute(&self, sql: &str) -> ggsql::Result<DataFrame> {
+    fn execute_sql(&self, sql: &str) -> ggsql::Result<DataFrame> {
         Python::attach(|py| {
             let bound = self.obj.bind(py);
             let result = bound
-                .call_method1("execute", (sql,))
-                .map_err(|e| GgsqlError::ReaderError(format!("Reader.execute() failed: {}", e)))?;
+                .call_method1("execute_sql", (sql,))
+                .map_err(|e| GgsqlError::ReaderError(format!("Reader.execute_sql() failed: {}", e)))?;
             py_to_polars_inner(&result).map_err(|e| GgsqlError::ReaderError(e.to_string()))
         })
     }
@@ -190,11 +190,11 @@ macro_rules! try_native_readers {
 /// Examples
 /// --------
 /// >>> reader = DuckDBReader("duckdb://memory")
-/// >>> df = reader.execute("SELECT 1 as x, 2 as y")
+/// >>> df = reader.execute_sql("SELECT 1 as x, 2 as y")
 ///
 /// >>> reader = DuckDBReader("duckdb://memory")
 /// >>> reader.register("data", pl.DataFrame({"x": [1, 2, 3]}))
-/// >>> df = reader.execute("SELECT * FROM data WHERE x > 1")
+/// >>> df = reader.execute_sql("SELECT * FROM data WHERE x > 1")
 #[pyclass(name = "DuckDBReader", unsendable)]
 struct PyDuckDBReader {
     inner: RustDuckDBReader,
@@ -264,10 +264,10 @@ impl PyDuckDBReader {
     /// ------
     /// ValueError
     ///     If the SQL is invalid or execution fails.
-    fn execute(&self, py: Python<'_>, sql: &str) -> PyResult<Py<PyAny>> {
+    fn execute_sql(&self, py: Python<'_>, sql: &str) -> PyResult<Py<PyAny>> {
         let df = self
             .inner
-            .execute(sql)
+            .execute_sql(sql)
             .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
         polars_to_py(py, &df)
     }
diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py
index 970dcf5a..b8614d0a 100644
--- a/ggsql-python/tests/test_ggsql.py
+++ b/ggsql-python/tests/test_ggsql.py
@@ -62,7 +62,7 @@ def test_create_in_memory(self):
 
     def test_execute_simple_query(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        df = reader.execute("SELECT 1 AS x, 2 AS y")
+        df = reader.execute_sql("SELECT 1 AS x, 2 AS y")
         assert isinstance(df, pl.DataFrame)
         assert df.shape == (1, 2)
         assert list(df.columns) == ["x", "y"]
@@ -72,7 +72,7 @@ def test_register_and_query(self):
         df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
         reader.register("my_data", df)
 
-        result = reader.execute("SELECT * FROM my_data WHERE x > 1")
+        result = reader.execute_sql("SELECT * FROM my_data WHERE x > 1")
         assert isinstance(result, pl.DataFrame)
         assert result.shape == (2, 2)
 
@@ -406,7 +406,7 @@ def test_simple_custom_reader(self):
         """Custom reader with execute() method works."""
 
         class SimpleReader:
-            def execute(self, sql: str) -> pl.DataFrame:
+            def execute_sql(self, sql: str) -> pl.DataFrame:
                 return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
 
         reader = SimpleReader()
@@ -420,7 +420,7 @@ class RegisterReader:
             def __init__(self):
                 self.tables = {}
 
-            def execute(self, sql: str) -> pl.DataFrame:
+            def execute_sql(self, sql: str) -> pl.DataFrame:
                 # Simple: just return the first registered table
                 if self.tables:
                     return next(iter(self.tables.values()))
@@ -442,7 +442,7 @@ def test_custom_reader_error_handling(self):
         """Custom reader errors are propagated."""
 
         class ErrorReader:
-            def execute(self, sql: str) -> pl.DataFrame:
+            def execute_sql(self, sql: str) -> pl.DataFrame:
                 raise ValueError("Custom reader error")
 
         reader = ErrorReader()
@@ -453,7 +453,7 @@ def test_custom_reader_wrong_return_type(self):
         """Custom reader returning wrong type raises TypeError."""
 
         class WrongTypeReader:
-            def execute(self, sql: str):
+            def execute_sql(self, sql: str):
                 return {"x": [1, 2, 3]}  # dict, not DataFrame
 
         reader = WrongTypeReader()
@@ -472,7 +472,7 @@ def test_custom_reader_can_render(self):
         """Custom reader result can be rendered to Vega-Lite."""
 
         class StaticReader:
-            def execute(self, sql: str) -> pl.DataFrame:
+            def execute_sql(self, sql: str) -> pl.DataFrame:
                 return pl.DataFrame(
                     {
                         "x": [1, 2, 3, 4, 5],
@@ -501,7 +501,7 @@ class RecordingReader:
             def __init__(self):
                 self.execute_calls = []
 
-            def execute(self, sql: str) -> pl.DataFrame:
+            def execute_sql(self, sql: str) -> pl.DataFrame:
                 self.execute_calls.append(sql)
                 return pl.DataFrame({"x": [1], "y": [2]})
 
diff --git a/src/api.rs b/src/api.rs
index ecfbdeaf..1158f5ef 100644
--- a/src/api.rs
+++ b/src/api.rs
@@ -245,7 +245,7 @@ pub fn prepare(query: &str, reader: &dyn Reader) -> Result<Prepared> {
     let warnings: Vec<ValidationWarning> = validated.warnings().to_vec();
 
     // Prepare data (this also validates, but we want the warnings from above)
-    let prepared_data = prepare_data_with_executor(query, |sql| reader.execute(sql))?;
+    let prepared_data = prepare_data_with_executor(query, |sql| reader.execute_sql(sql))?;
 
     Ok(Prepared::new(
         prepared_data.spec,
diff --git a/src/cli.rs b/src/cli.rs
index 1844ff01..c8ad4e5c 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -335,7 +335,7 @@ fn print_table_fallback(query: &str, reader: &DuckDBReader, max_rows: usize) {
     }
     let parsed = parsed.unwrap();
 
-    let data = reader.execute(&parsed);
+    let data = reader.execute_sql(&parsed);
     if let Err(e) = data {
         eprintln!("Failed to execute SQL query: {}", e);
         std::process::exit(1)
diff --git a/src/doc/API.md b/src/doc/API.md
index a0f97ab6..ed5af4fe 100644
--- a/src/doc/API.md
+++ b/src/doc/API.md
@@ -374,7 +374,7 @@ pub struct Location {
 ```rust
 pub trait Reader {
     /// Execute a SQL query and return a DataFrame
-    fn execute(&self, sql: &str) -> Result<DataFrame>;
+    fn execute_sql(&self, sql: &str) -> Result<DataFrame>;
 
     /// Register a DataFrame as a queryable table
     fn register(&mut self, name: &str, df: DataFrame) -> Result<()>;
@@ -425,7 +425,7 @@ class DuckDBReader:
             df: Polars DataFrame or narwhals-compatible DataFrame
         """
 
-    def execute(self, sql: str) -> polars.DataFrame:
+    def execute_sql(self, sql: str) -> polars.DataFrame:
         """Execute SQL and return a Polars DataFrame."""
 
     def supports_register(self) -> bool:
diff --git a/src/execute.rs b/src/execute.rs
index b7683f56..3bf2be33 100644
--- a/src/execute.rs
+++ b/src/execute.rs
@@ -1185,7 +1185,7 @@ where
 /// Convenience wrapper around `prepare_data_with_executor` for direct DuckDB reader usage.
 #[cfg(feature = "duckdb")]
 pub fn prepare_data(query: &str, reader: &DuckDBReader) -> Result<PreparedData> {
-    prepare_data_with_executor(query, |sql| reader.execute(sql))
+    prepare_data_with_executor(query, |sql| reader.execute_sql(sql))
 }
 
 #[cfg(test)]
diff --git a/src/lib.rs b/src/lib.rs
index cf13aaa5..c9128f54 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -116,7 +116,7 @@ mod integration_tests {
             FROM generate_series(0, 4) as t(n)
         "#;
 
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify DataFrame has temporal type (DuckDB returns Datetime for DATE + INTERVAL)
         assert_eq!(df.get_column_names(), vec!["date", "revenue"]);
@@ -176,7 +176,7 @@ mod integration_tests {
             FROM generate_series(0, 3) as t(n)
         "#;
 
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify DataFrame has Datetime type
         let timestamp_col = df.column("timestamp").unwrap();
@@ -224,7 +224,7 @@ mod integration_tests {
 
         // Real SQL that users would write
         let sql = "SELECT 1 as int_col, 2.5 as float_col, true as bool_col";
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify types are preserved
         // DuckDB treats numeric literals as DECIMAL, which we convert to Float64
@@ -279,7 +279,7 @@ mod integration_tests {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
 
         let sql = "SELECT * FROM (VALUES (1, 2.5, 'a'), (2, NULL, 'b'), (NULL, 3.5, NULL)) AS t(int_col, float_col, str_col)";
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify types
         assert!(matches!(
@@ -329,7 +329,7 @@ mod integration_tests {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
 
         let sql = "SELECT * FROM (VALUES ('A', 10), ('B', 20), ('A', 15), ('C', 30)) AS t(category, value)";
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         let mut spec = Plot::new();
         let layer = Layer::new(Geom::bar())
@@ -375,7 +375,7 @@ mod integration_tests {
             GROUP BY day
         "#;
 
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify temporal type is preserved through aggregation
         // DATE_TRUNC returns Date type (not Datetime)
@@ -413,7 +413,7 @@ mod integration_tests {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
 
         let sql = "SELECT 0.1 as small, 123.456 as medium, 999999.999999 as large";
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // All should be Float64
         assert!(matches!(
@@ -465,7 +465,7 @@ mod integration_tests {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
 
         let sql = "SELECT CAST(1 AS TINYINT) as tiny, CAST(1000 AS SMALLINT) as small, CAST(1000000 AS INTEGER) as int, CAST(1000000000000 AS BIGINT) as big";
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify types
         assert!(matches!(
@@ -533,7 +533,7 @@ mod integration_tests {
 
         // Prepare data - this parses, injects constants into global data, and replaces literals with columns
         let prepared =
-            execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap();
+            execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap();
 
         // Verify constants were injected into global data (not layer-specific data)
         // Both layers share __global__ data for faceting compatibility
@@ -641,7 +641,7 @@ mod integration_tests {
         "#;
 
         let prepared =
-            execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap();
+            execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap();
 
         // All layers should use global data for faceting to work
         assert!(
@@ -729,7 +729,7 @@ mod integration_tests {
         "#;
 
         let prepared =
-            execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap();
+            execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap();
 
         // Should have global data with the constant injected
         assert!(
diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs
index b3cf46d7..f67c39f2 100644
--- a/src/reader/duckdb.rs
+++ b/src/reader/duckdb.rs
@@ -24,11 +24,11 @@ use std::io::Cursor;
 ///
 /// // In-memory database
 /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
-/// let df = reader.execute("SELECT 1 as x, 2 as y")?;
+/// let df = reader.execute_sql("SELECT 1 as x, 2 as y")?;
 ///
 /// // File-based database
 /// let reader = DuckDBReader::from_connection_string("duckdb://data.db")?;
-/// let df = reader.execute("SELECT * FROM sales")?;
+/// let df = reader.execute_sql("SELECT * FROM sales")?;
 /// ```
 pub struct DuckDBReader {
     conn: Connection,
@@ -380,7 +380,7 @@ impl ColumnBuilder {
 }
 
 impl Reader for DuckDBReader {
-    fn execute(&self, sql: &str) -> Result<DataFrame> {
+    fn execute_sql(&self, sql: &str) -> Result<DataFrame> {
         use polars::prelude::*;
 
         // Check if this is a DDL statement (CREATE, DROP, INSERT, UPDATE, DELETE, ALTER)
@@ -544,7 +544,7 @@ mod tests {
     #[test]
     fn test_simple_query() {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let df = reader.execute("SELECT 1 as x, 2 as y").unwrap();
+        let df = reader.execute_sql("SELECT 1 as x, 2 as y").unwrap();
 
         assert_eq!(df.shape(), (1, 2));
         assert_eq!(df.get_column_names(), vec!["x", "y"]);
@@ -567,7 +567,7 @@ mod tests {
             .unwrap();
 
         // Query data
-        let df = reader.execute("SELECT * FROM test").unwrap();
+        let df = reader.execute_sql("SELECT * FROM test").unwrap();
 
         assert_eq!(df.shape(), (2, 2));
         assert_eq!(df.get_column_names(), vec!["x", "y"]);
@@ -576,7 +576,7 @@ mod tests {
     #[test]
     fn test_invalid_sql() {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let result = reader.execute("INVALID SQL SYNTAX");
+        let result = reader.execute_sql("INVALID SQL SYNTAX");
         assert!(result.is_err());
     }
 
@@ -598,7 +598,7 @@ mod tests {
             .unwrap();
 
         let df = reader
-            .execute("SELECT region, SUM(revenue) as total FROM sales GROUP BY region")
+            .execute_sql("SELECT region, SUM(revenue) as total FROM sales GROUP BY region")
             .unwrap();
 
         assert_eq!(df.shape(), (2, 2));
@@ -620,7 +620,7 @@ mod tests {
         reader.register("my_table", df).unwrap();
 
         // Query the registered table
-        let result = reader.execute("SELECT * FROM my_table ORDER BY x").unwrap();
+        let result = reader.execute_sql("SELECT * FROM my_table ORDER BY x").unwrap();
         assert_eq!(result.shape(), (3, 2));
         assert_eq!(result.get_column_names(), vec!["x", "y"]);
     }
@@ -698,7 +698,7 @@ mod tests {
         reader.register("empty_table", df).unwrap();
 
         // Query should return empty result with correct schema
-        let result = reader.execute("SELECT * FROM empty_table").unwrap();
+        let result = reader.execute_sql("SELECT * FROM empty_table").unwrap();
         assert_eq!(result.shape(), (0, 2));
         assert_eq!(result.get_column_names(), vec!["x", "y"]);
     }
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 762c0319..ed41a62c 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -17,12 +17,12 @@
 //!
 //! // Basic usage
 //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
-//! let df = reader.execute("SELECT * FROM table")?;
+//! let df = reader.execute_sql("SELECT * FROM table")?;
 //!
 //! // With DataFrame registration
 //! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?;
 //! reader.register("my_table", some_dataframe)?;
-//! let result = reader.execute("SELECT * FROM my_table")?;
+//! let result = reader.execute_sql("SELECT * FROM my_table")?;
 //! ```
 
 use crate::{DataFrame, GgsqlError, Result};
@@ -53,7 +53,7 @@ pub use duckdb::DuckDBReader;
 /// reader.register("sales", sales_df)?;
 ///
 /// // Now you can query it
-/// let result = reader.execute("SELECT * FROM sales WHERE amount > 100")?;
+/// let result = reader.execute_sql("SELECT * FROM sales WHERE amount > 100")?;
 /// ```
 pub trait Reader {
     /// Execute a SQL query and return the result as a DataFrame
@@ -72,7 +72,7 @@ pub trait Reader {
     /// - The SQL is invalid
     /// - The connection fails
     /// - The table or columns don't exist
-    fn execute(&self, sql: &str) -> Result<DataFrame>;
+    fn execute_sql(&self, sql: &str) -> Result<DataFrame>;
 
     /// Register a DataFrame as a queryable table (takes ownership)
     ///

From d74bca3a269efb2e89e92763d38abdb12c02943c Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Mon, 2 Feb 2026 13:35:28 +0000
Subject: [PATCH 07/12] Switch from api.prepare() to reader.execute()

---
 CLAUDE.md                             |  66 +++++-----
 ggsql-jupyter/src/executor.rs         |  13 +-
 ggsql-python/README.md                |  41 +++---
 ggsql-python/python/ggsql/__init__.py |  14 +-
 ggsql-python/src/lib.rs               | 109 ++++++++++------
 ggsql-python/tests/test_ggsql.py      | 177 +++++++++++++-------------
 src/api.rs                            |  95 +++++---------
 src/cli.rs                            |  18 +--
 src/doc/API.md                        |  94 +++++++-------
 src/lib.rs                            |   4 +-
 src/reader/mod.rs                     |  69 +++++++++-
 src/rest.rs                           |  16 +--
 12 files changed, 392 insertions(+), 324 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 1ac04a87..8c0cba03 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -152,28 +152,28 @@ DRAW line MAPPING month AS x, total AS y
 ### Quick Start
 
 ```rust
-use ggsql::{prepare, reader::DuckDBReader, writer::VegaLiteWriter};
+use ggsql::reader::{DuckDBReader, Reader};
+use ggsql::writer::VegaLiteWriter;
 
 // Create a reader
 let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
 
-// Prepare the visualization
-let prepared = ggsql::prepare(
-    "SELECT x, y FROM data VISUALISE x, y DRAW point",
-    &reader
+// Execute the ggsql query
+let spec = reader.execute(
+    "SELECT x, y FROM data VISUALISE x, y DRAW point"
 )?;
 
 // Render to Vega-Lite JSON
 let writer = VegaLiteWriter::new();
-let json = prepared.render(&writer)?;
+let json = spec.render(&writer)?;
 ```
 
 ### Core Functions
 
 | Function                 | Purpose                                                |
 | ------------------------ | ------------------------------------------------------ |
-| `prepare(query, reader)` | Main entry point: parse, execute SQL, resolve mappings |
-| `render(writer)`         | Generate output (Vega-Lite JSON) from prepared data    |
+| `reader.execute(query)`  | Main entry point: parse, execute SQL, resolve mappings |
+| `spec.render(writer)`    | Generate output (Vega-Lite JSON) from Spec             |
 | `validate(query)`        | Validate syntax + semantics, inspect query structure   |
 
 ### Key Types
@@ -188,12 +188,12 @@ let json = prepared.render(&writer)?;
 - `errors()` - Validation errors
 - `warnings()` - Validation warnings
 
-**`Prepared`** - Result of `prepare()`, ready for rendering:
+**`Spec`** - Result of `reader.execute()`, ready for rendering:
 
 - `render(writer)` - Generate output (Vega-Lite JSON)
 - `plot()` - Resolved plot specification
 - `metadata()` - Rows, columns, layer count
-- `warnings()` - Validation warnings from preparation
+- `warnings()` - Validation warnings from execution
 - `data()` / `layer_data(i)` / `stat_data(i)` - Access DataFrames
 - `sql()` / `visual()` / `layer_sql(i)` / `stat_sql(i)` - Query introspection
 
@@ -869,7 +869,7 @@ When running in Positron IDE, the extension provides enhanced functionality:
 **Features**:
 
 - PyO3-based Rust bindings compiled to a native Python extension
-- Two-stage API mirroring the Rust API: `prepare()` → `render()`
+- Two-stage API mirroring the Rust API: `reader.execute()` → `render()`
 - DuckDB reader with DataFrame registration
 - Custom Python reader support: any object with `execute_sql(sql) -> DataFrame` method
 - Works with any narwhals-compatible DataFrame (polars, pandas, etc.)
@@ -897,20 +897,19 @@ reader = ggsql.DuckDBReader("duckdb://memory")
 df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
 reader.register("data", df)
 
-# Prepare visualization
-prepared = ggsql.prepare(
-    "SELECT * FROM data VISUALISE x, y DRAW point",
-    reader
+# Execute visualization
+spec = reader.execute(
+    "SELECT * FROM data VISUALISE x, y DRAW point"
 )
 
 # Inspect metadata
-print(f"Rows: {prepared.metadata()['rows']}")
-print(f"Columns: {prepared.metadata()['columns']}")
-print(f"SQL: {prepared.sql()}")
+print(f"Rows: {spec.metadata()['rows']}")
+print(f"Columns: {spec.metadata()['columns']}")
+print(f"SQL: {spec.sql()}")
 
 # Render to Vega-Lite JSON
 writer = ggsql.VegaLiteWriter()
-json_output = prepared.render(writer)
+json_output = spec.render(writer)
 ```
 
 **Convenience Function** (`render_altair`):
@@ -943,22 +942,23 @@ print(f"Errors: {validated.errors()}")
 
 **Classes**:
 
-| Class                      | Description                                  |
-| -------------------------- | -------------------------------------------- |
-| `DuckDBReader(connection)` | Database reader with DataFrame registration  |
-| `VegaLiteWriter()`         | Vega-Lite JSON output writer                 |
-| `Validated`                | Result of `validate()` with query inspection |
-| `Prepared`                 | Result of `prepare()`, ready for rendering   |
+| Class                      | Description                                      |
+| -------------------------- | ------------------------------------------------ |
+| `DuckDBReader(connection)` | Database reader with DataFrame registration      |
+| `VegaLiteWriter()`         | Vega-Lite JSON output writer                     |
+| `Validated`                | Result of `validate()` with query inspection     |
+| `Spec`                     | Result of `reader.execute()`, ready for rendering |
 
 **Functions**:
 
-| Function                 | Description                                       |
-| ------------------------ | ------------------------------------------------- |
-| `validate(query)`        | Syntax/semantic validation with query inspection  |
-| `prepare(query, reader)` | Full preparation (reader can be native or custom) |
-| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart     |
+| Function                   | Description                                       |
+| -------------------------- | ------------------------------------------------- |
+| `validate(query)`          | Syntax/semantic validation with query inspection  |
+| `reader.execute(query)`    | Execute ggsql query, return Spec                  |
+| `execute(query, reader)`   | Execute with custom reader (bridge path)          |
+| `render_altair(df, viz)`   | Convenience: render DataFrame to Altair chart     |
 
-**Prepared Object Methods**:
+**Spec Methods**:
 
 | Method           | Description                                  |
 | ---------------- | -------------------------------------------- |
@@ -988,9 +988,9 @@ class MyReader:
     def execute_sql(self, sql: str) -> pl.DataFrame:
         return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
 
-# Use custom reader with prepare()
+# Use custom reader with ggsql.execute()
 reader = MyReader()
-prepared = ggsql.prepare(
+spec = ggsql.execute(
     "SELECT * FROM data VISUALISE x, y DRAW point",
     reader
 )
diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs
index b2415228..42b541d8 100644
--- a/ggsql-jupyter/src/executor.rs
+++ b/ggsql-jupyter/src/executor.rs
@@ -5,7 +5,6 @@
 
 use anyhow::Result;
 use ggsql::{
-    prepare,
     reader::{DuckDBReader, Reader},
     validate,
     writer::VegaLiteWriter,
@@ -69,17 +68,17 @@ impl QueryExecutor {
             return Ok(ExecutionResult::DataFrame(df));
         }
 
-        // 3. Prepare data using the new API
-        let prepared = prepare(code, &self.reader)?;
+        // 3. Execute ggsql query using reader
+        let spec = self.reader.execute(code)?;
 
         tracing::info!(
-            "Data prepared: {} rows, {} layers",
-            prepared.metadata().rows,
-            prepared.metadata().layer_count
+            "Query executed: {} rows, {} layers",
+            spec.metadata().rows,
+            spec.metadata().layer_count
         );
 
         // 4. Render to Vega-Lite
-        let vega_json = prepared.render(&self.writer)?;
+        let vega_json = spec.render(&self.writer)?;
 
         tracing::debug!("Generated Vega-Lite spec: {} chars", vega_json.len());
 
diff --git a/ggsql-python/README.md b/ggsql-python/README.md
index 22b5fb8b..08e9b848 100644
--- a/ggsql-python/README.md
+++ b/ggsql-python/README.md
@@ -84,30 +84,29 @@ df = pl.DataFrame({
 })
 reader.register("sales", df)
 
-# 3. Prepare the visualization
-prepared = ggsql.prepare(
+# 3. Execute the ggsql query
+spec = reader.execute(
     """
     SELECT * FROM sales
     VISUALISE date AS x, revenue AS y, region AS color
     DRAW line
     LABEL title => 'Sales by Region'
-    """,
-    reader
+    """
 )
 
 # 4. Inspect metadata
-print(f"Rows: {prepared.metadata()['rows']}")
-print(f"Columns: {prepared.metadata()['columns']}")
-print(f"Layers: {prepared.layer_count()}")
+print(f"Rows: {spec.metadata()['rows']}")
+print(f"Columns: {spec.metadata()['columns']}")
+print(f"Layers: {spec.layer_count()}")
 
 # 5. Inspect SQL/VISUALISE portions and data
-print(f"SQL: {prepared.sql()}")
-print(f"Visual: {prepared.visual()}")
-print(prepared.data())  # Returns polars DataFrame
+print(f"SQL: {spec.sql()}")
+print(f"Visual: {spec.visual()}")
+print(spec.data())  # Returns polars DataFrame
 
 # 6. Render to Vega-Lite JSON
 writer = ggsql.VegaLiteWriter()
-vegalite_json = prepared.render(writer)
+vegalite_json = spec.render(writer)
 print(vegalite_json)
 ```
 
@@ -136,7 +135,7 @@ Writer that generates Vega-Lite v6 JSON specifications.
 
 ```python
 writer = ggsql.VegaLiteWriter()
-json_output = prepared.render(writer)
+json_output = spec.render(writer)
 ```
 
 #### `Validated`
@@ -152,9 +151,9 @@ Result of `validate()` containing query analysis without SQL execution.
 - `errors() -> list[dict]` - Validation errors with messages and locations
 - `warnings() -> list[dict]` - Validation warnings
 
-#### `Prepared`
+#### `Spec`
 
-Result of `prepare()`, containing resolved visualization ready for rendering.
+Result of `reader.execute()`, containing resolved visualization ready for rendering.
 
 **Methods:**
 
@@ -168,7 +167,7 @@ Result of `prepare()`, containing resolved visualization ready for rendering.
 - `stat_data(index: int) -> polars.DataFrame | None` - Statistical transform data
 - `layer_sql(index: int) -> str | None` - Layer filter SQL
 - `stat_sql(index: int) -> str | None` - Stat transform SQL
-- `warnings() -> list[dict]` - Validation warnings from preparation
+- `warnings() -> list[dict]` - Validation warnings from execution
 
 ### Functions
 
@@ -185,13 +184,13 @@ else:
         print(f"Error: {error['message']}")
 ```
 
-#### `prepare(query: str, reader: DuckDBReader) -> Prepared`
+#### `reader.execute(query: str) -> Spec`
 
-Parse, validate, and execute a ggsql query.
+Execute a ggsql query and return the visualization specification.
 
 ```python
 reader = ggsql.DuckDBReader("duckdb://memory")
-prepared = ggsql.prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
+spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
 ```
 
 #### `render_altair(df, viz: str, **kwargs) -> altair.Chart`
@@ -253,14 +252,14 @@ class CSVReader:
         # A real implementation would parse SQL to determine which file to load
         return pl.read_csv(f"{self.data_dir}/data.csv")
 
-# Use custom reader with prepare()
+# Use custom reader with ggsql.execute()
 reader = CSVReader("/path/to/data")
-prepared = ggsql.prepare(
+spec = ggsql.execute(
     "SELECT * FROM data VISUALISE x, y DRAW point",
     reader
 )
 writer = ggsql.VegaLiteWriter()
-json_output = prepared.render(writer)
+json_output = spec.render(writer)
 ```
 
 **Optional methods** for custom readers:
diff --git a/ggsql-python/python/ggsql/__init__.py b/ggsql-python/python/ggsql/__init__.py
index 06b5f720..de159d2a 100644
--- a/ggsql-python/python/ggsql/__init__.py
+++ b/ggsql-python/python/ggsql/__init__.py
@@ -11,9 +11,9 @@
     DuckDBReader,
     VegaLiteWriter,
     Validated,
-    Prepared,
+    Spec,
     validate,
-    prepare,
+    execute,
 )
 
 __all__ = [
@@ -21,10 +21,10 @@
     "DuckDBReader",
     "VegaLiteWriter",
     "Validated",
-    "Prepared",
+    "Spec",
     # Functions
     "validate",
-    "prepare",
+    "execute",
     "render_altair",
 ]
 __version__ = "0.1.0"
@@ -81,10 +81,10 @@ def render_altair(
     # Build full query: SELECT * FROM __data__ + VISUALISE clause
     query = f"SELECT * FROM __data__ {viz}"
 
-    # Prepare and render
-    prepared = prepare(query, reader)
+    # Execute and render
+    spec = reader.execute(query)
     writer = VegaLiteWriter()
-    vegalite_json = prepared.render(writer)
+    vegalite_json = spec.render(writer)
 
     # Parse to determine the correct Altair class
     spec = json.loads(vegalite_json)
diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs
index 726db016..5be68c42 100644
--- a/ggsql-python/src/lib.rs
+++ b/ggsql-python/src/lib.rs
@@ -6,7 +6,7 @@ use pyo3::prelude::*;
 use pyo3::types::{PyBytes, PyDict, PyList};
 use std::io::Cursor;
 
-use ggsql::api::{prepare as rust_prepare, validate as rust_validate, Prepared, ValidationWarning};
+use ggsql::api::{validate as rust_validate, Spec, ValidationWarning};
 use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader};
 use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter;
 use ggsql::GgsqlError;
@@ -120,7 +120,7 @@ fn warnings_to_pylist(py: Python<'_>, warnings: &[ValidationWarning]) -> PyResul
 
 /// Bridges a Python reader object to the Rust Reader trait.
 ///
-/// This allows any Python object with an `execute(sql: str) -> polars.DataFrame`
+/// This allows any Python object with an `execute_sql(sql: str) -> polars.DataFrame`
 /// method to be used as a ggsql reader.
 struct PyReaderBridge {
     obj: Py<PyAny>,
@@ -130,9 +130,9 @@ impl Reader for PyReaderBridge {
     fn execute_sql(&self, sql: &str) -> ggsql::Result<DataFrame> {
         Python::attach(|py| {
             let bound = self.obj.bind(py);
-            let result = bound
-                .call_method1("execute_sql", (sql,))
-                .map_err(|e| GgsqlError::ReaderError(format!("Reader.execute_sql() failed: {}", e)))?;
+            let result = bound.call_method1("execute_sql", (sql,)).map_err(|e| {
+                GgsqlError::ReaderError(format!("Reader.execute_sql() failed: {}", e))
+            })?;
             py_to_polars_inner(&result).map_err(|e| GgsqlError::ReaderError(e.to_string()))
         })
     }
@@ -170,8 +170,8 @@ macro_rules! try_native_readers {
     ($query:expr, $reader:expr, $($native_type:ty),*) => {{
         $(
             if let Ok(native) = $reader.downcast::<$native_type>() {
-                return rust_prepare($query, &native.borrow().inner)
-                    .map(|p| PyPrepared { inner: p })
+                return native.borrow().inner.execute($query)
+                    .map(|s| PySpec { inner: s })
                     .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()));
             }
         )*
@@ -281,6 +281,39 @@ impl PyDuckDBReader {
     fn supports_register(&self) -> bool {
         self.inner.supports_register()
     }
+
+    /// Execute a ggsql query and return the visualization specification.
+    ///
+    /// This is the main entry point for creating visualizations. It parses
+    /// the query, executes the SQL portion, and returns a PySpec ready
+    /// for rendering.
+    ///
+    /// Parameters
+    /// ----------
+    /// query : str
+    ///     The ggsql query (SQL + VISUALISE clause).
+    ///
+    /// Returns
+    /// -------
+    /// Spec
+    ///     The resolved visualization specification ready for rendering.
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If the query syntax is invalid, has no VISUALISE clause, or SQL execution fails.
+    ///
+    /// Examples
+    /// --------
+    /// >>> reader = DuckDBReader("duckdb://memory")
+    /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+    /// >>> json_output = spec.render(VegaLiteWriter())
+    fn execute(&self, query: &str) -> PyResult<PySpec> {
+        self.inner
+            .execute(query)
+            .map(|s| PySpec { inner: s })
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+    }
 }
 
 // ============================================================================
@@ -289,13 +322,13 @@ impl PyDuckDBReader {
 
 /// Vega-Lite JSON output writer.
 ///
-/// Converts prepared visualization specifications to Vega-Lite v6 JSON.
+/// Converts visualization specifications to Vega-Lite v6 JSON.
 ///
 /// Examples
 /// --------
 /// >>> writer = VegaLiteWriter()
-/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
-/// >>> json_output = prepared.render(writer)
+/// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+/// >>> json_output = spec.render(writer)
 #[pyclass(name = "VegaLiteWriter")]
 struct PyVegaLiteWriter {
     inner: RustVegaLiteWriter,
@@ -399,26 +432,26 @@ impl PyValidated {
 }
 
 // ============================================================================
-// PyPrepared
+// PySpec
 // ============================================================================
 
-/// Result of prepare(), ready for rendering.
+/// Result of reader.execute(), ready for rendering.
 ///
 /// Contains the resolved plot specification, data, and metadata.
 /// Use render() to generate Vega-Lite JSON output.
 ///
 /// Examples
 /// --------
-/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
-/// >>> print(f"Rows: {prepared.metadata()['rows']}")
-/// >>> json_output = prepared.render(VegaLiteWriter())
-#[pyclass(name = "Prepared")]
-struct PyPrepared {
-    inner: Prepared,
+/// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+/// >>> print(f"Rows: {spec.metadata()['rows']}")
+/// >>> json_output = spec.render(VegaLiteWriter())
+#[pyclass(name = "Spec")]
+struct PySpec {
+    inner: Spec,
 }
 
 #[pymethods]
-impl PyPrepared {
+impl PySpec {
     /// Render to output format (Vega-Lite JSON).
     ///
     /// Parameters
@@ -626,21 +659,24 @@ fn validate(query: &str) -> PyResult<PyValidated> {
     })
 }
 
-/// Prepare a query for visualization. Main entry point for the Rust API.
+/// Execute a ggsql query using a custom Python reader.
+///
+/// This is a convenience function for custom readers. For native readers,
+/// prefer using `reader.execute()` directly.
 ///
 /// Parameters
 /// ----------
 /// query : str
-///     The ggsql query to prepare.
-/// reader : DuckDBReader | object
-///     The database reader to execute SQL against. Can be a native DuckDBReader
+///     The ggsql query to execute.
+/// reader : Reader | object
+///     The database reader to execute SQL against. Can be a native Reader
 ///     for optimal performance, or any Python object with an
-///     `execute(sql: str) -> polars.DataFrame` method.
+///     `execute_sql(sql: str) -> polars.DataFrame` method.
 ///
 /// Returns
 /// -------
-/// Prepared
-///     A prepared visualization ready for rendering.
+/// Spec
+///     The resolved visualization specification ready for rendering.
 ///
 /// Raises
 /// ------
@@ -649,19 +685,19 @@ fn validate(query: &str) -> PyResult<PyValidated> {
 ///
 /// Examples
 /// --------
-/// >>> # Using native reader (fast path)
+/// >>> # Using native reader (prefer reader.execute() instead)
 /// >>> reader = DuckDBReader("duckdb://memory")
-/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
-/// >>> json_output = prepared.render(VegaLiteWriter())
+/// >>> spec = execute("SELECT 1 AS x, 2 AS Y VISUALISE x, y DRAW point", reader)
+/// >>> json_output = spec.render(VegaLiteWriter())
 ///
 /// >>> # Using custom Python reader
 /// >>> class MyReader:
-/// ...     def execute(self, sql: str) -> pl.DataFrame:
+/// ...     def execute_sql(self, sql: str) -> pl.DataFrame:
 /// ...         return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
 /// >>> reader = MyReader()
-/// >>> prepared = prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader)
+/// >>> spec = execute("SELECT * FROM data VISUALISE x, y DRAW point", reader)
 #[pyfunction]
-fn prepare(query: &str, reader: &Bound<'_, PyAny>) -> PyResult<PyPrepared> {
+fn execute(query: &str, reader: &Bound<'_, PyAny>) -> PyResult<PySpec> {
     // Fast path: try all known native reader types
     // Add new native readers to this list as they're implemented
     try_native_readers!(query, reader, PyDuckDBReader);
@@ -670,8 +706,9 @@ fn prepare(query: &str, reader: &Bound<'_, PyAny>) -> PyResult<PyPrepared> {
     let bridge = PyReaderBridge {
         obj: reader.clone().unbind(),
     };
-    rust_prepare(query, &bridge)
-        .map(|p| PyPrepared { inner: p })
+    bridge
+        .execute(query)
+        .map(|s| PySpec { inner: s })
         .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
 }
 
@@ -685,11 +722,11 @@ fn _ggsql(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<PyDuckDBReader>()?;
     m.add_class::<PyVegaLiteWriter>()?;
     m.add_class::<PyValidated>()?;
-    m.add_class::<PyPrepared>()?;
+    m.add_class::<PySpec>()?;
 
     // Functions
     m.add_function(wrap_pyfunction!(validate, m)?)?;
-    m.add_function(wrap_pyfunction!(prepare, m)?)?;
+    m.add_function(wrap_pyfunction!(execute, m)?)?;
 
     Ok(())
 }
diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py
index b8614d0a..e54df2e8 100644
--- a/ggsql-python/tests/test_ggsql.py
+++ b/ggsql-python/tests/test_ggsql.py
@@ -3,7 +3,7 @@
 These tests focus on Python-specific logic:
 - DataFrame conversion via narwhals
 - Return type handling
-- Two-stage API (prepare -> render)
+- Two-stage API (reader.execute() -> render)
 
 Rust logic (parsing, Vega-Lite generation) is tested in the Rust test suite.
 """
@@ -93,112 +93,110 @@ def test_create_writer(self):
         assert writer is not None
 
 
-class TestPrepare:
-    """Tests for prepare() function."""
+class TestExecute:
+    """Tests for reader.execute() method."""
 
-    def test_prepare_simple_query(self):
+    def test_execute_simple_query(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        prepared = ggsql.prepare(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        spec = reader.execute(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
         )
-        assert prepared is not None
-        assert prepared.layer_count() == 1
+        assert spec is not None
+        assert spec.layer_count() == 1
 
-    def test_prepare_with_registered_data(self):
+    def test_execute_with_registered_data(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
         df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
         reader.register("data", df)
 
-        prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader)
-        assert prepared.metadata()["rows"] == 3
+        spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point")
+        assert spec.metadata()["rows"] == 3
 
-    def test_prepare_metadata(self):
+    def test_execute_metadata(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        prepared = ggsql.prepare(
+        spec = reader.execute(
             "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) "
             "VISUALISE x, y DRAW point",
-            reader,
         )
 
-        metadata = prepared.metadata()
+        metadata = spec.metadata()
         assert metadata["rows"] == 3
         assert "x" in metadata["columns"]
         assert "y" in metadata["columns"]
         assert metadata["layer_count"] == 1
 
-    def test_prepare_sql_accessor(self):
+    def test_execute_sql_accessor(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        prepared = ggsql.prepare(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        spec = reader.execute(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
         )
-        assert "SELECT" in prepared.sql()
+        assert "SELECT" in spec.sql()
 
-    def test_prepare_visual_accessor(self):
+    def test_execute_visual_accessor(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        prepared = ggsql.prepare(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        spec = reader.execute(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
         )
-        assert "VISUALISE" in prepared.visual()
+        assert "VISUALISE" in spec.visual()
 
-    def test_prepare_data_accessor(self):
+    def test_execute_data_accessor(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        prepared = ggsql.prepare(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        spec = reader.execute(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
         )
-        data = prepared.data()
+        data = spec.data()
         assert isinstance(data, pl.DataFrame)
         assert data.shape == (1, 2)
 
-    def test_prepare_without_visualise_fails(self):
+    def test_execute_without_visualise_fails(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
         with pytest.raises(ValueError):
-            ggsql.prepare("SELECT 1 AS x, 2 AS y", reader)
+            reader.execute("SELECT 1 AS x, 2 AS y")
 
 
-class TestPreparedRender:
-    """Tests for Prepared.render() method."""
+class TestSpecRender:
+    """Tests for Spec.render() method."""
 
     def test_render_to_vegalite(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        prepared = ggsql.prepare(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        spec = reader.execute(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
         )
         writer = ggsql.VegaLiteWriter()
 
-        result = prepared.render(writer)
+        result = spec.render(writer)
         assert isinstance(result, str)
 
-        spec = json.loads(result)
-        assert "$schema" in spec
-        assert "vega-lite" in spec["$schema"]
+        spec_dict = json.loads(result)
+        assert "$schema" in spec_dict
+        assert "vega-lite" in spec_dict["$schema"]
 
     def test_render_contains_data(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
         df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
         reader.register("data", df)
 
-        prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader)
+        spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point")
         writer = ggsql.VegaLiteWriter()
 
-        result = prepared.render(writer)
-        spec = json.loads(result)
+        result = spec.render(writer)
+        spec_dict = json.loads(result)
         # Data should be in the spec (either inline or in datasets)
-        assert "data" in spec or "datasets" in spec
+        assert "data" in spec_dict or "datasets" in spec_dict
 
     def test_render_multi_layer(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        prepared = ggsql.prepare(
+        spec = reader.execute(
             "SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) "
             "VISUALISE "
             "DRAW point MAPPING x AS x, y AS y "
             "DRAW line MAPPING x AS x, y AS y",
-            reader,
         )
         writer = ggsql.VegaLiteWriter()
 
-        result = prepared.render(writer)
-        spec = json.loads(result)
-        assert "layer" in spec
+        result = spec.render(writer)
+        spec_dict = json.loads(result)
+        assert "layer" in spec_dict
 
 
 class TestRenderAltairDataFrameConversion:
@@ -341,10 +339,10 @@ def test_invalid_viz_raises(self):
 
 
 class TestTwoStageAPIIntegration:
-    """Integration tests for the two-stage prepare -> render API."""
+    """Integration tests for the two-stage reader.execute() -> render API."""
 
     def test_end_to_end_workflow(self):
-        """Complete workflow: create reader, register data, prepare, render."""
+        """Complete workflow: create reader, register data, execute, render."""
         # Create reader
         reader = ggsql.DuckDBReader("duckdb://memory")
 
@@ -358,60 +356,59 @@ def test_end_to_end_workflow(self):
         )
         reader.register("sales", df)
 
-        # Prepare visualization
-        prepared = ggsql.prepare(
+        # Execute visualization
+        spec = reader.execute(
             "SELECT * FROM sales VISUALISE date AS x, value AS y, region AS color DRAW line",
-            reader,
         )
 
         # Verify metadata
-        assert prepared.metadata()["rows"] == 3
-        assert prepared.layer_count() == 1
+        assert spec.metadata()["rows"] == 3
+        assert spec.layer_count() == 1
 
         # Render to Vega-Lite
         writer = ggsql.VegaLiteWriter()
-        result = prepared.render(writer)
+        result = spec.render(writer)
 
         # Verify output
-        spec = json.loads(result)
-        assert "$schema" in spec
-        assert "line" in json.dumps(spec)
+        spec_dict = json.loads(result)
+        assert "$schema" in spec_dict
+        assert "line" in json.dumps(spec_dict)
 
-    def test_can_introspect_prepared(self):
-        """Test all introspection methods on Prepared."""
+    def test_can_introspect_spec(self):
+        """Test all introspection methods on Spec."""
         reader = ggsql.DuckDBReader("duckdb://memory")
-        prepared = ggsql.prepare(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        spec = reader.execute(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
         )
 
         # All these should work without error
-        assert prepared.sql() is not None
-        assert prepared.visual() is not None
-        assert prepared.layer_count() >= 1
-        assert prepared.metadata() is not None
-        assert prepared.data() is not None
-        assert prepared.warnings() is not None
+        assert spec.sql() is not None
+        assert spec.visual() is not None
+        assert spec.layer_count() >= 1
+        assert spec.metadata() is not None
+        assert spec.data() is not None
+        assert spec.warnings() is not None
 
         # Layer-specific accessors (may return None)
-        _ = prepared.layer_data(0)
-        _ = prepared.stat_data(0)
-        _ = prepared.layer_sql(0)
-        _ = prepared.stat_sql(0)
+        _ = spec.layer_data(0)
+        _ = spec.stat_data(0)
+        _ = spec.layer_sql(0)
+        _ = spec.stat_sql(0)
 
 
 class TestCustomReader:
     """Tests for custom Python reader support."""
 
     def test_simple_custom_reader(self):
-        """Custom reader with execute() method works."""
+        """Custom reader with execute_sql() method works."""
 
         class SimpleReader:
             def execute_sql(self, sql: str) -> pl.DataFrame:
                 return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
 
         reader = SimpleReader()
-        prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader)
-        assert prepared.metadata()["rows"] == 3
+        spec = ggsql.execute("SELECT * FROM data VISUALISE x, y DRAW point", reader)
+        assert spec.metadata()["rows"] == 3
 
     def test_custom_reader_with_register(self):
         """Custom reader with register() support."""
@@ -433,10 +430,10 @@ def register(self, name: str, df: pl.DataFrame) -> None:
                 self.tables[name] = df
 
         reader = RegisterReader()
-        prepared = ggsql.prepare(
+        spec = ggsql.execute(
             "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
         )
-        assert prepared is not None
+        assert spec is not None
 
     def test_custom_reader_error_handling(self):
         """Custom reader errors are propagated."""
@@ -447,7 +444,7 @@ def execute_sql(self, sql: str) -> pl.DataFrame:
 
         reader = ErrorReader()
         with pytest.raises(ValueError, match="Custom reader error"):
-            ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader)
+            ggsql.execute("SELECT 1 VISUALISE x, y DRAW point", reader)
 
     def test_custom_reader_wrong_return_type(self):
         """Custom reader returning wrong type raises TypeError."""
@@ -458,15 +455,15 @@ def execute_sql(self, sql: str):
 
         reader = WrongTypeReader()
         with pytest.raises((ValueError, TypeError)):
-            ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader)
+            ggsql.execute("SELECT 1 VISUALISE x, y DRAW point", reader)
 
     def test_native_reader_fast_path(self):
         """Native DuckDBReader still works (fast path)."""
         reader = ggsql.DuckDBReader("duckdb://memory")
-        prepared = ggsql.prepare(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
+        spec = reader.execute(
+            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
         )
-        assert prepared.metadata()["rows"] == 1
+        assert spec.metadata()["rows"] == 1
 
     def test_custom_reader_can_render(self):
         """Custom reader result can be rendered to Vega-Lite."""
@@ -482,20 +479,20 @@ def execute_sql(self, sql: str) -> pl.DataFrame:
                 )
 
         reader = StaticReader()
-        prepared = ggsql.prepare(
+        spec = ggsql.execute(
             "SELECT * FROM data VISUALISE x, y, category AS color DRAW point",
             reader,
         )
 
         writer = ggsql.VegaLiteWriter()
-        result = prepared.render(writer)
+        result = spec.render(writer)
 
-        spec = json.loads(result)
-        assert "$schema" in spec
-        assert "vega-lite" in spec["$schema"]
+        spec_dict = json.loads(result)
+        assert "$schema" in spec_dict
+        assert "vega-lite" in spec_dict["$schema"]
 
-    def test_custom_reader_execute_called(self):
-        """Verify execute() is called on the custom reader."""
+    def test_custom_reader_execute_sql_called(self):
+        """Verify execute_sql() is called on the custom reader."""
 
         class RecordingReader:
             def __init__(self):
@@ -506,12 +503,12 @@ def execute_sql(self, sql: str) -> pl.DataFrame:
                 return pl.DataFrame({"x": [1], "y": [2]})
 
         reader = RecordingReader()
-        ggsql.prepare(
+        ggsql.execute(
             "SELECT * FROM data VISUALISE x, y DRAW point",
             reader,
         )
 
-        # execute() should have been called at least once
+        # execute_sql() should have been called at least once
         assert len(reader.execute_calls) > 0
         # All calls should be valid SQL strings
         assert all(isinstance(sql, str) for sql in reader.execute_calls)
diff --git a/src/api.rs b/src/api.rs
index 1158f5ef..b715c459 100644
--- a/src/api.rs
+++ b/src/api.rs
@@ -1,6 +1,6 @@
 //! High-level ggsql API.
 //!
-//! Two-stage API: `prepare()` → `render()`.
+//! Two-stage API: `reader.execute()` → `render()`.
 
 use crate::naming;
 use crate::parser;
@@ -8,11 +8,6 @@ use crate::plot::Plot;
 use crate::{DataFrame, Result};
 use std::collections::HashMap;
 
-#[cfg(feature = "duckdb")]
-use crate::execute::prepare_data_with_executor;
-#[cfg(feature = "duckdb")]
-use crate::reader::Reader;
-
 #[cfg(feature = "vegalite")]
 use crate::writer::Writer;
 
@@ -20,8 +15,8 @@ use crate::writer::Writer;
 // Core Types
 // ============================================================================
 
-/// Result of `prepare()`, ready for rendering.
-pub struct Prepared {
+/// Result of `reader.execute()`, ready for rendering.
+pub struct Spec {
     /// Single resolved plot specification
     plot: Plot,
     /// Internal data map (global + layer-specific DataFrames)
@@ -40,8 +35,8 @@ pub struct Prepared {
     warnings: Vec<ValidationWarning>,
 }
 
-impl Prepared {
-    /// Create a new Prepared from PreparedData
+impl Spec {
+    /// Create a new Spec from PreparedData
     pub(crate) fn new(
         plot: Plot,
         data: HashMap<String, DataFrame>,
@@ -237,27 +232,6 @@ pub struct Location {
 // High-Level API Functions
 // ============================================================================
 
-/// Prepare a query for visualization. Main entry point for the two-stage API.
-#[cfg(feature = "duckdb")]
-pub fn prepare(query: &str, reader: &dyn Reader) -> Result<Prepared> {
-    // Run validation first to capture warnings
-    let validated = validate(query)?;
-    let warnings: Vec<ValidationWarning> = validated.warnings().to_vec();
-
-    // Prepare data (this also validates, but we want the warnings from above)
-    let prepared_data = prepare_data_with_executor(query, |sql| reader.execute_sql(sql))?;
-
-    Ok(Prepared::new(
-        prepared_data.spec,
-        prepared_data.data,
-        prepared_data.sql,
-        prepared_data.visual,
-        prepared_data.layer_sql,
-        prepared_data.stat_sql,
-        warnings,
-    ))
-}
-
 /// Validate query syntax and semantics without executing SQL.
 pub fn validate(query: &str) -> Result<Validated> {
     let mut errors = Vec::new();
@@ -427,35 +401,34 @@ mod tests {
 
     #[cfg(all(feature = "duckdb", feature = "vegalite"))]
     #[test]
-    fn test_prepare_and_render() {
-        use crate::reader::DuckDBReader;
+    fn test_execute_and_render() {
+        use crate::reader::{DuckDBReader, Reader};
         use crate::writer::VegaLiteWriter;
 
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let prepared = prepare("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point", &reader).unwrap();
+        let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point").unwrap();
 
-        assert_eq!(prepared.plot().layers.len(), 1);
-        assert_eq!(prepared.metadata().layer_count, 1);
-        assert!(prepared.data().is_some());
+        assert_eq!(spec.plot().layers.len(), 1);
+        assert_eq!(spec.metadata().layer_count, 1);
+        assert!(spec.data().is_some());
 
         let writer = VegaLiteWriter::new();
-        let result = prepared.render(&writer).unwrap();
+        let result = spec.render(&writer).unwrap();
         assert!(result.contains("point"));
     }
 
     #[cfg(all(feature = "duckdb", feature = "vegalite"))]
     #[test]
-    fn test_prepare_metadata() {
-        use crate::reader::DuckDBReader;
+    fn test_execute_metadata() {
+        use crate::reader::{DuckDBReader, Reader};
 
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let prepared = prepare(
+        let spec = reader.execute(
             "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) VISUALISE x, y DRAW point",
-            &reader,
         )
         .unwrap();
 
-        let metadata = prepared.metadata();
+        let metadata = spec.metadata();
         assert_eq!(metadata.rows, 3);
         assert_eq!(metadata.columns.len(), 2);
         assert!(metadata.columns.contains(&"x".to_string()));
@@ -465,8 +438,8 @@ mod tests {
 
     #[cfg(all(feature = "duckdb", feature = "vegalite"))]
     #[test]
-    fn test_prepare_with_cte() {
-        use crate::reader::DuckDBReader;
+    fn test_execute_with_cte() {
+        use crate::reader::{DuckDBReader, Reader};
 
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
         let query = r#"
@@ -477,18 +450,18 @@ mod tests {
             VISUALISE x, y DRAW point
         "#;
 
-        let prepared = prepare(query, &reader).unwrap();
+        let spec = reader.execute(query).unwrap();
 
-        assert_eq!(prepared.plot().layers.len(), 1);
-        assert!(prepared.data().is_some());
-        let df = prepared.data().unwrap();
+        assert_eq!(spec.plot().layers.len(), 1);
+        assert!(spec.data().is_some());
+        let df = spec.data().unwrap();
         assert_eq!(df.height(), 2);
     }
 
     #[cfg(all(feature = "duckdb", feature = "vegalite"))]
     #[test]
     fn test_render_multi_layer() {
-        use crate::reader::DuckDBReader;
+        use crate::reader::{DuckDBReader, Reader};
         use crate::writer::VegaLiteWriter;
 
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
@@ -499,9 +472,9 @@ mod tests {
             DRAW line MAPPING x AS x, y AS y
         "#;
 
-        let prepared = prepare(query, &reader).unwrap();
+        let spec = reader.execute(query).unwrap();
         let writer = VegaLiteWriter::new();
-        let result = prepared.render(&writer).unwrap();
+        let result = spec.render(&writer).unwrap();
 
         assert!(result.contains("layer"));
     }
@@ -524,13 +497,13 @@ mod tests {
         reader.register("my_data", df).unwrap();
 
         let query = "SELECT * FROM my_data VISUALISE x, y DRAW point";
-        let prepared = prepare(query, &reader).unwrap();
+        let spec = reader.execute(query).unwrap();
 
-        assert_eq!(prepared.metadata().rows, 3);
-        assert!(prepared.metadata().columns.contains(&"x".to_string()));
+        assert_eq!(spec.metadata().rows, 3);
+        assert!(spec.metadata().columns.contains(&"x".to_string()));
 
         let writer = VegaLiteWriter::new();
-        let result = prepared.render(&writer).unwrap();
+        let result = spec.render(&writer).unwrap();
         assert!(result.contains("point"));
     }
 
@@ -566,19 +539,19 @@ mod tests {
             DRAW bar
         "#;
 
-        let prepared = prepare(query, &reader).unwrap();
-        assert_eq!(prepared.metadata().rows, 3);
+        let spec = reader.execute(query).unwrap();
+        assert_eq!(spec.metadata().rows, 3);
     }
 
     #[cfg(feature = "duckdb")]
     #[test]
-    fn test_prepare_no_viz_fails() {
-        use crate::reader::DuckDBReader;
+    fn test_execute_no_viz_fails() {
+        use crate::reader::{DuckDBReader, Reader};
 
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
         let query = "SELECT 1 as x, 2 as y";
 
-        let result = prepare(query, &reader);
+        let result = reader.execute(query);
         assert!(result.is_err());
     }
 
diff --git a/src/cli.rs b/src/cli.rs
index c8ad4e5c..ee97eca1 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -12,7 +12,7 @@ use std::path::PathBuf;
 #[cfg(feature = "duckdb")]
 use ggsql::reader::{DuckDBReader, Reader};
 #[cfg(feature = "duckdb")]
-use ggsql::{prepare, validate};
+use ggsql::validate;
 
 #[cfg(feature = "vegalite")]
 use ggsql::writer::VegaLiteWriter;
@@ -186,24 +186,24 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option<PathBu
         return;
     }
 
-    // Prepare data
-    let prepared = match prepare(&query, &db_reader) {
-        Ok(p) => p,
+    // Execute ggsql query
+    let spec = match db_reader.execute(&query) {
+        Ok(s) => s,
         Err(e) => {
-            eprintln!("Failed to prepare data: {}", e);
+            eprintln!("Failed to execute query: {}", e);
             std::process::exit(1);
         }
     };
 
     if verbose {
-        let metadata = prepared.metadata();
-        eprintln!("\nData prepared:");
+        let metadata = spec.metadata();
+        eprintln!("\nQuery executed:");
         eprintln!("  Rows: {}", metadata.rows);
         eprintln!("  Columns: {}", metadata.columns.join(", "));
         eprintln!("  Layers: {}", metadata.layer_count);
     }
 
-    if prepared.plot().layers.is_empty() {
+    if spec.plot().layers.is_empty() {
         eprintln!("No visualization specifications found");
         std::process::exit(1);
     }
@@ -222,7 +222,7 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option<PathBu
 
     // Render
     let vl_writer = VegaLiteWriter::new();
-    let json_output = match prepared.render(&vl_writer) {
+    let json_output = match spec.render(&vl_writer) {
         Ok(r) => r,
         Err(e) => {
             eprintln!("Failed to generate Vega-Lite output: {}", e);
diff --git a/src/doc/API.md b/src/doc/API.md
index ed5af4fe..3cbf9c71 100644
--- a/src/doc/API.md
+++ b/src/doc/API.md
@@ -4,33 +4,33 @@ This document provides a comprehensive reference for the ggsql public API.
 
 ## Overview
 
-- **Stage 1: `prepare()`** - Parse query, execute SQL, resolve mappings, prepare data
-- **Stage 2: `render()`** - Generate output (Vega-Lite JSON, etc.)
+- **Stage 1: `reader.execute()`** - Parse query, execute SQL, resolve mappings, create Spec
+- **Stage 2: `spec.render()`** - Generate output (Vega-Lite JSON, etc.)
 
 ### API Functions
 
-| Function     | Use Case                                             |
-| ------------ | ---------------------------------------------------- |
-| `prepare()`  | Main entry point - full visualization pipeline       |
-| `render()`   | Generate output from prepared data                   |
-| `validate()` | Validate syntax + semantics, inspect query structure |
+| Function           | Use Case                                             |
+| ------------------ | ---------------------------------------------------- |
+| `reader.execute()` | Main entry point - full visualization pipeline       |
+| `spec.render()`    | Generate output from Spec                            |
+| `validate()`       | Validate syntax + semantics, inspect query structure |
 
 ---
 
 ## Core Functions
 
-### `prepare`
+### `Reader::execute`
 
 ```rust
-pub fn prepare(query: &str, reader: &dyn Reader) -> Result<Prepared>
+fn execute(&self, query: &str) -> Result<Spec>
 ```
 
-Prepare a ggsql query for visualization. This is the main entry point for the two-stage API.
+Execute a ggsql query for visualization. This is the main entry point - a default method on the Reader trait.
 
-**What happens during preparation:**
+**What happens during execution:**
 
 1. Parses the query (SQL + VISUALISE portions)
-2. Executes the main SQL query using the provided reader
+2. Executes the main SQL query using the reader
 3. Resolves wildcards (`VISUALISE *`) against actual columns
 4. Merges global mappings into each layer
 5. Executes layer-specific queries (filters, stats)
@@ -40,31 +40,30 @@ Prepare a ggsql query for visualization. This is the main entry point for the tw
 **Arguments:**
 
 - `query` - The full ggsql query string
-- `reader` - A reader implementing the `Reader` trait
 
 **Returns:**
 
-- `Ok(Prepared)` - Ready for rendering
+- `Ok(Spec)` - Ready for rendering
 - `Err(GgsqlError)` - Parse, validation, or execution error
 
 **Example:**
 
 ```rust
-use ggsql::{prepare, reader::DuckDBReader, writer::VegaLiteWriter};
+use ggsql::reader::{DuckDBReader, Reader};
+use ggsql::writer::VegaLiteWriter;
 
 let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
-let prepared = prepare(
-    "SELECT x, y FROM data VISUALISE x, y DRAW point",
-    &reader
+let spec = reader.execute(
+    "SELECT x, y FROM data VISUALISE x, y DRAW point"
 )?;
 
 // Access metadata
-println!("Rows: {}", prepared.metadata().rows);
-println!("Columns: {:?}", prepared.metadata().columns);
+println!("Rows: {}", spec.metadata().rows);
+println!("Columns: {:?}", spec.metadata().columns);
 
 // Render to Vega-Lite
 let writer = VegaLiteWriter::new();
-let result = prepared.render(&writer)?;
+let result = spec.render(&writer)?;
 ```
 
 **Error Conditions:**
@@ -184,9 +183,9 @@ if let Some(tree) = validated.tree() {
 
 ---
 
-### `Prepared`
+### `Spec`
 
-Result of preparing a visualization, ready for rendering.
+Result of executing a ggsql query, ready for rendering.
 
 #### Rendering Methods
 
@@ -198,7 +197,7 @@ Result of preparing a visualization, ready for rendering.
 
 ```rust
 let writer = VegaLiteWriter::new();
-let json = prepared.render(&writer)?;
+let json = spec.render(&writer)?;
 println!("{}", json);
 ```
 
@@ -212,9 +211,9 @@ println!("{}", json);
 **Example:**
 
 ```rust
-println!("Layers: {}", prepared.layer_count());
+println!("Layers: {}", spec.layer_count());
 
-let plot = prepared.plot();
+let plot = spec.plot();
 for (i, layer) in plot.layers.iter().enumerate() {
     println!("Layer {}: {:?}", i, layer.geom);
 }
@@ -229,7 +228,7 @@ for (i, layer) in plot.layers.iter().enumerate() {
 **Example:**
 
 ```rust
-let meta = prepared.metadata();
+let meta = spec.metadata();
 println!("Rows: {}", meta.rows);
 println!("Columns: {:?}", meta.columns);
 println!("Layer count: {}", meta.layer_count);
@@ -248,17 +247,17 @@ println!("Layer count: {}", meta.layer_count);
 
 ```rust
 // Global data
-if let Some(df) = prepared.data() {
+if let Some(df) = spec.data() {
     println!("Global data: {} rows", df.height());
 }
 
 // Layer-specific data (from FILTER or FROM clause)
-if let Some(df) = prepared.layer_data(0) {
+if let Some(df) = spec.layer_data(0) {
     println!("Layer 0 has filtered data: {} rows", df.height());
 }
 
 // Stat data (histogram bins, density estimates, etc.)
-if let Some(df) = prepared.stat_data(1) {
+if let Some(df) = spec.stat_data(1) {
     println!("Layer 1 stat data: {} rows", df.height());
 }
 ```
@@ -276,15 +275,15 @@ if let Some(df) = prepared.stat_data(1) {
 
 ```rust
 // Main query
-println!("SQL: {}", prepared.sql());
-println!("Visual: {}", prepared.visual());
+println!("SQL: {}", spec.sql());
+println!("Visual: {}", spec.visual());
 
 // Per-layer queries
-for i in 0..prepared.layer_count() {
-    if let Some(sql) = prepared.layer_sql(i) {
+for i in 0..spec.layer_count() {
+    if let Some(sql) = spec.layer_sql(i) {
         println!("Layer {} filter: {}", i, sql);
     }
-    if let Some(sql) = prepared.stat_sql(i) {
+    if let Some(sql) = spec.stat_sql(i) {
         println!("Layer {} stat: {}", i, sql);
     }
 }
@@ -292,24 +291,24 @@ for i in 0..prepared.layer_count() {
 
 #### Warnings Method
 
-| Method     | Signature                                    | Description                          |
-| ---------- | -------------------------------------------- | ------------------------------------ |
-| `warnings` | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings from preparation |
+| Method     | Signature                                    | Description                        |
+| ---------- | -------------------------------------------- | ---------------------------------- |
+| `warnings` | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings from execution |
 
 **Example:**
 
 ```rust
-let prepared = ggsql::prepare(query, &reader)?;
+let spec = reader.execute(query)?;
 
 // Check for warnings
-if !prepared.warnings().is_empty() {
-    for warning in prepared.warnings() {
+if !spec.warnings().is_empty() {
+    for warning in spec.warnings() {
         eprintln!("Warning: {}", warning.message);
     }
 }
 
 // Continue with rendering
-let json = prepared.render(&writer)?;
+let json = spec.render(&writer)?;
 ```
 
 ---
@@ -465,10 +464,10 @@ class Validated:
     # Note: tree() not exposed (tree-sitter nodes are Rust-only)
 ```
 
-#### `Prepared`
+#### `Spec`
 
 ```python
-class Prepared:
+class Spec:
     def render(self, writer: VegaLiteWriter) -> str:
         """Render to output format."""
 
@@ -512,6 +511,9 @@ def validate(query: str) -> Validated:
     Returns Validated object with query inspection and validation methods.
     """
 
-def prepare(query: str, reader: DuckDBReader) -> Prepared:
-    """Prepare a query for visualization."""
+def execute(query: str, reader: Any) -> Spec:
+    """Execute a ggsql query with a custom Python reader.
+
+    For native readers, use reader.execute() method instead.
+    """
 ```
diff --git a/src/lib.rs b/src/lib.rs
index c9128f54..15523596 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -27,7 +27,7 @@ ggsql splits queries at the `VISUALISE` boundary:
 
 ## Core Components
 
-- [`api`] - High-level API (prepare, parse, validate)
+- [`api`] - High-level API (validate, Spec)
 - [`parser`] - Query parsing and AST generation
 - [`reader`] - Data source abstraction layer
 - [`writer`] - Output format abstraction layer
@@ -55,7 +55,7 @@ pub use plot::{
 
 // Re-export API types and functions
 pub use api::{
-    prepare, validate, Location, Metadata, Prepared, Validated, ValidationError, ValidationWarning,
+    validate, Location, Metadata, Spec, Validated, ValidationError, ValidationWarning,
 };
 
 // DataFrame abstraction (wraps Polars)
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index ed41a62c..0ed80949 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -7,6 +7,7 @@
 //!
 //! All readers implement the `Reader` trait, which provides:
 //! - SQL query execution → DataFrame conversion
+//! - Visualization query execution → Spec
 //! - Optional DataFrame registration for queryable tables
 //! - Connection management and error handling
 //!
@@ -14,19 +15,29 @@
 //!
 //! ```rust,ignore
 //! use ggsql::reader::{Reader, DuckDBReader};
+//! use ggsql::writer::VegaLiteWriter;
 //!
-//! // Basic usage
+//! // Execute a ggsql query
 //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
-//! let df = reader.execute_sql("SELECT * FROM table")?;
+//! let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?;
+//!
+//! // Render to Vega-Lite JSON
+//! let writer = VegaLiteWriter::new();
+//! let json = spec.render(&writer)?;
 //!
 //! // With DataFrame registration
 //! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?;
 //! reader.register("my_table", some_dataframe)?;
-//! let result = reader.execute_sql("SELECT * FROM my_table")?;
+//! let spec = reader.execute("SELECT * FROM my_table VISUALISE x, y DRAW point")?;
 //! ```
 
 use crate::{DataFrame, GgsqlError, Result};
 
+#[cfg(feature = "duckdb")]
+use crate::api::{validate, Spec, ValidationWarning};
+#[cfg(feature = "duckdb")]
+use crate::execute::prepare_data_with_executor;
+
 #[cfg(feature = "duckdb")]
 pub mod duckdb;
 
@@ -108,4 +119,56 @@ pub trait Reader {
     fn supports_register(&self) -> bool {
         false
     }
+
+    /// Execute a ggsql query and return the visualization specification.
+    ///
+    /// This is the main entry point for creating visualizations. It parses the query,
+    /// executes the SQL portion, and returns a `Spec` ready for rendering.
+    ///
+    /// # Arguments
+    ///
+    /// * `query` - The ggsql query (SQL + VISUALISE clause)
+    ///
+    /// # Returns
+    ///
+    /// A `Spec` containing the resolved visualization specification and data.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The query syntax is invalid
+    /// - The query has no VISUALISE clause
+    /// - The SQL execution fails
+    ///
+    /// # Example
+    ///
+    /// ```rust,ignore
+    /// use ggsql::reader::{Reader, DuckDBReader};
+    /// use ggsql::writer::VegaLiteWriter;
+    ///
+    /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+    /// let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?;
+    ///
+    /// let writer = VegaLiteWriter::new();
+    /// let json = spec.render(&writer)?;
+    /// ```
+    #[cfg(feature = "duckdb")]
+    fn execute(&self, query: &str) -> Result<Spec> {
+        // Run validation first to capture warnings
+        let validated = validate(query)?;
+        let warnings: Vec<ValidationWarning> = validated.warnings().to_vec();
+
+        // Prepare data (this also validates, but we want the warnings from above)
+        let prepared_data = prepare_data_with_executor(query, |sql| self.execute_sql(sql))?;
+
+        Ok(Spec::new(
+            prepared_data.spec,
+            prepared_data.data,
+            prepared_data.sql,
+            prepared_data.visual,
+            prepared_data.layer_sql,
+            prepared_data.stat_sql,
+            warnings,
+        ))
+    }
 }
diff --git a/src/rest.rs b/src/rest.rs
index e87a14f9..68d59a54 100644
--- a/src/rest.rs
+++ b/src/rest.rs
@@ -34,9 +34,7 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 use ggsql::{parser, validate, GgsqlError, VERSION};
 
 #[cfg(feature = "duckdb")]
-use ggsql::prepare;
-#[cfg(feature = "duckdb")]
-use ggsql::reader::DuckDBReader;
+use ggsql::reader::{DuckDBReader, Reader};
 
 #[cfg(feature = "vegalite")]
 use ggsql::writer::VegaLiteWriter;
@@ -443,29 +441,29 @@ async fn query_handler(
     #[cfg(feature = "duckdb")]
     if request.reader.starts_with("duckdb://") {
         // Use shared reader or create new one
-        let prepared = if request.reader == "duckdb://memory" && state.reader.is_some() {
+        let spec = if request.reader == "duckdb://memory" && state.reader.is_some() {
             let reader_mutex = state.reader.as_ref().unwrap();
             let reader = reader_mutex
                 .lock()
                 .map_err(|e| GgsqlError::InternalError(format!("Failed to lock reader: {}", e)))?;
-            prepare(&request.query, &*reader)?
+            reader.execute(&request.query)?
         } else {
             let reader = DuckDBReader::from_connection_string(&request.reader)?;
-            prepare(&request.query, &reader)?
+            reader.execute(&request.query)?
         };
 
         // Get metadata
-        let metadata = prepared.metadata();
+        let metadata = spec.metadata();
 
         // Generate visualization output using writer
         #[cfg(feature = "vegalite")]
         if request.writer == "vegalite" {
             let writer = VegaLiteWriter::new();
-            let json_output = prepared.render(&writer)?;
+            let json_output = spec.render(&writer)?;
             let spec_value: serde_json::Value = serde_json::from_str(&json_output)
                 .map_err(|e| GgsqlError::WriterError(format!("Failed to parse JSON: {}", e)))?;
 
-            let plot = prepared.plot();
+            let plot = spec.plot();
 
             let result = QueryResult {
                 spec: spec_value,

From f652acce4d0bbb0491302ff2595895a95c633057 Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Mon, 2 Feb 2026 13:53:45 +0000
Subject: [PATCH 08/12] Move Spec out of api.rs

---
 ggsql-python/src/lib.rs |   3 +-
 src/api.rs              | 154 +---------------------------------------
 src/lib.rs              |   9 +--
 src/reader/mod.rs       |  46 ++++++++++--
 src/reader/spec.rs      | 126 ++++++++++++++++++++++++++++++++
 5 files changed, 176 insertions(+), 162 deletions(-)
 create mode 100644 src/reader/spec.rs

diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs
index 5be68c42..1a4a8922 100644
--- a/ggsql-python/src/lib.rs
+++ b/ggsql-python/src/lib.rs
@@ -6,7 +6,8 @@ use pyo3::prelude::*;
 use pyo3::types::{PyBytes, PyDict, PyList};
 use std::io::Cursor;
 
-use ggsql::api::{validate as rust_validate, Spec, ValidationWarning};
+use ggsql::api::{validate as rust_validate, ValidationWarning};
+use ggsql::reader::Spec;
 use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader};
 use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter;
 use ggsql::GgsqlError;
diff --git a/src/api.rs b/src/api.rs
index b715c459..ab613f2c 100644
--- a/src/api.rs
+++ b/src/api.rs
@@ -1,164 +1,14 @@
 //! High-level ggsql API.
 //!
-//! Two-stage API: `reader.execute()` → `render()`.
+//! Validation and query inspection without SQL execution.
 
-use crate::naming;
 use crate::parser;
-use crate::plot::Plot;
-use crate::{DataFrame, Result};
-use std::collections::HashMap;
-
-#[cfg(feature = "vegalite")]
-use crate::writer::Writer;
+use crate::Result;
 
 // ============================================================================
 // Core Types
 // ============================================================================
 
-/// Result of `reader.execute()`, ready for rendering.
-pub struct Spec {
-    /// Single resolved plot specification
-    plot: Plot,
-    /// Internal data map (global + layer-specific DataFrames)
-    data: HashMap<String, DataFrame>,
-    /// Cached metadata about the prepared visualization
-    metadata: Metadata,
-    /// The main SQL query that was executed
-    sql: String,
-    /// The raw VISUALISE portion text
-    visual: String,
-    /// Per-layer filter/source queries (None = uses global data directly)
-    layer_sql: Vec<Option<String>>,
-    /// Per-layer stat transform queries (None = no stat transform)
-    stat_sql: Vec<Option<String>>,
-    /// Validation warnings from preparation
-    warnings: Vec<ValidationWarning>,
-}
-
-impl Spec {
-    /// Create a new Spec from PreparedData
-    pub(crate) fn new(
-        plot: Plot,
-        data: HashMap<String, DataFrame>,
-        sql: String,
-        visual: String,
-        layer_sql: Vec<Option<String>>,
-        stat_sql: Vec<Option<String>>,
-        warnings: Vec<ValidationWarning>,
-    ) -> Self {
-        // Compute metadata from data
-        let (rows, columns) = if let Some(df) = data.get(naming::GLOBAL_DATA_KEY) {
-            let cols: Vec<String> = df
-                .get_column_names()
-                .iter()
-                .map(|s| s.to_string())
-                .collect();
-            (df.height(), cols)
-        } else if let Some(df) = data.values().next() {
-            let cols: Vec<String> = df
-                .get_column_names()
-                .iter()
-                .map(|s| s.to_string())
-                .collect();
-            (df.height(), cols)
-        } else {
-            (0, Vec::new())
-        };
-
-        let layer_count = plot.layers.len();
-        let metadata = Metadata {
-            rows,
-            columns,
-            layer_count,
-        };
-
-        Self {
-            plot,
-            data,
-            metadata,
-            sql,
-            visual,
-            layer_sql,
-            stat_sql,
-            warnings,
-        }
-    }
-
-    /// Render to output format (e.g., Vega-Lite JSON).
-    #[cfg(feature = "vegalite")]
-    pub fn render(&self, writer: &dyn Writer) -> Result<String> {
-        writer.write(&self.plot, &self.data)
-    }
-
-    /// Get the resolved plot specification.
-    pub fn plot(&self) -> &Plot {
-        &self.plot
-    }
-
-    /// Get visualization metadata.
-    pub fn metadata(&self) -> &Metadata {
-        &self.metadata
-    }
-
-    /// Number of layers.
-    pub fn layer_count(&self) -> usize {
-        self.plot.layers.len()
-    }
-
-    /// Get global data (main query result).
-    pub fn data(&self) -> Option<&DataFrame> {
-        self.data.get(naming::GLOBAL_DATA_KEY)
-    }
-
-    /// Get layer-specific data (from FILTER or FROM clause).
-    pub fn layer_data(&self, layer_index: usize) -> Option<&DataFrame> {
-        self.data.get(&naming::layer_key(layer_index))
-    }
-
-    /// Get stat transform data (e.g., histogram bins, density estimates).
-    pub fn stat_data(&self, layer_index: usize) -> Option<&DataFrame> {
-        self.layer_data(layer_index)
-    }
-
-    /// Get internal data map (all DataFrames by key).
-    pub fn data_map(&self) -> &HashMap<String, DataFrame> {
-        &self.data
-    }
-
-    /// The main SQL query that was executed.
-    pub fn sql(&self) -> &str {
-        &self.sql
-    }
-
-    /// The VISUALISE portion (raw text).
-    pub fn visual(&self) -> &str {
-        &self.visual
-    }
-
-    /// Layer filter/source query, or `None` if using global data.
-    pub fn layer_sql(&self, layer_index: usize) -> Option<&str> {
-        self.layer_sql.get(layer_index).and_then(|s| s.as_deref())
-    }
-
-    /// Stat transform query, or `None` if no stat transform.
-    pub fn stat_sql(&self, layer_index: usize) -> Option<&str> {
-        self.stat_sql.get(layer_index).and_then(|s| s.as_deref())
-    }
-
-    /// Validation warnings from preparation.
-    pub fn warnings(&self) -> &[ValidationWarning] {
-        &self.warnings
-    }
-}
-
-/// Metadata about the prepared visualization.
-#[derive(Debug, Clone)]
-pub struct Metadata {
-    pub rows: usize,
-    pub columns: Vec<String>,
-    pub layer_count: usize,
-}
-
 /// Result of `validate()` - query inspection and validation without SQL execution.
 pub struct Validated {
     sql: String,
diff --git a/src/lib.rs b/src/lib.rs
index 15523596..a0dd8cb7 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -27,7 +27,7 @@ ggsql splits queries at the `VISUALISE` boundary:
 
 ## Core Components
 
-- [`api`] - High-level API (validate, Spec)
+- [`api`] - Validation API (validate, Validated)
 - [`parser`] - Query parsing and AST generation
 - [`reader`] - Data source abstraction layer
 - [`writer`] - Output format abstraction layer
@@ -54,9 +54,10 @@ pub use plot::{
 };
 
 // Re-export API types and functions
-pub use api::{
-    validate, Location, Metadata, Spec, Validated, ValidationError, ValidationWarning,
-};
+pub use api::{validate, Location, Validated, ValidationError, ValidationWarning};
+
+// Re-export reader types
+pub use reader::{Metadata, Spec};
 
 // DataFrame abstraction (wraps Polars)
 pub use polars::prelude::DataFrame;
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 0ed80949..7da9130b 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -31,23 +31,59 @@
 //! let spec = reader.execute("SELECT * FROM my_table VISUALISE x, y DRAW point")?;
 //! ```
 
-use crate::{DataFrame, GgsqlError, Result};
+use std::collections::HashMap;
 
-#[cfg(feature = "duckdb")]
-use crate::api::{validate, Spec, ValidationWarning};
-#[cfg(feature = "duckdb")]
+use crate::api::{validate, ValidationWarning};
 use crate::execute::prepare_data_with_executor;
+use crate::plot::Plot;
+use crate::{DataFrame, GgsqlError, Result};
 
 #[cfg(feature = "duckdb")]
 pub mod duckdb;
 
 pub mod connection;
-
 pub mod data;
+mod spec;
 
 #[cfg(feature = "duckdb")]
 pub use duckdb::DuckDBReader;
 
+// ============================================================================
+// Spec - Result of reader.execute()
+// ============================================================================
+
+/// Result of executing a ggsql query, ready for rendering.
+pub struct Spec {
+    /// Single resolved plot specification
+    pub(crate) plot: Plot,
+    /// Internal data map (global + layer-specific DataFrames)
+    pub(crate) data: HashMap<String, DataFrame>,
+    /// Cached metadata about the prepared visualization
+    pub(crate) metadata: Metadata,
+    /// The main SQL query that was executed
+    pub(crate) sql: String,
+    /// The raw VISUALISE portion text
+    pub(crate) visual: String,
+    /// Per-layer filter/source queries (None = uses global data directly)
+    pub(crate) layer_sql: Vec<Option<String>>,
+    /// Per-layer stat transform queries (None = no stat transform)
+    pub(crate) stat_sql: Vec<Option<String>>,
+    /// Validation warnings from preparation
+    pub(crate) warnings: Vec<ValidationWarning>,
+}
+
+/// Metadata about the prepared visualization.
+#[derive(Debug, Clone)]
+pub struct Metadata {
+    pub rows: usize,
+    pub columns: Vec<String>,
+    pub layer_count: usize,
+}
+
+// ============================================================================
+// Reader Trait
+// ============================================================================
+
 /// Trait for data source readers
 ///
 /// Readers execute SQL queries and return Polars DataFrames.
diff --git a/src/reader/spec.rs b/src/reader/spec.rs
new file mode 100644
index 00000000..a8c08914
--- /dev/null
+++ b/src/reader/spec.rs
@@ -0,0 +1,126 @@
+//! Implementation of Spec methods.
+
+use std::collections::HashMap;
+
+use crate::api::ValidationWarning;
+use crate::naming;
+use crate::plot::Plot;
+use crate::writer::Writer;
+use crate::{DataFrame, Result};
+
+use super::{Metadata, Spec};
+
+impl Spec {
+    /// Create a new Spec from PreparedData
+    pub(crate) fn new(
+        plot: Plot,
+        data: HashMap<String, DataFrame>,
+        sql: String,
+        visual: String,
+        layer_sql: Vec<Option<String>>,
+        stat_sql: Vec<Option<String>>,
+        warnings: Vec<ValidationWarning>,
+    ) -> Self {
+        // Compute metadata from data
+        let (rows, columns) = if let Some(df) = data.get(naming::GLOBAL_DATA_KEY) {
+            let cols: Vec<String> = df
+                .get_column_names()
+                .iter()
+                .map(|s| s.to_string())
+                .collect();
+            (df.height(), cols)
+        } else if let Some(df) = data.values().next() {
+            let cols: Vec<String> = df
+                .get_column_names()
+                .iter()
+                .map(|s| s.to_string())
+                .collect();
+            (df.height(), cols)
+        } else {
+            (0, Vec::new())
+        };
+
+        let layer_count = plot.layers.len();
+        let metadata = Metadata {
+            rows,
+            columns,
+            layer_count,
+        };
+
+        Self {
+            plot,
+            data,
+            metadata,
+            sql,
+            visual,
+            layer_sql,
+            stat_sql,
+            warnings,
+        }
+    }
+
+    /// Render to output format (e.g., Vega-Lite JSON).
+    pub fn render(&self, writer: &dyn Writer) -> Result<String> {
+        writer.write(&self.plot, &self.data)
+    }
+
+    /// Get the resolved plot specification.
+    pub fn plot(&self) -> &Plot {
+        &self.plot
+    }
+
+    /// Get visualization metadata.
+    pub fn metadata(&self) -> &Metadata {
+        &self.metadata
+    }
+
+    /// Number of layers.
+    pub fn layer_count(&self) -> usize {
+        self.plot.layers.len()
+    }
+
+    /// Get global data (main query result).
+    pub fn data(&self) -> Option<&DataFrame> {
+        self.data.get(naming::GLOBAL_DATA_KEY)
+    }
+
+    /// Get layer-specific data (from FILTER or FROM clause).
+    pub fn layer_data(&self, layer_index: usize) -> Option<&DataFrame> {
+        self.data.get(&naming::layer_key(layer_index))
+    }
+
+    /// Get stat transform data (e.g., histogram bins, density estimates).
+    pub fn stat_data(&self, layer_index: usize) -> Option<&DataFrame> {
+        self.layer_data(layer_index)
+    }
+
+    /// Get internal data map (all DataFrames by key).
+    pub fn data_map(&self) -> &HashMap<String, DataFrame> {
+        &self.data
+    }
+
+    /// The main SQL query that was executed.
+    pub fn sql(&self) -> &str {
+        &self.sql
+    }
+
+    /// The VISUALISE portion (raw text).
+    pub fn visual(&self) -> &str {
+        &self.visual
+    }
+
+    /// Layer filter/source query, or `None` if using global data.
+    pub fn layer_sql(&self, layer_index: usize) -> Option<&str> {
+        self.layer_sql.get(layer_index).and_then(|s| s.as_deref())
+    }
+
+    /// Stat transform query, or `None` if no stat transform.
+    pub fn stat_sql(&self, layer_index: usize) -> Option<&str> {
+        self.stat_sql.get(layer_index).and_then(|s| s.as_deref())
+    }
+
+    /// Validation warnings from preparation.
+    pub fn warnings(&self) -> &[ValidationWarning] {
+        &self.warnings
+    }
+}

From 7eaab8e11c62159f92ade35322fb93de97d55717 Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Mon, 2 Feb 2026 14:06:56 +0000
Subject: [PATCH 09/12] Move validation to validate.rs

---
 ggsql-python/src/lib.rs     |   2 +-
 src/lib.rs                  |   7 +-
 src/reader/mod.rs           | 146 +++++++++++++++++++++++++++++++-
 src/reader/spec.rs          |   2 +-
 src/{api.rs => validate.rs} | 163 +-----------------------------------
 5 files changed, 155 insertions(+), 165 deletions(-)
 rename src/{api.rs => validate.rs} (60%)

diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs
index 1a4a8922..45587266 100644
--- a/ggsql-python/src/lib.rs
+++ b/ggsql-python/src/lib.rs
@@ -6,7 +6,7 @@ use pyo3::prelude::*;
 use pyo3::types::{PyBytes, PyDict, PyList};
 use std::io::Cursor;
 
-use ggsql::api::{validate as rust_validate, ValidationWarning};
+use ggsql::validate::{validate as rust_validate, ValidationWarning};
 use ggsql::reader::Spec;
 use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader};
 use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter;
diff --git a/src/lib.rs b/src/lib.rs
index a0dd8cb7..9bb21554 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -46,15 +46,15 @@ pub mod writer;
 #[cfg(feature = "duckdb")]
 pub mod execute;
 
-pub mod api;
+pub mod validate;
 
 // Re-export key types for convenience
 pub use plot::{
     AestheticValue, DataSource, Facet, Geom, Layer, Mappings, Plot, Scale, SqlExpression,
 };
 
-// Re-export API types and functions
-pub use api::{validate, Location, Validated, ValidationError, ValidationWarning};
+// Re-export validation types and functions
+pub use validate::{validate, Location, Validated, ValidationError, ValidationWarning};
 
 // Re-export reader types
 pub use reader::{Metadata, Spec};
@@ -779,4 +779,5 @@ mod integration_tests {
         assert_eq!(data["__ggsql_const_stroke_0__"], "value");
         assert_eq!(data["__ggsql_const_stroke_1__"], "value");
     }
+
 }
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 7da9130b..34afb559 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -33,7 +33,7 @@
 
 use std::collections::HashMap;
 
-use crate::api::{validate, ValidationWarning};
+use crate::validate::{validate, ValidationWarning};
 use crate::execute::prepare_data_with_executor;
 use crate::plot::Plot;
 use crate::{DataFrame, GgsqlError, Result};
@@ -208,3 +208,147 @@ pub trait Reader {
         ))
     }
 }
+
+#[cfg(test)]
+#[cfg(all(feature = "duckdb", feature = "vegalite"))]
+mod tests {
+    use super::*;
+    use crate::writer::VegaLiteWriter;
+
+    #[test]
+    fn test_execute_and_render() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let spec = reader
+            .execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")
+            .unwrap();
+
+        assert_eq!(spec.plot().layers.len(), 1);
+        assert_eq!(spec.metadata().layer_count, 1);
+        assert!(spec.data().is_some());
+
+        let writer = VegaLiteWriter::new();
+        let result = spec.render(&writer).unwrap();
+        assert!(result.contains("point"));
+    }
+
+    #[test]
+    fn test_execute_metadata() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let spec = reader
+            .execute(
+                "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) VISUALISE x, y DRAW point",
+            )
+            .unwrap();
+
+        let metadata = spec.metadata();
+        assert_eq!(metadata.rows, 3);
+        assert_eq!(metadata.columns.len(), 2);
+        assert!(metadata.columns.contains(&"x".to_string()));
+        assert!(metadata.columns.contains(&"y".to_string()));
+        assert_eq!(metadata.layer_count, 1);
+    }
+
+    #[test]
+    fn test_execute_with_cte() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let query = r#"
+            WITH data AS (
+                SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y)
+            )
+            SELECT * FROM data
+            VISUALISE x, y DRAW point
+        "#;
+
+        let spec = reader.execute(query).unwrap();
+
+        assert_eq!(spec.plot().layers.len(), 1);
+        assert!(spec.data().is_some());
+        let df = spec.data().unwrap();
+        assert_eq!(df.height(), 2);
+    }
+
+    #[test]
+    fn test_render_multi_layer() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let query = r#"
+            SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y)
+            VISUALISE
+            DRAW point MAPPING x AS x, y AS y
+            DRAW line MAPPING x AS x, y AS y
+        "#;
+
+        let spec = reader.execute(query).unwrap();
+        let writer = VegaLiteWriter::new();
+        let result = spec.render(&writer).unwrap();
+
+        assert!(result.contains("layer"));
+    }
+
+    #[test]
+    fn test_register_and_query() {
+        use polars::prelude::*;
+
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        let df = df! {
+            "x" => [1i32, 2, 3],
+            "y" => [10i32, 20, 30],
+        }
+        .unwrap();
+
+        reader.register("my_data", df).unwrap();
+
+        let query = "SELECT * FROM my_data VISUALISE x, y DRAW point";
+        let spec = reader.execute(query).unwrap();
+
+        assert_eq!(spec.metadata().rows, 3);
+        assert!(spec.metadata().columns.contains(&"x".to_string()));
+
+        let writer = VegaLiteWriter::new();
+        let result = spec.render(&writer).unwrap();
+        assert!(result.contains("point"));
+    }
+
+    #[test]
+    fn test_register_and_join() {
+        use polars::prelude::*;
+
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        let sales = df! {
+            "id" => [1i32, 2, 3],
+            "amount" => [100i32, 200, 300],
+            "product_id" => [1i32, 1, 2],
+        }
+        .unwrap();
+
+        let products = df! {
+            "id" => [1i32, 2],
+            "name" => ["Widget", "Gadget"],
+        }
+        .unwrap();
+
+        reader.register("sales", sales).unwrap();
+        reader.register("products", products).unwrap();
+
+        let query = r#"
+            SELECT s.id, s.amount, p.name
+            FROM sales s
+            JOIN products p ON s.product_id = p.id
+            VISUALISE id AS x, amount AS y
+            DRAW bar
+        "#;
+
+        let spec = reader.execute(query).unwrap();
+        assert_eq!(spec.metadata().rows, 3);
+    }
+
+    #[test]
+    fn test_execute_no_viz_fails() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let query = "SELECT 1 as x, 2 as y";
+
+        let result = reader.execute(query);
+        assert!(result.is_err());
+    }
+}
diff --git a/src/reader/spec.rs b/src/reader/spec.rs
index a8c08914..92c45781 100644
--- a/src/reader/spec.rs
+++ b/src/reader/spec.rs
@@ -2,7 +2,7 @@
 
 use std::collections::HashMap;
 
-use crate::api::ValidationWarning;
+use crate::validate::ValidationWarning;
 use crate::naming;
 use crate::plot::Plot;
 use crate::writer::Writer;
diff --git a/src/api.rs b/src/validate.rs
similarity index 60%
rename from src/api.rs
rename to src/validate.rs
index ab613f2c..79bf4ed1 100644
--- a/src/api.rs
+++ b/src/validate.rs
@@ -1,6 +1,7 @@
-//! High-level ggsql API.
+//! Query validation without SQL execution.
 //!
-//! Validation and query inspection without SQL execution.
+//! This module provides query syntax and semantic validation without executing
+//! any SQL. Use this for IDE integration, syntax checking, and query inspection.
 
 use crate::parser;
 use crate::Result;
@@ -79,7 +80,7 @@ pub struct Location {
 }
 
 // ============================================================================
-// High-Level API Functions
+// Validation Function
 // ============================================================================
 
 /// Validate query syntax and semantics without executing SQL.
@@ -249,162 +250,6 @@ mod tests {
         assert!(!validated.errors().is_empty());
     }
 
-    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
-    #[test]
-    fn test_execute_and_render() {
-        use crate::reader::{DuckDBReader, Reader};
-        use crate::writer::VegaLiteWriter;
-
-        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point").unwrap();
-
-        assert_eq!(spec.plot().layers.len(), 1);
-        assert_eq!(spec.metadata().layer_count, 1);
-        assert!(spec.data().is_some());
-
-        let writer = VegaLiteWriter::new();
-        let result = spec.render(&writer).unwrap();
-        assert!(result.contains("point"));
-    }
-
-    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
-    #[test]
-    fn test_execute_metadata() {
-        use crate::reader::{DuckDBReader, Reader};
-
-        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let spec = reader.execute(
-            "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) VISUALISE x, y DRAW point",
-        )
-        .unwrap();
-
-        let metadata = spec.metadata();
-        assert_eq!(metadata.rows, 3);
-        assert_eq!(metadata.columns.len(), 2);
-        assert!(metadata.columns.contains(&"x".to_string()));
-        assert!(metadata.columns.contains(&"y".to_string()));
-        assert_eq!(metadata.layer_count, 1);
-    }
-
-    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
-    #[test]
-    fn test_execute_with_cte() {
-        use crate::reader::{DuckDBReader, Reader};
-
-        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let query = r#"
-            WITH data AS (
-                SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y)
-            )
-            SELECT * FROM data
-            VISUALISE x, y DRAW point
-        "#;
-
-        let spec = reader.execute(query).unwrap();
-
-        assert_eq!(spec.plot().layers.len(), 1);
-        assert!(spec.data().is_some());
-        let df = spec.data().unwrap();
-        assert_eq!(df.height(), 2);
-    }
-
-    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
-    #[test]
-    fn test_render_multi_layer() {
-        use crate::reader::{DuckDBReader, Reader};
-        use crate::writer::VegaLiteWriter;
-
-        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let query = r#"
-            SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y)
-            VISUALISE
-            DRAW point MAPPING x AS x, y AS y
-            DRAW line MAPPING x AS x, y AS y
-        "#;
-
-        let spec = reader.execute(query).unwrap();
-        let writer = VegaLiteWriter::new();
-        let result = spec.render(&writer).unwrap();
-
-        assert!(result.contains("layer"));
-    }
-
-    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
-    #[test]
-    fn test_register_and_query() {
-        use crate::reader::{DuckDBReader, Reader};
-        use crate::writer::VegaLiteWriter;
-        use polars::prelude::*;
-
-        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-
-        let df = df! {
-            "x" => [1i32, 2, 3],
-            "y" => [10i32, 20, 30],
-        }
-        .unwrap();
-
-        reader.register("my_data", df).unwrap();
-
-        let query = "SELECT * FROM my_data VISUALISE x, y DRAW point";
-        let spec = reader.execute(query).unwrap();
-
-        assert_eq!(spec.metadata().rows, 3);
-        assert!(spec.metadata().columns.contains(&"x".to_string()));
-
-        let writer = VegaLiteWriter::new();
-        let result = spec.render(&writer).unwrap();
-        assert!(result.contains("point"));
-    }
-
-    #[cfg(all(feature = "duckdb", feature = "vegalite"))]
-    #[test]
-    fn test_register_and_join() {
-        use crate::reader::{DuckDBReader, Reader};
-        use polars::prelude::*;
-
-        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-
-        let sales = df! {
-            "id" => [1i32, 2, 3],
-            "amount" => [100i32, 200, 300],
-            "product_id" => [1i32, 1, 2],
-        }
-        .unwrap();
-
-        let products = df! {
-            "id" => [1i32, 2],
-            "name" => ["Widget", "Gadget"],
-        }
-        .unwrap();
-
-        reader.register("sales", sales).unwrap();
-        reader.register("products", products).unwrap();
-
-        let query = r#"
-            SELECT s.id, s.amount, p.name
-            FROM sales s
-            JOIN products p ON s.product_id = p.id
-            VISUALISE id AS x, amount AS y
-            DRAW bar
-        "#;
-
-        let spec = reader.execute(query).unwrap();
-        assert_eq!(spec.metadata().rows, 3);
-    }
-
-    #[cfg(feature = "duckdb")]
-    #[test]
-    fn test_execute_no_viz_fails() {
-        use crate::reader::{DuckDBReader, Reader};
-
-        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let query = "SELECT 1 as x, 2 as y";
-
-        let result = reader.execute(query);
-        assert!(result.is_err());
-    }
-
     #[test]
     fn test_validate_sql_and_visual_content() {
         let query = "SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y DRAW line MAPPING x AS x, y AS y";

From b588a4ee26bfddd471dd1bb4eda4bd1e349e57a1 Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Mon, 2 Feb 2026 15:55:51 +0000
Subject: [PATCH 10/12] Switch rendering to writer.render()

---
 CLAUDE.md                             | 36 +++++++-------
 README.md                             |  7 +--
 ggsql-jupyter/src/executor.rs         |  6 +--
 ggsql-python/README.md                |  6 +--
 ggsql-python/python/ggsql/__init__.py |  2 +-
 ggsql-python/src/lib.rs               | 70 +++++++++++++++------------
 ggsql-python/tests/test_ggsql.py      | 46 ++++++------------
 src/cli.rs                            |  4 +-
 src/doc/API.md                        | 19 ++++----
 src/lib.rs                            |  1 -
 src/reader/duckdb.rs                  |  4 +-
 src/reader/mod.rs                     | 18 +++----
 src/reader/spec.rs                    | 10 +---
 src/rest.rs                           |  4 +-
 src/writer/mod.rs                     | 47 ++++++++++++++++--
 src/writer/vegalite.rs                |  2 +
 16 files changed, 155 insertions(+), 127 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 8c0cba03..ca8c0e09 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -165,16 +165,16 @@ let spec = reader.execute(
 
 // Render to Vega-Lite JSON
 let writer = VegaLiteWriter::new();
-let json = spec.render(&writer)?;
+let json = writer.render(&spec)?;
 ```
 
 ### Core Functions
 
-| Function                 | Purpose                                                |
-| ------------------------ | ------------------------------------------------------ |
-| `reader.execute(query)`  | Main entry point: parse, execute SQL, resolve mappings |
-| `spec.render(writer)`    | Generate output (Vega-Lite JSON) from Spec             |
-| `validate(query)`        | Validate syntax + semantics, inspect query structure   |
+| Function                | Purpose                                                |
+| ----------------------- | ------------------------------------------------------ |
+| `reader.execute(query)` | Main entry point: parse, execute SQL, resolve mappings |
+| `writer.render(spec)`   | Generate output from a Spec                            |
+| `validate(query)`       | Validate syntax + semantics, inspect query structure   |
 
 ### Key Types
 
@@ -909,7 +909,7 @@ print(f"SQL: {spec.sql()}")
 
 # Render to Vega-Lite JSON
 writer = ggsql.VegaLiteWriter()
-json_output = spec.render(writer)
+json_output = writer.render(spec)
 ```
 
 **Convenience Function** (`render_altair`):
@@ -942,21 +942,21 @@ print(f"Errors: {validated.errors()}")
 
 **Classes**:
 
-| Class                      | Description                                      |
-| -------------------------- | ------------------------------------------------ |
-| `DuckDBReader(connection)` | Database reader with DataFrame registration      |
-| `VegaLiteWriter()`         | Vega-Lite JSON output writer                     |
-| `Validated`                | Result of `validate()` with query inspection     |
+| Class                      | Description                                       |
+| -------------------------- | ------------------------------------------------- |
+| `DuckDBReader(connection)` | Database reader with DataFrame registration       |
+| `VegaLiteWriter()`         | Vega-Lite JSON output writer                      |
+| `Validated`                | Result of `validate()` with query inspection      |
 | `Spec`                     | Result of `reader.execute()`, ready for rendering |
 
 **Functions**:
 
-| Function                   | Description                                       |
-| -------------------------- | ------------------------------------------------- |
-| `validate(query)`          | Syntax/semantic validation with query inspection  |
-| `reader.execute(query)`    | Execute ggsql query, return Spec                  |
-| `execute(query, reader)`   | Execute with custom reader (bridge path)          |
-| `render_altair(df, viz)`   | Convenience: render DataFrame to Altair chart     |
+| Function                 | Description                                      |
+| ------------------------ | ------------------------------------------------ |
+| `validate(query)`        | Syntax/semantic validation with query inspection |
+| `reader.execute(query)`  | Execute ggsql query, return Spec                 |
+| `execute(query, reader)` | Execute with custom reader (bridge path)         |
+| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart    |
 
 **Spec Methods**:
 
diff --git a/README.md b/README.md
index 46ff2b50..8af476f9 100644
--- a/README.md
+++ b/README.md
@@ -327,13 +327,10 @@ chart.display()
 reader = ggsql.DuckDBReader("duckdb://memory")
 reader.register("data", df)
 
-prepared = ggsql.prepare(
-    "SELECT * FROM data VISUALISE x, y DRAW point",
-    reader
-)
+spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point")
 
 writer = ggsql.VegaLiteWriter()
-json_output = prepared.render(writer)
+json_output = writer.render(spec)
 ```
 
 See the [ggsql-python README](ggsql-python/README.md) for complete API documentation.
diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs
index 42b541d8..d91b223a 100644
--- a/ggsql-jupyter/src/executor.rs
+++ b/ggsql-jupyter/src/executor.rs
@@ -7,7 +7,7 @@ use anyhow::Result;
 use ggsql::{
     reader::{DuckDBReader, Reader},
     validate,
-    writer::VegaLiteWriter,
+    writer::{VegaLiteWriter, Writer},
 };
 use polars::frame::DataFrame;
 
@@ -77,8 +77,8 @@ impl QueryExecutor {
             spec.metadata().layer_count
         );
 
-        // 4. Render to Vega-Lite
-        let vega_json = spec.render(&self.writer)?;
+        // 4. Render to output format
+        let vega_json = self.writer.render(&spec)?;
 
         tracing::debug!("Generated Vega-Lite spec: {} chars", vega_json.len());
 
diff --git a/ggsql-python/README.md b/ggsql-python/README.md
index 08e9b848..f69dd073 100644
--- a/ggsql-python/README.md
+++ b/ggsql-python/README.md
@@ -106,7 +106,7 @@ print(spec.data())  # Returns polars DataFrame
 
 # 6. Render to Vega-Lite JSON
 writer = ggsql.VegaLiteWriter()
-vegalite_json = spec.render(writer)
+vegalite_json = writer.render(spec)
 print(vegalite_json)
 ```
 
@@ -135,7 +135,7 @@ Writer that generates Vega-Lite v6 JSON specifications.
 
 ```python
 writer = ggsql.VegaLiteWriter()
-json_output = spec.render(writer)
+json_output = writer.render(spec)
 ```
 
 #### `Validated`
@@ -259,7 +259,7 @@ spec = ggsql.execute(
     reader
 )
 writer = ggsql.VegaLiteWriter()
-json_output = spec.render(writer)
+json_output = writer.render(spec)
 ```
 
 **Optional methods** for custom readers:
diff --git a/ggsql-python/python/ggsql/__init__.py b/ggsql-python/python/ggsql/__init__.py
index de159d2a..d69c84ef 100644
--- a/ggsql-python/python/ggsql/__init__.py
+++ b/ggsql-python/python/ggsql/__init__.py
@@ -84,7 +84,7 @@ def render_altair(
     # Execute and render
     spec = reader.execute(query)
     writer = VegaLiteWriter()
-    vegalite_json = spec.render(writer)
+    vegalite_json = writer.render(spec)
 
     # Parse to determine the correct Altair class
     spec = json.loads(vegalite_json)
diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs
index 45587266..1a9d0efc 100644
--- a/ggsql-python/src/lib.rs
+++ b/ggsql-python/src/lib.rs
@@ -6,10 +6,10 @@ use pyo3::prelude::*;
 use pyo3::types::{PyBytes, PyDict, PyList};
 use std::io::Cursor;
 
-use ggsql::validate::{validate as rust_validate, ValidationWarning};
 use ggsql::reader::Spec;
 use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader};
-use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter;
+use ggsql::validate::{validate as rust_validate, ValidationWarning};
+use ggsql::writer::{VegaLiteWriter as RustVegaLiteWriter, Writer as RustWriter};
 use ggsql::GgsqlError;
 
 use polars::prelude::{DataFrame, IpcReader, IpcWriter, SerReader, SerWriter};
@@ -308,7 +308,8 @@ impl PyDuckDBReader {
     /// --------
     /// >>> reader = DuckDBReader("duckdb://memory")
     /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
-    /// >>> json_output = spec.render(VegaLiteWriter())
+    /// >>> writer = VegaLiteWriter()
+    /// >>> json_output = writer.render(spec)
     fn execute(&self, query: &str) -> PyResult<PySpec> {
         self.inner
             .execute(query)
@@ -329,7 +330,7 @@ impl PyDuckDBReader {
 /// --------
 /// >>> writer = VegaLiteWriter()
 /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
-/// >>> json_output = spec.render(writer)
+/// >>> json_output = writer.render(spec)
 #[pyclass(name = "VegaLiteWriter")]
 struct PyVegaLiteWriter {
     inner: RustVegaLiteWriter,
@@ -349,6 +350,35 @@ impl PyVegaLiteWriter {
             inner: RustVegaLiteWriter::new(),
         }
     }
+
+    /// Render a Spec to Vega-Lite JSON output
+    ///
+    /// Parameters
+    /// ----------
+    /// spec : Spec
+    ///     The visualization specification from reader.execute().
+    ///
+    /// Returns
+    /// -------
+    /// str
+    ///     The output (i.e., Vega-Lite JSON string).
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If rendering fails.
+    ///
+    /// Examples
+    /// --------
+    /// >>> reader = DuckDBReader("duckdb://memory")
+    /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+    /// >>> writer = VegaLiteWriter()
+    /// >>> json_output = writer.render(spec)
+    fn render(&self, spec: &PySpec) -> PyResult<String> {
+        self.inner
+            .render(&spec.inner)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+    }
 }
 
 // ============================================================================
@@ -439,13 +469,14 @@ impl PyValidated {
 /// Result of reader.execute(), ready for rendering.
 ///
 /// Contains the resolved plot specification, data, and metadata.
-/// Use render() to generate Vega-Lite JSON output.
+/// Use writer.render(spec) to generate output.
 ///
 /// Examples
 /// --------
 /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
 /// >>> print(f"Rows: {spec.metadata()['rows']}")
-/// >>> json_output = spec.render(VegaLiteWriter())
+/// >>> writer = VegaLiteWriter()
+/// >>> json_output = writer.render(spec)
 #[pyclass(name = "Spec")]
 struct PySpec {
     inner: Spec,
@@ -453,28 +484,6 @@ struct PySpec {
 
 #[pymethods]
 impl PySpec {
-    /// Render to output format (Vega-Lite JSON).
-    ///
-    /// Parameters
-    /// ----------
-    /// writer : VegaLiteWriter
-    ///     The writer to use for rendering.
-    ///
-    /// Returns
-    /// -------
-    /// str
-    ///     The Vega-Lite JSON specification as a string.
-    ///
-    /// Raises
-    /// ------
-    /// ValueError
-    ///     If rendering fails.
-    fn render(&self, writer: &PyVegaLiteWriter) -> PyResult<String> {
-        self.inner
-            .render(&writer.inner)
-            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
-    }
-
     /// Get visualization metadata.
     ///
     /// Returns
@@ -688,8 +697,9 @@ fn validate(query: &str) -> PyResult<PyValidated> {
 /// --------
 /// >>> # Using native reader (prefer reader.execute() instead)
 /// >>> reader = DuckDBReader("duckdb://memory")
-/// >>> spec = execute("SELECT 1 AS x, 2 AS Y VISUALISE x, y DRAW point", reader)
-/// >>> json_output = spec.render(VegaLiteWriter())
+/// >>> spec = execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
+/// >>> writer = VegaLiteWriter()
+/// >>> json_output = writer.render(spec)
 ///
 /// >>> # Using custom Python reader
 /// >>> class MyReader:
diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py
index e54df2e8..8c7c1f19 100644
--- a/ggsql-python/tests/test_ggsql.py
+++ b/ggsql-python/tests/test_ggsql.py
@@ -98,9 +98,7 @@ class TestExecute:
 
     def test_execute_simple_query(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        spec = reader.execute(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
-        )
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
         assert spec is not None
         assert spec.layer_count() == 1
 
@@ -127,23 +125,17 @@ def test_execute_metadata(self):
 
     def test_execute_sql_accessor(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        spec = reader.execute(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
-        )
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
         assert "SELECT" in spec.sql()
 
     def test_execute_visual_accessor(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        spec = reader.execute(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
-        )
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
         assert "VISUALISE" in spec.visual()
 
     def test_execute_data_accessor(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        spec = reader.execute(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
-        )
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
         data = spec.data()
         assert isinstance(data, pl.DataFrame)
         assert data.shape == (1, 2)
@@ -154,17 +146,15 @@ def test_execute_without_visualise_fails(self):
             reader.execute("SELECT 1 AS x, 2 AS y")
 
 
-class TestSpecRender:
-    """Tests for Spec.render() method."""
+class TestWriterRender:
+    """Tests for VegaLiteWriter.render() method."""
 
     def test_render_to_vegalite(self):
         reader = ggsql.DuckDBReader("duckdb://memory")
-        spec = reader.execute(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
-        )
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
         writer = ggsql.VegaLiteWriter()
 
-        result = spec.render(writer)
+        result = writer.render(spec)
         assert isinstance(result, str)
 
         spec_dict = json.loads(result)
@@ -179,7 +169,7 @@ def test_render_contains_data(self):
         spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point")
         writer = ggsql.VegaLiteWriter()
 
-        result = spec.render(writer)
+        result = writer.render(spec)
         spec_dict = json.loads(result)
         # Data should be in the spec (either inline or in datasets)
         assert "data" in spec_dict or "datasets" in spec_dict
@@ -194,7 +184,7 @@ def test_render_multi_layer(self):
         )
         writer = ggsql.VegaLiteWriter()
 
-        result = spec.render(writer)
+        result = writer.render(spec)
         spec_dict = json.loads(result)
         assert "layer" in spec_dict
 
@@ -367,7 +357,7 @@ def test_end_to_end_workflow(self):
 
         # Render to Vega-Lite
         writer = ggsql.VegaLiteWriter()
-        result = spec.render(writer)
+        result = writer.render(spec)
 
         # Verify output
         spec_dict = json.loads(result)
@@ -377,9 +367,7 @@ def test_end_to_end_workflow(self):
     def test_can_introspect_spec(self):
         """Test all introspection methods on Spec."""
         reader = ggsql.DuckDBReader("duckdb://memory")
-        spec = reader.execute(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
-        )
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
 
         # All these should work without error
         assert spec.sql() is not None
@@ -430,9 +418,7 @@ def register(self, name: str, df: pl.DataFrame) -> None:
                 self.tables[name] = df
 
         reader = RegisterReader()
-        spec = ggsql.execute(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader
-        )
+        spec = ggsql.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
         assert spec is not None
 
     def test_custom_reader_error_handling(self):
@@ -460,9 +446,7 @@ def execute_sql(self, sql: str):
     def test_native_reader_fast_path(self):
         """Native DuckDBReader still works (fast path)."""
         reader = ggsql.DuckDBReader("duckdb://memory")
-        spec = reader.execute(
-            "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point"
-        )
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
         assert spec.metadata()["rows"] == 1
 
     def test_custom_reader_can_render(self):
@@ -485,7 +469,7 @@ def execute_sql(self, sql: str) -> pl.DataFrame:
         )
 
         writer = ggsql.VegaLiteWriter()
-        result = spec.render(writer)
+        result = writer.render(spec)
 
         spec_dict = json.loads(result)
         assert "$schema" in spec_dict
diff --git a/src/cli.rs b/src/cli.rs
index ee97eca1..bb6d4df9 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -15,7 +15,7 @@ use ggsql::reader::{DuckDBReader, Reader};
 use ggsql::validate;
 
 #[cfg(feature = "vegalite")]
-use ggsql::writer::VegaLiteWriter;
+use ggsql::writer::{VegaLiteWriter, Writer};
 
 #[derive(Parser)]
 #[command(name = "ggsql")]
@@ -222,7 +222,7 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option<PathBu
 
     // Render
     let vl_writer = VegaLiteWriter::new();
-    let json_output = match spec.render(&vl_writer) {
+    let json_output = match vl_writer.render(&spec) {
         Ok(r) => r,
         Err(e) => {
             eprintln!("Failed to generate Vega-Lite output: {}", e);
diff --git a/src/doc/API.md b/src/doc/API.md
index 3cbf9c71..89753bc5 100644
--- a/src/doc/API.md
+++ b/src/doc/API.md
@@ -5,14 +5,14 @@ This document provides a comprehensive reference for the ggsql public API.
 ## Overview
 
 - **Stage 1: `reader.execute()`** - Parse query, execute SQL, resolve mappings, create Spec
-- **Stage 2: `spec.render()`** - Generate output (Vega-Lite JSON, etc.)
+- **Stage 2: `writer.render()`** - Generate output (Vega-Lite JSON, etc.)
 
 ### API Functions
 
 | Function           | Use Case                                             |
 | ------------------ | ---------------------------------------------------- |
 | `reader.execute()` | Main entry point - full visualization pipeline       |
-| `spec.render()`    | Generate output from Spec                            |
+| `writer.render()`  | Generate output from Spec                            |
 | `validate()`       | Validate syntax + semantics, inspect query structure |
 
 ---
@@ -50,7 +50,7 @@ Execute a ggsql query for visualization. This is the main entry point - a defaul
 
 ```rust
 use ggsql::reader::{DuckDBReader, Reader};
-use ggsql::writer::VegaLiteWriter;
+use ggsql::writer::{VegaLiteWriter, Writer};
 
 let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
 let spec = reader.execute(
@@ -63,7 +63,7 @@ println!("Columns: {:?}", spec.metadata().columns);
 
 // Render to Vega-Lite
 let writer = VegaLiteWriter::new();
-let result = spec.render(&writer)?;
+let result = writer.render(&spec)?;
 ```
 
 **Error Conditions:**
@@ -187,17 +187,15 @@ if let Some(tree) = validated.tree() {
 
 Result of executing a ggsql query, ready for rendering.
 
-#### Rendering Methods
+#### Rendering
 
-| Method   | Signature                                                 | Description             |
-| -------- | --------------------------------------------------------- | ----------------------- |
-| `render` | `fn render(&self, writer: &dyn Writer) -> Result<String>` | Render to output format |
+Use `writer.render(&spec)` to generate output.
 
 **Example:**
 
 ```rust
 let writer = VegaLiteWriter::new();
-let json = spec.render(&writer)?;
+let json = writer.render(&spec)?;
 println!("{}", json);
 ```
 
@@ -308,7 +306,8 @@ if !spec.warnings().is_empty() {
 }
 
 // Continue with rendering
-let json = spec.render(&writer)?;
+let writer = VegaLiteWriter::new();
+let json = writer.render(&spec)?;
 ```
 
 ---
diff --git a/src/lib.rs b/src/lib.rs
index 9bb21554..61273bd6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -779,5 +779,4 @@ mod integration_tests {
         assert_eq!(data["__ggsql_const_stroke_0__"], "value");
         assert_eq!(data["__ggsql_const_stroke_1__"], "value");
     }
-
 }
diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs
index f67c39f2..de1cf2eb 100644
--- a/src/reader/duckdb.rs
+++ b/src/reader/duckdb.rs
@@ -620,7 +620,9 @@ mod tests {
         reader.register("my_table", df).unwrap();
 
         // Query the registered table
-        let result = reader.execute_sql("SELECT * FROM my_table ORDER BY x").unwrap();
+        let result = reader
+            .execute_sql("SELECT * FROM my_table ORDER BY x")
+            .unwrap();
         assert_eq!(result.shape(), (3, 2));
         assert_eq!(result.get_column_names(), vec!["x", "y"]);
     }
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 34afb559..5ac40828 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -15,7 +15,7 @@
 //!
 //! ```rust,ignore
 //! use ggsql::reader::{Reader, DuckDBReader};
-//! use ggsql::writer::VegaLiteWriter;
+//! use ggsql::writer::{Writer, VegaLiteWriter};
 //!
 //! // Execute a ggsql query
 //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
@@ -23,7 +23,7 @@
 //!
 //! // Render to Vega-Lite JSON
 //! let writer = VegaLiteWriter::new();
-//! let json = spec.render(&writer)?;
+//! let json = writer.render(&spec)?;
 //!
 //! // With DataFrame registration
 //! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?;
@@ -33,9 +33,9 @@
 
 use std::collections::HashMap;
 
-use crate::validate::{validate, ValidationWarning};
 use crate::execute::prepare_data_with_executor;
 use crate::plot::Plot;
+use crate::validate::{validate, ValidationWarning};
 use crate::{DataFrame, GgsqlError, Result};
 
 #[cfg(feature = "duckdb")]
@@ -180,13 +180,13 @@ pub trait Reader {
     ///
     /// ```rust,ignore
     /// use ggsql::reader::{Reader, DuckDBReader};
-    /// use ggsql::writer::VegaLiteWriter;
+    /// use ggsql::writer::{Writer, VegaLiteWriter};
     ///
     /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
     /// let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?;
     ///
     /// let writer = VegaLiteWriter::new();
-    /// let json = spec.render(&writer)?;
+    /// let json = writer.render(&spec)?;
     /// ```
     #[cfg(feature = "duckdb")]
     fn execute(&self, query: &str) -> Result<Spec> {
@@ -213,7 +213,7 @@ pub trait Reader {
 #[cfg(all(feature = "duckdb", feature = "vegalite"))]
 mod tests {
     use super::*;
-    use crate::writer::VegaLiteWriter;
+    use crate::writer::{VegaLiteWriter, Writer};
 
     #[test]
     fn test_execute_and_render() {
@@ -227,7 +227,7 @@ mod tests {
         assert!(spec.data().is_some());
 
         let writer = VegaLiteWriter::new();
-        let result = spec.render(&writer).unwrap();
+        let result = writer.render(&spec).unwrap();
         assert!(result.contains("point"));
     }
 
@@ -279,7 +279,7 @@ mod tests {
 
         let spec = reader.execute(query).unwrap();
         let writer = VegaLiteWriter::new();
-        let result = spec.render(&writer).unwrap();
+        let result = writer.render(&spec).unwrap();
 
         assert!(result.contains("layer"));
     }
@@ -305,7 +305,7 @@ mod tests {
         assert!(spec.metadata().columns.contains(&"x".to_string()));
 
         let writer = VegaLiteWriter::new();
-        let result = spec.render(&writer).unwrap();
+        let result = writer.render(&spec).unwrap();
         assert!(result.contains("point"));
     }
 
diff --git a/src/reader/spec.rs b/src/reader/spec.rs
index 92c45781..4b1fc5bd 100644
--- a/src/reader/spec.rs
+++ b/src/reader/spec.rs
@@ -2,11 +2,10 @@
 
 use std::collections::HashMap;
 
-use crate::validate::ValidationWarning;
 use crate::naming;
 use crate::plot::Plot;
-use crate::writer::Writer;
-use crate::{DataFrame, Result};
+use crate::validate::ValidationWarning;
+use crate::DataFrame;
 
 use super::{Metadata, Spec};
 
@@ -59,11 +58,6 @@ impl Spec {
         }
     }
 
-    /// Render to output format (e.g., Vega-Lite JSON).
-    pub fn render(&self, writer: &dyn Writer) -> Result<String> {
-        writer.write(&self.plot, &self.data)
-    }
-
     /// Get the resolved plot specification.
     pub fn plot(&self) -> &Plot {
         &self.plot
diff --git a/src/rest.rs b/src/rest.rs
index 68d59a54..8f2338c4 100644
--- a/src/rest.rs
+++ b/src/rest.rs
@@ -37,7 +37,7 @@ use ggsql::{parser, validate, GgsqlError, VERSION};
 use ggsql::reader::{DuckDBReader, Reader};
 
 #[cfg(feature = "vegalite")]
-use ggsql::writer::VegaLiteWriter;
+use ggsql::writer::{VegaLiteWriter, Writer};
 
 /// CLI arguments for the REST API server
 #[derive(Parser)]
@@ -459,7 +459,7 @@ async fn query_handler(
         #[cfg(feature = "vegalite")]
         if request.writer == "vegalite" {
             let writer = VegaLiteWriter::new();
-            let json_output = spec.render(&writer)?;
+            let json_output = writer.render(&spec)?;
             let spec_value: serde_json::Value = serde_json::from_str(&json_output)
                 .map_err(|e| GgsqlError::WriterError(format!("Failed to parse JSON: {}", e)))?;
 
diff --git a/src/writer/mod.rs b/src/writer/mod.rs
index 7f026e6b..b06bf332 100644
--- a/src/writer/mod.rs
+++ b/src/writer/mod.rs
@@ -14,12 +14,17 @@
 //!
 //! ```rust,ignore
 //! use ggsql::writer::{Writer, VegaLiteWriter};
+//! use ggsql::reader::{Reader, DuckDBReader};
+//!
+//! let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+//! let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?;
 //!
 //! let writer = VegaLiteWriter::new();
-//! let json = writer.write(&spec, &dataframe)?;
+//! let json = writer.render(&spec)?;
 //! println!("{}", json);
 //! ```
 
+use crate::reader::Spec;
 use crate::{DataFrame, Plot, Result};
 use std::collections::HashMap;
 
@@ -33,7 +38,15 @@ pub use vegalite::VegaLiteWriter;
 ///
 /// Writers take a Plot and data sources and produce formatted output
 /// (JSON, R code, PNG bytes, etc.).
+///
+/// # Associated Types
+///
+/// * `Output` - The type returned by `write()` and `render()`. Use `Option<String>`
+///   for text output, `Option<Vec<u8>>` for binary, `()` for void writers, etc.
 pub trait Writer {
+    /// The output type produced by this writer.
+    type Output;
+
     /// Generate output from a visualization specification and data sources
     ///
     /// # Arguments
@@ -44,7 +57,7 @@ pub trait Writer {
     ///
     /// # Returns
     ///
-    /// A string containing the formatted output (JSON, code, etc.)
+    /// The writer's output, depends on writer implementation.
     ///
     /// # Errors
     ///
@@ -52,7 +65,7 @@ pub trait Writer {
     /// - The spec is incompatible with this writer
     /// - The data doesn't match the spec's requirements
     /// - Output generation fails
-    fn write(&self, spec: &Plot, data: &HashMap<String, DataFrame>) -> Result<String>;
+    fn write(&self, spec: &Plot, data: &HashMap<String, DataFrame>) -> Result<Self::Output>;
 
     /// Validate that a spec is compatible with this writer
     ///
@@ -67,4 +80,32 @@ pub trait Writer {
     ///
     /// Ok(()) if the spec is compatible, otherwise an error
     fn validate(&self, spec: &Plot) -> Result<()>;
+
+    /// Render a Spec to output format
+    ///
+    /// This is the main entry point for generating visualization output.
+    ///
+    /// # Arguments
+    ///
+    /// * `spec` - The prepared visualization specification from `reader.execute()`
+    ///
+    /// # Returns
+    ///
+    /// The writer's output (type depends on writer implementation)
+    ///
+    /// # Example
+    ///
+    /// ```rust,ignore
+    /// use ggsql::reader::{Reader, DuckDBReader};
+    /// use ggsql::writer::{Writer, VegaLiteWriter};
+    ///
+    /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+    /// let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?;
+    ///
+    /// let writer = VegaLiteWriter::new();
+    /// let json = writer.render(&spec)?;
+    /// ```
+    fn render(&self, spec: &Spec) -> Result<Self::Output> {
+        self.write(spec.plot(), spec.data_map())
+    }
 }
diff --git a/src/writer/vegalite.rs b/src/writer/vegalite.rs
index 01bf884b..ec86589a 100644
--- a/src/writer/vegalite.rs
+++ b/src/writer/vegalite.rs
@@ -999,6 +999,8 @@ impl VegaLiteWriter {
 }
 
 impl Writer for VegaLiteWriter {
+    type Output = String;
+
     fn write(&self, spec: &Plot, data: &HashMap<String, DataFrame>) -> Result<String> {
         // Validate spec before processing
         self.validate(spec)?;

From d0b585b5b83c336585340de44ced547c6c164484 Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Mon, 2 Feb 2026 16:09:30 +0000
Subject: [PATCH 11/12] Add unregister method

---
 CLAUDE.md               |  2 +
 ggsql-python/README.md  |  5 +++
 ggsql-python/src/lib.rs | 29 +++++++++++++++
 src/doc/API.md          | 10 +++++
 src/reader/duckdb.rs    | 81 ++++++++++++++++++++++++++++++++++++++++-
 src/reader/mod.rs       | 20 ++++++++++
 6 files changed, 146 insertions(+), 1 deletion(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index ca8c0e09..29149b76 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -209,6 +209,7 @@ let json = writer.render(&spec)?;
 
 - `execute_sql(sql)` - Run SQL, return DataFrame
 - `register(name, df)` - Register DataFrame as table
+- `unregister(name)` - Unregister a previously registered table
 - Implementation: `DuckDBReader`
 
 **Writer trait** (output format abstraction):
@@ -1000,6 +1001,7 @@ Optional methods for custom readers:
 
 - `supports_register() -> bool` - Return `True` if registration is supported
 - `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a table
+- `unregister(name: str) -> None` - Unregister a previously registered table
 
 Native readers (e.g., `DuckDBReader`) use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization.
 
diff --git a/ggsql-python/README.md b/ggsql-python/README.md
index f69dd073..f08afe34 100644
--- a/ggsql-python/README.md
+++ b/ggsql-python/README.md
@@ -126,6 +126,7 @@ reader = ggsql.DuckDBReader("duckdb:///path/to/file.db")  # File database
 **Methods:**
 
 - `register(name: str, df: polars.DataFrame)` - Register a DataFrame as a queryable table
+- `unregister(name: str)` - Unregister a previously registered table
 - `execute_sql(sql: str) -> polars.DataFrame` - Execute SQL and return results
 - `supports_register() -> bool` - Check if registration is supported
 
@@ -266,6 +267,7 @@ json_output = writer.render(spec)
 
 - `supports_register() -> bool` - Return `True` if your reader supports DataFrame registration
 - `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a queryable table
+- `unregister(name: str) -> None` - Unregister a previously registered table
 
 ```python
 class AdvancedReader:
@@ -283,6 +285,9 @@ class AdvancedReader:
 
     def register(self, name: str, df: pl.DataFrame) -> None:
         self.tables[name] = df
+
+    def unregister(self, name: str) -> None:
+        del self.tables[name]
 ```
 
 Native readers like `DuckDBReader` use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization.
diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs
index 1a9d0efc..d2eb0ec0 100644
--- a/ggsql-python/src/lib.rs
+++ b/ggsql-python/src/lib.rs
@@ -159,6 +159,18 @@ impl Reader for PyReaderBridge {
             Ok(())
         })
     }
+
+    fn unregister(&mut self, name: &str) -> ggsql::Result<()> {
+        Python::attach(|py| {
+            self.obj
+                .bind(py)
+                .call_method1("unregister", (name,))
+                .map_err(|e| {
+                    GgsqlError::ReaderError(format!("Reader.unregister() failed: {}", e))
+                })?;
+            Ok(())
+        })
+    }
 }
 
 // ============================================================================
@@ -249,6 +261,23 @@ impl PyDuckDBReader {
             .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
     }
 
+    /// Unregister a previously registered table.
+    ///
+    /// Parameters
+    /// ----------
+    /// name : str
+    ///     The table name to unregister.
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If the table wasn't registered via this reader or unregistration fails.
+    fn unregister(&mut self, name: &str) -> PyResult<()> {
+        self.inner
+            .unregister(name)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+    }
+
     /// Execute a SQL query and return the result as a DataFrame.
     ///
     /// Parameters
diff --git a/src/doc/API.md b/src/doc/API.md
index 89753bc5..0676ac5a 100644
--- a/src/doc/API.md
+++ b/src/doc/API.md
@@ -377,6 +377,9 @@ pub trait Reader {
     /// Register a DataFrame as a queryable table
     fn register(&mut self, name: &str, df: DataFrame) -> Result<()>;
 
+    /// Unregister a previously registered table
+    fn unregister(&mut self, name: &str) -> Result<()>;
+
     /// Check if this reader supports DataFrame registration
     fn supports_register(&self) -> bool;
 }
@@ -423,6 +426,13 @@ class DuckDBReader:
             df: Polars DataFrame or narwhals-compatible DataFrame
         """
 
+    def unregister(self, name: str) -> None:
+        """Unregister a previously registered table.
+
+        Args:
+            name: Table name to unregister
+        """
+
     def execute_sql(self, sql: str) -> polars.DataFrame:
         """Execute SQL and return a Polars DataFrame."""
 
diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs
index de1cf2eb..1824d6dc 100644
--- a/src/reader/duckdb.rs
+++ b/src/reader/duckdb.rs
@@ -10,6 +10,7 @@ use duckdb::vtab::arrow::{arrow_recordbatch_to_query_params, ArrowVTab};
 use duckdb::{params, Connection};
 use polars::io::SerWriter;
 use polars::prelude::*;
+use std::collections::HashSet;
 use std::io::Cursor;
 
 /// DuckDB database reader
@@ -32,6 +33,7 @@ use std::io::Cursor;
 /// ```
 pub struct DuckDBReader {
     conn: Connection,
+    registered_tables: HashSet<String>,
 }
 
 impl DuckDBReader {
@@ -75,7 +77,10 @@ impl DuckDBReader {
                 GgsqlError::ReaderError(format!("Failed to register arrow function: {}", e))
             })?;
 
-        Ok(Self { conn })
+        Ok(Self {
+            conn,
+            registered_tables: HashSet::new(),
+        })
     }
 
     /// Get a reference to the underlying DuckDB connection
@@ -523,6 +528,30 @@ impl Reader for DuckDBReader {
             GgsqlError::ReaderError(format!("Failed to register table '{}': {}", name, e))
         })?;
 
+        // Track the table so we can unregister it later
+        self.registered_tables.insert(name.to_string());
+
+        Ok(())
+    }
+
+    fn unregister(&mut self, name: &str) -> Result<()> {
+        // Only allow unregistering tables we created via register()
+        if !self.registered_tables.contains(name) {
+            return Err(GgsqlError::ReaderError(format!(
+                "Table '{}' was not registered via this reader",
+                name
+            )));
+        }
+
+        // Drop the temp table
+        let sql = format!("DROP TABLE IF EXISTS \"{}\"", name);
+        self.conn.execute(&sql, []).map_err(|e| {
+            GgsqlError::ReaderError(format!("Failed to unregister table '{}': {}", name, e))
+        })?;
+
+        // Remove from tracking
+        self.registered_tables.remove(name);
+
         Ok(())
     }
 
@@ -704,4 +733,54 @@ mod tests {
         assert_eq!(result.shape(), (0, 2));
         assert_eq!(result.get_column_names(), vec!["x", "y"]);
     }
+
+    #[test]
+    fn test_unregister() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let df = DataFrame::new(vec![Column::new("x".into(), vec![1i32, 2, 3])]).unwrap();
+
+        reader.register("test_data", df).unwrap();
+
+        // Should be queryable
+        let result = reader.execute_sql("SELECT * FROM test_data").unwrap();
+        assert_eq!(result.height(), 3);
+
+        // Unregister
+        reader.unregister("test_data").unwrap();
+
+        // Should no longer exist
+        let result = reader.execute_sql("SELECT * FROM test_data");
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_unregister_not_registered() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        // Create a table directly (not via register)
+        reader
+            .connection()
+            .execute("CREATE TABLE user_table (x INT)", params![])
+            .unwrap();
+
+        // Should fail - we didn't register this via register()
+        let result = reader.unregister("user_table");
+        assert!(result.is_err());
+        let err = result.unwrap_err().to_string();
+        assert!(err.contains("was not registered via this reader"));
+    }
+
+    #[test]
+    fn test_reregister_after_unregister() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let df = DataFrame::new(vec![Column::new("x".into(), vec![1i32, 2, 3])]).unwrap();
+
+        reader.register("data", df.clone()).unwrap();
+        reader.unregister("data").unwrap();
+
+        // Should be able to register again
+        reader.register("data", df).unwrap();
+        let result = reader.execute_sql("SELECT * FROM data").unwrap();
+        assert_eq!(result.height(), 3);
+    }
 }
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 5ac40828..cfbd271a 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -147,6 +147,26 @@ pub trait Reader {
         )))
     }
 
+    /// Unregister a previously registered table
+    ///
+    /// # Arguments
+    ///
+    /// * `name` - The table name to unregister
+    ///
+    /// # Returns
+    ///
+    /// `Ok(())` on success.
+    ///
+    /// # Default Implementation
+    ///
+    /// Returns an error by default. Override for readers that support registration.
+    fn unregister(&mut self, name: &str) -> Result<()> {
+        Err(GgsqlError::ReaderError(format!(
+            "This reader does not support unregistering table '{}'",
+            name
+        )))
+    }
+
     /// Check if this reader supports DataFrame registration
     ///
     /// # Returns

From ba2d56689dcfd4e6c20bce4c84bbf2e51f15622d Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Mon, 2 Feb 2026 16:43:34 +0000
Subject: [PATCH 12/12] Add ibis SQL example

---
 CLAUDE.md                        |  3 +-
 ggsql-python/README.md           | 48 +++++++++++++++++++++++++++++++-
 ggsql-python/tests/test_ggsql.py | 42 ++++++++++++++++++++++++++++
 src/doc/API.md                   |  3 --
 4 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 29149b76..45d32fb7 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -147,7 +147,7 @@ DRAW line MAPPING month AS x, total AS y
 
 ---
 
-## Public API (`src/api.rs`)
+## Public API
 
 ### Quick Start
 
@@ -190,7 +190,6 @@ let json = writer.render(&spec)?;
 
 **`Spec`** - Result of `reader.execute()`, ready for rendering:
 
-- `render(writer)` - Generate output (Vega-Lite JSON)
 - `plot()` - Resolved plot specification
 - `metadata()` - Rows, columns, layer count
 - `warnings()` - Validation warnings from execution
diff --git a/ggsql-python/README.md b/ggsql-python/README.md
index f08afe34..7a2148f1 100644
--- a/ggsql-python/README.md
+++ b/ggsql-python/README.md
@@ -158,7 +158,6 @@ Result of `reader.execute()`, containing resolved visualization ready for render
 
 **Methods:**
 
-- `render(writer: VegaLiteWriter) -> str` - Generate Vega-Lite JSON
 - `metadata() -> dict` - Get `{"rows": int, "columns": list[str], "layer_count": int}`
 - `sql() -> str` - The executed SQL query
 - `visual() -> str` - The VISUALISE clause
@@ -292,6 +291,53 @@ class AdvancedReader:
 
 Native readers like `DuckDBReader` use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization.
 
+### Ibis Reader Example
+
+[Ibis](https://ibis-project.org/) provides a unified Python API for SQL operations across multiple backends. Here's how to create an ibis-based custom reader:
+
+```python
+import ggsql
+import polars as pl
+import ibis
+
+class IbisReader:
+    """Custom reader using ibis as the SQL backend."""
+
+    def __init__(self, backend="duckdb"):
+        if backend == "duckdb":
+            self.con = ibis.duckdb.connect()
+        elif backend == "sqlite":
+            self.con = ibis.sqlite.connect()
+        # Add other backends as needed
+
+    def execute_sql(self, sql: str) -> pl.DataFrame:
+        return self.con.con.execute(sql).pl()
+
+    def supports_register(self) -> bool:
+        return True
+
+    def register(self, name: str, df: pl.DataFrame) -> None:
+        self.con.create_table(name, df.to_arrow(), overwrite=True)
+
+    def unregister(self, name: str) -> None:
+        self.con.drop_table(name)
+
+# Usage
+reader = IbisReader()
+df = pl.DataFrame({
+    "date": ["2024-01-01", "2024-01-02", "2024-01-03"],
+    "revenue": [100, 150, 120],
+})
+reader.register("sales", df)
+
+spec = ggsql.execute(
+    "SELECT * FROM sales VISUALISE date AS x, revenue AS y DRAW line",
+    reader
+)
+writer = ggsql.VegaLiteWriter()
+print(writer.render(spec))
+```
+
 ## Development
 
 ### Keeping in sync with the monorepo
diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py
index 8c7c1f19..f5c666fc 100644
--- a/ggsql-python/tests/test_ggsql.py
+++ b/ggsql-python/tests/test_ggsql.py
@@ -16,6 +16,14 @@
 
 import ggsql
 
+# Optional dependency for ibis test
+try:
+    import ibis
+
+    HAS_IBIS = hasattr(ibis, "duckdb")
+except ImportError:
+    HAS_IBIS = False
+
 
 class TestValidate:
     """Tests for validate() function."""
@@ -496,3 +504,37 @@ def execute_sql(self, sql: str) -> pl.DataFrame:
         assert len(reader.execute_calls) > 0
         # All calls should be valid SQL strings
         assert all(isinstance(sql, str) for sql in reader.execute_calls)
+
+    @pytest.mark.skipif(not HAS_IBIS, reason="ibis not installed")
+    def test_custom_reader_ibis(self):
+        """Test custom reader using ibis as backend."""
+
+        class IbisReader:
+            def __init__(self):
+                self.con = ibis.duckdb.connect()
+
+            def execute_sql(self, sql: str) -> pl.DataFrame:
+                return self.con.con.execute(sql).pl()
+
+            def supports_register(self) -> bool:
+                return True
+
+            def register(self, name: str, df: pl.DataFrame) -> None:
+                self.con.create_table(name, df.to_arrow(), overwrite=True)
+
+            def unregister(self, name: str) -> None:
+                self.con.drop_table(name)
+
+        reader = IbisReader()
+        df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+        reader.register("mydata", df)
+
+        spec = ggsql.execute(
+            "SELECT * FROM mydata VISUALISE x, y DRAW point",
+            reader,
+        )
+
+        assert spec.metadata()["rows"] == 3
+        writer = ggsql.VegaLiteWriter()
+        json_output = writer.render(spec)
+        assert "point" in json_output
diff --git a/src/doc/API.md b/src/doc/API.md
index 0676ac5a..1327960e 100644
--- a/src/doc/API.md
+++ b/src/doc/API.md
@@ -477,9 +477,6 @@ class Validated:
 
 ```python
 class Spec:
-    def render(self, writer: VegaLiteWriter) -> str:
-        """Render to output format."""
-
     def metadata(self) -> dict:
         """Get metadata as dict with keys: rows, columns, layer_count."""