diff --git a/CLAUDE.md b/CLAUDE.md
index bdc93293..45d32fb7 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -147,6 +147,79 @@ DRAW line MAPPING month AS x, total AS y
 
 ---
 
+## Public API
+
+### Quick Start
+
+```rust
+use ggsql::reader::{DuckDBReader, Reader};
+use ggsql::writer::VegaLiteWriter;
+
+// Create a reader
+let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+
+// Execute the ggsql query
+let spec = reader.execute(
+    "SELECT x, y FROM data VISUALISE x, y DRAW point"
+)?;
+
+// Render to Vega-Lite JSON
+let writer = VegaLiteWriter::new();
+let json = writer.render(&spec)?;
+```
+
+### Core Functions
+
+| Function                | Purpose                                                |
+| ----------------------- | ------------------------------------------------------ |
+| `reader.execute(query)` | Main entry point: parse, execute SQL, resolve mappings |
+| `writer.render(spec)`   | Generate output from a Spec                            |
+| `validate(query)`       | Validate syntax + semantics, inspect query structure   |
+
+### Key Types
+
+**`Validated`** - Result of `validate()`:
+
+- `has_visual()` - Whether query has VISUALISE clause
+- `sql()` - The SQL portion (before VISUALISE)
+- `visual()` - The VISUALISE portion (raw text)
+- `tree()` - CST for advanced inspection
+- `valid()` - Whether query is valid
+- `errors()` - Validation errors
+- `warnings()` - Validation warnings
+
+**`Spec`** - Result of `reader.execute()`, ready for rendering:
+
+- `plot()` - Resolved plot specification
+- `metadata()` - Rows, columns, layer count
+- `warnings()` - Validation warnings from execution
+- `data()` / `layer_data(i)` / `stat_data(i)` - Access DataFrames
+- `sql()` / `visual()` / `layer_sql(i)` / `stat_sql(i)` - Query introspection
+
+**`Metadata`**:
+
+- `rows` - Number of rows in primary data
+- `columns` - Column names
+- `layer_count` - Number of layers
+
+### Reader & Writer
+
+**Reader trait** (data source abstraction):
+
+- `execute_sql(sql)` - Run SQL, return DataFrame
+- `register(name, df)` - Register DataFrame as table
+- `unregister(name)` - Unregister a previously registered table
+- Implementation: `DuckDBReader`
+
+**Writer trait** (output format abstraction):
+
+- `write(spec, data)` - Generate output string
+- Implementation: `VegaLiteWriter` (Vega-Lite v6 JSON)
+
+For detailed API documentation, see [`src/doc/API.md`](src/doc/API.md).
+
+---
+
 ## Component Breakdown
 
 ### 1. Parser Module (`src/parser/`)
@@ -432,7 +505,7 @@ pub type Result<T> = std::result::Result<T, GgsqlError>;
 
 ```rust
 pub trait Reader {
-    fn execute(&self, sql: &str) -> Result<DataFrame>;
+    fn execute_sql(&self, sql: &str) -> Result<DataFrame>;
     fn supports_query(&self, sql: &str) -> bool;
 }
 ```
@@ -462,7 +535,6 @@ pub fn parse_connection_string(uri: &str) -> Result<ConnectionInfo> {
 The codebase includes connection string parsing and feature flags for additional readers, but they are not yet implemented:
 
 - **PostgreSQL Reader** (`postgres://...`)
-
   - Feature flag: `postgres`
   - Connection string parsing exists in `connection.rs`
   - Reader implementation: Not yet available
@@ -792,15 +864,18 @@ When running in Positron IDE, the extension provides enhanced functionality:
 
 ### 8. Python Bindings (`ggsql-python/`)
 
-**Responsibility**: Python bindings for ggsql, enabling Python users to render Altair charts using ggsql's VISUALISE syntax.
+**Responsibility**: Python bindings for ggsql, enabling Python users to create visualizations using ggsql's VISUALISE syntax.
 
 **Features**:
 
 - PyO3-based Rust bindings compiled to a native Python extension
+- Two-stage API mirroring the Rust API: `reader.execute()` → `render()`
+- DuckDB reader with DataFrame registration
+- Custom Python reader support: any object with `execute_sql(sql) -> DataFrame` method
 - Works with any narwhals-compatible DataFrame (polars, pandas, etc.)
 - LazyFrames are collected automatically
-- Returns native `altair.Chart` objects for easy display and customization
-- Query splitting to separate SQL from VISUALISE portions
+- Returns native `altair.Chart` objects via `render_altair()` convenience function
+- Query validation and introspection (SQL, layer queries, stat queries)
 
 **Installation**:
 
@@ -817,26 +892,117 @@ maturin develop
 import ggsql
 import polars as pl
 
-# Split a ggSQL query into SQL and VISUALISE portions
-sql, viz = ggsql.split_query("""
-    SELECT date, revenue FROM sales
-    VISUALISE date AS x, revenue AS y
-    DRAW line
-""")
+# Create reader and register data
+reader = ggsql.DuckDBReader("duckdb://memory")
+df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+reader.register("data", df)
+
+# Execute visualization
+spec = reader.execute(
+    "SELECT * FROM data VISUALISE x, y DRAW point"
+)
+
+# Inspect metadata
+print(f"Rows: {spec.metadata()['rows']}")
+print(f"Columns: {spec.metadata()['columns']}")
+print(f"SQL: {spec.sql()}")
+
+# Render to Vega-Lite JSON
+writer = ggsql.VegaLiteWriter()
+json_output = writer.render(spec)
+```
+
+**Convenience Function** (`render_altair`):
+
+For quick visualizations without explicit reader setup:
+
+```python
+import ggsql
+import polars as pl
 
-# Execute SQL and render to Altair chart
 df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
-chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point")
 
-# Display or save
+# Render DataFrame to Altair chart in one call
+chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point")
 chart.display()  # In Jupyter
-chart.save("chart.html")
 ```
 
+**Query Validation**:
+
+```python
+# Validate syntax without execution
+validated = ggsql.validate(
+    "SELECT x, y FROM data VISUALISE x, y DRAW point"
+)
+print(f"Valid: {validated.valid()}")
+print(f"Has VISUALISE: {validated.has_visual()}")
+print(f"SQL portion: {validated.sql()}")
+print(f"Errors: {validated.errors()}")
+```
+
+**Classes**:
+
+| Class                      | Description                                       |
+| -------------------------- | ------------------------------------------------- |
+| `DuckDBReader(connection)` | Database reader with DataFrame registration       |
+| `VegaLiteWriter()`         | Vega-Lite JSON output writer                      |
+| `Validated`                | Result of `validate()` with query inspection      |
+| `Spec`                     | Result of `reader.execute()`, ready for rendering |
+
 **Functions**:
 
-- `split_query(query: str) -> tuple[str, str]` - Split ggSQL query into SQL and VISUALISE portions
-- `render_altair(df, viz, **kwargs) -> altair.Chart` - Render DataFrame with VISUALISE spec to Altair chart
+| Function                 | Description                                      |
+| ------------------------ | ------------------------------------------------ |
+| `validate(query)`        | Syntax/semantic validation with query inspection |
+| `reader.execute(query)`  | Execute ggsql query, return Spec                 |
+| `execute(query, reader)` | Execute with custom reader (bridge path)         |
+| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart    |
+
+**Spec Methods**:
+
+| Method           | Description                                  |
+| ---------------- | -------------------------------------------- |
+| `render(writer)` | Generate Vega-Lite JSON                      |
+| `metadata()`     | Get rows, columns, layer_count               |
+| `sql()`          | Get the SQL portion                          |
+| `visual()`       | Get the VISUALISE portion                    |
+| `layer_count()`  | Number of DRAW layers                        |
+| `data()`         | Get the main DataFrame                       |
+| `layer_data(i)`  | Get layer-specific DataFrame (if filtered)   |
+| `stat_data(i)`   | Get stat transform DataFrame (if applicable) |
+| `layer_sql(i)`   | Get layer filter SQL (if applicable)         |
+| `stat_sql(i)`    | Get stat transform SQL (if applicable)       |
+| `warnings()`     | Get validation warnings                      |
+
+**Custom Python Readers**:
+
+Any Python object with an `execute_sql(sql: str) -> polars.DataFrame` method can be used as a reader:
+
+```python
+import ggsql
+import polars as pl
+
+class MyReader:
+    """Custom reader that returns static data."""
+
+    def execute_sql(self, sql: str) -> pl.DataFrame:
+        return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+
+# Use custom reader with ggsql.execute()
+reader = MyReader()
+spec = ggsql.execute(
+    "SELECT * FROM data VISUALISE x, y DRAW point",
+    reader
+)
+```
+
+Optional methods for custom readers:
+
+- `supports_register() -> bool` - Return `True` if registration is supported
+- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a table
+- `unregister(name: str) -> None` - Unregister a previously registered table
+
+Native readers (e.g., `DuckDBReader`) use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization.
 
 **Dependencies**:
 
@@ -920,22 +1086,23 @@ cargo build --all-features
 ```
 
 Where `<global_mapping>` can be:
+
 - Empty: `VISUALISE` (layers must define all mappings)
 - Mappings: `VISUALISE x, y, date AS x` (mixed implicit/explicit)
 - Wildcard: `VISUALISE *` (map all columns)
 
 ### Clause Types
 
-| Clause         | Repeatable | Purpose            | Example                              |
-| -------------- | ---------- | ------------------ | ------------------------------------ |
-| `VISUALISE`    | ✅ Yes     | Entry point        | `VISUALISE date AS x, revenue AS y`  |
-| `DRAW`         | ✅ Yes     | Define layers      | `DRAW line MAPPING date AS x, value AS y` |
-| `SCALE`        | ✅ Yes     | Configure scales   | `SCALE x SETTING type => 'date'`          |
-| `FACET`        | ❌ No      | Small multiples    | `FACET WRAP region`                  |
-| `COORD`        | ❌ No      | Coordinate system  | `COORD cartesian SETTING xlim => [0,100]` |
-| `LABEL`        | ❌ No      | Text labels        | `LABEL title => 'My Chart', x => 'Date'`   |
-| `GUIDE`        | ✅ Yes     | Legend/axis config | `GUIDE color SETTING position => 'right'` |
-| `THEME`        | ❌ No      | Visual styling     | `THEME minimal`                      |
+| Clause      | Repeatable | Purpose            | Example                                   |
+| ----------- | ---------- | ------------------ | ----------------------------------------- |
+| `VISUALISE` | ✅ Yes     | Entry point        | `VISUALISE date AS x, revenue AS y`       |
+| `DRAW`      | ✅ Yes     | Define layers      | `DRAW line MAPPING date AS x, value AS y` |
+| `SCALE`     | ✅ Yes     | Configure scales   | `SCALE x SETTING type => 'date'`          |
+| `FACET`     | ❌ No      | Small multiples    | `FACET WRAP region`                       |
+| `COORD`     | ❌ No      | Coordinate system  | `COORD cartesian SETTING xlim => [0,100]` |
+| `LABEL`     | ❌ No      | Text labels        | `LABEL title => 'My Chart', x => 'Date'`  |
+| `GUIDE`     | ✅ Yes     | Legend/axis config | `GUIDE color SETTING position => 'right'` |
+| `THEME`     | ❌ No      | Visual styling     | `THEME minimal`                           |
 
 ### DRAW Clause (Layers)
 
@@ -1201,7 +1368,6 @@ COORD cartesian SETTING xlim => [0, 100], ylim => [0, 200]
 LABEL x => 'Category', y => 'Count'
 ```
 
-
 ### LABEL Clause
 
 **Syntax**:
diff --git a/Cargo.toml b/Cargo.toml
index cd5b672c..5e98f8aa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,7 +32,8 @@ csscolorparser = "0.8.1"
 polars = { version = "0.52", features = ["lazy", "sql", "ipc"] }
 
 # Readers
-duckdb = { version = "1.1", features = ["bundled"] }
+duckdb = { version = "1.4", features = ["bundled", "vtab-arrow"] }
+arrow = { version = "56", default-features = false, features = ["ipc"] }
 postgres = "0.19"
 sqlx = { version = "0.8", features = ["postgres", "runtime-tokio-rustls"] }
 rusqlite = "0.32"
diff --git a/README.md b/README.md
index 43d70847..8af476f9 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,7 @@ THEME minimal
 - ✅ REST API server (`ggsql-rest`) with CORS support
 - ✅ Jupyter kernel (`ggsql-jupyter`) with inline Vega-Lite visualizations
 - ✅ VS Code extension (`ggsql-vscode`) with syntax highlighting and Positron IDE integration
+- ✅ Python bindings (`ggsql-python`) with Altair chart output
 
 **Planned:**
 
@@ -93,7 +94,9 @@ ggsql/
 │
 ├── ggsql-jupyter/                   # Jupyter kernel
 │
-└── ggsql-vscode/                    # VS Code extension
+├── ggsql-vscode/                    # VS Code extension
+│
+└── ggsql-python/                    # Python bindings
 ```
 
 ## Development Workflow
@@ -297,6 +300,41 @@ When running in Positron IDE, the extension provides additional features:
 - **Language runtime registration** for executing ggsql queries directly within Positron
 - **Plot pane integration** - visualizations are automatically routed to Positron's Plots pane
 
+## Python Bindings
+
+The `ggsql-python` package provides Python bindings for using ggsql with DataFrames.
+
+### Installation
+
+```bash
+cd ggsql-python
+pip install maturin
+maturin develop
+```
+
+### Usage
+
+```python
+import ggsql
+import polars as pl
+
+# Simple usage with render_altair
+df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point")
+chart.display()
+
+# Two-stage API for full control
+reader = ggsql.DuckDBReader("duckdb://memory")
+reader.register("data", df)
+
+spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point")
+
+writer = ggsql.VegaLiteWriter()
+json_output = writer.render(spec)
+```
+
+See the [ggsql-python README](ggsql-python/README.md) for complete API documentation.
+
 ## CLI
 
 ### Installation
diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs
index 1c38e3ae..d91b223a 100644
--- a/ggsql-jupyter/src/executor.rs
+++ b/ggsql-jupyter/src/executor.rs
@@ -5,9 +5,8 @@
 
 use anyhow::Result;
 use ggsql::{
-    execute::prepare_data,
-    parser,
     reader::{DuckDBReader, Reader},
+    validate,
     writer::{VegaLiteWriter, Writer},
 };
 use polars::frame::DataFrame;
@@ -54,13 +53,13 @@ impl QueryExecutor {
     pub fn execute(&self, code: &str) -> Result<ExecutionResult> {
         tracing::debug!("Executing query: {} chars", code.len());
 
-        // 1. Split query to check if there's a visualization
-        let (_sql_part, viz_part) = parser::split_query(code)?;
+        // 1. Validate to check if there's a visualization
+        let validated = validate(code)?;
 
         // 2. Check if there's a visualization
-        if viz_part.is_empty() {
+        if !validated.has_visual() {
             // Pure SQL query - execute directly and return DataFrame
-            let df = self.reader.execute(code)?;
+            let df = self.reader.execute_sql(code)?;
             tracing::info!(
                 "Pure SQL executed: {} rows, {} cols",
                 df.height(),
@@ -69,17 +68,21 @@ impl QueryExecutor {
             return Ok(ExecutionResult::DataFrame(df));
         }
 
-        // 3. Prepare data using shared execution logic (handles layer sources)
-        let prepared = prepare_data(code, &self.reader)?;
+        // 3. Execute ggsql query using reader
+        let spec = self.reader.execute(code)?;
 
-        tracing::info!("Data sources prepared: {} sources", prepared.data.len());
+        tracing::info!(
+            "Query executed: {} rows, {} layers",
+            spec.metadata().rows,
+            spec.metadata().layer_count
+        );
 
-        // 4. Generate Vega-Lite spec (use first spec if multiple)
-        let vega_json = self.writer.write(&prepared.specs[0], &prepared.data)?;
+        // 4. Render to output format
+        let vega_json = self.writer.render(&spec)?;
 
         tracing::debug!("Generated Vega-Lite spec: {} chars", vega_json.len());
 
-        // 6. Return result
+        // 5. Return result
         Ok(ExecutionResult::Visualization { spec: vega_json })
     }
 }
diff --git a/ggsql-python/Cargo.toml b/ggsql-python/Cargo.toml
index 62229afd..8f73e6f8 100644
--- a/ggsql-python/Cargo.toml
+++ b/ggsql-python/Cargo.toml
@@ -12,7 +12,7 @@ crate-type = ["cdylib"]
 [dependencies]
 pyo3 = { version = "0.26", features = ["extension-module"] }
 polars = { workspace = true, features = ["ipc"] }
-ggsql = { path = "../src", default-features = false, features = ["vegalite"] }
+ggsql = { path = "../src", default-features = false, features = ["duckdb", "vegalite"] }
 
 [features]
 default = []
diff --git a/ggsql-python/README.md b/ggsql-python/README.md
index 0d97bbee..7a2148f1 100644
--- a/ggsql-python/README.md
+++ b/ggsql-python/README.md
@@ -2,7 +2,7 @@
 
 Python bindings for [ggsql](https://github.com/georgestagg/ggsql), a SQL extension for declarative data visualization.
 
-This package provides a thin wrapper around the Rust `ggsql` crate, enabling Python users to render Altair charts from DataFrames using ggsql's VISUALISE syntax.
+This package provides Python bindings to the Rust `ggsql` crate, enabling Python users to create visualizations using ggsql's VISUALISE syntax with native Altair chart output.
 
 ## Installation
 
@@ -15,6 +15,7 @@ pip install ggsql
 ### From source
 
 Building from source requires:
+
 - Rust toolchain (install via [rustup](https://rustup.rs/))
 - Python 3.10+
 - [maturin](https://github.com/PyO3/maturin)
@@ -39,35 +40,182 @@ maturin build --release
 pip install target/wheels/ggsql-*.whl
 ```
 
-## Usage
+## Quick Start
+
+### Simple Usage with `render_altair`
+
+For quick visualizations, use the `render_altair` convenience function:
 
 ```python
 import ggsql
-import duckdb
+import polars as pl
+
+# Create a DataFrame
+df = pl.DataFrame({
+    "x": [1, 2, 3, 4, 5],
+    "y": [10, 20, 15, 30, 25],
+    "category": ["A", "B", "A", "B", "A"]
+})
+
+# Render to Altair chart
+chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point")
+
+# Display or save
+chart.display()  # In Jupyter
+chart.save("chart.html")  # Save to file
+```
 
-# Split a ggSQL query into SQL and VISUALISE portions
-sql, viz = ggsql.split_query("""
-    SELECT date, revenue, region FROM sales
-    WHERE year = 2024
+### Two-Stage API
+
+For more control, use the two-stage API with explicit reader and writer:
+
+```python
+import ggsql
+import polars as pl
+
+# 1. Create a DuckDB reader
+reader = ggsql.DuckDBReader("duckdb://memory")
+
+# 2. Register your DataFrame as a table
+df = pl.DataFrame({
+    "date": ["2024-01-01", "2024-01-02", "2024-01-03"],
+    "revenue": [100, 150, 120],
+    "region": ["North", "South", "North"]
+})
+reader.register("sales", df)
+
+# 3. Execute the ggsql query
+spec = reader.execute(
+    """
+    SELECT * FROM sales
     VISUALISE date AS x, revenue AS y, region AS color
     DRAW line
-    LABEL title => 'Sales Trends'
-""")
+    LABEL title => 'Sales by Region'
+    """
+)
+
+# 4. Inspect metadata
+print(f"Rows: {spec.metadata()['rows']}")
+print(f"Columns: {spec.metadata()['columns']}")
+print(f"Layers: {spec.layer_count()}")
+
+# 5. Inspect SQL/VISUALISE portions and data
+print(f"SQL: {spec.sql()}")
+print(f"Visual: {spec.visual()}")
+print(spec.data())  # Returns polars DataFrame
+
+# 6. Render to Vega-Lite JSON
+writer = ggsql.VegaLiteWriter()
+vegalite_json = writer.render(spec)
+print(vegalite_json)
+```
 
-# Execute SQL with DuckDB
-df = duckdb.sql(sql).pl()
+## API Reference
 
-# Render DataFrame + VISUALISE spec to Altair chart
-chart = ggsql.render_altair(df, viz)
+### Classes
 
-# Display or save the chart
-chart.display()  # In Jupyter
-chart.save("chart.html")  # Save to file
+#### `DuckDBReader(connection: str)`
+
+Database reader that executes SQL and manages DataFrames.
+
+```python
+reader = ggsql.DuckDBReader("duckdb://memory")  # In-memory database
+reader = ggsql.DuckDBReader("duckdb:///path/to/file.db")  # File database
+```
+
+**Methods:**
+
+- `register(name: str, df: polars.DataFrame)` - Register a DataFrame as a queryable table
+- `unregister(name: str)` - Unregister a previously registered table
+- `execute_sql(sql: str) -> polars.DataFrame` - Execute SQL and return results
+- `supports_register() -> bool` - Check if registration is supported
+
+#### `VegaLiteWriter()`
+
+Writer that generates Vega-Lite v6 JSON specifications.
+
+```python
+writer = ggsql.VegaLiteWriter()
+json_output = writer.render(spec)
+```
+
+#### `Validated`
+
+Result of `validate()` containing query analysis without SQL execution.
+
+**Methods:**
+
+- `valid() -> bool` - Whether the query is syntactically and semantically valid
+- `has_visual() -> bool` - Whether the query contains a VISUALISE clause
+- `sql() -> str` - The SQL portion (before VISUALISE)
+- `visual() -> str` - The VISUALISE portion
+- `errors() -> list[dict]` - Validation errors with messages and locations
+- `warnings() -> list[dict]` - Validation warnings
+
+#### `Spec`
+
+Result of `reader.execute()`, containing resolved visualization ready for rendering.
+
+**Methods:**
+
+- `metadata() -> dict` - Get `{"rows": int, "columns": list[str], "layer_count": int}`
+- `sql() -> str` - The executed SQL query
+- `visual() -> str` - The VISUALISE clause
+- `layer_count() -> int` - Number of DRAW layers
+- `data() -> polars.DataFrame | None` - Main query result DataFrame
+- `layer_data(index: int) -> polars.DataFrame | None` - Layer-specific data (if filtered)
+- `stat_data(index: int) -> polars.DataFrame | None` - Statistical transform data
+- `layer_sql(index: int) -> str | None` - Layer filter SQL
+- `stat_sql(index: int) -> str | None` - Stat transform SQL
+- `warnings() -> list[dict]` - Validation warnings from execution
+
+### Functions
+
+#### `validate(query: str) -> Validated`
+
+Validate query syntax and semantics without executing SQL.
+
+```python
+validated = ggsql.validate("SELECT x, y FROM data VISUALISE x, y DRAW point")
+if validated.valid():
+    print("Query is valid!")
+else:
+    for error in validated.errors():
+        print(f"Error: {error['message']}")
+```
+
+#### `reader.execute(query: str) -> Spec`
+
+Execute a ggsql query and return the visualization specification.
+
+```python
+reader = ggsql.DuckDBReader("duckdb://memory")
+spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
 ```
 
-### Mapping styles
+#### `render_altair(df, viz: str, **kwargs) -> altair.Chart`
+
+Convenience function to render a DataFrame with a VISUALISE spec to an Altair chart.
+
+**Parameters:**
+
+- `df` - Any narwhals-compatible DataFrame (polars, pandas, etc.). LazyFrames are collected automatically.
+- `viz` - The VISUALISE specification string
+- `**kwargs` - Additional arguments passed to `altair.Chart.from_json()` (e.g., `validate=False`)
 
-The `render_altair()` function supports various mapping styles:
+**Returns:** An Altair chart object (Chart, LayerChart, FacetChart, etc.)
+
+```python
+import polars as pl
+import ggsql
+
+df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point")
+```
+
+## Examples
+
+### Mapping Styles
 
 ```python
 df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30], "category": ["A", "B", "A"]})
@@ -85,41 +233,116 @@ ggsql.render_altair(df, "VISUALISE * DRAW point")
 ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point")
 ```
 
-## API
+### Custom Readers
 
-### `split_query(query: str) -> tuple[str, str]`
+You can use any Python object with an `execute_sql(sql: str) -> polars.DataFrame` method as a reader. This enables integration with any data source.
 
-Split a ggSQL query into SQL and VISUALISE portions.
+```python
+import ggsql
+import polars as pl
+
+class CSVReader:
+    """Custom reader that loads data from CSV files."""
+
+    def __init__(self, data_dir: str):
+        self.data_dir = data_dir
+
+    def execute_sql(self, sql: str) -> pl.DataFrame:
+        # Simple implementation: ignore SQL and return fixed data
+        # A real implementation would parse SQL to determine which file to load
+        return pl.read_csv(f"{self.data_dir}/data.csv")
+
+# Use custom reader with ggsql.execute()
+reader = CSVReader("/path/to/data")
+spec = ggsql.execute(
+    "SELECT * FROM data VISUALISE x, y DRAW point",
+    reader
+)
+writer = ggsql.VegaLiteWriter()
+json_output = writer.render(spec)
+```
 
-**Parameters:**
-- `query`: The full ggSQL query string
+**Optional methods** for custom readers:
 
-**Returns:**
-- Tuple of `(sql_portion, visualise_portion)`
+- `supports_register() -> bool` - Return `True` if your reader supports DataFrame registration
+- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a queryable table
+- `unregister(name: str) -> None` - Unregister a previously registered table
 
-**Raises:**
-- `ValueError`: If the query cannot be parsed
+```python
+class AdvancedReader:
+    """Custom reader with registration support."""
 
-### `render_altair(df, viz, **kwargs) -> altair.Chart`
+    def __init__(self):
+        self.tables = {}
 
-Render a DataFrame with a VISUALISE specification to an Altair chart.
+    def execute_sql(self, sql: str) -> pl.DataFrame:
+        # Your SQL execution logic here
+        ...
 
-**Parameters:**
-- `df`: Any narwhals-compatible DataFrame (polars, pandas, etc.). LazyFrames are collected automatically.
-- `viz`: The VISUALISE specification string
-- `**kwargs`: Additional keyword arguments passed to `altair.Chart.from_json()`. Common options include `validate=False` to skip schema validation.
+    def supports_register(self) -> bool:
+        return True
 
-**Returns:**
-- An `altair.Chart` object that can be displayed, saved, or further customized
+    def register(self, name: str, df: pl.DataFrame) -> None:
+        self.tables[name] = df
 
-**Raises:**
-- `ValueError`: If the spec cannot be parsed or rendered
+    def unregister(self, name: str) -> None:
+        del self.tables[name]
+```
+
+Native readers like `DuckDBReader` use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization.
+
+### Ibis Reader Example
+
+[Ibis](https://ibis-project.org/) provides a unified Python API for SQL operations across multiple backends. Here's how to create an ibis-based custom reader:
+
+```python
+import ggsql
+import polars as pl
+import ibis
+
+class IbisReader:
+    """Custom reader using ibis as the SQL backend."""
+
+    def __init__(self, backend="duckdb"):
+        if backend == "duckdb":
+            self.con = ibis.duckdb.connect()
+        elif backend == "sqlite":
+            self.con = ibis.sqlite.connect()
+        # Add other backends as needed
+
+    def execute_sql(self, sql: str) -> pl.DataFrame:
+        return self.con.con.execute(sql).pl()
+
+    def supports_register(self) -> bool:
+        return True
+
+    def register(self, name: str, df: pl.DataFrame) -> None:
+        self.con.create_table(name, df.to_arrow(), overwrite=True)
+
+    def unregister(self, name: str) -> None:
+        self.con.drop_table(name)
+
+# Usage
+reader = IbisReader()
+df = pl.DataFrame({
+    "date": ["2024-01-01", "2024-01-02", "2024-01-03"],
+    "revenue": [100, 150, 120],
+})
+reader.register("sales", df)
+
+spec = ggsql.execute(
+    "SELECT * FROM sales VISUALISE date AS x, revenue AS y DRAW line",
+    reader
+)
+writer = ggsql.VegaLiteWriter()
+print(writer.render(spec))
+```
 
 ## Development
 
 ### Keeping in sync with the monorepo
 
-The `ggsql-python` package is part of the [ggsql monorepo](https://github.com/georgestagg/ggsql) and depends on the Rust `ggsql` crate via a path dependency. When the Rust crate is updated, you may need to rebuild:
+The `ggsql-python` package is part of the [ggsql monorepo](https://github.com/posit-dev/ggsql) and depends on the Rust `ggsql` crate via a path dependency. When the Rust crate is updated, you may need to rebuild:
 
 ```bash
 cd ggsql-python
diff --git a/ggsql-python/python/ggsql/__init__.py b/ggsql-python/python/ggsql/__init__.py
index dbbb5afb..d69c84ef 100644
--- a/ggsql-python/python/ggsql/__init__.py
+++ b/ggsql-python/python/ggsql/__init__.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import io
 import json
 from typing import Any, Union
 
@@ -8,9 +7,26 @@
 import narwhals as nw
 from narwhals.typing import IntoFrame
 
-from ggsql._ggsql import split_query, render as _render
+from ggsql._ggsql import (
+    DuckDBReader,
+    VegaLiteWriter,
+    Validated,
+    Spec,
+    validate,
+    execute,
+)
 
-__all__ = ["split_query", "render_altair"]
+__all__ = [
+    # Classes
+    "DuckDBReader",
+    "VegaLiteWriter",
+    "Validated",
+    "Spec",
+    # Functions
+    "validate",
+    "execute",
+    "render_altair",
+]
 __version__ = "0.1.0"
 
 # Type alias for any Altair chart type
@@ -56,13 +72,19 @@ def render_altair(
     if not isinstance(df, nw.DataFrame):
         raise TypeError("df must be a narwhals DataFrame or compatible type")
 
-    # Convert to polars and serialize to IPC bytes
     pl_df = df.to_polars()
-    buffer = io.BytesIO()
-    pl_df.write_ipc(buffer)
-    ipc_bytes = buffer.getvalue()
 
-    vegalite_json = _render(ipc_bytes, viz, writer="vegalite")
+    # Create temporary reader and register data
+    reader = DuckDBReader("duckdb://memory")
+    reader.register("__data__", pl_df)
+
+    # Build full query: SELECT * FROM __data__ + VISUALISE clause
+    query = f"SELECT * FROM __data__ {viz}"
+
+    # Execute and render
+    spec = reader.execute(query)
+    writer = VegaLiteWriter()
+    vegalite_json = writer.render(spec)
 
     # Parse to determine the correct Altair class
     spec = json.loads(vegalite_json)
diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs
index 7c472c35..d2eb0ec0 100644
--- a/ggsql-python/src/lib.rs
+++ b/ggsql-python/src/lib.rs
@@ -3,100 +3,770 @@
 #![allow(clippy::useless_conversion)]
 
 use pyo3::prelude::*;
-use pyo3::types::PyBytes;
-use std::collections::{HashMap, HashSet};
+use pyo3::types::{PyBytes, PyDict, PyList};
 use std::io::Cursor;
 
-use ggsql::naming::GLOBAL_DATA_KEY;
-use ggsql::parser::parse_query;
-use ggsql::writer::{VegaLiteWriter, Writer};
-use ggsql::AestheticValue;
+use ggsql::reader::Spec;
+use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader};
+use ggsql::validate::{validate as rust_validate, ValidationWarning};
+use ggsql::writer::{VegaLiteWriter as RustVegaLiteWriter, Writer as RustWriter};
+use ggsql::GgsqlError;
 
-use polars::prelude::{DataFrame, IpcReader, SerReader};
+use polars::prelude::{DataFrame, IpcReader, IpcWriter, SerReader, SerWriter};
 
-#[pyfunction]
-fn split_query(query: &str) -> PyResult<(String, String)> {
-    ggsql::parser::split_query(query)
-        .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+// ============================================================================
+// Helper Functions for DataFrame Conversion
+// ============================================================================
+
+/// Convert a Polars DataFrame to a Python polars DataFrame via IPC serialization
+fn polars_to_py(py: Python<'_>, df: &DataFrame) -> PyResult<Py<PyAny>> {
+    let mut buffer = Vec::new();
+    IpcWriter::new(&mut buffer)
+        .finish(&mut df.clone())
+        .map_err(|e| {
+            PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
+                "Failed to serialize DataFrame: {}",
+                e
+            ))
+        })?;
+
+    let io = py.import("io")?;
+    let bytes_io = io.call_method1("BytesIO", (PyBytes::new(py, &buffer),))?;
+
+    let polars = py.import("polars")?;
+    polars
+        .call_method1("read_ipc", (bytes_io,))
+        .map(|obj| obj.into())
 }
 
-#[pyfunction]
-#[pyo3(signature = (ipc_bytes, viz, *, writer = "vegalite"))]
-fn render(ipc_bytes: &Bound<'_, PyBytes>, viz: &str, writer: &str) -> PyResult<String> {
-    // Read DataFrame from IPC bytes
-    let bytes = ipc_bytes.as_bytes();
-    let cursor = Cursor::new(bytes);
-    let df: DataFrame = IpcReader::new(cursor).finish().map_err(|e| {
-        PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Failed to read IPC data: {}", e))
-    })?;
+/// Convert a Python polars DataFrame to a Rust Polars DataFrame via IPC serialization
+fn py_to_polars(py: Python<'_>, df: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
+    let io = py.import("io")?;
+    let bytes_io = io.call_method0("BytesIO")?;
+    df.call_method1("write_ipc", (&bytes_io,))?;
+    bytes_io.call_method1("seek", (0i64,))?;
 
-    // Parse the visualization spec
-    // The viz string should be a complete VISUALISE statement
-    let specs = parse_query(viz)
-        .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
+    let ipc_bytes: Vec<u8> = bytes_io.call_method0("read")?.extract()?;
+    let cursor = Cursor::new(ipc_bytes);
+
+    IpcReader::new(cursor).finish().map_err(|e| {
+        PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("Failed to read DataFrame: {}", e))
+    })
+}
+
+/// Convert a Python polars DataFrame to Rust DataFrame - for use inside Python::attach
+/// This variant is used by PyReaderBridge where we already hold the GIL.
+fn py_to_polars_inner(df: &Bound<'_, PyAny>) -> PyResult<DataFrame> {
+    let py = df.py();
+    let io = py.import("io")?;
+    let bytes_io = io.call_method0("BytesIO")?;
 
-    let mut spec = specs.into_iter().next().ok_or_else(|| {
-        PyErr::new::<pyo3::exceptions::PyValueError, _>("No visualization spec found")
+    df.call_method1("write_ipc", (&bytes_io,)).map_err(|_| {
+        PyErr::new::<pyo3::exceptions::PyTypeError, _>(
+            "Reader.execute_sql() must return a polars.DataFrame",
+        )
     })?;
 
-    // Get column names for resolving global mappings
-    let column_names: HashSet<&str> = df.get_column_names().iter().map(|s| s.as_str()).collect();
-
-    // Merge global mappings into layers and handle wildcards
-    for layer in &mut spec.layers {
-        let supported_aesthetics = layer.geom.aesthetics().supported;
-
-        // 1. Merge explicit global aesthetics into layer (layer takes precedence)
-        for (aesthetic, value) in &spec.global_mappings.aesthetics {
-            if supported_aesthetics.contains(&aesthetic.as_str()) {
-                layer
-                    .mappings
-                    .aesthetics
-                    .entry(aesthetic.clone())
-                    .or_insert_with(|| value.clone());
-            }
+    bytes_io.call_method1("seek", (0i64,))?;
+    let ipc_bytes: Vec<u8> = bytes_io.call_method0("read")?.extract()?;
+    let cursor = Cursor::new(ipc_bytes);
+
+    IpcReader::new(cursor).finish().map_err(|e| {
+        PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
+            "Failed to deserialize DataFrame: {}",
+            e
+        ))
+    })
+}
+
+/// Convert validation errors/warnings to a Python list of dicts
+fn errors_to_pylist(
+    py: Python<'_>,
+    items: &[(String, Option<(usize, usize)>)],
+) -> PyResult<Py<PyList>> {
+    let list = PyList::empty(py);
+    for (message, location) in items {
+        let dict = PyDict::new(py);
+        dict.set_item("message", message)?;
+        if let Some((line, column)) = location {
+            let loc_dict = PyDict::new(py);
+            loc_dict.set_item("line", line)?;
+            loc_dict.set_item("column", column)?;
+            dict.set_item("location", loc_dict)?;
+        } else {
+            dict.set_item("location", py.None())?;
         }
+        list.append(dict)?;
+    }
+    Ok(list.into())
+}
 
-        // 2. Handle wildcard expansion: map columns to aesthetics with matching names
-        let has_wildcard = layer.mappings.wildcard || spec.global_mappings.wildcard;
-        if has_wildcard {
-            for &aes in supported_aesthetics {
-                // Only create mapping if column exists in the dataframe
-                if column_names.contains(aes) {
-                    layer
-                        .mappings
-                        .aesthetics
-                        .entry(aes.to_string())
-                        .or_insert_with(|| AestheticValue::standard_column(aes));
-                }
+/// Convert ValidationWarning slice to Python list format
+fn warnings_to_pylist(py: Python<'_>, warnings: &[ValidationWarning]) -> PyResult<Py<PyList>> {
+    let items: Vec<_> = warnings
+        .iter()
+        .map(|w| {
+            (
+                w.message.clone(),
+                w.location.as_ref().map(|l| (l.line, l.column)),
+            )
+        })
+        .collect();
+    errors_to_pylist(py, &items)
+}
+
+// ============================================================================
+// PyReaderBridge - Bridges Python reader objects to Rust Reader trait
+// ============================================================================
+
+/// Bridges a Python reader object to the Rust Reader trait.
+///
+/// This allows any Python object with an `execute_sql(sql: str) -> polars.DataFrame`
+/// method to be used as a ggsql reader.
+struct PyReaderBridge {
+    obj: Py<PyAny>,
+}
+
+impl Reader for PyReaderBridge {
+    fn execute_sql(&self, sql: &str) -> ggsql::Result<DataFrame> {
+        Python::attach(|py| {
+            let bound = self.obj.bind(py);
+            let result = bound.call_method1("execute_sql", (sql,)).map_err(|e| {
+                GgsqlError::ReaderError(format!("Reader.execute_sql() failed: {}", e))
+            })?;
+            py_to_polars_inner(&result).map_err(|e| GgsqlError::ReaderError(e.to_string()))
+        })
+    }
+
+    fn supports_register(&self) -> bool {
+        Python::attach(|py| {
+            self.obj
+                .bind(py)
+                .call_method0("supports_register")
+                .and_then(|r| r.extract::<bool>())
+                .unwrap_or(false)
+        })
+    }
+
+    fn register(&mut self, name: &str, df: DataFrame) -> ggsql::Result<()> {
+        Python::attach(|py| {
+            let py_df =
+                polars_to_py(py, &df).map_err(|e| GgsqlError::ReaderError(e.to_string()))?;
+            self.obj
+                .bind(py)
+                .call_method1("register", (name, py_df))
+                .map_err(|e| GgsqlError::ReaderError(format!("Reader.register() failed: {}", e)))?;
+            Ok(())
+        })
+    }
+
+    fn unregister(&mut self, name: &str) -> ggsql::Result<()> {
+        Python::attach(|py| {
+            self.obj
+                .bind(py)
+                .call_method1("unregister", (name,))
+                .map_err(|e| {
+                    GgsqlError::ReaderError(format!("Reader.unregister() failed: {}", e))
+                })?;
+            Ok(())
+        })
+    }
+}
+
+// ============================================================================
+// Native Reader Detection Macro
+// ============================================================================
+
+/// Macro to try native readers and fall back to bridge.
+/// Adding new native readers = add to the macro invocation list.
+macro_rules! try_native_readers {
+    ($query:expr, $reader:expr, $($native_type:ty),*) => {{
+        $(
+            if let Ok(native) = $reader.downcast::<$native_type>() {
+                return native.borrow().inner.execute($query)
+                    .map(|s| PySpec { inner: s })
+                    .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()));
             }
-        }
+        )*
+    }};
+}
+
+// ============================================================================
+// PyDuckDBReader
+// ============================================================================
+
+/// DuckDB database reader for executing SQL queries.
+///
+/// Creates an in-memory or file-based DuckDB connection that can execute
+/// SQL queries and register DataFrames as queryable tables.
+///
+/// Examples
+/// --------
+/// >>> reader = DuckDBReader("duckdb://memory")
+/// >>> df = reader.execute_sql("SELECT 1 as x, 2 as y")
+///
+/// >>> reader = DuckDBReader("duckdb://memory")
+/// >>> reader.register("data", pl.DataFrame({"x": [1, 2, 3]}))
+/// >>> df = reader.execute_sql("SELECT * FROM data WHERE x > 1")
+#[pyclass(name = "DuckDBReader", unsendable)]
+struct PyDuckDBReader {
+    inner: RustDuckDBReader,
+}
+
+#[pymethods]
+impl PyDuckDBReader {
+    /// Create a new DuckDB reader from a connection string.
+    ///
+    /// Parameters
+    /// ----------
+    /// connection : str
+    ///     Connection string. Use "duckdb://memory" for in-memory database
+    ///     or "duckdb://path/to/file.db" for file-based database.
+    ///
+    /// Returns
+    /// -------
+    /// DuckDBReader
+    ///     A configured DuckDB reader instance.
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If the connection string is invalid or the database cannot be opened.
+    #[new]
+    fn new(connection: &str) -> PyResult<Self> {
+        let inner = RustDuckDBReader::from_connection_string(connection)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
+        Ok(Self { inner })
+    }
+
+    /// Register a DataFrame as a queryable table.
+    ///
+    /// After registration, the DataFrame can be queried by name in SQL.
+    ///
+    /// Parameters
+    /// ----------
+    /// name : str
+    ///     The table name to register under.
+    /// df : polars.DataFrame
+    ///     The DataFrame to register. Must be a polars DataFrame.
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If registration fails or the table name is invalid.
+    fn register(&mut self, py: Python<'_>, name: &str, df: &Bound<'_, PyAny>) -> PyResult<()> {
+        let rust_df = py_to_polars(py, df)?;
+        self.inner
+            .register(name, rust_df)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
     }
 
-    // Compute aesthetic labels from column names
-    spec.compute_aesthetic_labels();
+    /// Unregister a previously registered table.
+    ///
+    /// Parameters
+    /// ----------
+    /// name : str
+    ///     The table name to unregister.
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If the table wasn't registered via this reader or unregistration fails.
+    fn unregister(&mut self, name: &str) -> PyResult<()> {
+        self.inner
+            .unregister(name)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+    }
+
+    /// Execute a SQL query and return the result as a DataFrame.
+    ///
+    /// Parameters
+    /// ----------
+    /// sql : str
+    ///     The SQL query to execute.
+    ///
+    /// Returns
+    /// -------
+    /// polars.DataFrame
+    ///     The query result as a polars DataFrame.
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If the SQL is invalid or execution fails.
+    fn execute_sql(&self, py: Python<'_>, sql: &str) -> PyResult<Py<PyAny>> {
+        let df = self
+            .inner
+            .execute_sql(sql)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
+        polars_to_py(py, &df)
+    }
 
-    // Create data map with the DataFrame as global data
-    let mut data_map: HashMap<String, DataFrame> = HashMap::new();
-    data_map.insert(GLOBAL_DATA_KEY.to_string(), df);
+    /// Check if this reader supports DataFrame registration.
+    ///
+    /// Returns
+    /// -------
+    /// bool
+    ///     True if register() is supported, False otherwise.
+    fn supports_register(&self) -> bool {
+        self.inner.supports_register()
+    }
+
+    /// Execute a ggsql query and return the visualization specification.
+    ///
+    /// This is the main entry point for creating visualizations. It parses
+    /// the query, executes the SQL portion, and returns a PySpec ready
+    /// for rendering.
+    ///
+    /// Parameters
+    /// ----------
+    /// query : str
+    ///     The ggsql query (SQL + VISUALISE clause).
+    ///
+    /// Returns
+    /// -------
+    /// Spec
+    ///     The resolved visualization specification ready for rendering.
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If the query syntax is invalid, has no VISUALISE clause, or SQL execution fails.
+    ///
+    /// Examples
+    /// --------
+    /// >>> reader = DuckDBReader("duckdb://memory")
+    /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+    /// >>> writer = VegaLiteWriter()
+    /// >>> json_output = writer.render(spec)
+    fn execute(&self, query: &str) -> PyResult<PySpec> {
+        self.inner
+            .execute(query)
+            .map(|s| PySpec { inner: s })
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+    }
+}
 
-    // Write using the specified writer
-    match writer {
-        "vegalite" => {
-            let w = VegaLiteWriter::new();
-            w.write(&spec, &data_map)
-                .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+// ============================================================================
+// PyVegaLiteWriter
+// ============================================================================
+
+/// Vega-Lite JSON output writer.
+///
+/// Converts visualization specifications to Vega-Lite v6 JSON.
+///
+/// Examples
+/// --------
+/// >>> writer = VegaLiteWriter()
+/// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+/// >>> json_output = writer.render(spec)
+#[pyclass(name = "VegaLiteWriter")]
+struct PyVegaLiteWriter {
+    inner: RustVegaLiteWriter,
+}
+
+#[pymethods]
+impl PyVegaLiteWriter {
+    /// Create a new Vega-Lite writer.
+    ///
+    /// Returns
+    /// -------
+    /// VegaLiteWriter
+    ///     A configured Vega-Lite writer instance.
+    #[new]
+    fn new() -> Self {
+        Self {
+            inner: RustVegaLiteWriter::new(),
         }
-        _ => Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
-            "Unknown writer: {}",
-            writer
-        ))),
     }
+
+    /// Render a Spec to Vega-Lite JSON output
+    ///
+    /// Parameters
+    /// ----------
+    /// spec : Spec
+    ///     The visualization specification from reader.execute().
+    ///
+    /// Returns
+    /// -------
+    /// str
+    ///     The output (i.e., Vega-Lite JSON string).
+    ///
+    /// Raises
+    /// ------
+    /// ValueError
+    ///     If rendering fails.
+    ///
+    /// Examples
+    /// --------
+    /// >>> reader = DuckDBReader("duckdb://memory")
+    /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+    /// >>> writer = VegaLiteWriter()
+    /// >>> json_output = writer.render(spec)
+    fn render(&self, spec: &PySpec) -> PyResult<String> {
+        self.inner
+            .render(&spec.inner)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+    }
+}
+
+// ============================================================================
+// PyValidated
+// ============================================================================
+
+/// Result of validate() - query inspection and validation without SQL execution.
+///
+/// Contains information about query structure and any validation errors/warnings.
+/// The tree() method from Rust is not exposed as it's not useful in Python.
+#[pyclass(name = "Validated")]
+struct PyValidated {
+    sql: String,
+    visual: String,
+    has_visual: bool,
+    valid: bool,
+    errors: Vec<(String, Option<(usize, usize)>)>,
+    warnings: Vec<(String, Option<(usize, usize)>)>,
+}
+
+#[pymethods]
+impl PyValidated {
+    /// Whether the query contains a VISUALISE clause.
+    ///
+    /// Returns
+    /// -------
+    /// bool
+    ///     True if the query has a VISUALISE clause.
+    fn has_visual(&self) -> bool {
+        self.has_visual
+    }
+
+    /// The SQL portion (before VISUALISE).
+    ///
+    /// Returns
+    /// -------
+    /// str
+    ///     The SQL part of the query.
+    fn sql(&self) -> &str {
+        &self.sql
+    }
+
+    /// The VISUALISE portion (raw text).
+    ///
+    /// Returns
+    /// -------
+    /// str
+    ///     The VISUALISE part of the query.
+    fn visual(&self) -> &str {
+        &self.visual
+    }
+
+    /// Whether the query is valid (no errors).
+    ///
+    /// Returns
+    /// -------
+    /// bool
+    ///     True if the query is syntactically and semantically valid.
+    fn valid(&self) -> bool {
+        self.valid
+    }
+
+    /// Validation errors (fatal issues).
+    ///
+    /// Returns
+    /// -------
+    /// list[dict]
+    ///     List of error dictionaries with 'message' and optional 'location' keys.
+    fn errors(&self, py: Python<'_>) -> PyResult<Py<PyList>> {
+        errors_to_pylist(py, &self.errors)
+    }
+
+    /// Validation warnings (non-fatal issues).
+    ///
+    /// Returns
+    /// -------
+    /// list[dict]
+    ///     List of warning dictionaries with 'message' and optional 'location' keys.
+    fn warnings(&self, py: Python<'_>) -> PyResult<Py<PyList>> {
+        errors_to_pylist(py, &self.warnings)
+    }
+}
+
+// ============================================================================
+// PySpec
+// ============================================================================
+
+/// Result of reader.execute(), ready for rendering.
+///
+/// Contains the resolved plot specification, data, and metadata.
+/// Use writer.render(spec) to generate output.
+///
+/// Examples
+/// --------
+/// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+/// >>> print(f"Rows: {spec.metadata()['rows']}")
+/// >>> writer = VegaLiteWriter()
+/// >>> json_output = writer.render(spec)
+#[pyclass(name = "Spec")]
+struct PySpec {
+    inner: Spec,
 }
 
+#[pymethods]
+impl PySpec {
+    /// Get visualization metadata.
+    ///
+    /// Returns
+    /// -------
+    /// dict
+    ///     Dictionary with 'rows', 'columns', and 'layer_count' keys.
+    fn metadata(&self, py: Python<'_>) -> PyResult<Py<PyDict>> {
+        let m = self.inner.metadata();
+        let dict = PyDict::new(py);
+        dict.set_item("rows", m.rows)?;
+        dict.set_item("columns", m.columns.clone())?;
+        dict.set_item("layer_count", m.layer_count)?;
+        Ok(dict.into())
+    }
+
+    /// The main SQL query that was executed.
+    ///
+    /// Returns
+    /// -------
+    /// str
+    ///     The SQL query string.
+    fn sql(&self) -> &str {
+        self.inner.sql()
+    }
+
+    /// The VISUALISE portion (raw text).
+    ///
+    /// Returns
+    /// -------
+    /// str
+    ///     The VISUALISE clause text.
+    fn visual(&self) -> &str {
+        self.inner.visual()
+    }
+
+    /// Number of layers.
+    ///
+    /// Returns
+    /// -------
+    /// int
+    ///     The number of DRAW clauses in the visualization.
+    fn layer_count(&self) -> usize {
+        self.inner.layer_count()
+    }
+
+    /// Get global data (main query result).
+    ///
+    /// Returns
+    /// -------
+    /// polars.DataFrame | None
+    ///     The main query result DataFrame, or None if not available.
+    fn data(&self, py: Python<'_>) -> PyResult<Option<Py<PyAny>>> {
+        self.inner.data().map(|df| polars_to_py(py, df)).transpose()
+    }
+
+    /// Get layer-specific data (from FILTER or FROM clause).
+    ///
+    /// Parameters
+    /// ----------
+    /// index : int
+    ///     The layer index (0-based).
+    ///
+    /// Returns
+    /// -------
+    /// polars.DataFrame | None
+    ///     The layer-specific DataFrame, or None if the layer uses global data.
+    fn layer_data(&self, py: Python<'_>, index: usize) -> PyResult<Option<Py<PyAny>>> {
+        self.inner
+            .layer_data(index)
+            .map(|df| polars_to_py(py, df))
+            .transpose()
+    }
+
+    /// Get stat transform data (e.g., histogram bins, density estimates).
+    ///
+    /// Parameters
+    /// ----------
+    /// index : int
+    ///     The layer index (0-based).
+    ///
+    /// Returns
+    /// -------
+    /// polars.DataFrame | None
+    ///     The stat transform DataFrame, or None if no stat transform.
+    fn stat_data(&self, py: Python<'_>, index: usize) -> PyResult<Option<Py<PyAny>>> {
+        self.inner
+            .stat_data(index)
+            .map(|df| polars_to_py(py, df))
+            .transpose()
+    }
+
+    /// Layer filter/source query, or None if using global data.
+    ///
+    /// Parameters
+    /// ----------
+    /// index : int
+    ///     The layer index (0-based).
+    ///
+    /// Returns
+    /// -------
+    /// str | None
+    ///     The filter SQL query, or None if the layer uses global data directly.
+    fn layer_sql(&self, index: usize) -> Option<String> {
+        self.inner.layer_sql(index).map(|s| s.to_string())
+    }
+
+    /// Stat transform query, or None if no stat transform.
+    ///
+    /// Parameters
+    /// ----------
+    /// index : int
+    ///     The layer index (0-based).
+    ///
+    /// Returns
+    /// -------
+    /// str | None
+    ///     The stat transform SQL query, or None if no stat transform.
+    fn stat_sql(&self, index: usize) -> Option<String> {
+        self.inner.stat_sql(index).map(|s| s.to_string())
+    }
+
+    /// Validation warnings from preparation.
+    ///
+    /// Returns
+    /// -------
+    /// list[dict]
+    ///     List of warning dictionaries with 'message' and optional 'location' keys.
+    fn warnings(&self, py: Python<'_>) -> PyResult<Py<PyList>> {
+        warnings_to_pylist(py, self.inner.warnings())
+    }
+}
+
+// ============================================================================
+// Module Functions
+// ============================================================================
+
+/// Validate query syntax and semantics without executing SQL.
+///
+/// Parameters
+/// ----------
+/// query : str
+///     The ggsql query to validate.
+///
+/// Returns
+/// -------
+/// Validated
+///     Validation result with query inspection methods.
+///
+/// Raises
+/// ------
+/// ValueError
+///     If validation fails unexpectedly (not for syntax errors, which are captured).
+#[pyfunction]
+fn validate(query: &str) -> PyResult<PyValidated> {
+    let v = rust_validate(query)
+        .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))?;
+
+    Ok(PyValidated {
+        sql: v.sql().to_string(),
+        visual: v.visual().to_string(),
+        has_visual: v.has_visual(),
+        valid: v.valid(),
+        errors: v
+            .errors()
+            .iter()
+            .map(|e| {
+                (
+                    e.message.clone(),
+                    e.location.as_ref().map(|l| (l.line, l.column)),
+                )
+            })
+            .collect(),
+        warnings: v
+            .warnings()
+            .iter()
+            .map(|w| {
+                (
+                    w.message.clone(),
+                    w.location.as_ref().map(|l| (l.line, l.column)),
+                )
+            })
+            .collect(),
+    })
+}
+
+/// Execute a ggsql query using a custom Python reader.
+///
+/// This is a convenience function for custom readers. For native readers,
+/// prefer using `reader.execute()` directly.
+///
+/// Parameters
+/// ----------
+/// query : str
+///     The ggsql query to execute.
+/// reader : Reader | object
+///     The database reader to execute SQL against. Can be a native Reader
+///     for optimal performance, or any Python object with an
+///     `execute_sql(sql: str) -> polars.DataFrame` method.
+///
+/// Returns
+/// -------
+/// Spec
+///     The resolved visualization specification ready for rendering.
+///
+/// Raises
+/// ------
+/// ValueError
+///     If parsing, validation, or SQL execution fails.
+///
+/// Examples
+/// --------
+/// >>> # Using native reader (prefer reader.execute() instead)
+/// >>> reader = DuckDBReader("duckdb://memory")
+/// >>> spec = execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
+/// >>> writer = VegaLiteWriter()
+/// >>> json_output = writer.render(spec)
+///
+/// >>> # Using custom Python reader
+/// >>> class MyReader:
+/// ...     def execute_sql(self, sql: str) -> pl.DataFrame:
+/// ...         return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+/// >>> reader = MyReader()
+/// >>> spec = execute("SELECT * FROM data VISUALISE x, y DRAW point", reader)
+#[pyfunction]
+fn execute(query: &str, reader: &Bound<'_, PyAny>) -> PyResult<PySpec> {
+    // Fast path: try all known native reader types
+    // Add new native readers to this list as they're implemented
+    try_native_readers!(query, reader, PyDuckDBReader);
+
+    // Bridge path: wrap Python object as Reader
+    let bridge = PyReaderBridge {
+        obj: reader.clone().unbind(),
+    };
+    bridge
+        .execute(query)
+        .map(|s| PySpec { inner: s })
+        .map_err(|e| PyErr::new::<pyo3::exceptions::PyValueError, _>(e.to_string()))
+}
+
+// ============================================================================
+// Module Registration
+// ============================================================================
+
 #[pymodule]
 fn _ggsql(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_function(wrap_pyfunction!(split_query, m)?)?;
-    m.add_function(wrap_pyfunction!(render, m)?)?;
+    // Classes
+    m.add_class::<PyDuckDBReader>()?;
+    m.add_class::<PyVegaLiteWriter>()?;
+    m.add_class::<PyValidated>()?;
+    m.add_class::<PySpec>()?;
+
+    // Functions
+    m.add_function(wrap_pyfunction!(validate, m)?)?;
+    m.add_function(wrap_pyfunction!(execute, m)?)?;
+
     Ok(())
 }
diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py
index 9df03779..f5c666fc 100644
--- a/ggsql-python/tests/test_ggsql.py
+++ b/ggsql-python/tests/test_ggsql.py
@@ -3,31 +3,198 @@
 These tests focus on Python-specific logic:
 - DataFrame conversion via narwhals
 - Return type handling
+- Two-stage API (reader.execute() -> render)
 
 Rust logic (parsing, Vega-Lite generation) is tested in the Rust test suite.
 """
 
+import json
+
 import pytest
 import polars as pl
 import altair
 
 import ggsql
 
+# Optional dependency for ibis test
+try:
+    import ibis
+
+    HAS_IBIS = hasattr(ibis, "duckdb")
+except ImportError:
+    HAS_IBIS = False
+
+
+class TestValidate:
+    """Tests for validate() function."""
+
+    def test_valid_query_with_visualise(self):
+        validated = ggsql.validate(
+            "SELECT 1 AS x, 2 AS y VISUALISE DRAW point MAPPING x AS x, y AS y"
+        )
+        assert validated.has_visual()
+        assert validated.valid()
+        assert "SELECT" in validated.sql()
+        assert "VISUALISE" in validated.visual()
+        assert len(validated.errors()) == 0
+
+    def test_valid_query_without_visualise(self):
+        validated = ggsql.validate("SELECT 1 AS x, 2 AS y")
+        assert not validated.has_visual()
+        assert validated.valid()
+        assert validated.sql() == "SELECT 1 AS x, 2 AS y"
+        assert validated.visual() == ""
+
+    def test_invalid_query_has_errors(self):
+        validated = ggsql.validate("SELECT 1 VISUALISE DRAW invalid_geom")
+        assert not validated.valid()
+        assert len(validated.errors()) > 0
+
+    def test_missing_required_aesthetic(self):
+        # Point requires x and y, only providing x
+        validated = ggsql.validate(
+            "SELECT 1 AS x, 2 AS y VISUALISE DRAW point MAPPING x AS x"
+        )
+        assert not validated.valid()
+        errors = validated.errors()
+        assert len(errors) > 0
+        assert any("y" in e["message"] for e in errors)
+
+
+class TestDuckDBReader:
+    """Tests for DuckDBReader class."""
+
+    def test_create_in_memory(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        assert reader is not None
+
+    def test_execute_simple_query(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        df = reader.execute_sql("SELECT 1 AS x, 2 AS y")
+        assert isinstance(df, pl.DataFrame)
+        assert df.shape == (1, 2)
+        assert list(df.columns) == ["x", "y"]
+
+    def test_register_and_query(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+        reader.register("my_data", df)
+
+        result = reader.execute_sql("SELECT * FROM my_data WHERE x > 1")
+        assert isinstance(result, pl.DataFrame)
+        assert result.shape == (2, 2)
+
+    def test_supports_register(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        assert reader.supports_register() is True
+
+    def test_invalid_connection_string(self):
+        with pytest.raises(ValueError):
+            ggsql.DuckDBReader("invalid://connection")
+
 
-class TestSplitQuery:
-    """Tests for split_query() function."""
+class TestVegaLiteWriter:
+    """Tests for VegaLiteWriter class."""
 
-    def test_splits_sql_and_visualise(self):
-        sql, viz = ggsql.split_query(
-            "SELECT x, y FROM data VISUALISE x, y DRAW point"
+    def test_create_writer(self):
+        writer = ggsql.VegaLiteWriter()
+        assert writer is not None
+
+
+class TestExecute:
+    """Tests for reader.execute() method."""
+
+    def test_execute_simple_query(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+        assert spec is not None
+        assert spec.layer_count() == 1
+
+    def test_execute_with_registered_data(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+        reader.register("data", df)
+
+        spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point")
+        assert spec.metadata()["rows"] == 3
+
+    def test_execute_metadata(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        spec = reader.execute(
+            "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) "
+            "VISUALISE x, y DRAW point",
+        )
+
+        metadata = spec.metadata()
+        assert metadata["rows"] == 3
+        assert "x" in metadata["columns"]
+        assert "y" in metadata["columns"]
+        assert metadata["layer_count"] == 1
+
+    def test_execute_sql_accessor(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+        assert "SELECT" in spec.sql()
+
+    def test_execute_visual_accessor(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+        assert "VISUALISE" in spec.visual()
+
+    def test_execute_data_accessor(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+        data = spec.data()
+        assert isinstance(data, pl.DataFrame)
+        assert data.shape == (1, 2)
+
+    def test_execute_without_visualise_fails(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        with pytest.raises(ValueError):
+            reader.execute("SELECT 1 AS x, 2 AS y")
+
+
+class TestWriterRender:
+    """Tests for VegaLiteWriter.render() method."""
+
+    def test_render_to_vegalite(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+        writer = ggsql.VegaLiteWriter()
+
+        result = writer.render(spec)
+        assert isinstance(result, str)
+
+        spec_dict = json.loads(result)
+        assert "$schema" in spec_dict
+        assert "vega-lite" in spec_dict["$schema"]
+
+    def test_render_contains_data(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+        reader.register("data", df)
+
+        spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point")
+        writer = ggsql.VegaLiteWriter()
+
+        result = writer.render(spec)
+        spec_dict = json.loads(result)
+        # Data should be in the spec (either inline or in datasets)
+        assert "data" in spec_dict or "datasets" in spec_dict
+
+    def test_render_multi_layer(self):
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        spec = reader.execute(
+            "SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) "
+            "VISUALISE "
+            "DRAW point MAPPING x AS x, y AS y "
+            "DRAW line MAPPING x AS x, y AS y",
         )
-        assert "SELECT" in sql
-        assert "VISUALISE" in viz
+        writer = ggsql.VegaLiteWriter()
 
-    def test_no_visualise_returns_empty_viz(self):
-        sql, viz = ggsql.split_query("SELECT * FROM data")
-        assert sql == "SELECT * FROM data"
-        assert viz == ""
+        result = writer.render(spec)
+        spec_dict = json.loads(result)
+        assert "layer" in spec_dict
 
 
 class TestRenderAltairDataFrameConversion:
@@ -112,23 +279,31 @@ def test_layered_chart_can_round_trip(self):
 
     def test_faceted_chart_returns_facet_chart(self):
         """FACET WRAP specs produce FacetChart."""
-        df = pl.DataFrame({
-            "x": [1, 2, 3, 4, 5, 6],
-            "y": [10, 20, 30, 40, 50, 60],
-            "group": ["A", "A", "A", "B", "B", "B"],
-        })
+        df = pl.DataFrame(
+            {
+                "x": [1, 2, 3, 4, 5, 6],
+                "y": [10, 20, 30, 40, 50, 60],
+                "group": ["A", "A", "A", "B", "B", "B"],
+            }
+        )
         # Need validate=False because ggsql produces v6 specs
-        chart = ggsql.render_altair(df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False)
+        chart = ggsql.render_altair(
+            df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False
+        )
         assert isinstance(chart, altair.FacetChart)
 
     def test_faceted_chart_can_round_trip(self):
         """FacetChart can be converted to dict and back."""
-        df = pl.DataFrame({
-            "x": [1, 2, 3, 4, 5, 6],
-            "y": [10, 20, 30, 40, 50, 60],
-            "group": ["A", "A", "A", "B", "B", "B"],
-        })
-        chart = ggsql.render_altair(df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False)
+        df = pl.DataFrame(
+            {
+                "x": [1, 2, 3, 4, 5, 6],
+                "y": [10, 20, 30, 40, 50, 60],
+                "group": ["A", "A", "A", "B", "B", "B"],
+            }
+        )
+        chart = ggsql.render_altair(
+            df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False
+        )
 
         # Convert to dict (skip validation for ggsql specs)
         spec = chart.to_dict(validate=False)
@@ -140,11 +315,13 @@ def test_faceted_chart_can_round_trip(self):
 
     def test_chart_with_color_encoding(self):
         """Charts with color encoding still return correct type."""
-        df = pl.DataFrame({
-            "x": [1, 2, 3, 4],
-            "y": [10, 20, 30, 40],
-            "category": ["A", "B", "A", "B"],
-        })
+        df = pl.DataFrame(
+            {
+                "x": [1, 2, 3, 4],
+                "y": [10, 20, 30, 40],
+                "category": ["A", "B", "A", "B"],
+            }
+        )
         chart = ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point")
         # Should still be a LayerChart (ggsql wraps in layer)
         assert isinstance(chart, altair.LayerChart)
@@ -157,3 +334,207 @@ def test_invalid_viz_raises(self):
         df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
         with pytest.raises(ValueError):
             ggsql.render_altair(df, "NOT VALID SYNTAX")
+
+
+class TestTwoStageAPIIntegration:
+    """Integration tests for the two-stage reader.execute() -> render API."""
+
+    def test_end_to_end_workflow(self):
+        """Complete workflow: create reader, register data, execute, render."""
+        # Create reader
+        reader = ggsql.DuckDBReader("duckdb://memory")
+
+        # Register data
+        df = pl.DataFrame(
+            {
+                "date": ["2024-01-01", "2024-01-02", "2024-01-03"],
+                "value": [10, 20, 30],
+                "region": ["North", "South", "North"],
+            }
+        )
+        reader.register("sales", df)
+
+        # Execute visualization
+        spec = reader.execute(
+            "SELECT * FROM sales VISUALISE date AS x, value AS y, region AS color DRAW line",
+        )
+
+        # Verify metadata
+        assert spec.metadata()["rows"] == 3
+        assert spec.layer_count() == 1
+
+        # Render to Vega-Lite
+        writer = ggsql.VegaLiteWriter()
+        result = writer.render(spec)
+
+        # Verify output
+        spec_dict = json.loads(result)
+        assert "$schema" in spec_dict
+        assert "line" in json.dumps(spec_dict)
+
+    def test_can_introspect_spec(self):
+        """Test all introspection methods on Spec."""
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+
+        # All these should work without error
+        assert spec.sql() is not None
+        assert spec.visual() is not None
+        assert spec.layer_count() >= 1
+        assert spec.metadata() is not None
+        assert spec.data() is not None
+        assert spec.warnings() is not None
+
+        # Layer-specific accessors (may return None)
+        _ = spec.layer_data(0)
+        _ = spec.stat_data(0)
+        _ = spec.layer_sql(0)
+        _ = spec.stat_sql(0)
+
+
+class TestCustomReader:
+    """Tests for custom Python reader support."""
+
+    def test_simple_custom_reader(self):
+        """Custom reader with execute_sql() method works."""
+
+        class SimpleReader:
+            def execute_sql(self, sql: str) -> pl.DataFrame:
+                return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+
+        reader = SimpleReader()
+        spec = ggsql.execute("SELECT * FROM data VISUALISE x, y DRAW point", reader)
+        assert spec.metadata()["rows"] == 3
+
+    def test_custom_reader_with_register(self):
+        """Custom reader with register() support."""
+
+        class RegisterReader:
+            def __init__(self):
+                self.tables = {}
+
+            def execute_sql(self, sql: str) -> pl.DataFrame:
+                # Simple: just return the first registered table
+                if self.tables:
+                    return next(iter(self.tables.values()))
+                return pl.DataFrame({"x": [1], "y": [2]})
+
+            def supports_register(self) -> bool:
+                return True
+
+            def register(self, name: str, df: pl.DataFrame) -> None:
+                self.tables[name] = df
+
+        reader = RegisterReader()
+        spec = ggsql.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
+        assert spec is not None
+
+    def test_custom_reader_error_handling(self):
+        """Custom reader errors are propagated."""
+
+        class ErrorReader:
+            def execute_sql(self, sql: str) -> pl.DataFrame:
+                raise ValueError("Custom reader error")
+
+        reader = ErrorReader()
+        with pytest.raises(ValueError, match="Custom reader error"):
+            ggsql.execute("SELECT 1 VISUALISE x, y DRAW point", reader)
+
+    def test_custom_reader_wrong_return_type(self):
+        """Custom reader returning wrong type raises TypeError."""
+
+        class WrongTypeReader:
+            def execute_sql(self, sql: str):
+                return {"x": [1, 2, 3]}  # dict, not DataFrame
+
+        reader = WrongTypeReader()
+        with pytest.raises((ValueError, TypeError)):
+            ggsql.execute("SELECT 1 VISUALISE x, y DRAW point", reader)
+
+    def test_native_reader_fast_path(self):
+        """Native DuckDBReader still works (fast path)."""
+        reader = ggsql.DuckDBReader("duckdb://memory")
+        spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
+        assert spec.metadata()["rows"] == 1
+
+    def test_custom_reader_can_render(self):
+        """Custom reader result can be rendered to Vega-Lite."""
+
+        class StaticReader:
+            def execute_sql(self, sql: str) -> pl.DataFrame:
+                return pl.DataFrame(
+                    {
+                        "x": [1, 2, 3, 4, 5],
+                        "y": [10, 40, 20, 50, 30],
+                        "category": ["A", "B", "A", "B", "A"],
+                    }
+                )
+
+        reader = StaticReader()
+        spec = ggsql.execute(
+            "SELECT * FROM data VISUALISE x, y, category AS color DRAW point",
+            reader,
+        )
+
+        writer = ggsql.VegaLiteWriter()
+        result = writer.render(spec)
+
+        spec_dict = json.loads(result)
+        assert "$schema" in spec_dict
+        assert "vega-lite" in spec_dict["$schema"]
+
+    def test_custom_reader_execute_sql_called(self):
+        """Verify execute_sql() is called on the custom reader."""
+
+        class RecordingReader:
+            def __init__(self):
+                self.execute_calls = []
+
+            def execute_sql(self, sql: str) -> pl.DataFrame:
+                self.execute_calls.append(sql)
+                return pl.DataFrame({"x": [1], "y": [2]})
+
+        reader = RecordingReader()
+        ggsql.execute(
+            "SELECT * FROM data VISUALISE x, y DRAW point",
+            reader,
+        )
+
+        # execute_sql() should have been called at least once
+        assert len(reader.execute_calls) > 0
+        # All calls should be valid SQL strings
+        assert all(isinstance(sql, str) for sql in reader.execute_calls)
+
+    @pytest.mark.skipif(not HAS_IBIS, reason="ibis not installed")
+    def test_custom_reader_ibis(self):
+        """Test custom reader using ibis as backend."""
+
+        class IbisReader:
+            def __init__(self):
+                self.con = ibis.duckdb.connect()
+
+            def execute_sql(self, sql: str) -> pl.DataFrame:
+                return self.con.con.execute(sql).pl()
+
+            def supports_register(self) -> bool:
+                return True
+
+            def register(self, name: str, df: pl.DataFrame) -> None:
+                self.con.create_table(name, df.to_arrow(), overwrite=True)
+
+            def unregister(self, name: str) -> None:
+                self.con.drop_table(name)
+
+        reader = IbisReader()
+        df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+        reader.register("mydata", df)
+
+        spec = ggsql.execute(
+            "SELECT * FROM mydata VISUALISE x, y DRAW point",
+            reader,
+        )
+
+        assert spec.metadata()["rows"] == 3
+        writer = ggsql.VegaLiteWriter()
+        json_output = writer.render(spec)
+        assert "point" in json_output
diff --git a/src/Cargo.toml b/src/Cargo.toml
index 75cbd1f6..dd60aac1 100644
--- a/src/Cargo.toml
+++ b/src/Cargo.toml
@@ -32,6 +32,7 @@ polars.workspace = true
 
 # Readers
 duckdb = { workspace = true, optional = true }
+arrow = { workspace = true, optional = true }
 postgres = { workspace = true, optional = true }
 sqlx = { workspace = true, optional = true }
 rusqlite = { workspace = true, optional = true }
@@ -69,7 +70,7 @@ proptest.workspace = true
 
 [features]
 default = ["duckdb", "sqlite", "vegalite"]
-duckdb = ["dep:duckdb"]
+duckdb = ["dep:duckdb", "dep:arrow"]
 postgres = ["dep:postgres"]
 sqlite = ["dep:rusqlite"]
 vegalite = []
diff --git a/src/cli.rs b/src/cli.rs
index 80dec31e..bb6d4df9 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -9,10 +9,10 @@ use ggsql::parser::extract_sql;
 use ggsql::{parser, VERSION};
 use std::path::PathBuf;
 
-#[cfg(feature = "duckdb")]
-use ggsql::execute::prepare_data;
 #[cfg(feature = "duckdb")]
 use ggsql::reader::{DuckDBReader, Reader};
+#[cfg(feature = "duckdb")]
+use ggsql::validate;
 
 #[cfg(feature = "vegalite")]
 use ggsql::writer::{VegaLiteWriter, Writer};
@@ -169,15 +169,16 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option<PathBu
     }
     let db_reader = db_reader.unwrap();
 
-    // Check if visualise part is empty
-    let parsed = parser::split_query(&query);
-    if let Err(e) = parsed {
-        eprintln!("Failed to split query: {}", e);
-        std::process::exit(1);
-    }
-    let (_, viz_part) = parsed.unwrap();
+    // Use validate() to check if query has visualization
+    let validated = match validate(&query) {
+        Ok(v) => v,
+        Err(e) => {
+            eprintln!("Failed to validate query: {}", e);
+            std::process::exit(1);
+        }
+    };
 
-    if viz_part.is_empty() {
+    if !validated.has_visual() {
         if verbose {
             eprintln!("Visualisation is empty. Printing table instead.");
         }
@@ -185,28 +186,27 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option<PathBu
         return;
     }
 
-    // Prepare data (parses query, executes SQL, handles layer sources)
-    let prepared = prepare_data(&query, &db_reader);
-    if let Err(e) = prepared {
-        eprintln!("Failed to prepare data: {}", e);
-        std::process::exit(1);
-    }
-    let prepared = prepared.unwrap();
+    // Execute ggsql query
+    let spec = match db_reader.execute(&query) {
+        Ok(s) => s,
+        Err(e) => {
+            eprintln!("Failed to execute query: {}", e);
+            std::process::exit(1);
+        }
+    };
 
     if verbose {
-        eprintln!("\nData sources loaded:");
-        for (key, df) in &prepared.data {
-            eprintln!("  {}: {:?}", key, df.shape());
-        }
-        eprintln!("\nParsed {} visualisation spec(s)", prepared.specs.len());
+        let metadata = spec.metadata();
+        eprintln!("\nQuery executed:");
+        eprintln!("  Rows: {}", metadata.rows);
+        eprintln!("  Columns: {}", metadata.columns.join(", "));
+        eprintln!("  Layers: {}", metadata.layer_count);
     }
 
-    let first_spec = prepared.specs.first();
-    if first_spec.is_none() {
+    if spec.plot().layers.is_empty() {
         eprintln!("No visualization specifications found");
         std::process::exit(1);
     }
-    let first_spec = first_spec.unwrap();
 
     // Check writer
     if writer != "vegalite" {
@@ -220,14 +220,15 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option<PathBu
         std::process::exit(1)
     }
 
-    // Write visualization
+    // Render
     let vl_writer = VegaLiteWriter::new();
-    let json_output = vl_writer.write(first_spec, &prepared.data);
-    if let Err(ref e) = json_output {
-        eprintln!("Failed to generate Vega-Lite output: {}", e);
-        std::process::exit(1);
-    }
-    let json_output = json_output.unwrap();
+    let json_output = match vl_writer.render(&spec) {
+        Ok(r) => r,
+        Err(e) => {
+            eprintln!("Failed to generate Vega-Lite output: {}", e);
+            std::process::exit(1);
+        }
+    };
 
     if output.is_none() {
         // Empty output location, write to stdout
@@ -237,7 +238,7 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option<PathBu
     let output = output.unwrap();
 
     // Write to file
-    match std::fs::write(&output, &json_output) {
+    match std::fs::write(&output, json_output) {
         Ok(_) => {
             if verbose {
                 eprintln!("\nVega-Lite JSON written to: {}", output.display());
@@ -291,13 +292,38 @@ fn cmd_parse(query: String, format: String) {
     }
 }
 
-fn cmd_validate(query: String, reader: Option<String>) {
-    println!("Validating query: {}", query);
-    if let Some(reader) = reader {
-        println!("Reader: {}", reader);
+fn cmd_validate(query: String, _reader: Option<String>) {
+    #[cfg(feature = "duckdb")]
+    {
+        match validate(&query) {
+            Ok(validated) if validated.valid() => {
+                println!("✓ Query syntax is valid");
+            }
+            Ok(validated) => {
+                println!("✗ Validation errors:");
+                for err in validated.errors() {
+                    println!("  - {}", err.message);
+                }
+                if !validated.warnings().is_empty() {
+                    println!("\nWarnings:");
+                    for warning in validated.warnings() {
+                        println!("  - {}", warning.message);
+                    }
+                }
+                std::process::exit(1);
+            }
+            Err(e) => {
+                eprintln!("Error during validation: {}", e);
+                std::process::exit(1);
+            }
+        }
+    }
+
+    #[cfg(not(feature = "duckdb"))]
+    {
+        eprintln!("Validation requires the duckdb feature");
+        std::process::exit(1);
     }
-    // TODO: Implement validation logic
-    println!("Validation not yet implemented");
 }
 
 // Prints a CSV-like output to stdout with aligned columns
@@ -309,7 +335,7 @@ fn print_table_fallback(query: &str, reader: &DuckDBReader, max_rows: usize) {
     }
     let parsed = parsed.unwrap();
 
-    let data = reader.execute(&parsed);
+    let data = reader.execute_sql(&parsed);
     if let Err(e) = data {
         eprintln!("Failed to execute SQL query: {}", e);
         std::process::exit(1)
diff --git a/src/doc/API.md b/src/doc/API.md
new file mode 100644
index 00000000..1327960e
--- /dev/null
+++ b/src/doc/API.md
@@ -0,0 +1,525 @@
+# ggsql API Reference
+
+This document provides a comprehensive reference for the ggsql public API.
+
+## Overview
+
+- **Stage 1: `reader.execute()`** - Parse query, execute SQL, resolve mappings, create Spec
+- **Stage 2: `writer.render()`** - Generate output (Vega-Lite JSON, etc.)
+
+### API Functions
+
+| Function           | Use Case                                             |
+| ------------------ | ---------------------------------------------------- |
+| `reader.execute()` | Main entry point - full visualization pipeline       |
+| `writer.render()`  | Generate output from Spec                            |
+| `validate()`       | Validate syntax + semantics, inspect query structure |
+
+---
+
+## Core Functions
+
+### `Reader::execute`
+
+```rust
+fn execute(&self, query: &str) -> Result<Spec>
+```
+
+Execute a ggsql query for visualization. This is the main entry point - a default method on the Reader trait.
+
+**What happens during execution:**
+
+1. Parses the query (SQL + VISUALISE portions)
+2. Executes the main SQL query using the reader
+3. Resolves wildcards (`VISUALISE *`) against actual columns
+4. Merges global mappings into each layer
+5. Executes layer-specific queries (filters, stats)
+6. Injects constant values as synthetic columns
+7. Computes aesthetic labels from column names
+
+**Arguments:**
+
+- `query` - The full ggsql query string
+
+**Returns:**
+
+- `Ok(Spec)` - Ready for rendering
+- `Err(GgsqlError)` - Parse, validation, or execution error
+
+**Example:**
+
+```rust
+use ggsql::reader::{DuckDBReader, Reader};
+use ggsql::writer::{VegaLiteWriter, Writer};
+
+let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+let spec = reader.execute(
+    "SELECT x, y FROM data VISUALISE x, y DRAW point"
+)?;
+
+// Access metadata
+println!("Rows: {}", spec.metadata().rows);
+println!("Columns: {:?}", spec.metadata().columns);
+
+// Render to Vega-Lite
+let writer = VegaLiteWriter::new();
+let result = writer.render(&spec)?;
+```
+
+**Error Conditions:**
+
+- Parse error in SQL or VISUALISE portion
+- SQL execution failure
+- Missing required aesthetics
+- Invalid geom type
+- Multiple VISUALISE statements (not yet supported)
+
+---
+
+### `validate`
+
+```rust
+pub fn validate(query: &str) -> Result<Validated>
+```
+
+Validate query syntax and semantics without executing SQL. This function combines query parsing and validation into a single operation.
+
+**What is validated:**
+
+- Syntax (parsing)
+- Required aesthetics for each geom type
+- Valid scale types (linear, log10, date, etc.)
+- Valid coord types and properties
+- Valid geom types
+- Valid aesthetic names
+- Valid SETTING parameters
+
+**Arguments:**
+
+- `query` - The full ggsql query string (SQL + VISUALISE)
+
+**Returns:**
+
+- `Ok(Validated)` - Validation results with query inspection methods
+- `Err(GgsqlError)` - Internal error
+
+**Example:**
+
+```rust
+use ggsql::validate;
+
+let validated = validate("SELECT x, y FROM data VISUALISE x, y DRAW point")?;
+
+// Check validity
+if !validated.valid() {
+    for error in validated.errors() {
+        eprintln!("Error: {}", error.message);
+    }
+}
+
+// Inspect query structure
+if validated.has_visual() {
+    println!("SQL: {}", validated.sql());
+    println!("Visual: {}", validated.visual());
+}
+```
+
+**Notes:**
+
+- Does not execute SQL
+- Does not resolve wildcards or global mappings
+- Cannot validate column existence (requires data)
+- Returns all errors, not just the first one
+- CST available via `tree()` for advanced inspection
+
+---
+
+## Type Reference
+
+### `Validated`
+
+Result of validating a query (syntax + semantics, no SQL execution).
+
+```rust
+pub struct Validated {
+    // All fields private
+}
+```
+
+**Methods:**
+
+| Method       | Signature                                    | Description                        |
+| ------------ | -------------------------------------------- | ---------------------------------- |
+| `has_visual` | `fn has_visual(&self) -> bool`               | Whether query contains VISUALISE   |
+| `sql`        | `fn sql(&self) -> &str`                      | The SQL portion (before VISUALISE) |
+| `visual`     | `fn visual(&self) -> &str`                   | The VISUALISE portion (raw text)   |
+| `tree`       | `fn tree(&self) -> Option<&Tree>`            | CST for advanced inspection        |
+| `valid`      | `fn valid(&self) -> bool`                    | Whether query is valid             |
+| `errors`     | `fn errors(&self) -> &[ValidationError]`     | Validation errors                  |
+| `warnings`   | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings                |
+
+**Example:**
+
+```rust
+let validated = ggsql::validate("SELECT 1 as x VISUALISE DRAW point MAPPING x AS x, y AS y")?;
+
+// Check validity
+if !validated.valid() {
+    for error in validated.errors() {
+        eprintln!("Error: {}", error.message);
+    }
+}
+
+// Inspect query structure
+assert!(validated.has_visual());
+assert_eq!(validated.sql(), "SELECT 1 as x");
+assert!(validated.visual().starts_with("VISUALISE"));
+
+// CST access for advanced use cases
+if let Some(tree) = validated.tree() {
+    println!("Root node: {}", tree.root_node().kind());
+}
+```
+
+---
+
+### `Spec`
+
+Result of executing a ggsql query, ready for rendering.
+
+#### Rendering
+
+Use `writer.render(&spec)` to generate output.
+
+**Example:**
+
+```rust
+let writer = VegaLiteWriter::new();
+let json = writer.render(&spec)?;
+println!("{}", json);
+```
+
+#### Plot Access Methods
+
+| Method        | Signature                        | Description                     |
+| ------------- | -------------------------------- | ------------------------------- |
+| `plot`        | `fn plot(&self) -> &Plot`        | Get resolved plot specification |
+| `layer_count` | `fn layer_count(&self) -> usize` | Number of layers                |
+
+**Example:**
+
+```rust
+println!("Layers: {}", spec.layer_count());
+
+let plot = spec.plot();
+for (i, layer) in plot.layers.iter().enumerate() {
+    println!("Layer {}: {:?}", i, layer.geom);
+}
+```
+
+#### Metadata Methods
+
+| Method     | Signature                         | Description                |
+| ---------- | --------------------------------- | -------------------------- |
+| `metadata` | `fn metadata(&self) -> &Metadata` | Get visualization metadata |
+
+**Example:**
+
+```rust
+let meta = spec.metadata();
+println!("Rows: {}", meta.rows);
+println!("Columns: {:?}", meta.columns);
+println!("Layer count: {}", meta.layer_count);
+```
+
+#### Data Access Methods
+
+| Method       | Signature                                              | Description                     |
+| ------------ | ------------------------------------------------------ | ------------------------------- |
+| `data`       | `fn data(&self) -> Option<&DataFrame>`                 | Global data (main query result) |
+| `layer_data` | `fn layer_data(&self, i: usize) -> Option<&DataFrame>` | Layer-specific data             |
+| `stat_data`  | `fn stat_data(&self, i: usize) -> Option<&DataFrame>`  | Stat transform results          |
+| `data_map`   | `fn data_map(&self) -> &HashMap<String, DataFrame>`    | Raw data map access             |
+
+**Example:**
+
+```rust
+// Global data
+if let Some(df) = spec.data() {
+    println!("Global data: {} rows", df.height());
+}
+
+// Layer-specific data (from FILTER or FROM clause)
+if let Some(df) = spec.layer_data(0) {
+    println!("Layer 0 has filtered data: {} rows", df.height());
+}
+
+// Stat data (histogram bins, density estimates, etc.)
+if let Some(df) = spec.stat_data(1) {
+    println!("Layer 1 stat data: {} rows", df.height());
+}
+```
+
+#### Query Introspection Methods
+
+| Method      | Signature                                       | Description                      |
+| ----------- | ----------------------------------------------- | -------------------------------- |
+| `sql`       | `fn sql(&self) -> &str`                         | Main SQL query that was executed |
+| `visual`    | `fn visual(&self) -> &str`                      | Raw VISUALISE text               |
+| `layer_sql` | `fn layer_sql(&self, i: usize) -> Option<&str>` | Layer filter/source query        |
+| `stat_sql`  | `fn stat_sql(&self, i: usize) -> Option<&str>`  | Stat transform query             |
+
+**Example:**
+
+```rust
+// Main query
+println!("SQL: {}", spec.sql());
+println!("Visual: {}", spec.visual());
+
+// Per-layer queries
+for i in 0..spec.layer_count() {
+    if let Some(sql) = spec.layer_sql(i) {
+        println!("Layer {} filter: {}", i, sql);
+    }
+    if let Some(sql) = spec.stat_sql(i) {
+        println!("Layer {} stat: {}", i, sql);
+    }
+}
+```
+
+#### Warnings Method
+
+| Method     | Signature                                    | Description                        |
+| ---------- | -------------------------------------------- | ---------------------------------- |
+| `warnings` | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings from execution |
+
+**Example:**
+
+```rust
+let spec = reader.execute(query)?;
+
+// Check for warnings
+if !spec.warnings().is_empty() {
+    for warning in spec.warnings() {
+        eprintln!("Warning: {}", warning.message);
+    }
+}
+
+// Continue with rendering
+let writer = VegaLiteWriter::new();
+let json = writer.render(&spec)?;
+```
+
+---
+
+### `Metadata`
+
+Information about the prepared visualization.
+
+```rust
+pub struct Metadata {
+    pub rows: usize,           // Rows in primary data source
+    pub columns: Vec<String>,  // Column names
+    pub layer_count: usize,    // Number of layers in the plot
+}
+```
+
+---
+
+### `ValidationError`
+
+A validation error (fatal issue).
+
+```rust
+pub struct ValidationError {
+    pub message: String,
+    pub location: Option<Location>,
+}
+```
+
+---
+
+### `ValidationWarning`
+
+A validation warning (non-fatal issue).
+
+```rust
+pub struct ValidationWarning {
+    pub message: String,
+    pub location: Option<Location>,
+}
+```
+
+---
+
+### `Location`
+
+Location within a query string.
+
+```rust
+pub struct Location {
+    pub line: usize,    // 0-based line number
+    pub column: usize,  // 0-based column number
+}
+```
+
+---
+
+## Reader Trait & Implementations
+
+### `Reader` Trait
+
+```rust
+pub trait Reader {
+    /// Execute a SQL query and return a DataFrame
+    fn execute_sql(&self, sql: &str) -> Result<DataFrame>;
+
+    /// Register a DataFrame as a queryable table
+    fn register(&mut self, name: &str, df: DataFrame) -> Result<()>;
+
+    /// Unregister a previously registered table
+    fn unregister(&mut self, name: &str) -> Result<()>;
+
+    /// Check if this reader supports DataFrame registration
+    fn supports_register(&self) -> bool;
+}
+```
+
+---
+
+## Writer Trait & Implementations
+
+### `Writer` Trait
+
+```rust
+pub trait Writer {
+    /// Render a plot specification to output format
+    fn write(&self, spec: &Plot, data: &HashMap<String, DataFrame>) -> Result<String>;
+
+    /// Get the file extension for this writer's output
+    fn file_extension(&self) -> &str;
+}
+```
+
+## Python Bindings
+
+The Python bindings provide the same two-stage API with Pythonic conventions.
+
+### Classes
+
+#### `DuckDBReader`
+
+```python
+class DuckDBReader:
+    def __init__(self, connection: str) -> None:
+        """Create a DuckDB reader.
+
+        Args:
+            connection: Connection string (e.g., "duckdb://memory")
+        """
+
+    def register(self, name: str, df: Any) -> None:
+        """Register a DataFrame as a queryable table.
+
+        Args:
+            name: Table name
+            df: Polars DataFrame or narwhals-compatible DataFrame
+        """
+
+    def unregister(self, name: str) -> None:
+        """Unregister a previously registered table.
+
+        Args:
+            name: Table name to unregister
+        """
+
+    def execute_sql(self, sql: str) -> polars.DataFrame:
+        """Execute SQL and return a Polars DataFrame."""
+
+    def supports_register(self) -> bool:
+        """Check if registration is supported."""
+```
+
+#### `VegaLiteWriter`
+
+```python
+class VegaLiteWriter:
+    def __init__(self) -> None:
+        """Create a Vega-Lite writer."""
+```
+
+#### `Validated`
+
+```python
+class Validated:
+    def has_visual(self) -> bool:
+        """Check if query has VISUALISE clause."""
+
+    def sql(self) -> str:
+        """Get the SQL portion."""
+
+    def visual(self) -> str:
+        """Get the VISUALISE portion."""
+
+    def valid(self) -> bool:
+        """Check if query is valid."""
+
+    def errors(self) -> list[dict]:
+        """Get validation errors as list of dicts with 'message', 'location'."""
+
+    def warnings(self) -> list[dict]:
+        """Get validation warnings as list of dicts with 'message', 'location'."""
+
+    # Note: tree() not exposed (tree-sitter nodes are Rust-only)
+```
+
+#### `Spec`
+
+```python
+class Spec:
+    def metadata(self) -> dict:
+        """Get metadata as dict with keys: rows, columns, layer_count."""
+
+    def sql(self) -> str:
+        """Get the main SQL query."""
+
+    def visual(self) -> str:
+        """Get the VISUALISE text."""
+
+    def layer_count(self) -> int:
+        """Get number of layers."""
+
+    def warnings(self) -> list[dict]:
+        """Get validation warnings as list of dicts with 'message', 'location'."""
+
+    def data(self) -> polars.DataFrame | None:
+        """Get global data."""
+
+    def layer_data(self, index: int) -> polars.DataFrame | None:
+        """Get layer-specific data."""
+
+    def stat_data(self, index: int) -> polars.DataFrame | None:
+        """Get stat transform data."""
+
+    def layer_sql(self, index: int) -> str | None:
+        """Get layer filter query."""
+
+    def stat_sql(self, index: int) -> str | None:
+        """Get stat transform query."""
+```
+
+### Functions
+
+```python
+def validate(query: str) -> Validated:
+    """Validate query syntax and semantics.
+
+    Returns Validated object with query inspection and validation methods.
+    """
+
+def execute(query: str, reader: Any) -> Spec:
+    """Execute a ggsql query with a custom Python reader.
+
+    For native readers, use reader.execute() method instead.
+    """
+```
diff --git a/src/execute.rs b/src/execute.rs
index 33116ceb..3bf2be33 100644
--- a/src/execute.rs
+++ b/src/execute.rs
@@ -531,6 +531,23 @@ fn transform_global_sql(sql: &str, materialized_ctes: &HashSet<String>) -> Optio
     }
 }
 
+/// Result of building a layer query
+///
+/// Contains information about the queries executed for a layer,
+/// distinguishing between base filter queries and stat transform queries.
+#[derive(Debug, Default)]
+pub struct LayerQueryResult {
+    /// The final query to execute (if any)
+    /// None means layer uses global data directly
+    pub query: Option<String>,
+    /// The base query before stat transform (filter/source only)
+    /// None if layer uses global data directly without filter
+    pub layer_sql: Option<String>,
+    /// The stat transform query (if a stat transform was applied)
+    /// None if no stat transform was needed
+    pub stat_sql: Option<String>,
+}
+
 /// Build a layer query handling all source types
 ///
 /// Handles:
@@ -544,12 +561,12 @@ fn transform_global_sql(sql: &str, materialized_ctes: &HashSet<String>) -> Optio
 /// (e.g., histogram binning, bar counting).
 ///
 /// Returns:
-/// - `Ok(Some(query))` - execute this query and store result
-/// - `Ok(None)` - layer uses `__global__` directly (no source, no filter, no constants, no stat transform)
+/// - `Ok(LayerQueryResult)` with information about queries executed
 /// - `Err(...)` - validation error (e.g., filter without global data)
 ///
 /// Note: This function takes `&mut Layer` because stat transforms may add new aesthetic mappings
 /// (e.g., mapping y to `__ggsql_stat__count` for histogram or bar count).
+#[allow(clippy::too_many_arguments)]
 fn build_layer_query<F>(
     layer: &mut Layer,
     schema: &Schema,
@@ -559,7 +576,7 @@ fn build_layer_query<F>(
     facet: Option<&Facet>,
     constants: &[(String, LiteralValue)],
     execute_query: &F,
-) -> Result<Option<String>>
+) -> Result<LayerQueryResult>
 where
     F: Fn(&str) -> Result<DataFrame>,
 {
@@ -603,7 +620,7 @@ where
                 naming::global_table()
             } else {
                 // No source, no filter, no constants, no stat transform - use __global__ data directly
-                return Ok(None);
+                return Ok(LayerQueryResult::default());
             }
         }
     };
@@ -635,6 +652,9 @@ where
         query = format!("{} WHERE {}", query, f);
     }
 
+    // Save the base query (with filter) before stat transform
+    let base_query = query.clone();
+
     // Apply statistical transformation (after filter, uses combined group_by)
     // Returns StatResult::Identity for no transformation, StatResult::Transformed for transformed query
     let stat_result = layer.geom.apply_stat_transform(
@@ -692,11 +712,15 @@ where
             }
 
             // Use the transformed query
-            let mut final_query = transformed_query;
+            let mut final_query = transformed_query.clone();
             if let Some(o) = order_by {
                 final_query = format!("{} ORDER BY {}", final_query, o);
             }
-            Ok(Some(final_query))
+            Ok(LayerQueryResult {
+                query: Some(final_query),
+                layer_sql: Some(base_query),
+                stat_sql: Some(transformed_query),
+            })
         }
         StatResult::Identity => {
             // Identity - no stat transformation
@@ -707,14 +731,18 @@ where
                 && order_by.is_none()
                 && constants.is_empty()
             {
-                Ok(None)
+                Ok(LayerQueryResult::default())
             } else {
                 // Layer has filter, order_by, or constants - still need the query
                 let mut final_query = query;
                 if let Some(o) = order_by {
                     final_query = format!("{} ORDER BY {}", final_query, o);
                 }
-                Ok(Some(final_query))
+                Ok(LayerQueryResult {
+                    query: Some(final_query.clone()),
+                    layer_sql: Some(final_query),
+                    stat_sql: None,
+                })
             }
         }
     }
@@ -860,8 +888,16 @@ fn split_color_aesthetic(layers: &mut Vec<Layer>) {
 pub struct PreparedData {
     /// Data map with global and layer-specific DataFrames
     pub data: HashMap<String, DataFrame>,
-    /// Parsed and resolved visualization specifications
-    pub specs: Vec<Plot>,
+    /// Parsed and resolved visualization specification
+    pub spec: Plot,
+    /// The main SQL query that was executed
+    pub sql: String,
+    /// The raw VISUALISE portion text
+    pub visual: String,
+    /// Per-layer filter/source queries (None = uses global data directly)
+    pub layer_sql: Vec<Option<String>>,
+    /// Per-layer stat transform queries (None = no stat transform)
+    pub stat_sql: Vec<Option<String>>,
 }
 
 /// Build data map from a query using a custom query executor function
@@ -888,6 +924,13 @@ where
         ));
     }
 
+    // TODO: Support multiple VISUALISE statements in future
+    if specs.len() > 1 {
+        return Err(GgsqlError::ValidationError(
+            "Multiple VISUALISE statements are not yet supported. Please use a single VISUALISE statement.".to_string(),
+        ));
+    }
+
     // Check if we have any visualization content
     if viz_part.trim().is_empty() {
         return Err(GgsqlError::ValidationError(
@@ -1054,6 +1097,10 @@ where
     // - Layer with no source, no filter, no order_by → returns None (use global directly, constants already injected)
     let facet = specs[0].facet.clone();
 
+    // Track layer and stat queries for introspection
+    let mut layer_sql_vec: Vec<Option<String>> = Vec::new();
+    let mut stat_sql_vec: Vec<Option<String>> = Vec::new();
+
     for (idx, layer) in specs[0].layers.iter_mut().enumerate() {
         // For layers using global data without filter, constants are already in global data
         // (injected with layer-indexed names). For other layers, extract constants for injection.
@@ -1064,7 +1111,7 @@ where
         };
 
         // Get mutable reference to layer for stat transform to update aesthetics
-        if let Some(layer_query) = build_layer_query(
+        let query_result = build_layer_query(
             layer,
             &layer_schemas[idx],
             &materialized_ctes,
@@ -1073,7 +1120,14 @@ where
             facet.as_ref(),
             &constants,
             &execute_query,
-        )? {
+        )?;
+
+        // Store query information for introspection
+        layer_sql_vec.push(query_result.layer_sql);
+        stat_sql_vec.push(query_result.stat_sql);
+
+        // Execute the query if one was generated
+        if let Some(layer_query) = query_result.query {
             let df = execute_query(&layer_query).map_err(|e| {
                 GgsqlError::ReaderError(format!(
                     "Failed to fetch data for layer {}: {}",
@@ -1105,20 +1159,24 @@ where
         ));
     }
 
-    // Post-process specs: replace literals with column references and compute labels
-    for spec in &mut specs {
-        // Replace literal aesthetic values with column references to synthetic constant columns
-        replace_literals_with_columns(spec);
-        // Compute aesthetic labels (uses first non-constant column, respects user-specified labels)
-        spec.compute_aesthetic_labels();
-        // Divide 'color' over 'stroke' and 'fill'. This needs to happens after
-        // literals have associated columns.
-        split_color_aesthetic(&mut spec.layers);
-    }
+    let mut spec = specs.into_iter().next().unwrap();
+
+    // Post-process spec: replace literals with column references and compute labels
+    // Replace literal aesthetic values with column references to synthetic constant columns
+    replace_literals_with_columns(&mut spec);
+    // Compute aesthetic labels (uses first non-constant column, respects user-specified labels)
+    spec.compute_aesthetic_labels();
+    // Divide 'color' over 'stroke' and 'fill'. This needs to happens after
+    // literals have associated columns.
+    split_color_aesthetic(&mut spec.layers);
 
     Ok(PreparedData {
         data: data_map,
-        specs,
+        spec,
+        sql: sql_part,
+        visual: viz_part,
+        layer_sql: layer_sql_vec,
+        stat_sql: stat_sql_vec,
     })
 }
 
@@ -1127,7 +1185,7 @@ where
 /// Convenience wrapper around `prepare_data_with_executor` for direct DuckDB reader usage.
 #[cfg(feature = "duckdb")]
 pub fn prepare_data(query: &str, reader: &DuckDBReader) -> Result<PreparedData> {
-    prepare_data_with_executor(query, |sql| reader.execute(sql))
+    prepare_data_with_executor(query, |sql| reader.execute_sql(sql))
 }
 
 #[cfg(test)]
@@ -1146,7 +1204,7 @@ mod tests {
         let result = prepare_data(query, &reader).unwrap();
 
         assert!(result.data.contains_key(naming::GLOBAL_DATA_KEY));
-        assert_eq!(result.specs.len(), 1);
+        assert_eq!(result.spec.layers.len(), 1);
     }
 
     #[cfg(feature = "duckdb")]
@@ -1373,7 +1431,8 @@ mod tests {
         );
 
         // Should use temp table name with session UUID
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.starts_with("SELECT * FROM __ggsql_cte_sales_"));
         assert!(query.ends_with("__"));
         assert!(query.contains(naming::session_id()));
@@ -1401,7 +1460,8 @@ mod tests {
         );
 
         // Should use temp table name with session UUID and filter
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.contains("__ggsql_cte_sales_"));
         assert!(query.ends_with(" WHERE year = 2024"));
         assert!(query.contains(naming::session_id()));
@@ -1427,8 +1487,9 @@ mod tests {
         );
 
         // Should use table name directly
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some("SELECT * FROM some_table".to_string())
         );
     }
@@ -1453,8 +1514,9 @@ mod tests {
             &mock_execute,
         );
 
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some("SELECT * FROM some_table WHERE value > 100".to_string())
         );
     }
@@ -1479,8 +1541,9 @@ mod tests {
         );
 
         // File paths should be wrapped in single quotes
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some("SELECT * FROM 'data/sales.csv'".to_string())
         );
     }
@@ -1505,8 +1568,9 @@ mod tests {
             &mock_execute,
         );
 
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some("SELECT * FROM 'data.parquet' WHERE x > 10".to_string())
         );
     }
@@ -1531,7 +1595,8 @@ mod tests {
         );
 
         // Should query global table with session UUID and filter
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.starts_with("SELECT * FROM __ggsql_global_"));
         assert!(query.ends_with("__ WHERE category = 'A'"));
         assert!(query.contains(naming::session_id()));
@@ -1555,8 +1620,11 @@ mod tests {
             &mock_execute,
         );
 
-        // Should return None - layer uses __global__ directly
-        assert_eq!(result.unwrap(), None);
+        // Should return empty result - layer uses __global__ directly
+        let query_result = result.unwrap();
+        assert!(query_result.query.is_none());
+        assert!(query_result.layer_sql.is_none());
+        assert!(query_result.stat_sql.is_none());
     }
 
     #[test]
@@ -1605,8 +1673,9 @@ mod tests {
             &mock_execute,
         );
 
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some("SELECT * FROM some_table ORDER BY date ASC".to_string())
         );
     }
@@ -1632,8 +1701,9 @@ mod tests {
             &mock_execute,
         );
 
+        let query_result = result.unwrap();
         assert_eq!(
-            result.unwrap(),
+            query_result.query,
             Some(
                 "SELECT * FROM some_table WHERE year = 2024 ORDER BY date DESC, value ASC"
                     .to_string()
@@ -1661,7 +1731,8 @@ mod tests {
         );
 
         // Should query global table with session UUID and order_by
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.starts_with("SELECT * FROM __ggsql_global_"));
         assert!(query.ends_with("__ ORDER BY x ASC"));
         assert!(query.contains(naming::session_id()));
@@ -1697,7 +1768,8 @@ mod tests {
         );
 
         // Should inject constants as columns
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.contains("SELECT *"));
         assert!(query.contains("'value' AS __ggsql_const_color__"));
         assert!(query.contains("'value2' AS __ggsql_const_size__"));
@@ -1727,7 +1799,8 @@ mod tests {
             &mock_execute,
         );
 
-        let query = result.unwrap().unwrap();
+        let query_result = result.unwrap();
+        let query = query_result.query.unwrap();
         assert!(query.contains("FROM __ggsql_global_"));
         assert!(query.contains(naming::session_id()));
         assert!(query.contains("'value' AS __ggsql_const_fill__"));
@@ -2259,8 +2332,8 @@ mod tests {
         assert_eq!(global_df.height(), 3);
 
         // Verify spec has x and y aesthetics merged into layer
-        assert_eq!(result.specs.len(), 1);
-        let layer = &result.specs[0].layers[0];
+        assert_eq!(result.spec.layers.len(), 1);
+        let layer = &result.spec.layers[0];
         assert!(
             layer.mappings.contains_key("x"),
             "Layer should have x from global mapping"
@@ -2721,7 +2794,7 @@ mod tests {
 
         let result = prepare_data(query, &reader).unwrap();
 
-        let aes = &result.specs[0].layers[0].mappings.aesthetics;
+        let aes = &result.spec.layers[0].mappings.aesthetics;
 
         assert!(aes.contains_key("stroke"));
         assert!(aes.contains_key("fill"));
@@ -2739,7 +2812,7 @@ mod tests {
         "#;
 
         let result = prepare_data(query, &reader).unwrap();
-        let aes = &result.specs[0].layers[0].mappings.aesthetics;
+        let aes = &result.spec.layers[0].mappings.aesthetics;
 
         let stroke = aes.get("stroke").unwrap();
         assert_eq!(stroke.column_name().unwrap(), "island");
@@ -2754,7 +2827,7 @@ mod tests {
         "#;
 
         let result = prepare_data(query, &reader).unwrap();
-        let aes = &result.specs[0].layers[0].mappings.aesthetics;
+        let aes = &result.spec.layers[0].mappings.aesthetics;
 
         let stroke = aes.get("stroke").unwrap();
         assert_eq!(stroke.column_name().unwrap(), "__ggsql_const_color_0__");
diff --git a/src/lib.rs b/src/lib.rs
index 9eec2d49..61273bd6 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -27,10 +27,10 @@ ggsql splits queries at the `VISUALISE` boundary:
 
 ## Core Components
 
+- [`api`] - Validation API (validate, Validated)
 - [`parser`] - Query parsing and AST generation
-- [`engine`] - Core execution engine
-- [`readers`] - Data source abstraction layer
-- [`writers`] - Output format abstraction layer
+- [`reader`] - Data source abstraction layer
+- [`writer`] - Output format abstraction layer
 */
 
 pub mod naming;
@@ -46,14 +46,18 @@ pub mod writer;
 #[cfg(feature = "duckdb")]
 pub mod execute;
 
+pub mod validate;
+
 // Re-export key types for convenience
 pub use plot::{
     AestheticValue, DataSource, Facet, Geom, Layer, Mappings, Plot, Scale, SqlExpression,
 };
 
-// Future modules - not yet implemented
-// #[cfg(feature = "engine")]
-// pub mod engine;
+// Re-export validation types and functions
+pub use validate::{validate, Location, Validated, ValidationError, ValidationWarning};
+
+// Re-export reader types
+pub use reader::{Metadata, Spec};
 
 // DataFrame abstraction (wraps Polars)
 pub use polars::prelude::DataFrame;
@@ -113,7 +117,7 @@ mod integration_tests {
             FROM generate_series(0, 4) as t(n)
         "#;
 
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify DataFrame has temporal type (DuckDB returns Datetime for DATE + INTERVAL)
         assert_eq!(df.get_column_names(), vec!["date", "revenue"]);
@@ -173,7 +177,7 @@ mod integration_tests {
             FROM generate_series(0, 3) as t(n)
         "#;
 
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify DataFrame has Datetime type
         let timestamp_col = df.column("timestamp").unwrap();
@@ -221,7 +225,7 @@ mod integration_tests {
 
         // Real SQL that users would write
         let sql = "SELECT 1 as int_col, 2.5 as float_col, true as bool_col";
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify types are preserved
         // DuckDB treats numeric literals as DECIMAL, which we convert to Float64
@@ -276,7 +280,7 @@ mod integration_tests {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
 
         let sql = "SELECT * FROM (VALUES (1, 2.5, 'a'), (2, NULL, 'b'), (NULL, 3.5, NULL)) AS t(int_col, float_col, str_col)";
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify types
         assert!(matches!(
@@ -326,7 +330,7 @@ mod integration_tests {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
 
         let sql = "SELECT * FROM (VALUES ('A', 10), ('B', 20), ('A', 15), ('C', 30)) AS t(category, value)";
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         let mut spec = Plot::new();
         let layer = Layer::new(Geom::bar())
@@ -372,7 +376,7 @@ mod integration_tests {
             GROUP BY day
         "#;
 
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify temporal type is preserved through aggregation
         // DATE_TRUNC returns Date type (not Datetime)
@@ -410,7 +414,7 @@ mod integration_tests {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
 
         let sql = "SELECT 0.1 as small, 123.456 as medium, 999999.999999 as large";
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // All should be Float64
         assert!(matches!(
@@ -462,7 +466,7 @@ mod integration_tests {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
 
         let sql = "SELECT CAST(1 AS TINYINT) as tiny, CAST(1000 AS SMALLINT) as small, CAST(1000000 AS INTEGER) as int, CAST(1000000000000 AS BIGINT) as big";
-        let df = reader.execute(sql).unwrap();
+        let df = reader.execute_sql(sql).unwrap();
 
         // Verify types
         assert!(matches!(
@@ -530,7 +534,7 @@ mod integration_tests {
 
         // Prepare data - this parses, injects constants into global data, and replaces literals with columns
         let prepared =
-            execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap();
+            execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap();
 
         // Verify constants were injected into global data (not layer-specific data)
         // Both layers share __global__ data for faceting compatibility
@@ -547,7 +551,7 @@ mod integration_tests {
             !prepared.data.contains_key(&naming::layer_key(1)),
             "Layer 1 should use global data, not layer-specific data"
         );
-        assert_eq!(prepared.specs.len(), 1);
+        assert_eq!(prepared.spec.layers.len(), 2);
 
         // Verify global data contains layer-indexed constant columns
         let global_df = prepared.data.get(naming::GLOBAL_DATA_KEY).unwrap();
@@ -565,7 +569,7 @@ mod integration_tests {
 
         // Generate Vega-Lite
         let writer = VegaLiteWriter::new();
-        let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap();
+        let json_str = writer.write(&prepared.spec, &prepared.data).unwrap();
         let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();
 
         // Verify we have two layers
@@ -638,7 +642,7 @@ mod integration_tests {
         "#;
 
         let prepared =
-            execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap();
+            execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap();
 
         // All layers should use global data for faceting to work
         assert!(
@@ -685,7 +689,7 @@ mod integration_tests {
 
         // Generate Vega-Lite and verify faceting structure
         let writer = VegaLiteWriter::new();
-        let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap();
+        let json_str = writer.write(&prepared.spec, &prepared.data).unwrap();
         let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();
 
         // Should have facet structure (row and column)
@@ -726,7 +730,7 @@ mod integration_tests {
         "#;
 
         let prepared =
-            execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap();
+            execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap();
 
         // Should have global data with the constant injected
         assert!(
@@ -750,7 +754,7 @@ mod integration_tests {
 
         // Generate Vega-Lite and verify it works
         let writer = VegaLiteWriter::new();
-        let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap();
+        let json_str = writer.write(&prepared.spec, &prepared.data).unwrap();
         let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap();
 
         // Both layers should have color field-mapped to their indexed constant columns
diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs
index 8ee13ebb..1824d6dc 100644
--- a/src/reader/duckdb.rs
+++ b/src/reader/duckdb.rs
@@ -5,7 +5,13 @@
 use crate::reader::data::init_builtin_data;
 use crate::reader::{connection::ConnectionInfo, Reader};
 use crate::{DataFrame, GgsqlError, Result};
+use arrow::ipc::reader::FileReader;
+use duckdb::vtab::arrow::{arrow_recordbatch_to_query_params, ArrowVTab};
 use duckdb::{params, Connection};
+use polars::io::SerWriter;
+use polars::prelude::*;
+use std::collections::HashSet;
+use std::io::Cursor;
 
 /// DuckDB database reader
 ///
@@ -19,14 +25,15 @@ use duckdb::{params, Connection};
 ///
 /// // In-memory database
 /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
-/// let df = reader.execute("SELECT 1 as x, 2 as y")?;
+/// let df = reader.execute_sql("SELECT 1 as x, 2 as y")?;
 ///
 /// // File-based database
 /// let reader = DuckDBReader::from_connection_string("duckdb://data.db")?;
-/// let df = reader.execute("SELECT * FROM sales")?;
+/// let df = reader.execute_sql("SELECT * FROM sales")?;
 /// ```
 pub struct DuckDBReader {
     conn: Connection,
+    registered_tables: HashSet<String>,
 }
 
 impl DuckDBReader {
@@ -64,7 +71,16 @@ impl DuckDBReader {
             }
         };
 
-        Ok(Self { conn })
+        // Register Arrow virtual table function for DataFrame registration
+        conn.register_table_function::<ArrowVTab>("arrow")
+            .map_err(|e| {
+                GgsqlError::ReaderError(format!("Failed to register arrow function: {}", e))
+            })?;
+
+        Ok(Self {
+            conn,
+            registered_tables: HashSet::new(),
+        })
     }
 
     /// Get a reference to the underlying DuckDB connection
@@ -73,6 +89,81 @@ impl DuckDBReader {
     pub fn connection(&self) -> &Connection {
         &self.conn
     }
+
+    /// Check if a table exists in the database
+    fn table_exists(&self, name: &str) -> Result<bool> {
+        let sql = "SELECT COUNT(*) FROM information_schema.tables WHERE table_name = ?";
+        let count: i64 = self
+            .conn
+            .query_row(sql, [name], |row| row.get(0))
+            .unwrap_or(0);
+        Ok(count > 0)
+    }
+}
+
+/// Validate a table name
+fn validate_table_name(name: &str) -> Result<()> {
+    if name.is_empty() {
+        return Err(GgsqlError::ReaderError("Table name cannot be empty".into()));
+    }
+
+    // Reject characters that could break double-quoted identifiers or cause issues
+    let forbidden = ['"', '\0', '\n', '\r'];
+    for ch in forbidden {
+        if name.contains(ch) {
+            return Err(GgsqlError::ReaderError(format!(
+                "Table name '{}' contains invalid character '{}'",
+                name,
+                ch.escape_default()
+            )));
+        }
+    }
+
+    // Reasonable length limit
+    if name.len() > 128 {
+        return Err(GgsqlError::ReaderError(format!(
+            "Table name '{}' exceeds maximum length of 128 characters",
+            name
+        )));
+    }
+
+    Ok(())
+}
+
+/// Convert a Polars DataFrame to DuckDB Arrow query parameters via IPC serialization
+fn dataframe_to_arrow_params(df: DataFrame) -> Result<[usize; 2]> {
+    // Serialize DataFrame to IPC format
+    let mut buffer = Vec::new();
+    {
+        let mut writer = IpcWriter::new(&mut buffer);
+        writer.finish(&mut df.clone()).map_err(|e| {
+            GgsqlError::ReaderError(format!("Failed to serialize DataFrame: {}", e))
+        })?;
+    }
+
+    // Read IPC into arrow crate's RecordBatch
+    let cursor = Cursor::new(buffer);
+    let reader = FileReader::try_new(cursor, None)
+        .map_err(|e| GgsqlError::ReaderError(format!("Failed to read IPC: {}", e)))?;
+
+    // Collect all batches and concatenate if needed
+    let batches: Vec<_> = reader.filter_map(|r| r.ok()).collect();
+
+    if batches.is_empty() {
+        return Err(GgsqlError::ReaderError(
+            "DataFrame produced no Arrow batches".into(),
+        ));
+    }
+
+    // For single batch, use directly; for multiple, concatenate
+    let rb = if batches.len() == 1 {
+        batches.into_iter().next().unwrap()
+    } else {
+        arrow::compute::concat_batches(&batches[0].schema(), &batches)
+            .map_err(|e| GgsqlError::ReaderError(format!("Failed to concat batches: {}", e)))?
+    };
+
+    Ok(arrow_recordbatch_to_query_params(rb))
 }
 
 /// Helper struct for building typed columns from rows
@@ -294,7 +385,7 @@ impl ColumnBuilder {
 }
 
 impl Reader for DuckDBReader {
-    fn execute(&self, sql: &str) -> Result<DataFrame> {
+    fn execute_sql(&self, sql: &str) -> Result<DataFrame> {
         use polars::prelude::*;
 
         // Check if this is a DDL statement (CREATE, DROP, INSERT, UPDATE, DELETE, ALTER)
@@ -413,30 +504,60 @@ impl Reader for DuckDBReader {
         Ok(df)
     }
 
-    fn validate_columns(&self, sql: &str, columns: &[String]) -> Result<()> {
-        // Execute the query to get the schema
-        let df = self.execute(sql)?;
+    fn register(&mut self, name: &str, df: DataFrame) -> Result<()> {
+        // Validate table name
+        validate_table_name(name)?;
 
-        // Get column names from the DataFrame
-        let schema_columns: Vec<String> = df
-            .get_column_names()
-            .iter()
-            .map(|s| s.to_string())
-            .collect();
+        // Check for duplicates
+        if self.table_exists(name)? {
+            return Err(GgsqlError::ReaderError(format!(
+                "Table '{}' already exists",
+                name
+            )));
+        }
 
-        // Check if all required columns exist
-        for col in columns {
-            if !schema_columns.contains(col) {
-                return Err(GgsqlError::ValidationError(format!(
-                    "Column '{}' not found in query result. Available columns: {}",
-                    col,
-                    schema_columns.join(", ")
-                )));
-            }
+        // Convert DataFrame to Arrow query params
+        let params = dataframe_to_arrow_params(df)?;
+
+        // Create temp table from Arrow data
+        let sql = format!(
+            "CREATE TEMP TABLE \"{}\" AS SELECT * FROM arrow(?, ?)",
+            name
+        );
+        self.conn.execute(&sql, params).map_err(|e| {
+            GgsqlError::ReaderError(format!("Failed to register table '{}': {}", name, e))
+        })?;
+
+        // Track the table so we can unregister it later
+        self.registered_tables.insert(name.to_string());
+
+        Ok(())
+    }
+
+    fn unregister(&mut self, name: &str) -> Result<()> {
+        // Only allow unregistering tables we created via register()
+        if !self.registered_tables.contains(name) {
+            return Err(GgsqlError::ReaderError(format!(
+                "Table '{}' was not registered via this reader",
+                name
+            )));
         }
 
+        // Drop the temp table
+        let sql = format!("DROP TABLE IF EXISTS \"{}\"", name);
+        self.conn.execute(&sql, []).map_err(|e| {
+            GgsqlError::ReaderError(format!("Failed to unregister table '{}': {}", name, e))
+        })?;
+
+        // Remove from tracking
+        self.registered_tables.remove(name);
+
         Ok(())
     }
+
+    fn supports_register(&self) -> bool {
+        true
+    }
 }
 
 #[cfg(test)]
@@ -452,7 +573,7 @@ mod tests {
     #[test]
     fn test_simple_query() {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let df = reader.execute("SELECT 1 as x, 2 as y").unwrap();
+        let df = reader.execute_sql("SELECT 1 as x, 2 as y").unwrap();
 
         assert_eq!(df.shape(), (1, 2));
         assert_eq!(df.get_column_names(), vec!["x", "y"]);
@@ -475,38 +596,16 @@ mod tests {
             .unwrap();
 
         // Query data
-        let df = reader.execute("SELECT * FROM test").unwrap();
+        let df = reader.execute_sql("SELECT * FROM test").unwrap();
 
         assert_eq!(df.shape(), (2, 2));
         assert_eq!(df.get_column_names(), vec!["x", "y"]);
     }
 
-    #[test]
-    fn test_validate_columns_success() {
-        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let sql = "SELECT 1 as x, 2 as y";
-
-        let result = reader.validate_columns(sql, &["x".to_string(), "y".to_string()]);
-        assert!(result.is_ok());
-    }
-
-    #[test]
-    fn test_validate_columns_missing() {
-        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let sql = "SELECT 1 as x, 2 as y";
-
-        let result = reader.validate_columns(sql, &["z".to_string()]);
-        assert!(result.is_err());
-        assert!(result
-            .unwrap_err()
-            .to_string()
-            .contains("Column 'z' not found"));
-    }
-
     #[test]
     fn test_invalid_sql() {
         let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
-        let result = reader.execute("INVALID SQL SYNTAX");
+        let result = reader.execute_sql("INVALID SQL SYNTAX");
         assert!(result.is_err());
     }
 
@@ -528,10 +627,160 @@ mod tests {
             .unwrap();
 
         let df = reader
-            .execute("SELECT region, SUM(revenue) as total FROM sales GROUP BY region")
+            .execute_sql("SELECT region, SUM(revenue) as total FROM sales GROUP BY region")
             .unwrap();
 
         assert_eq!(df.shape(), (2, 2));
         assert_eq!(df.get_column_names(), vec!["region", "total"]);
     }
+
+    #[test]
+    fn test_register_and_query() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        // Create a DataFrame
+        let df = DataFrame::new(vec![
+            Column::new("x".into(), vec![1i32, 2, 3]),
+            Column::new("y".into(), vec![10i32, 20, 30]),
+        ])
+        .unwrap();
+
+        // Register the DataFrame
+        reader.register("my_table", df).unwrap();
+
+        // Query the registered table
+        let result = reader
+            .execute_sql("SELECT * FROM my_table ORDER BY x")
+            .unwrap();
+        assert_eq!(result.shape(), (3, 2));
+        assert_eq!(result.get_column_names(), vec!["x", "y"]);
+    }
+
+    #[test]
+    fn test_register_duplicate_name_errors() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        let df1 = DataFrame::new(vec![Column::new("a".into(), vec![1i32])]).unwrap();
+        let df2 = DataFrame::new(vec![Column::new("b".into(), vec![2i32])]).unwrap();
+
+        // First registration should succeed
+        reader.register("dup_table", df1).unwrap();
+
+        // Second registration with same name should fail
+        let result = reader.register("dup_table", df2);
+        assert!(result.is_err());
+        let err = result.unwrap_err().to_string();
+        assert!(err.contains("already exists"));
+    }
+
+    #[test]
+    fn test_register_invalid_table_names() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let df = DataFrame::new(vec![Column::new("a".into(), vec![1i32])]).unwrap();
+
+        // Empty name
+        let result = reader.register("", df.clone());
+        assert!(result.is_err());
+        assert!(result.unwrap_err().to_string().contains("cannot be empty"));
+
+        // Name with double quote
+        let result = reader.register("bad\"name", df.clone());
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("invalid character"));
+
+        // Name with null byte
+        let result = reader.register("bad\0name", df.clone());
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("invalid character"));
+
+        // Name too long
+        let long_name = "a".repeat(200);
+        let result = reader.register(&long_name, df);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("exceeds maximum length"));
+    }
+
+    #[test]
+    fn test_supports_register() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        assert!(reader.supports_register());
+    }
+
+    #[test]
+    fn test_register_empty_dataframe() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        // Create an empty DataFrame with schema
+        let df = DataFrame::new(vec![
+            Column::new("x".into(), Vec::<i32>::new()),
+            Column::new("y".into(), Vec::<String>::new()),
+        ])
+        .unwrap();
+
+        reader.register("empty_table", df).unwrap();
+
+        // Query should return empty result with correct schema
+        let result = reader.execute_sql("SELECT * FROM empty_table").unwrap();
+        assert_eq!(result.shape(), (0, 2));
+        assert_eq!(result.get_column_names(), vec!["x", "y"]);
+    }
+
+    #[test]
+    fn test_unregister() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let df = DataFrame::new(vec![Column::new("x".into(), vec![1i32, 2, 3])]).unwrap();
+
+        reader.register("test_data", df).unwrap();
+
+        // Should be queryable
+        let result = reader.execute_sql("SELECT * FROM test_data").unwrap();
+        assert_eq!(result.height(), 3);
+
+        // Unregister
+        reader.unregister("test_data").unwrap();
+
+        // Should no longer exist
+        let result = reader.execute_sql("SELECT * FROM test_data");
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_unregister_not_registered() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        // Create a table directly (not via register)
+        reader
+            .connection()
+            .execute("CREATE TABLE user_table (x INT)", params![])
+            .unwrap();
+
+        // Should fail - we didn't register this via register()
+        let result = reader.unregister("user_table");
+        assert!(result.is_err());
+        let err = result.unwrap_err().to_string();
+        assert!(err.contains("was not registered via this reader"));
+    }
+
+    #[test]
+    fn test_reregister_after_unregister() {
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let df = DataFrame::new(vec![Column::new("x".into(), vec![1i32, 2, 3])]).unwrap();
+
+        reader.register("data", df.clone()).unwrap();
+        reader.unregister("data").unwrap();
+
+        // Should be able to register again
+        reader.register("data", df).unwrap();
+        let result = reader.execute_sql("SELECT * FROM data").unwrap();
+        assert_eq!(result.height(), 3);
+    }
 }
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 7f3f403a..cfbd271a 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -7,34 +7,101 @@
 //!
 //! All readers implement the `Reader` trait, which provides:
 //! - SQL query execution → DataFrame conversion
-//! - Column validation for query introspection
+//! - Visualization query execution → Spec
+//! - Optional DataFrame registration for queryable tables
 //! - Connection management and error handling
 //!
 //! # Example
 //!
 //! ```rust,ignore
 //! use ggsql::reader::{Reader, DuckDBReader};
+//! use ggsql::writer::{Writer, VegaLiteWriter};
 //!
+//! // Execute a ggsql query
 //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
-//! let df = reader.execute("SELECT * FROM table")?;
+//! let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?;
+//!
+//! // Render to Vega-Lite JSON
+//! let writer = VegaLiteWriter::new();
+//! let json = writer.render(&spec)?;
+//!
+//! // With DataFrame registration
+//! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+//! reader.register("my_table", some_dataframe)?;
+//! let spec = reader.execute("SELECT * FROM my_table VISUALISE x, y DRAW point")?;
 //! ```
 
-use crate::{DataFrame, Result};
+use std::collections::HashMap;
+
+use crate::execute::prepare_data_with_executor;
+use crate::plot::Plot;
+use crate::validate::{validate, ValidationWarning};
+use crate::{DataFrame, GgsqlError, Result};
 
 #[cfg(feature = "duckdb")]
 pub mod duckdb;
 
 pub mod connection;
-
 pub mod data;
+mod spec;
 
 #[cfg(feature = "duckdb")]
 pub use duckdb::DuckDBReader;
 
+// ============================================================================
+// Spec - Result of reader.execute()
+// ============================================================================
+
+/// Result of executing a ggsql query, ready for rendering.
+pub struct Spec {
+    /// Single resolved plot specification
+    pub(crate) plot: Plot,
+    /// Internal data map (global + layer-specific DataFrames)
+    pub(crate) data: HashMap<String, DataFrame>,
+    /// Cached metadata about the prepared visualization
+    pub(crate) metadata: Metadata,
+    /// The main SQL query that was executed
+    pub(crate) sql: String,
+    /// The raw VISUALISE portion text
+    pub(crate) visual: String,
+    /// Per-layer filter/source queries (None = uses global data directly)
+    pub(crate) layer_sql: Vec<Option<String>>,
+    /// Per-layer stat transform queries (None = no stat transform)
+    pub(crate) stat_sql: Vec<Option<String>>,
+    /// Validation warnings from preparation
+    pub(crate) warnings: Vec<ValidationWarning>,
+}
+
+/// Metadata about the prepared visualization.
+#[derive(Debug, Clone)]
+pub struct Metadata {
+    pub rows: usize,
+    pub columns: Vec<String>,
+    pub layer_count: usize,
+}
+
+// ============================================================================
+// Reader Trait
+// ============================================================================
+
 /// Trait for data source readers
 ///
 /// Readers execute SQL queries and return Polars DataFrames.
 /// They provide a uniform interface for different database backends.
+///
+/// # DataFrame Registration
+///
+/// Some readers support registering DataFrames as queryable tables using
+/// the [`register`](Reader::register) method. This allows you to query
+/// in-memory DataFrames with SQL, join them with other tables, etc.
+///
+/// ```rust,ignore
+/// // Register a DataFrame (takes ownership)
+/// reader.register("sales", sales_df)?;
+///
+/// // Now you can query it
+/// let result = reader.execute_sql("SELECT * FROM sales WHERE amount > 100")?;
+/// ```
 pub trait Reader {
     /// Execute a SQL query and return the result as a DataFrame
     ///
@@ -52,20 +119,256 @@ pub trait Reader {
     /// - The SQL is invalid
     /// - The connection fails
     /// - The table or columns don't exist
-    fn execute(&self, sql: &str) -> Result<DataFrame>;
+    fn execute_sql(&self, sql: &str) -> Result<DataFrame>;
+
+    /// Register a DataFrame as a queryable table (takes ownership)
+    ///
+    /// After registration, the DataFrame can be queried by name in SQL:
+    /// ```sql
+    /// SELECT * FROM <name> WHERE ...
+    /// ```
+    ///
+    /// # Arguments
+    ///
+    /// * `name` - The table name to register under
+    /// * `df` - The DataFrame to register (ownership is transferred)
+    ///
+    /// # Returns
+    ///
+    /// `Ok(())` on success, error if registration fails or isn't supported.
+    ///
+    /// # Default Implementation
+    ///
+    /// Returns an error by default. Override for readers that support registration.
+    fn register(&mut self, name: &str, _df: DataFrame) -> Result<()> {
+        Err(GgsqlError::ReaderError(format!(
+            "This reader does not support DataFrame registration for table '{}'",
+            name
+        )))
+    }
 
-    /// Validate that specified columns exist in a query result
+    /// Unregister a previously registered table
+    ///
+    /// # Arguments
+    ///
+    /// * `name` - The table name to unregister
+    ///
+    /// # Returns
     ///
-    /// This is useful for checking column names before visualization
-    /// to provide better error messages.
+    /// `Ok(())` on success.
+    ///
+    /// # Default Implementation
+    ///
+    /// Returns an error by default. Override for readers that support registration.
+    fn unregister(&mut self, name: &str) -> Result<()> {
+        Err(GgsqlError::ReaderError(format!(
+            "This reader does not support unregistering table '{}'",
+            name
+        )))
+    }
+
+    /// Check if this reader supports DataFrame registration
+    ///
+    /// # Returns
+    ///
+    /// `true` if [`register`](Reader::register) is implemented, `false` otherwise.
+    fn supports_register(&self) -> bool {
+        false
+    }
+
+    /// Execute a ggsql query and return the visualization specification.
+    ///
+    /// This is the main entry point for creating visualizations. It parses the query,
+    /// executes the SQL portion, and returns a `Spec` ready for rendering.
     ///
     /// # Arguments
     ///
-    /// * `sql` - The SQL query to introspect
-    /// * `columns` - Column names to validate
+    /// * `query` - The ggsql query (SQL + VISUALISE clause)
     ///
     /// # Returns
     ///
-    /// Ok(()) if all columns exist, otherwise an error
-    fn validate_columns(&self, sql: &str, columns: &[String]) -> Result<()>;
+    /// A `Spec` containing the resolved visualization specification and data.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The query syntax is invalid
+    /// - The query has no VISUALISE clause
+    /// - The SQL execution fails
+    ///
+    /// # Example
+    ///
+    /// ```rust,ignore
+    /// use ggsql::reader::{Reader, DuckDBReader};
+    /// use ggsql::writer::{Writer, VegaLiteWriter};
+    ///
+    /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+    /// let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?;
+    ///
+    /// let writer = VegaLiteWriter::new();
+    /// let json = writer.render(&spec)?;
+    /// ```
+    #[cfg(feature = "duckdb")]
+    fn execute(&self, query: &str) -> Result<Spec> {
+        // Run validation first to capture warnings
+        let validated = validate(query)?;
+        let warnings: Vec<ValidationWarning> = validated.warnings().to_vec();
+
+        // Prepare data (this also validates, but we want the warnings from above)
+        let prepared_data = prepare_data_with_executor(query, |sql| self.execute_sql(sql))?;
+
+        Ok(Spec::new(
+            prepared_data.spec,
+            prepared_data.data,
+            prepared_data.sql,
+            prepared_data.visual,
+            prepared_data.layer_sql,
+            prepared_data.stat_sql,
+            warnings,
+        ))
+    }
+}
+
+#[cfg(test)]
+#[cfg(all(feature = "duckdb", feature = "vegalite"))]
+mod tests {
+    use super::*;
+    use crate::writer::{VegaLiteWriter, Writer};
+
+    #[test]
+    fn test_execute_and_render() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let spec = reader
+            .execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")
+            .unwrap();
+
+        assert_eq!(spec.plot().layers.len(), 1);
+        assert_eq!(spec.metadata().layer_count, 1);
+        assert!(spec.data().is_some());
+
+        let writer = VegaLiteWriter::new();
+        let result = writer.render(&spec).unwrap();
+        assert!(result.contains("point"));
+    }
+
+    #[test]
+    fn test_execute_metadata() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let spec = reader
+            .execute(
+                "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) VISUALISE x, y DRAW point",
+            )
+            .unwrap();
+
+        let metadata = spec.metadata();
+        assert_eq!(metadata.rows, 3);
+        assert_eq!(metadata.columns.len(), 2);
+        assert!(metadata.columns.contains(&"x".to_string()));
+        assert!(metadata.columns.contains(&"y".to_string()));
+        assert_eq!(metadata.layer_count, 1);
+    }
+
+    #[test]
+    fn test_execute_with_cte() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let query = r#"
+            WITH data AS (
+                SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y)
+            )
+            SELECT * FROM data
+            VISUALISE x, y DRAW point
+        "#;
+
+        let spec = reader.execute(query).unwrap();
+
+        assert_eq!(spec.plot().layers.len(), 1);
+        assert!(spec.data().is_some());
+        let df = spec.data().unwrap();
+        assert_eq!(df.height(), 2);
+    }
+
+    #[test]
+    fn test_render_multi_layer() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let query = r#"
+            SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y)
+            VISUALISE
+            DRAW point MAPPING x AS x, y AS y
+            DRAW line MAPPING x AS x, y AS y
+        "#;
+
+        let spec = reader.execute(query).unwrap();
+        let writer = VegaLiteWriter::new();
+        let result = writer.render(&spec).unwrap();
+
+        assert!(result.contains("layer"));
+    }
+
+    #[test]
+    fn test_register_and_query() {
+        use polars::prelude::*;
+
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        let df = df! {
+            "x" => [1i32, 2, 3],
+            "y" => [10i32, 20, 30],
+        }
+        .unwrap();
+
+        reader.register("my_data", df).unwrap();
+
+        let query = "SELECT * FROM my_data VISUALISE x, y DRAW point";
+        let spec = reader.execute(query).unwrap();
+
+        assert_eq!(spec.metadata().rows, 3);
+        assert!(spec.metadata().columns.contains(&"x".to_string()));
+
+        let writer = VegaLiteWriter::new();
+        let result = writer.render(&spec).unwrap();
+        assert!(result.contains("point"));
+    }
+
+    #[test]
+    fn test_register_and_join() {
+        use polars::prelude::*;
+
+        let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+
+        let sales = df! {
+            "id" => [1i32, 2, 3],
+            "amount" => [100i32, 200, 300],
+            "product_id" => [1i32, 1, 2],
+        }
+        .unwrap();
+
+        let products = df! {
+            "id" => [1i32, 2],
+            "name" => ["Widget", "Gadget"],
+        }
+        .unwrap();
+
+        reader.register("sales", sales).unwrap();
+        reader.register("products", products).unwrap();
+
+        let query = r#"
+            SELECT s.id, s.amount, p.name
+            FROM sales s
+            JOIN products p ON s.product_id = p.id
+            VISUALISE id AS x, amount AS y
+            DRAW bar
+        "#;
+
+        let spec = reader.execute(query).unwrap();
+        assert_eq!(spec.metadata().rows, 3);
+    }
+
+    #[test]
+    fn test_execute_no_viz_fails() {
+        let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
+        let query = "SELECT 1 as x, 2 as y";
+
+        let result = reader.execute(query);
+        assert!(result.is_err());
+    }
 }
diff --git a/src/reader/spec.rs b/src/reader/spec.rs
new file mode 100644
index 00000000..4b1fc5bd
--- /dev/null
+++ b/src/reader/spec.rs
@@ -0,0 +1,120 @@
+//! Implementation of Spec methods.
+
+use std::collections::HashMap;
+
+use crate::naming;
+use crate::plot::Plot;
+use crate::validate::ValidationWarning;
+use crate::DataFrame;
+
+use super::{Metadata, Spec};
+
+impl Spec {
+    /// Create a new Spec from PreparedData
+    pub(crate) fn new(
+        plot: Plot,
+        data: HashMap<String, DataFrame>,
+        sql: String,
+        visual: String,
+        layer_sql: Vec<Option<String>>,
+        stat_sql: Vec<Option<String>>,
+        warnings: Vec<ValidationWarning>,
+    ) -> Self {
+        // Compute metadata from data
+        let (rows, columns) = if let Some(df) = data.get(naming::GLOBAL_DATA_KEY) {
+            let cols: Vec<String> = df
+                .get_column_names()
+                .iter()
+                .map(|s| s.to_string())
+                .collect();
+            (df.height(), cols)
+        } else if let Some(df) = data.values().next() {
+            let cols: Vec<String> = df
+                .get_column_names()
+                .iter()
+                .map(|s| s.to_string())
+                .collect();
+            (df.height(), cols)
+        } else {
+            (0, Vec::new())
+        };
+
+        let layer_count = plot.layers.len();
+        let metadata = Metadata {
+            rows,
+            columns,
+            layer_count,
+        };
+
+        Self {
+            plot,
+            data,
+            metadata,
+            sql,
+            visual,
+            layer_sql,
+            stat_sql,
+            warnings,
+        }
+    }
+
+    /// Get the resolved plot specification.
+    pub fn plot(&self) -> &Plot {
+        &self.plot
+    }
+
+    /// Get visualization metadata.
+    pub fn metadata(&self) -> &Metadata {
+        &self.metadata
+    }
+
+    /// Number of layers.
+    pub fn layer_count(&self) -> usize {
+        self.plot.layers.len()
+    }
+
+    /// Get global data (main query result).
+    pub fn data(&self) -> Option<&DataFrame> {
+        self.data.get(naming::GLOBAL_DATA_KEY)
+    }
+
+    /// Get layer-specific data (from FILTER or FROM clause).
+    pub fn layer_data(&self, layer_index: usize) -> Option<&DataFrame> {
+        self.data.get(&naming::layer_key(layer_index))
+    }
+
+    /// Get stat transform data (e.g., histogram bins, density estimates).
+    pub fn stat_data(&self, layer_index: usize) -> Option<&DataFrame> {
+        self.layer_data(layer_index)
+    }
+
+    /// Get internal data map (all DataFrames by key).
+    pub fn data_map(&self) -> &HashMap<String, DataFrame> {
+        &self.data
+    }
+
+    /// The main SQL query that was executed.
+    pub fn sql(&self) -> &str {
+        &self.sql
+    }
+
+    /// The VISUALISE portion (raw text).
+    pub fn visual(&self) -> &str {
+        &self.visual
+    }
+
+    /// Layer filter/source query, or `None` if using global data.
+    pub fn layer_sql(&self, layer_index: usize) -> Option<&str> {
+        self.layer_sql.get(layer_index).and_then(|s| s.as_deref())
+    }
+
+    /// Stat transform query, or `None` if no stat transform.
+    pub fn stat_sql(&self, layer_index: usize) -> Option<&str> {
+        self.stat_sql.get(layer_index).and_then(|s| s.as_deref())
+    }
+
+    /// Validation warnings from preparation.
+    pub fn warnings(&self) -> &[ValidationWarning] {
+        &self.warnings
+    }
+}
diff --git a/src/rest.rs b/src/rest.rs
index 88fb61a6..8f2338c4 100644
--- a/src/rest.rs
+++ b/src/rest.rs
@@ -31,10 +31,8 @@ use tower_http::cors::{Any, CorsLayer};
 use tracing::info;
 use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 
-use ggsql::{parser, GgsqlError, VERSION};
+use ggsql::{parser, validate, GgsqlError, VERSION};
 
-#[cfg(feature = "duckdb")]
-use ggsql::execute::prepare_data_with_executor;
 #[cfg(feature = "duckdb")]
 use ggsql::reader::{DuckDBReader, Reader};
 
@@ -442,61 +440,38 @@ async fn query_handler(
 
     #[cfg(feature = "duckdb")]
     if request.reader.starts_with("duckdb://") {
-        // Create query executor that handles shared state vs new reader
-        let execute_query = |sql: &str| -> Result<ggsql::DataFrame, GgsqlError> {
-            if request.reader == "duckdb://memory" && state.reader.is_some() {
-                let reader_mutex = state.reader.as_ref().unwrap();
-                let reader = reader_mutex.lock().map_err(|e| {
-                    GgsqlError::InternalError(format!("Failed to lock reader: {}", e))
-                })?;
-                reader.execute(sql)
-            } else {
-                let reader = DuckDBReader::from_connection_string(&request.reader)?;
-                reader.execute(sql)
-            }
-        };
-
-        // Prepare data using shared execution logic
-        let prepared = prepare_data_with_executor(&request.query, execute_query)?;
-
-        // Get metadata from available data
-        let (rows, columns) = if let Some(df) = prepared.data.get("__global__") {
-            let (r, _) = df.shape();
-            let cols: Vec<String> = df
-                .get_column_names()
-                .iter()
-                .map(|s| s.to_string())
-                .collect();
-            (r, cols)
+        // Use shared reader or create new one
+        let spec = if request.reader == "duckdb://memory" && state.reader.is_some() {
+            let reader_mutex = state.reader.as_ref().unwrap();
+            let reader = reader_mutex
+                .lock()
+                .map_err(|e| GgsqlError::InternalError(format!("Failed to lock reader: {}", e)))?;
+            reader.execute(&request.query)?
         } else {
-            // Use first available data for metadata
-            let df = prepared.data.values().next().unwrap();
-            let (r, _) = df.shape();
-            let cols: Vec<String> = df
-                .get_column_names()
-                .iter()
-                .map(|s| s.to_string())
-                .collect();
-            (r, cols)
+            let reader = DuckDBReader::from_connection_string(&request.reader)?;
+            reader.execute(&request.query)?
         };
 
-        let first_spec = &prepared.specs[0];
+        // Get metadata
+        let metadata = spec.metadata();
 
         // Generate visualization output using writer
         #[cfg(feature = "vegalite")]
         if request.writer == "vegalite" {
             let writer = VegaLiteWriter::new();
-            let json_output = writer.write(first_spec, &prepared.data)?;
+            let json_output = writer.render(&spec)?;
             let spec_value: serde_json::Value = serde_json::from_str(&json_output)
                 .map_err(|e| GgsqlError::WriterError(format!("Failed to parse JSON: {}", e)))?;
 
+            let plot = spec.plot();
+
             let result = QueryResult {
                 spec: spec_value,
                 metadata: QueryMetadata {
-                    rows,
-                    columns,
-                    global_mappings: format!("{:?}", first_spec.global_mappings),
-                    layers: first_spec.layers.len(),
+                    rows: metadata.rows,
+                    columns: metadata.columns.clone(),
+                    global_mappings: format!("{:?}", plot.global_mappings),
+                    layers: plot.layers.len(),
                 },
             };
 
@@ -525,13 +500,45 @@ async fn query_handler(
 }
 
 /// POST /api/v1/parse - Parse a ggsql query
+#[cfg(feature = "duckdb")]
+async fn parse_handler(
+    Json(request): Json<ParseRequest>,
+) -> Result<Json<ApiSuccess<ParseResult>>, ApiErrorResponse> {
+    info!("Parsing query: {} chars", request.query.len());
+
+    // Validate query to get sql/viz portions
+    let validated = validate(&request.query)?;
+
+    // Parse ggsql portion
+    let specs = parser::parse_query(&request.query)?;
+
+    // Convert specs to JSON
+    let specs_json: Vec<serde_json::Value> = specs
+        .iter()
+        .map(|spec| serde_json::to_value(spec).unwrap_or(serde_json::Value::Null))
+        .collect();
+
+    let result = ParseResult {
+        sql_portion: validated.sql().to_string(),
+        viz_portion: validated.visual().to_string(),
+        specs: specs_json,
+    };
+
+    Ok(Json(ApiSuccess {
+        status: "success".to_string(),
+        data: result,
+    }))
+}
+
+/// POST /api/v1/parse - Parse a ggsql query
+#[cfg(not(feature = "duckdb"))]
 async fn parse_handler(
     Json(request): Json<ParseRequest>,
 ) -> Result<Json<ApiSuccess<ParseResult>>, ApiErrorResponse> {
     info!("Parsing query: {} chars", request.query.len());
 
-    // Split query
-    let (sql_part, viz_part) = parser::split_query(&request.query)?;
+    // Validate query to get sql/viz portions
+    let validated = validate(&request.query)?;
 
     // Parse ggsql portion
     let specs = parser::parse_query(&request.query)?;
@@ -543,8 +550,8 @@ async fn parse_handler(
         .collect();
 
     let result = ParseResult {
-        sql_portion: sql_part,
-        viz_portion: viz_part,
+        sql_portion: validated.sql().to_string(),
+        viz_portion: validated.visual().to_string(),
         specs: specs_json,
     };
 
diff --git a/src/validate.rs b/src/validate.rs
new file mode 100644
index 00000000..79bf4ed1
--- /dev/null
+++ b/src/validate.rs
@@ -0,0 +1,274 @@
+//! Query validation without SQL execution.
+//!
+//! This module provides query syntax and semantic validation without executing
+//! any SQL. Use this for IDE integration, syntax checking, and query inspection.
+
+use crate::parser;
+use crate::Result;
+
+// ============================================================================
+// Core Types
+// ============================================================================
+
+/// Result of `validate()` - query inspection and validation without SQL execution.
+pub struct Validated {
+    sql: String,
+    visual: String,
+    has_visual: bool,
+    tree: Option<tree_sitter::Tree>,
+    valid: bool,
+    errors: Vec<ValidationError>,
+    warnings: Vec<ValidationWarning>,
+}
+
+impl Validated {
+    /// Whether the query contains a VISUALISE clause.
+    pub fn has_visual(&self) -> bool {
+        self.has_visual
+    }
+
+    /// The SQL portion (before VISUALISE).
+    pub fn sql(&self) -> &str {
+        &self.sql
+    }
+
+    /// The VISUALISE portion (raw text).
+    pub fn visual(&self) -> &str {
+        &self.visual
+    }
+
+    /// CST for advanced inspection.
+    pub fn tree(&self) -> Option<&tree_sitter::Tree> {
+        self.tree.as_ref()
+    }
+
+    /// Whether the query is valid (no errors).
+    pub fn valid(&self) -> bool {
+        self.valid
+    }
+
+    /// Validation errors.
+    pub fn errors(&self) -> &[ValidationError] {
+        &self.errors
+    }
+
+    /// Validation warnings.
+    pub fn warnings(&self) -> &[ValidationWarning] {
+        &self.warnings
+    }
+}
+
+/// A validation error (fatal).
+#[derive(Debug, Clone)]
+pub struct ValidationError {
+    pub message: String,
+    pub location: Option<Location>,
+}
+
+/// A validation warning (non-fatal).
+#[derive(Debug, Clone)]
+pub struct ValidationWarning {
+    pub message: String,
+    pub location: Option<Location>,
+}
+
+/// Location within a query string (0-based).
+#[derive(Debug, Clone)]
+pub struct Location {
+    pub line: usize,
+    pub column: usize,
+}
+
+// ============================================================================
+// Validation Function
+// ============================================================================
+
+/// Validate query syntax and semantics without executing SQL.
+pub fn validate(query: &str) -> Result<Validated> {
+    let mut errors = Vec::new();
+    let warnings = Vec::new();
+
+    // Split to determine if there's a viz portion
+    let (sql_part, viz_part) = match parser::split_query(query) {
+        Ok((sql, viz)) => (sql, viz),
+        Err(e) => {
+            // Split error - return as validation error
+            errors.push(ValidationError {
+                message: e.to_string(),
+                location: None,
+            });
+            return Ok(Validated {
+                sql: String::new(),
+                visual: String::new(),
+                has_visual: false,
+                tree: None,
+                valid: false,
+                errors,
+                warnings,
+            });
+        }
+    };
+
+    let has_visual = !viz_part.trim().is_empty();
+
+    // Parse the full query to get the CST
+    let tree = if has_visual {
+        let mut ts_parser = tree_sitter::Parser::new();
+        ts_parser
+            .set_language(&tree_sitter_ggsql::language())
+            .map_err(|e| {
+                crate::GgsqlError::InternalError(format!("Failed to set language: {}", e))
+            })?;
+        ts_parser.parse(query, None)
+    } else {
+        None
+    };
+
+    // If no visualization, just syntax check passed
+    if !has_visual {
+        return Ok(Validated {
+            sql: sql_part,
+            visual: viz_part,
+            has_visual,
+            tree,
+            valid: true,
+            errors,
+            warnings,
+        });
+    }
+
+    // Parse to get plot specifications for validation
+    let plots = match parser::parse_query(query) {
+        Ok(p) => p,
+        Err(e) => {
+            errors.push(ValidationError {
+                message: e.to_string(),
+                location: None,
+            });
+            return Ok(Validated {
+                sql: sql_part,
+                visual: viz_part,
+                has_visual,
+                tree,
+                valid: false,
+                errors,
+                warnings,
+            });
+        }
+    };
+
+    // Validate the single plot (we only support one VISUALISE statement)
+    if let Some(plot) = plots.first() {
+        // Validate each layer
+        for (layer_idx, layer) in plot.layers.iter().enumerate() {
+            let context = format!("Layer {}", layer_idx + 1);
+
+            // Check required aesthetics
+            // Note: Without schema data, we can only check if mappings exist,
+            // not if the columns are valid. We skip this check for wildcards.
+            if !layer.mappings.wildcard {
+                if let Err(e) = layer.validate_required_aesthetics() {
+                    errors.push(ValidationError {
+                        message: format!("{}: {}", context, e),
+                        location: None,
+                    });
+                }
+            }
+
+            // Validate SETTING parameters
+            if let Err(e) = layer.validate_settings() {
+                errors.push(ValidationError {
+                    message: format!("{}: {}", context, e),
+                    location: None,
+                });
+            }
+        }
+    }
+
+    Ok(Validated {
+        sql: sql_part,
+        visual: viz_part,
+        has_visual,
+        tree,
+        valid: errors.is_empty(),
+        errors,
+        warnings,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_validate_with_visual() {
+        let validated =
+            validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y").unwrap();
+        assert!(validated.has_visual());
+        assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y");
+        assert!(validated.visual().starts_with("VISUALISE"));
+        assert!(validated.tree().is_some());
+        assert!(validated.valid());
+    }
+
+    #[test]
+    fn test_validate_without_visual() {
+        let validated = validate("SELECT 1 as x, 2 as y").unwrap();
+        assert!(!validated.has_visual());
+        assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y");
+        assert!(validated.visual().is_empty());
+        assert!(validated.tree().is_none());
+        assert!(validated.valid());
+    }
+
+    #[test]
+    fn test_validate_valid_query() {
+        let validated =
+            validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y").unwrap();
+        assert!(
+            validated.valid(),
+            "Expected valid query: {:?}",
+            validated.errors()
+        );
+        assert!(validated.errors().is_empty());
+    }
+
+    #[test]
+    fn test_validate_missing_required_aesthetic() {
+        // Point requires x and y, but we only provide x
+        let validated =
+            validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x").unwrap();
+        assert!(!validated.valid());
+        assert!(!validated.errors().is_empty());
+        assert!(validated.errors()[0].message.contains("y"));
+    }
+
+    #[test]
+    fn test_validate_syntax_error() {
+        let validated = validate("SELECT 1 VISUALISE DRAW invalidgeom").unwrap();
+        assert!(!validated.valid());
+        assert!(!validated.errors().is_empty());
+    }
+
+    #[test]
+    fn test_validate_sql_and_visual_content() {
+        let query = "SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y DRAW line MAPPING x AS x, y AS y";
+        let validated = validate(query).unwrap();
+
+        assert!(validated.has_visual());
+        assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y");
+        assert!(validated.visual().contains("DRAW point"));
+        assert!(validated.visual().contains("DRAW line"));
+        assert!(validated.valid());
+    }
+
+    #[test]
+    fn test_validate_sql_only() {
+        let query = "SELECT 1 as x, 2 as y";
+        let validated = validate(query).unwrap();
+
+        // SQL-only queries should be valid (just syntax check)
+        assert!(validated.valid());
+        assert!(validated.errors().is_empty());
+    }
+}
diff --git a/src/writer/mod.rs b/src/writer/mod.rs
index 7f026e6b..b06bf332 100644
--- a/src/writer/mod.rs
+++ b/src/writer/mod.rs
@@ -14,12 +14,17 @@
 //!
 //! ```rust,ignore
 //! use ggsql::writer::{Writer, VegaLiteWriter};
+//! use ggsql::reader::{Reader, DuckDBReader};
+//!
+//! let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+//! let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?;
 //!
 //! let writer = VegaLiteWriter::new();
-//! let json = writer.write(&spec, &dataframe)?;
+//! let json = writer.render(&spec)?;
 //! println!("{}", json);
 //! ```
 
+use crate::reader::Spec;
 use crate::{DataFrame, Plot, Result};
 use std::collections::HashMap;
 
@@ -33,7 +38,15 @@ pub use vegalite::VegaLiteWriter;
 ///
 /// Writers take a Plot and data sources and produce formatted output
 /// (JSON, R code, PNG bytes, etc.).
+///
+/// # Associated Types
+///
+/// * `Output` - The type returned by `write()` and `render()`. Use `Option<String>`
+///   for text output, `Option<Vec<u8>>` for binary, `()` for void writers, etc.
 pub trait Writer {
+    /// The output type produced by this writer.
+    type Output;
+
     /// Generate output from a visualization specification and data sources
     ///
     /// # Arguments
@@ -44,7 +57,7 @@ pub trait Writer {
     ///
     /// # Returns
     ///
-    /// A string containing the formatted output (JSON, code, etc.)
+    /// The writer's output, depends on writer implementation.
     ///
     /// # Errors
     ///
@@ -52,7 +65,7 @@ pub trait Writer {
     /// - The spec is incompatible with this writer
     /// - The data doesn't match the spec's requirements
     /// - Output generation fails
-    fn write(&self, spec: &Plot, data: &HashMap<String, DataFrame>) -> Result<String>;
+    fn write(&self, spec: &Plot, data: &HashMap<String, DataFrame>) -> Result<Self::Output>;
 
     /// Validate that a spec is compatible with this writer
     ///
@@ -67,4 +80,32 @@ pub trait Writer {
     ///
     /// Ok(()) if the spec is compatible, otherwise an error
     fn validate(&self, spec: &Plot) -> Result<()>;
+
+    /// Render a Spec to output format
+    ///
+    /// This is the main entry point for generating visualization output.
+    ///
+    /// # Arguments
+    ///
+    /// * `spec` - The prepared visualization specification from `reader.execute()`
+    ///
+    /// # Returns
+    ///
+    /// The writer's output (type depends on writer implementation)
+    ///
+    /// # Example
+    ///
+    /// ```rust,ignore
+    /// use ggsql::reader::{Reader, DuckDBReader};
+    /// use ggsql::writer::{Writer, VegaLiteWriter};
+    ///
+    /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?;
+    /// let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?;
+    ///
+    /// let writer = VegaLiteWriter::new();
+    /// let json = writer.render(&spec)?;
+    /// ```
+    fn render(&self, spec: &Spec) -> Result<Self::Output> {
+        self.write(spec.plot(), spec.data_map())
+    }
 }
diff --git a/src/writer/vegalite.rs b/src/writer/vegalite.rs
index 01bf884b..ec86589a 100644
--- a/src/writer/vegalite.rs
+++ b/src/writer/vegalite.rs
@@ -999,6 +999,8 @@ impl VegaLiteWriter {
 }
 
 impl Writer for VegaLiteWriter {
+    type Output = String;
+
     fn write(&self, spec: &Plot, data: &HashMap<String, DataFrame>) -> Result<String> {
         // Validate spec before processing
         self.validate(spec)?;