diff --git a/CLAUDE.md b/CLAUDE.md index bdc93293..45d32fb7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -147,6 +147,79 @@ DRAW line MAPPING month AS x, total AS y --- +## Public API + +### Quick Start + +```rust +use ggsql::reader::{DuckDBReader, Reader}; +use ggsql::writer::VegaLiteWriter; + +// Create a reader +let reader = DuckDBReader::from_connection_string("duckdb://memory")?; + +// Execute the ggsql query +let spec = reader.execute( + "SELECT x, y FROM data VISUALISE x, y DRAW point" +)?; + +// Render to Vega-Lite JSON +let writer = VegaLiteWriter::new(); +let json = writer.render(&spec)?; +``` + +### Core Functions + +| Function | Purpose | +| ----------------------- | ------------------------------------------------------ | +| `reader.execute(query)` | Main entry point: parse, execute SQL, resolve mappings | +| `writer.render(spec)` | Generate output from a Spec | +| `validate(query)` | Validate syntax + semantics, inspect query structure | + +### Key Types + +**`Validated`** - Result of `validate()`: + +- `has_visual()` - Whether query has VISUALISE clause +- `sql()` - The SQL portion (before VISUALISE) +- `visual()` - The VISUALISE portion (raw text) +- `tree()` - CST for advanced inspection +- `valid()` - Whether query is valid +- `errors()` - Validation errors +- `warnings()` - Validation warnings + +**`Spec`** - Result of `reader.execute()`, ready for rendering: + +- `plot()` - Resolved plot specification +- `metadata()` - Rows, columns, layer count +- `warnings()` - Validation warnings from execution +- `data()` / `layer_data(i)` / `stat_data(i)` - Access DataFrames +- `sql()` / `visual()` / `layer_sql(i)` / `stat_sql(i)` - Query introspection + +**`Metadata`**: + +- `rows` - Number of rows in primary data +- `columns` - Column names +- `layer_count` - Number of layers + +### Reader & Writer + +**Reader trait** (data source abstraction): + +- `execute_sql(sql)` - Run SQL, return DataFrame +- `register(name, df)` - Register DataFrame as table +- `unregister(name)` - Unregister a previously registered table +- Implementation: `DuckDBReader` + +**Writer trait** (output format abstraction): + +- `write(spec, data)` - Generate output string +- Implementation: `VegaLiteWriter` (Vega-Lite v6 JSON) + +For detailed API documentation, see [`src/doc/API.md`](src/doc/API.md). + +--- + ## Component Breakdown ### 1. Parser Module (`src/parser/`) @@ -432,7 +505,7 @@ pub type Result = std::result::Result; ```rust pub trait Reader { - fn execute(&self, sql: &str) -> Result; + fn execute_sql(&self, sql: &str) -> Result; fn supports_query(&self, sql: &str) -> bool; } ``` @@ -462,7 +535,6 @@ pub fn parse_connection_string(uri: &str) -> Result { The codebase includes connection string parsing and feature flags for additional readers, but they are not yet implemented: - **PostgreSQL Reader** (`postgres://...`) - - Feature flag: `postgres` - Connection string parsing exists in `connection.rs` - Reader implementation: Not yet available @@ -792,15 +864,18 @@ When running in Positron IDE, the extension provides enhanced functionality: ### 8. Python Bindings (`ggsql-python/`) -**Responsibility**: Python bindings for ggsql, enabling Python users to render Altair charts using ggsql's VISUALISE syntax. +**Responsibility**: Python bindings for ggsql, enabling Python users to create visualizations using ggsql's VISUALISE syntax. **Features**: - PyO3-based Rust bindings compiled to a native Python extension +- Two-stage API mirroring the Rust API: `reader.execute()` → `render()` +- DuckDB reader with DataFrame registration +- Custom Python reader support: any object with `execute_sql(sql) -> DataFrame` method - Works with any narwhals-compatible DataFrame (polars, pandas, etc.) - LazyFrames are collected automatically -- Returns native `altair.Chart` objects for easy display and customization -- Query splitting to separate SQL from VISUALISE portions +- Returns native `altair.Chart` objects via `render_altair()` convenience function +- Query validation and introspection (SQL, layer queries, stat queries) **Installation**: @@ -817,26 +892,117 @@ maturin develop import ggsql import polars as pl -# Split a ggSQL query into SQL and VISUALISE portions -sql, viz = ggsql.split_query(""" - SELECT date, revenue FROM sales - VISUALISE date AS x, revenue AS y - DRAW line -""") +# Create reader and register data +reader = ggsql.DuckDBReader("duckdb://memory") +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) +reader.register("data", df) + +# Execute visualization +spec = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point" +) + +# Inspect metadata +print(f"Rows: {spec.metadata()['rows']}") +print(f"Columns: {spec.metadata()['columns']}") +print(f"SQL: {spec.sql()}") + +# Render to Vega-Lite JSON +writer = ggsql.VegaLiteWriter() +json_output = writer.render(spec) +``` + +**Convenience Function** (`render_altair`): + +For quick visualizations without explicit reader setup: + +```python +import ggsql +import polars as pl -# Execute SQL and render to Altair chart df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") -# Display or save +# Render DataFrame to Altair chart in one call +chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") chart.display() # In Jupyter -chart.save("chart.html") ``` +**Query Validation**: + +```python +# Validate syntax without execution +validated = ggsql.validate( + "SELECT x, y FROM data VISUALISE x, y DRAW point" +) +print(f"Valid: {validated.valid()}") +print(f"Has VISUALISE: {validated.has_visual()}") +print(f"SQL portion: {validated.sql()}") +print(f"Errors: {validated.errors()}") +``` + +**Classes**: + +| Class | Description | +| -------------------------- | ------------------------------------------------- | +| `DuckDBReader(connection)` | Database reader with DataFrame registration | +| `VegaLiteWriter()` | Vega-Lite JSON output writer | +| `Validated` | Result of `validate()` with query inspection | +| `Spec` | Result of `reader.execute()`, ready for rendering | + **Functions**: -- `split_query(query: str) -> tuple[str, str]` - Split ggSQL query into SQL and VISUALISE portions -- `render_altair(df, viz, **kwargs) -> altair.Chart` - Render DataFrame with VISUALISE spec to Altair chart +| Function | Description | +| ------------------------ | ------------------------------------------------ | +| `validate(query)` | Syntax/semantic validation with query inspection | +| `reader.execute(query)` | Execute ggsql query, return Spec | +| `execute(query, reader)` | Execute with custom reader (bridge path) | +| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart | + +**Spec Methods**: + +| Method | Description | +| ---------------- | -------------------------------------------- | +| `render(writer)` | Generate Vega-Lite JSON | +| `metadata()` | Get rows, columns, layer_count | +| `sql()` | Get the SQL portion | +| `visual()` | Get the VISUALISE portion | +| `layer_count()` | Number of DRAW layers | +| `data()` | Get the main DataFrame | +| `layer_data(i)` | Get layer-specific DataFrame (if filtered) | +| `stat_data(i)` | Get stat transform DataFrame (if applicable) | +| `layer_sql(i)` | Get layer filter SQL (if applicable) | +| `stat_sql(i)` | Get stat transform SQL (if applicable) | +| `warnings()` | Get validation warnings | + +**Custom Python Readers**: + +Any Python object with an `execute_sql(sql: str) -> polars.DataFrame` method can be used as a reader: + +```python +import ggsql +import polars as pl + +class MyReader: + """Custom reader that returns static data.""" + + def execute_sql(self, sql: str) -> pl.DataFrame: + return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + +# Use custom reader with ggsql.execute() +reader = MyReader() +spec = ggsql.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", + reader +) +``` + +Optional methods for custom readers: + +- `supports_register() -> bool` - Return `True` if registration is supported +- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a table +- `unregister(name: str) -> None` - Unregister a previously registered table + +Native readers (e.g., `DuckDBReader`) use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization. **Dependencies**: @@ -920,22 +1086,23 @@ cargo build --all-features ``` Where `` can be: + - Empty: `VISUALISE` (layers must define all mappings) - Mappings: `VISUALISE x, y, date AS x` (mixed implicit/explicit) - Wildcard: `VISUALISE *` (map all columns) ### Clause Types -| Clause | Repeatable | Purpose | Example | -| -------------- | ---------- | ------------------ | ------------------------------------ | -| `VISUALISE` | ✅ Yes | Entry point | `VISUALISE date AS x, revenue AS y` | -| `DRAW` | ✅ Yes | Define layers | `DRAW line MAPPING date AS x, value AS y` | -| `SCALE` | ✅ Yes | Configure scales | `SCALE x SETTING type => 'date'` | -| `FACET` | ❌ No | Small multiples | `FACET WRAP region` | -| `COORD` | ❌ No | Coordinate system | `COORD cartesian SETTING xlim => [0,100]` | -| `LABEL` | ❌ No | Text labels | `LABEL title => 'My Chart', x => 'Date'` | -| `GUIDE` | ✅ Yes | Legend/axis config | `GUIDE color SETTING position => 'right'` | -| `THEME` | ❌ No | Visual styling | `THEME minimal` | +| Clause | Repeatable | Purpose | Example | +| ----------- | ---------- | ------------------ | ----------------------------------------- | +| `VISUALISE` | ✅ Yes | Entry point | `VISUALISE date AS x, revenue AS y` | +| `DRAW` | ✅ Yes | Define layers | `DRAW line MAPPING date AS x, value AS y` | +| `SCALE` | ✅ Yes | Configure scales | `SCALE x SETTING type => 'date'` | +| `FACET` | ❌ No | Small multiples | `FACET WRAP region` | +| `COORD` | ❌ No | Coordinate system | `COORD cartesian SETTING xlim => [0,100]` | +| `LABEL` | ❌ No | Text labels | `LABEL title => 'My Chart', x => 'Date'` | +| `GUIDE` | ✅ Yes | Legend/axis config | `GUIDE color SETTING position => 'right'` | +| `THEME` | ❌ No | Visual styling | `THEME minimal` | ### DRAW Clause (Layers) @@ -1201,7 +1368,6 @@ COORD cartesian SETTING xlim => [0, 100], ylim => [0, 200] LABEL x => 'Category', y => 'Count' ``` - ### LABEL Clause **Syntax**: diff --git a/Cargo.toml b/Cargo.toml index cd5b672c..5e98f8aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,8 @@ csscolorparser = "0.8.1" polars = { version = "0.52", features = ["lazy", "sql", "ipc"] } # Readers -duckdb = { version = "1.1", features = ["bundled"] } +duckdb = { version = "1.4", features = ["bundled", "vtab-arrow"] } +arrow = { version = "56", default-features = false, features = ["ipc"] } postgres = "0.19" sqlx = { version = "0.8", features = ["postgres", "runtime-tokio-rustls"] } rusqlite = "0.32" diff --git a/README.md b/README.md index 43d70847..8af476f9 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ THEME minimal - ✅ REST API server (`ggsql-rest`) with CORS support - ✅ Jupyter kernel (`ggsql-jupyter`) with inline Vega-Lite visualizations - ✅ VS Code extension (`ggsql-vscode`) with syntax highlighting and Positron IDE integration +- ✅ Python bindings (`ggsql-python`) with Altair chart output **Planned:** @@ -93,7 +94,9 @@ ggsql/ │ ├── ggsql-jupyter/ # Jupyter kernel │ -└── ggsql-vscode/ # VS Code extension +├── ggsql-vscode/ # VS Code extension +│ +└── ggsql-python/ # Python bindings ``` ## Development Workflow @@ -297,6 +300,41 @@ When running in Positron IDE, the extension provides additional features: - **Language runtime registration** for executing ggsql queries directly within Positron - **Plot pane integration** - visualizations are automatically routed to Positron's Plots pane +## Python Bindings + +The `ggsql-python` package provides Python bindings for using ggsql with DataFrames. + +### Installation + +```bash +cd ggsql-python +pip install maturin +maturin develop +``` + +### Usage + +```python +import ggsql +import polars as pl + +# Simple usage with render_altair +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) +chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") +chart.display() + +# Two-stage API for full control +reader = ggsql.DuckDBReader("duckdb://memory") +reader.register("data", df) + +spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point") + +writer = ggsql.VegaLiteWriter() +json_output = writer.render(spec) +``` + +See the [ggsql-python README](ggsql-python/README.md) for complete API documentation. + ## CLI ### Installation diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs index 1c38e3ae..d91b223a 100644 --- a/ggsql-jupyter/src/executor.rs +++ b/ggsql-jupyter/src/executor.rs @@ -5,9 +5,8 @@ use anyhow::Result; use ggsql::{ - execute::prepare_data, - parser, reader::{DuckDBReader, Reader}, + validate, writer::{VegaLiteWriter, Writer}, }; use polars::frame::DataFrame; @@ -54,13 +53,13 @@ impl QueryExecutor { pub fn execute(&self, code: &str) -> Result { tracing::debug!("Executing query: {} chars", code.len()); - // 1. Split query to check if there's a visualization - let (_sql_part, viz_part) = parser::split_query(code)?; + // 1. Validate to check if there's a visualization + let validated = validate(code)?; // 2. Check if there's a visualization - if viz_part.is_empty() { + if !validated.has_visual() { // Pure SQL query - execute directly and return DataFrame - let df = self.reader.execute(code)?; + let df = self.reader.execute_sql(code)?; tracing::info!( "Pure SQL executed: {} rows, {} cols", df.height(), @@ -69,17 +68,21 @@ impl QueryExecutor { return Ok(ExecutionResult::DataFrame(df)); } - // 3. Prepare data using shared execution logic (handles layer sources) - let prepared = prepare_data(code, &self.reader)?; + // 3. Execute ggsql query using reader + let spec = self.reader.execute(code)?; - tracing::info!("Data sources prepared: {} sources", prepared.data.len()); + tracing::info!( + "Query executed: {} rows, {} layers", + spec.metadata().rows, + spec.metadata().layer_count + ); - // 4. Generate Vega-Lite spec (use first spec if multiple) - let vega_json = self.writer.write(&prepared.specs[0], &prepared.data)?; + // 4. Render to output format + let vega_json = self.writer.render(&spec)?; tracing::debug!("Generated Vega-Lite spec: {} chars", vega_json.len()); - // 6. Return result + // 5. Return result Ok(ExecutionResult::Visualization { spec: vega_json }) } } diff --git a/ggsql-python/Cargo.toml b/ggsql-python/Cargo.toml index 62229afd..8f73e6f8 100644 --- a/ggsql-python/Cargo.toml +++ b/ggsql-python/Cargo.toml @@ -12,7 +12,7 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.26", features = ["extension-module"] } polars = { workspace = true, features = ["ipc"] } -ggsql = { path = "../src", default-features = false, features = ["vegalite"] } +ggsql = { path = "../src", default-features = false, features = ["duckdb", "vegalite"] } [features] default = [] diff --git a/ggsql-python/README.md b/ggsql-python/README.md index 0d97bbee..7a2148f1 100644 --- a/ggsql-python/README.md +++ b/ggsql-python/README.md @@ -2,7 +2,7 @@ Python bindings for [ggsql](https://github.com/georgestagg/ggsql), a SQL extension for declarative data visualization. -This package provides a thin wrapper around the Rust `ggsql` crate, enabling Python users to render Altair charts from DataFrames using ggsql's VISUALISE syntax. +This package provides Python bindings to the Rust `ggsql` crate, enabling Python users to create visualizations using ggsql's VISUALISE syntax with native Altair chart output. ## Installation @@ -15,6 +15,7 @@ pip install ggsql ### From source Building from source requires: + - Rust toolchain (install via [rustup](https://rustup.rs/)) - Python 3.10+ - [maturin](https://github.com/PyO3/maturin) @@ -39,35 +40,182 @@ maturin build --release pip install target/wheels/ggsql-*.whl ``` -## Usage +## Quick Start + +### Simple Usage with `render_altair` + +For quick visualizations, use the `render_altair` convenience function: ```python import ggsql -import duckdb +import polars as pl + +# Create a DataFrame +df = pl.DataFrame({ + "x": [1, 2, 3, 4, 5], + "y": [10, 20, 15, 30, 25], + "category": ["A", "B", "A", "B", "A"] +}) + +# Render to Altair chart +chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") + +# Display or save +chart.display() # In Jupyter +chart.save("chart.html") # Save to file +``` -# Split a ggSQL query into SQL and VISUALISE portions -sql, viz = ggsql.split_query(""" - SELECT date, revenue, region FROM sales - WHERE year = 2024 +### Two-Stage API + +For more control, use the two-stage API with explicit reader and writer: + +```python +import ggsql +import polars as pl + +# 1. Create a DuckDB reader +reader = ggsql.DuckDBReader("duckdb://memory") + +# 2. Register your DataFrame as a table +df = pl.DataFrame({ + "date": ["2024-01-01", "2024-01-02", "2024-01-03"], + "revenue": [100, 150, 120], + "region": ["North", "South", "North"] +}) +reader.register("sales", df) + +# 3. Execute the ggsql query +spec = reader.execute( + """ + SELECT * FROM sales VISUALISE date AS x, revenue AS y, region AS color DRAW line - LABEL title => 'Sales Trends' -""") + LABEL title => 'Sales by Region' + """ +) + +# 4. Inspect metadata +print(f"Rows: {spec.metadata()['rows']}") +print(f"Columns: {spec.metadata()['columns']}") +print(f"Layers: {spec.layer_count()}") + +# 5. Inspect SQL/VISUALISE portions and data +print(f"SQL: {spec.sql()}") +print(f"Visual: {spec.visual()}") +print(spec.data()) # Returns polars DataFrame + +# 6. Render to Vega-Lite JSON +writer = ggsql.VegaLiteWriter() +vegalite_json = writer.render(spec) +print(vegalite_json) +``` -# Execute SQL with DuckDB -df = duckdb.sql(sql).pl() +## API Reference -# Render DataFrame + VISUALISE spec to Altair chart -chart = ggsql.render_altair(df, viz) +### Classes -# Display or save the chart -chart.display() # In Jupyter -chart.save("chart.html") # Save to file +#### `DuckDBReader(connection: str)` + +Database reader that executes SQL and manages DataFrames. + +```python +reader = ggsql.DuckDBReader("duckdb://memory") # In-memory database +reader = ggsql.DuckDBReader("duckdb:///path/to/file.db") # File database +``` + +**Methods:** + +- `register(name: str, df: polars.DataFrame)` - Register a DataFrame as a queryable table +- `unregister(name: str)` - Unregister a previously registered table +- `execute_sql(sql: str) -> polars.DataFrame` - Execute SQL and return results +- `supports_register() -> bool` - Check if registration is supported + +#### `VegaLiteWriter()` + +Writer that generates Vega-Lite v6 JSON specifications. + +```python +writer = ggsql.VegaLiteWriter() +json_output = writer.render(spec) +``` + +#### `Validated` + +Result of `validate()` containing query analysis without SQL execution. + +**Methods:** + +- `valid() -> bool` - Whether the query is syntactically and semantically valid +- `has_visual() -> bool` - Whether the query contains a VISUALISE clause +- `sql() -> str` - The SQL portion (before VISUALISE) +- `visual() -> str` - The VISUALISE portion +- `errors() -> list[dict]` - Validation errors with messages and locations +- `warnings() -> list[dict]` - Validation warnings + +#### `Spec` + +Result of `reader.execute()`, containing resolved visualization ready for rendering. + +**Methods:** + +- `metadata() -> dict` - Get `{"rows": int, "columns": list[str], "layer_count": int}` +- `sql() -> str` - The executed SQL query +- `visual() -> str` - The VISUALISE clause +- `layer_count() -> int` - Number of DRAW layers +- `data() -> polars.DataFrame | None` - Main query result DataFrame +- `layer_data(index: int) -> polars.DataFrame | None` - Layer-specific data (if filtered) +- `stat_data(index: int) -> polars.DataFrame | None` - Statistical transform data +- `layer_sql(index: int) -> str | None` - Layer filter SQL +- `stat_sql(index: int) -> str | None` - Stat transform SQL +- `warnings() -> list[dict]` - Validation warnings from execution + +### Functions + +#### `validate(query: str) -> Validated` + +Validate query syntax and semantics without executing SQL. + +```python +validated = ggsql.validate("SELECT x, y FROM data VISUALISE x, y DRAW point") +if validated.valid(): + print("Query is valid!") +else: + for error in validated.errors(): + print(f"Error: {error['message']}") +``` + +#### `reader.execute(query: str) -> Spec` + +Execute a ggsql query and return the visualization specification. + +```python +reader = ggsql.DuckDBReader("duckdb://memory") +spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") ``` -### Mapping styles +#### `render_altair(df, viz: str, **kwargs) -> altair.Chart` + +Convenience function to render a DataFrame with a VISUALISE spec to an Altair chart. + +**Parameters:** + +- `df` - Any narwhals-compatible DataFrame (polars, pandas, etc.). LazyFrames are collected automatically. +- `viz` - The VISUALISE specification string +- `**kwargs` - Additional arguments passed to `altair.Chart.from_json()` (e.g., `validate=False`) -The `render_altair()` function supports various mapping styles: +**Returns:** An Altair chart object (Chart, LayerChart, FacetChart, etc.) + +```python +import polars as pl +import ggsql + +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) +chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") +``` + +## Examples + +### Mapping Styles ```python df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30], "category": ["A", "B", "A"]}) @@ -85,41 +233,116 @@ ggsql.render_altair(df, "VISUALISE * DRAW point") ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point") ``` -## API +### Custom Readers -### `split_query(query: str) -> tuple[str, str]` +You can use any Python object with an `execute_sql(sql: str) -> polars.DataFrame` method as a reader. This enables integration with any data source. -Split a ggSQL query into SQL and VISUALISE portions. +```python +import ggsql +import polars as pl + +class CSVReader: + """Custom reader that loads data from CSV files.""" + + def __init__(self, data_dir: str): + self.data_dir = data_dir + + def execute_sql(self, sql: str) -> pl.DataFrame: + # Simple implementation: ignore SQL and return fixed data + # A real implementation would parse SQL to determine which file to load + return pl.read_csv(f"{self.data_dir}/data.csv") + +# Use custom reader with ggsql.execute() +reader = CSVReader("/path/to/data") +spec = ggsql.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", + reader +) +writer = ggsql.VegaLiteWriter() +json_output = writer.render(spec) +``` -**Parameters:** -- `query`: The full ggSQL query string +**Optional methods** for custom readers: -**Returns:** -- Tuple of `(sql_portion, visualise_portion)` +- `supports_register() -> bool` - Return `True` if your reader supports DataFrame registration +- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a queryable table +- `unregister(name: str) -> None` - Unregister a previously registered table -**Raises:** -- `ValueError`: If the query cannot be parsed +```python +class AdvancedReader: + """Custom reader with registration support.""" -### `render_altair(df, viz, **kwargs) -> altair.Chart` + def __init__(self): + self.tables = {} -Render a DataFrame with a VISUALISE specification to an Altair chart. + def execute_sql(self, sql: str) -> pl.DataFrame: + # Your SQL execution logic here + ... -**Parameters:** -- `df`: Any narwhals-compatible DataFrame (polars, pandas, etc.). LazyFrames are collected automatically. -- `viz`: The VISUALISE specification string -- `**kwargs`: Additional keyword arguments passed to `altair.Chart.from_json()`. Common options include `validate=False` to skip schema validation. + def supports_register(self) -> bool: + return True -**Returns:** -- An `altair.Chart` object that can be displayed, saved, or further customized + def register(self, name: str, df: pl.DataFrame) -> None: + self.tables[name] = df -**Raises:** -- `ValueError`: If the spec cannot be parsed or rendered + def unregister(self, name: str) -> None: + del self.tables[name] +``` + +Native readers like `DuckDBReader` use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization. + +### Ibis Reader Example + +[Ibis](https://ibis-project.org/) provides a unified Python API for SQL operations across multiple backends. Here's how to create an ibis-based custom reader: + +```python +import ggsql +import polars as pl +import ibis + +class IbisReader: + """Custom reader using ibis as the SQL backend.""" + + def __init__(self, backend="duckdb"): + if backend == "duckdb": + self.con = ibis.duckdb.connect() + elif backend == "sqlite": + self.con = ibis.sqlite.connect() + # Add other backends as needed + + def execute_sql(self, sql: str) -> pl.DataFrame: + return self.con.con.execute(sql).pl() + + def supports_register(self) -> bool: + return True + + def register(self, name: str, df: pl.DataFrame) -> None: + self.con.create_table(name, df.to_arrow(), overwrite=True) + + def unregister(self, name: str) -> None: + self.con.drop_table(name) + +# Usage +reader = IbisReader() +df = pl.DataFrame({ + "date": ["2024-01-01", "2024-01-02", "2024-01-03"], + "revenue": [100, 150, 120], +}) +reader.register("sales", df) + +spec = ggsql.execute( + "SELECT * FROM sales VISUALISE date AS x, revenue AS y DRAW line", + reader +) +writer = ggsql.VegaLiteWriter() +print(writer.render(spec)) +``` ## Development ### Keeping in sync with the monorepo -The `ggsql-python` package is part of the [ggsql monorepo](https://github.com/georgestagg/ggsql) and depends on the Rust `ggsql` crate via a path dependency. When the Rust crate is updated, you may need to rebuild: +The `ggsql-python` package is part of the [ggsql monorepo](https://github.com/posit-dev/ggsql) and depends on the Rust `ggsql` crate via a path dependency. When the Rust crate is updated, you may need to rebuild: ```bash cd ggsql-python diff --git a/ggsql-python/python/ggsql/__init__.py b/ggsql-python/python/ggsql/__init__.py index dbbb5afb..d69c84ef 100644 --- a/ggsql-python/python/ggsql/__init__.py +++ b/ggsql-python/python/ggsql/__init__.py @@ -1,6 +1,5 @@ from __future__ import annotations -import io import json from typing import Any, Union @@ -8,9 +7,26 @@ import narwhals as nw from narwhals.typing import IntoFrame -from ggsql._ggsql import split_query, render as _render +from ggsql._ggsql import ( + DuckDBReader, + VegaLiteWriter, + Validated, + Spec, + validate, + execute, +) -__all__ = ["split_query", "render_altair"] +__all__ = [ + # Classes + "DuckDBReader", + "VegaLiteWriter", + "Validated", + "Spec", + # Functions + "validate", + "execute", + "render_altair", +] __version__ = "0.1.0" # Type alias for any Altair chart type @@ -56,13 +72,19 @@ def render_altair( if not isinstance(df, nw.DataFrame): raise TypeError("df must be a narwhals DataFrame or compatible type") - # Convert to polars and serialize to IPC bytes pl_df = df.to_polars() - buffer = io.BytesIO() - pl_df.write_ipc(buffer) - ipc_bytes = buffer.getvalue() - vegalite_json = _render(ipc_bytes, viz, writer="vegalite") + # Create temporary reader and register data + reader = DuckDBReader("duckdb://memory") + reader.register("__data__", pl_df) + + # Build full query: SELECT * FROM __data__ + VISUALISE clause + query = f"SELECT * FROM __data__ {viz}" + + # Execute and render + spec = reader.execute(query) + writer = VegaLiteWriter() + vegalite_json = writer.render(spec) # Parse to determine the correct Altair class spec = json.loads(vegalite_json) diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index 7c472c35..d2eb0ec0 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -3,100 +3,770 @@ #![allow(clippy::useless_conversion)] use pyo3::prelude::*; -use pyo3::types::PyBytes; -use std::collections::{HashMap, HashSet}; +use pyo3::types::{PyBytes, PyDict, PyList}; use std::io::Cursor; -use ggsql::naming::GLOBAL_DATA_KEY; -use ggsql::parser::parse_query; -use ggsql::writer::{VegaLiteWriter, Writer}; -use ggsql::AestheticValue; +use ggsql::reader::Spec; +use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader}; +use ggsql::validate::{validate as rust_validate, ValidationWarning}; +use ggsql::writer::{VegaLiteWriter as RustVegaLiteWriter, Writer as RustWriter}; +use ggsql::GgsqlError; -use polars::prelude::{DataFrame, IpcReader, SerReader}; +use polars::prelude::{DataFrame, IpcReader, IpcWriter, SerReader, SerWriter}; -#[pyfunction] -fn split_query(query: &str) -> PyResult<(String, String)> { - ggsql::parser::split_query(query) - .map_err(|e| PyErr::new::(e.to_string())) +// ============================================================================ +// Helper Functions for DataFrame Conversion +// ============================================================================ + +/// Convert a Polars DataFrame to a Python polars DataFrame via IPC serialization +fn polars_to_py(py: Python<'_>, df: &DataFrame) -> PyResult> { + let mut buffer = Vec::new(); + IpcWriter::new(&mut buffer) + .finish(&mut df.clone()) + .map_err(|e| { + PyErr::new::(format!( + "Failed to serialize DataFrame: {}", + e + )) + })?; + + let io = py.import("io")?; + let bytes_io = io.call_method1("BytesIO", (PyBytes::new(py, &buffer),))?; + + let polars = py.import("polars")?; + polars + .call_method1("read_ipc", (bytes_io,)) + .map(|obj| obj.into()) } -#[pyfunction] -#[pyo3(signature = (ipc_bytes, viz, *, writer = "vegalite"))] -fn render(ipc_bytes: &Bound<'_, PyBytes>, viz: &str, writer: &str) -> PyResult { - // Read DataFrame from IPC bytes - let bytes = ipc_bytes.as_bytes(); - let cursor = Cursor::new(bytes); - let df: DataFrame = IpcReader::new(cursor).finish().map_err(|e| { - PyErr::new::(format!("Failed to read IPC data: {}", e)) - })?; +/// Convert a Python polars DataFrame to a Rust Polars DataFrame via IPC serialization +fn py_to_polars(py: Python<'_>, df: &Bound<'_, PyAny>) -> PyResult { + let io = py.import("io")?; + let bytes_io = io.call_method0("BytesIO")?; + df.call_method1("write_ipc", (&bytes_io,))?; + bytes_io.call_method1("seek", (0i64,))?; - // Parse the visualization spec - // The viz string should be a complete VISUALISE statement - let specs = parse_query(viz) - .map_err(|e| PyErr::new::(e.to_string()))?; + let ipc_bytes: Vec = bytes_io.call_method0("read")?.extract()?; + let cursor = Cursor::new(ipc_bytes); + + IpcReader::new(cursor).finish().map_err(|e| { + PyErr::new::(format!("Failed to read DataFrame: {}", e)) + }) +} + +/// Convert a Python polars DataFrame to Rust DataFrame - for use inside Python::attach +/// This variant is used by PyReaderBridge where we already hold the GIL. +fn py_to_polars_inner(df: &Bound<'_, PyAny>) -> PyResult { + let py = df.py(); + let io = py.import("io")?; + let bytes_io = io.call_method0("BytesIO")?; - let mut spec = specs.into_iter().next().ok_or_else(|| { - PyErr::new::("No visualization spec found") + df.call_method1("write_ipc", (&bytes_io,)).map_err(|_| { + PyErr::new::( + "Reader.execute_sql() must return a polars.DataFrame", + ) })?; - // Get column names for resolving global mappings - let column_names: HashSet<&str> = df.get_column_names().iter().map(|s| s.as_str()).collect(); - - // Merge global mappings into layers and handle wildcards - for layer in &mut spec.layers { - let supported_aesthetics = layer.geom.aesthetics().supported; - - // 1. Merge explicit global aesthetics into layer (layer takes precedence) - for (aesthetic, value) in &spec.global_mappings.aesthetics { - if supported_aesthetics.contains(&aesthetic.as_str()) { - layer - .mappings - .aesthetics - .entry(aesthetic.clone()) - .or_insert_with(|| value.clone()); - } + bytes_io.call_method1("seek", (0i64,))?; + let ipc_bytes: Vec = bytes_io.call_method0("read")?.extract()?; + let cursor = Cursor::new(ipc_bytes); + + IpcReader::new(cursor).finish().map_err(|e| { + PyErr::new::(format!( + "Failed to deserialize DataFrame: {}", + e + )) + }) +} + +/// Convert validation errors/warnings to a Python list of dicts +fn errors_to_pylist( + py: Python<'_>, + items: &[(String, Option<(usize, usize)>)], +) -> PyResult> { + let list = PyList::empty(py); + for (message, location) in items { + let dict = PyDict::new(py); + dict.set_item("message", message)?; + if let Some((line, column)) = location { + let loc_dict = PyDict::new(py); + loc_dict.set_item("line", line)?; + loc_dict.set_item("column", column)?; + dict.set_item("location", loc_dict)?; + } else { + dict.set_item("location", py.None())?; } + list.append(dict)?; + } + Ok(list.into()) +} - // 2. Handle wildcard expansion: map columns to aesthetics with matching names - let has_wildcard = layer.mappings.wildcard || spec.global_mappings.wildcard; - if has_wildcard { - for &aes in supported_aesthetics { - // Only create mapping if column exists in the dataframe - if column_names.contains(aes) { - layer - .mappings - .aesthetics - .entry(aes.to_string()) - .or_insert_with(|| AestheticValue::standard_column(aes)); - } +/// Convert ValidationWarning slice to Python list format +fn warnings_to_pylist(py: Python<'_>, warnings: &[ValidationWarning]) -> PyResult> { + let items: Vec<_> = warnings + .iter() + .map(|w| { + ( + w.message.clone(), + w.location.as_ref().map(|l| (l.line, l.column)), + ) + }) + .collect(); + errors_to_pylist(py, &items) +} + +// ============================================================================ +// PyReaderBridge - Bridges Python reader objects to Rust Reader trait +// ============================================================================ + +/// Bridges a Python reader object to the Rust Reader trait. +/// +/// This allows any Python object with an `execute_sql(sql: str) -> polars.DataFrame` +/// method to be used as a ggsql reader. +struct PyReaderBridge { + obj: Py, +} + +impl Reader for PyReaderBridge { + fn execute_sql(&self, sql: &str) -> ggsql::Result { + Python::attach(|py| { + let bound = self.obj.bind(py); + let result = bound.call_method1("execute_sql", (sql,)).map_err(|e| { + GgsqlError::ReaderError(format!("Reader.execute_sql() failed: {}", e)) + })?; + py_to_polars_inner(&result).map_err(|e| GgsqlError::ReaderError(e.to_string())) + }) + } + + fn supports_register(&self) -> bool { + Python::attach(|py| { + self.obj + .bind(py) + .call_method0("supports_register") + .and_then(|r| r.extract::()) + .unwrap_or(false) + }) + } + + fn register(&mut self, name: &str, df: DataFrame) -> ggsql::Result<()> { + Python::attach(|py| { + let py_df = + polars_to_py(py, &df).map_err(|e| GgsqlError::ReaderError(e.to_string()))?; + self.obj + .bind(py) + .call_method1("register", (name, py_df)) + .map_err(|e| GgsqlError::ReaderError(format!("Reader.register() failed: {}", e)))?; + Ok(()) + }) + } + + fn unregister(&mut self, name: &str) -> ggsql::Result<()> { + Python::attach(|py| { + self.obj + .bind(py) + .call_method1("unregister", (name,)) + .map_err(|e| { + GgsqlError::ReaderError(format!("Reader.unregister() failed: {}", e)) + })?; + Ok(()) + }) + } +} + +// ============================================================================ +// Native Reader Detection Macro +// ============================================================================ + +/// Macro to try native readers and fall back to bridge. +/// Adding new native readers = add to the macro invocation list. +macro_rules! try_native_readers { + ($query:expr, $reader:expr, $($native_type:ty),*) => {{ + $( + if let Ok(native) = $reader.downcast::<$native_type>() { + return native.borrow().inner.execute($query) + .map(|s| PySpec { inner: s }) + .map_err(|e| PyErr::new::(e.to_string())); } - } + )* + }}; +} + +// ============================================================================ +// PyDuckDBReader +// ============================================================================ + +/// DuckDB database reader for executing SQL queries. +/// +/// Creates an in-memory or file-based DuckDB connection that can execute +/// SQL queries and register DataFrames as queryable tables. +/// +/// Examples +/// -------- +/// >>> reader = DuckDBReader("duckdb://memory") +/// >>> df = reader.execute_sql("SELECT 1 as x, 2 as y") +/// +/// >>> reader = DuckDBReader("duckdb://memory") +/// >>> reader.register("data", pl.DataFrame({"x": [1, 2, 3]})) +/// >>> df = reader.execute_sql("SELECT * FROM data WHERE x > 1") +#[pyclass(name = "DuckDBReader", unsendable)] +struct PyDuckDBReader { + inner: RustDuckDBReader, +} + +#[pymethods] +impl PyDuckDBReader { + /// Create a new DuckDB reader from a connection string. + /// + /// Parameters + /// ---------- + /// connection : str + /// Connection string. Use "duckdb://memory" for in-memory database + /// or "duckdb://path/to/file.db" for file-based database. + /// + /// Returns + /// ------- + /// DuckDBReader + /// A configured DuckDB reader instance. + /// + /// Raises + /// ------ + /// ValueError + /// If the connection string is invalid or the database cannot be opened. + #[new] + fn new(connection: &str) -> PyResult { + let inner = RustDuckDBReader::from_connection_string(connection) + .map_err(|e| PyErr::new::(e.to_string()))?; + Ok(Self { inner }) + } + + /// Register a DataFrame as a queryable table. + /// + /// After registration, the DataFrame can be queried by name in SQL. + /// + /// Parameters + /// ---------- + /// name : str + /// The table name to register under. + /// df : polars.DataFrame + /// The DataFrame to register. Must be a polars DataFrame. + /// + /// Raises + /// ------ + /// ValueError + /// If registration fails or the table name is invalid. + fn register(&mut self, py: Python<'_>, name: &str, df: &Bound<'_, PyAny>) -> PyResult<()> { + let rust_df = py_to_polars(py, df)?; + self.inner + .register(name, rust_df) + .map_err(|e| PyErr::new::(e.to_string())) } - // Compute aesthetic labels from column names - spec.compute_aesthetic_labels(); + /// Unregister a previously registered table. + /// + /// Parameters + /// ---------- + /// name : str + /// The table name to unregister. + /// + /// Raises + /// ------ + /// ValueError + /// If the table wasn't registered via this reader or unregistration fails. + fn unregister(&mut self, name: &str) -> PyResult<()> { + self.inner + .unregister(name) + .map_err(|e| PyErr::new::(e.to_string())) + } + + /// Execute a SQL query and return the result as a DataFrame. + /// + /// Parameters + /// ---------- + /// sql : str + /// The SQL query to execute. + /// + /// Returns + /// ------- + /// polars.DataFrame + /// The query result as a polars DataFrame. + /// + /// Raises + /// ------ + /// ValueError + /// If the SQL is invalid or execution fails. + fn execute_sql(&self, py: Python<'_>, sql: &str) -> PyResult> { + let df = self + .inner + .execute_sql(sql) + .map_err(|e| PyErr::new::(e.to_string()))?; + polars_to_py(py, &df) + } - // Create data map with the DataFrame as global data - let mut data_map: HashMap = HashMap::new(); - data_map.insert(GLOBAL_DATA_KEY.to_string(), df); + /// Check if this reader supports DataFrame registration. + /// + /// Returns + /// ------- + /// bool + /// True if register() is supported, False otherwise. + fn supports_register(&self) -> bool { + self.inner.supports_register() + } + + /// Execute a ggsql query and return the visualization specification. + /// + /// This is the main entry point for creating visualizations. It parses + /// the query, executes the SQL portion, and returns a PySpec ready + /// for rendering. + /// + /// Parameters + /// ---------- + /// query : str + /// The ggsql query (SQL + VISUALISE clause). + /// + /// Returns + /// ------- + /// Spec + /// The resolved visualization specification ready for rendering. + /// + /// Raises + /// ------ + /// ValueError + /// If the query syntax is invalid, has no VISUALISE clause, or SQL execution fails. + /// + /// Examples + /// -------- + /// >>> reader = DuckDBReader("duckdb://memory") + /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + /// >>> writer = VegaLiteWriter() + /// >>> json_output = writer.render(spec) + fn execute(&self, query: &str) -> PyResult { + self.inner + .execute(query) + .map(|s| PySpec { inner: s }) + .map_err(|e| PyErr::new::(e.to_string())) + } +} - // Write using the specified writer - match writer { - "vegalite" => { - let w = VegaLiteWriter::new(); - w.write(&spec, &data_map) - .map_err(|e| PyErr::new::(e.to_string())) +// ============================================================================ +// PyVegaLiteWriter +// ============================================================================ + +/// Vega-Lite JSON output writer. +/// +/// Converts visualization specifications to Vega-Lite v6 JSON. +/// +/// Examples +/// -------- +/// >>> writer = VegaLiteWriter() +/// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") +/// >>> json_output = writer.render(spec) +#[pyclass(name = "VegaLiteWriter")] +struct PyVegaLiteWriter { + inner: RustVegaLiteWriter, +} + +#[pymethods] +impl PyVegaLiteWriter { + /// Create a new Vega-Lite writer. + /// + /// Returns + /// ------- + /// VegaLiteWriter + /// A configured Vega-Lite writer instance. + #[new] + fn new() -> Self { + Self { + inner: RustVegaLiteWriter::new(), } - _ => Err(PyErr::new::(format!( - "Unknown writer: {}", - writer - ))), } + + /// Render a Spec to Vega-Lite JSON output + /// + /// Parameters + /// ---------- + /// spec : Spec + /// The visualization specification from reader.execute(). + /// + /// Returns + /// ------- + /// str + /// The output (i.e., Vega-Lite JSON string). + /// + /// Raises + /// ------ + /// ValueError + /// If rendering fails. + /// + /// Examples + /// -------- + /// >>> reader = DuckDBReader("duckdb://memory") + /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + /// >>> writer = VegaLiteWriter() + /// >>> json_output = writer.render(spec) + fn render(&self, spec: &PySpec) -> PyResult { + self.inner + .render(&spec.inner) + .map_err(|e| PyErr::new::(e.to_string())) + } +} + +// ============================================================================ +// PyValidated +// ============================================================================ + +/// Result of validate() - query inspection and validation without SQL execution. +/// +/// Contains information about query structure and any validation errors/warnings. +/// The tree() method from Rust is not exposed as it's not useful in Python. +#[pyclass(name = "Validated")] +struct PyValidated { + sql: String, + visual: String, + has_visual: bool, + valid: bool, + errors: Vec<(String, Option<(usize, usize)>)>, + warnings: Vec<(String, Option<(usize, usize)>)>, +} + +#[pymethods] +impl PyValidated { + /// Whether the query contains a VISUALISE clause. + /// + /// Returns + /// ------- + /// bool + /// True if the query has a VISUALISE clause. + fn has_visual(&self) -> bool { + self.has_visual + } + + /// The SQL portion (before VISUALISE). + /// + /// Returns + /// ------- + /// str + /// The SQL part of the query. + fn sql(&self) -> &str { + &self.sql + } + + /// The VISUALISE portion (raw text). + /// + /// Returns + /// ------- + /// str + /// The VISUALISE part of the query. + fn visual(&self) -> &str { + &self.visual + } + + /// Whether the query is valid (no errors). + /// + /// Returns + /// ------- + /// bool + /// True if the query is syntactically and semantically valid. + fn valid(&self) -> bool { + self.valid + } + + /// Validation errors (fatal issues). + /// + /// Returns + /// ------- + /// list[dict] + /// List of error dictionaries with 'message' and optional 'location' keys. + fn errors(&self, py: Python<'_>) -> PyResult> { + errors_to_pylist(py, &self.errors) + } + + /// Validation warnings (non-fatal issues). + /// + /// Returns + /// ------- + /// list[dict] + /// List of warning dictionaries with 'message' and optional 'location' keys. + fn warnings(&self, py: Python<'_>) -> PyResult> { + errors_to_pylist(py, &self.warnings) + } +} + +// ============================================================================ +// PySpec +// ============================================================================ + +/// Result of reader.execute(), ready for rendering. +/// +/// Contains the resolved plot specification, data, and metadata. +/// Use writer.render(spec) to generate output. +/// +/// Examples +/// -------- +/// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") +/// >>> print(f"Rows: {spec.metadata()['rows']}") +/// >>> writer = VegaLiteWriter() +/// >>> json_output = writer.render(spec) +#[pyclass(name = "Spec")] +struct PySpec { + inner: Spec, } +#[pymethods] +impl PySpec { + /// Get visualization metadata. + /// + /// Returns + /// ------- + /// dict + /// Dictionary with 'rows', 'columns', and 'layer_count' keys. + fn metadata(&self, py: Python<'_>) -> PyResult> { + let m = self.inner.metadata(); + let dict = PyDict::new(py); + dict.set_item("rows", m.rows)?; + dict.set_item("columns", m.columns.clone())?; + dict.set_item("layer_count", m.layer_count)?; + Ok(dict.into()) + } + + /// The main SQL query that was executed. + /// + /// Returns + /// ------- + /// str + /// The SQL query string. + fn sql(&self) -> &str { + self.inner.sql() + } + + /// The VISUALISE portion (raw text). + /// + /// Returns + /// ------- + /// str + /// The VISUALISE clause text. + fn visual(&self) -> &str { + self.inner.visual() + } + + /// Number of layers. + /// + /// Returns + /// ------- + /// int + /// The number of DRAW clauses in the visualization. + fn layer_count(&self) -> usize { + self.inner.layer_count() + } + + /// Get global data (main query result). + /// + /// Returns + /// ------- + /// polars.DataFrame | None + /// The main query result DataFrame, or None if not available. + fn data(&self, py: Python<'_>) -> PyResult>> { + self.inner.data().map(|df| polars_to_py(py, df)).transpose() + } + + /// Get layer-specific data (from FILTER or FROM clause). + /// + /// Parameters + /// ---------- + /// index : int + /// The layer index (0-based). + /// + /// Returns + /// ------- + /// polars.DataFrame | None + /// The layer-specific DataFrame, or None if the layer uses global data. + fn layer_data(&self, py: Python<'_>, index: usize) -> PyResult>> { + self.inner + .layer_data(index) + .map(|df| polars_to_py(py, df)) + .transpose() + } + + /// Get stat transform data (e.g., histogram bins, density estimates). + /// + /// Parameters + /// ---------- + /// index : int + /// The layer index (0-based). + /// + /// Returns + /// ------- + /// polars.DataFrame | None + /// The stat transform DataFrame, or None if no stat transform. + fn stat_data(&self, py: Python<'_>, index: usize) -> PyResult>> { + self.inner + .stat_data(index) + .map(|df| polars_to_py(py, df)) + .transpose() + } + + /// Layer filter/source query, or None if using global data. + /// + /// Parameters + /// ---------- + /// index : int + /// The layer index (0-based). + /// + /// Returns + /// ------- + /// str | None + /// The filter SQL query, or None if the layer uses global data directly. + fn layer_sql(&self, index: usize) -> Option { + self.inner.layer_sql(index).map(|s| s.to_string()) + } + + /// Stat transform query, or None if no stat transform. + /// + /// Parameters + /// ---------- + /// index : int + /// The layer index (0-based). + /// + /// Returns + /// ------- + /// str | None + /// The stat transform SQL query, or None if no stat transform. + fn stat_sql(&self, index: usize) -> Option { + self.inner.stat_sql(index).map(|s| s.to_string()) + } + + /// Validation warnings from preparation. + /// + /// Returns + /// ------- + /// list[dict] + /// List of warning dictionaries with 'message' and optional 'location' keys. + fn warnings(&self, py: Python<'_>) -> PyResult> { + warnings_to_pylist(py, self.inner.warnings()) + } +} + +// ============================================================================ +// Module Functions +// ============================================================================ + +/// Validate query syntax and semantics without executing SQL. +/// +/// Parameters +/// ---------- +/// query : str +/// The ggsql query to validate. +/// +/// Returns +/// ------- +/// Validated +/// Validation result with query inspection methods. +/// +/// Raises +/// ------ +/// ValueError +/// If validation fails unexpectedly (not for syntax errors, which are captured). +#[pyfunction] +fn validate(query: &str) -> PyResult { + let v = rust_validate(query) + .map_err(|e| PyErr::new::(e.to_string()))?; + + Ok(PyValidated { + sql: v.sql().to_string(), + visual: v.visual().to_string(), + has_visual: v.has_visual(), + valid: v.valid(), + errors: v + .errors() + .iter() + .map(|e| { + ( + e.message.clone(), + e.location.as_ref().map(|l| (l.line, l.column)), + ) + }) + .collect(), + warnings: v + .warnings() + .iter() + .map(|w| { + ( + w.message.clone(), + w.location.as_ref().map(|l| (l.line, l.column)), + ) + }) + .collect(), + }) +} + +/// Execute a ggsql query using a custom Python reader. +/// +/// This is a convenience function for custom readers. For native readers, +/// prefer using `reader.execute()` directly. +/// +/// Parameters +/// ---------- +/// query : str +/// The ggsql query to execute. +/// reader : Reader | object +/// The database reader to execute SQL against. Can be a native Reader +/// for optimal performance, or any Python object with an +/// `execute_sql(sql: str) -> polars.DataFrame` method. +/// +/// Returns +/// ------- +/// Spec +/// The resolved visualization specification ready for rendering. +/// +/// Raises +/// ------ +/// ValueError +/// If parsing, validation, or SQL execution fails. +/// +/// Examples +/// -------- +/// >>> # Using native reader (prefer reader.execute() instead) +/// >>> reader = DuckDBReader("duckdb://memory") +/// >>> spec = execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) +/// >>> writer = VegaLiteWriter() +/// >>> json_output = writer.render(spec) +/// +/// >>> # Using custom Python reader +/// >>> class MyReader: +/// ... def execute_sql(self, sql: str) -> pl.DataFrame: +/// ... return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) +/// >>> reader = MyReader() +/// >>> spec = execute("SELECT * FROM data VISUALISE x, y DRAW point", reader) +#[pyfunction] +fn execute(query: &str, reader: &Bound<'_, PyAny>) -> PyResult { + // Fast path: try all known native reader types + // Add new native readers to this list as they're implemented + try_native_readers!(query, reader, PyDuckDBReader); + + // Bridge path: wrap Python object as Reader + let bridge = PyReaderBridge { + obj: reader.clone().unbind(), + }; + bridge + .execute(query) + .map(|s| PySpec { inner: s }) + .map_err(|e| PyErr::new::(e.to_string())) +} + +// ============================================================================ +// Module Registration +// ============================================================================ + #[pymodule] fn _ggsql(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_function(wrap_pyfunction!(split_query, m)?)?; - m.add_function(wrap_pyfunction!(render, m)?)?; + // Classes + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + // Functions + m.add_function(wrap_pyfunction!(validate, m)?)?; + m.add_function(wrap_pyfunction!(execute, m)?)?; + Ok(()) } diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index 9df03779..f5c666fc 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -3,31 +3,198 @@ These tests focus on Python-specific logic: - DataFrame conversion via narwhals - Return type handling +- Two-stage API (reader.execute() -> render) Rust logic (parsing, Vega-Lite generation) is tested in the Rust test suite. """ +import json + import pytest import polars as pl import altair import ggsql +# Optional dependency for ibis test +try: + import ibis + + HAS_IBIS = hasattr(ibis, "duckdb") +except ImportError: + HAS_IBIS = False + + +class TestValidate: + """Tests for validate() function.""" + + def test_valid_query_with_visualise(self): + validated = ggsql.validate( + "SELECT 1 AS x, 2 AS y VISUALISE DRAW point MAPPING x AS x, y AS y" + ) + assert validated.has_visual() + assert validated.valid() + assert "SELECT" in validated.sql() + assert "VISUALISE" in validated.visual() + assert len(validated.errors()) == 0 + + def test_valid_query_without_visualise(self): + validated = ggsql.validate("SELECT 1 AS x, 2 AS y") + assert not validated.has_visual() + assert validated.valid() + assert validated.sql() == "SELECT 1 AS x, 2 AS y" + assert validated.visual() == "" + + def test_invalid_query_has_errors(self): + validated = ggsql.validate("SELECT 1 VISUALISE DRAW invalid_geom") + assert not validated.valid() + assert len(validated.errors()) > 0 + + def test_missing_required_aesthetic(self): + # Point requires x and y, only providing x + validated = ggsql.validate( + "SELECT 1 AS x, 2 AS y VISUALISE DRAW point MAPPING x AS x" + ) + assert not validated.valid() + errors = validated.errors() + assert len(errors) > 0 + assert any("y" in e["message"] for e in errors) + + +class TestDuckDBReader: + """Tests for DuckDBReader class.""" + + def test_create_in_memory(self): + reader = ggsql.DuckDBReader("duckdb://memory") + assert reader is not None + + def test_execute_simple_query(self): + reader = ggsql.DuckDBReader("duckdb://memory") + df = reader.execute_sql("SELECT 1 AS x, 2 AS y") + assert isinstance(df, pl.DataFrame) + assert df.shape == (1, 2) + assert list(df.columns) == ["x", "y"] + + def test_register_and_query(self): + reader = ggsql.DuckDBReader("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + reader.register("my_data", df) + + result = reader.execute_sql("SELECT * FROM my_data WHERE x > 1") + assert isinstance(result, pl.DataFrame) + assert result.shape == (2, 2) + + def test_supports_register(self): + reader = ggsql.DuckDBReader("duckdb://memory") + assert reader.supports_register() is True + + def test_invalid_connection_string(self): + with pytest.raises(ValueError): + ggsql.DuckDBReader("invalid://connection") + -class TestSplitQuery: - """Tests for split_query() function.""" +class TestVegaLiteWriter: + """Tests for VegaLiteWriter class.""" - def test_splits_sql_and_visualise(self): - sql, viz = ggsql.split_query( - "SELECT x, y FROM data VISUALISE x, y DRAW point" + def test_create_writer(self): + writer = ggsql.VegaLiteWriter() + assert writer is not None + + +class TestExecute: + """Tests for reader.execute() method.""" + + def test_execute_simple_query(self): + reader = ggsql.DuckDBReader("duckdb://memory") + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + assert spec is not None + assert spec.layer_count() == 1 + + def test_execute_with_registered_data(self): + reader = ggsql.DuckDBReader("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + reader.register("data", df) + + spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point") + assert spec.metadata()["rows"] == 3 + + def test_execute_metadata(self): + reader = ggsql.DuckDBReader("duckdb://memory") + spec = reader.execute( + "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) " + "VISUALISE x, y DRAW point", + ) + + metadata = spec.metadata() + assert metadata["rows"] == 3 + assert "x" in metadata["columns"] + assert "y" in metadata["columns"] + assert metadata["layer_count"] == 1 + + def test_execute_sql_accessor(self): + reader = ggsql.DuckDBReader("duckdb://memory") + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + assert "SELECT" in spec.sql() + + def test_execute_visual_accessor(self): + reader = ggsql.DuckDBReader("duckdb://memory") + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + assert "VISUALISE" in spec.visual() + + def test_execute_data_accessor(self): + reader = ggsql.DuckDBReader("duckdb://memory") + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + data = spec.data() + assert isinstance(data, pl.DataFrame) + assert data.shape == (1, 2) + + def test_execute_without_visualise_fails(self): + reader = ggsql.DuckDBReader("duckdb://memory") + with pytest.raises(ValueError): + reader.execute("SELECT 1 AS x, 2 AS y") + + +class TestWriterRender: + """Tests for VegaLiteWriter.render() method.""" + + def test_render_to_vegalite(self): + reader = ggsql.DuckDBReader("duckdb://memory") + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + writer = ggsql.VegaLiteWriter() + + result = writer.render(spec) + assert isinstance(result, str) + + spec_dict = json.loads(result) + assert "$schema" in spec_dict + assert "vega-lite" in spec_dict["$schema"] + + def test_render_contains_data(self): + reader = ggsql.DuckDBReader("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + reader.register("data", df) + + spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point") + writer = ggsql.VegaLiteWriter() + + result = writer.render(spec) + spec_dict = json.loads(result) + # Data should be in the spec (either inline or in datasets) + assert "data" in spec_dict or "datasets" in spec_dict + + def test_render_multi_layer(self): + reader = ggsql.DuckDBReader("duckdb://memory") + spec = reader.execute( + "SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) " + "VISUALISE " + "DRAW point MAPPING x AS x, y AS y " + "DRAW line MAPPING x AS x, y AS y", ) - assert "SELECT" in sql - assert "VISUALISE" in viz + writer = ggsql.VegaLiteWriter() - def test_no_visualise_returns_empty_viz(self): - sql, viz = ggsql.split_query("SELECT * FROM data") - assert sql == "SELECT * FROM data" - assert viz == "" + result = writer.render(spec) + spec_dict = json.loads(result) + assert "layer" in spec_dict class TestRenderAltairDataFrameConversion: @@ -112,23 +279,31 @@ def test_layered_chart_can_round_trip(self): def test_faceted_chart_returns_facet_chart(self): """FACET WRAP specs produce FacetChart.""" - df = pl.DataFrame({ - "x": [1, 2, 3, 4, 5, 6], - "y": [10, 20, 30, 40, 50, 60], - "group": ["A", "A", "A", "B", "B", "B"], - }) + df = pl.DataFrame( + { + "x": [1, 2, 3, 4, 5, 6], + "y": [10, 20, 30, 40, 50, 60], + "group": ["A", "A", "A", "B", "B", "B"], + } + ) # Need validate=False because ggsql produces v6 specs - chart = ggsql.render_altair(df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False) + chart = ggsql.render_altair( + df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False + ) assert isinstance(chart, altair.FacetChart) def test_faceted_chart_can_round_trip(self): """FacetChart can be converted to dict and back.""" - df = pl.DataFrame({ - "x": [1, 2, 3, 4, 5, 6], - "y": [10, 20, 30, 40, 50, 60], - "group": ["A", "A", "A", "B", "B", "B"], - }) - chart = ggsql.render_altair(df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False) + df = pl.DataFrame( + { + "x": [1, 2, 3, 4, 5, 6], + "y": [10, 20, 30, 40, 50, 60], + "group": ["A", "A", "A", "B", "B", "B"], + } + ) + chart = ggsql.render_altair( + df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False + ) # Convert to dict (skip validation for ggsql specs) spec = chart.to_dict(validate=False) @@ -140,11 +315,13 @@ def test_faceted_chart_can_round_trip(self): def test_chart_with_color_encoding(self): """Charts with color encoding still return correct type.""" - df = pl.DataFrame({ - "x": [1, 2, 3, 4], - "y": [10, 20, 30, 40], - "category": ["A", "B", "A", "B"], - }) + df = pl.DataFrame( + { + "x": [1, 2, 3, 4], + "y": [10, 20, 30, 40], + "category": ["A", "B", "A", "B"], + } + ) chart = ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point") # Should still be a LayerChart (ggsql wraps in layer) assert isinstance(chart, altair.LayerChart) @@ -157,3 +334,207 @@ def test_invalid_viz_raises(self): df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) with pytest.raises(ValueError): ggsql.render_altair(df, "NOT VALID SYNTAX") + + +class TestTwoStageAPIIntegration: + """Integration tests for the two-stage reader.execute() -> render API.""" + + def test_end_to_end_workflow(self): + """Complete workflow: create reader, register data, execute, render.""" + # Create reader + reader = ggsql.DuckDBReader("duckdb://memory") + + # Register data + df = pl.DataFrame( + { + "date": ["2024-01-01", "2024-01-02", "2024-01-03"], + "value": [10, 20, 30], + "region": ["North", "South", "North"], + } + ) + reader.register("sales", df) + + # Execute visualization + spec = reader.execute( + "SELECT * FROM sales VISUALISE date AS x, value AS y, region AS color DRAW line", + ) + + # Verify metadata + assert spec.metadata()["rows"] == 3 + assert spec.layer_count() == 1 + + # Render to Vega-Lite + writer = ggsql.VegaLiteWriter() + result = writer.render(spec) + + # Verify output + spec_dict = json.loads(result) + assert "$schema" in spec_dict + assert "line" in json.dumps(spec_dict) + + def test_can_introspect_spec(self): + """Test all introspection methods on Spec.""" + reader = ggsql.DuckDBReader("duckdb://memory") + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + + # All these should work without error + assert spec.sql() is not None + assert spec.visual() is not None + assert spec.layer_count() >= 1 + assert spec.metadata() is not None + assert spec.data() is not None + assert spec.warnings() is not None + + # Layer-specific accessors (may return None) + _ = spec.layer_data(0) + _ = spec.stat_data(0) + _ = spec.layer_sql(0) + _ = spec.stat_sql(0) + + +class TestCustomReader: + """Tests for custom Python reader support.""" + + def test_simple_custom_reader(self): + """Custom reader with execute_sql() method works.""" + + class SimpleReader: + def execute_sql(self, sql: str) -> pl.DataFrame: + return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + + reader = SimpleReader() + spec = ggsql.execute("SELECT * FROM data VISUALISE x, y DRAW point", reader) + assert spec.metadata()["rows"] == 3 + + def test_custom_reader_with_register(self): + """Custom reader with register() support.""" + + class RegisterReader: + def __init__(self): + self.tables = {} + + def execute_sql(self, sql: str) -> pl.DataFrame: + # Simple: just return the first registered table + if self.tables: + return next(iter(self.tables.values())) + return pl.DataFrame({"x": [1], "y": [2]}) + + def supports_register(self) -> bool: + return True + + def register(self, name: str, df: pl.DataFrame) -> None: + self.tables[name] = df + + reader = RegisterReader() + spec = ggsql.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) + assert spec is not None + + def test_custom_reader_error_handling(self): + """Custom reader errors are propagated.""" + + class ErrorReader: + def execute_sql(self, sql: str) -> pl.DataFrame: + raise ValueError("Custom reader error") + + reader = ErrorReader() + with pytest.raises(ValueError, match="Custom reader error"): + ggsql.execute("SELECT 1 VISUALISE x, y DRAW point", reader) + + def test_custom_reader_wrong_return_type(self): + """Custom reader returning wrong type raises TypeError.""" + + class WrongTypeReader: + def execute_sql(self, sql: str): + return {"x": [1, 2, 3]} # dict, not DataFrame + + reader = WrongTypeReader() + with pytest.raises((ValueError, TypeError)): + ggsql.execute("SELECT 1 VISUALISE x, y DRAW point", reader) + + def test_native_reader_fast_path(self): + """Native DuckDBReader still works (fast path).""" + reader = ggsql.DuckDBReader("duckdb://memory") + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + assert spec.metadata()["rows"] == 1 + + def test_custom_reader_can_render(self): + """Custom reader result can be rendered to Vega-Lite.""" + + class StaticReader: + def execute_sql(self, sql: str) -> pl.DataFrame: + return pl.DataFrame( + { + "x": [1, 2, 3, 4, 5], + "y": [10, 40, 20, 50, 30], + "category": ["A", "B", "A", "B", "A"], + } + ) + + reader = StaticReader() + spec = ggsql.execute( + "SELECT * FROM data VISUALISE x, y, category AS color DRAW point", + reader, + ) + + writer = ggsql.VegaLiteWriter() + result = writer.render(spec) + + spec_dict = json.loads(result) + assert "$schema" in spec_dict + assert "vega-lite" in spec_dict["$schema"] + + def test_custom_reader_execute_sql_called(self): + """Verify execute_sql() is called on the custom reader.""" + + class RecordingReader: + def __init__(self): + self.execute_calls = [] + + def execute_sql(self, sql: str) -> pl.DataFrame: + self.execute_calls.append(sql) + return pl.DataFrame({"x": [1], "y": [2]}) + + reader = RecordingReader() + ggsql.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", + reader, + ) + + # execute_sql() should have been called at least once + assert len(reader.execute_calls) > 0 + # All calls should be valid SQL strings + assert all(isinstance(sql, str) for sql in reader.execute_calls) + + @pytest.mark.skipif(not HAS_IBIS, reason="ibis not installed") + def test_custom_reader_ibis(self): + """Test custom reader using ibis as backend.""" + + class IbisReader: + def __init__(self): + self.con = ibis.duckdb.connect() + + def execute_sql(self, sql: str) -> pl.DataFrame: + return self.con.con.execute(sql).pl() + + def supports_register(self) -> bool: + return True + + def register(self, name: str, df: pl.DataFrame) -> None: + self.con.create_table(name, df.to_arrow(), overwrite=True) + + def unregister(self, name: str) -> None: + self.con.drop_table(name) + + reader = IbisReader() + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + reader.register("mydata", df) + + spec = ggsql.execute( + "SELECT * FROM mydata VISUALISE x, y DRAW point", + reader, + ) + + assert spec.metadata()["rows"] == 3 + writer = ggsql.VegaLiteWriter() + json_output = writer.render(spec) + assert "point" in json_output diff --git a/src/Cargo.toml b/src/Cargo.toml index 75cbd1f6..dd60aac1 100644 --- a/src/Cargo.toml +++ b/src/Cargo.toml @@ -32,6 +32,7 @@ polars.workspace = true # Readers duckdb = { workspace = true, optional = true } +arrow = { workspace = true, optional = true } postgres = { workspace = true, optional = true } sqlx = { workspace = true, optional = true } rusqlite = { workspace = true, optional = true } @@ -69,7 +70,7 @@ proptest.workspace = true [features] default = ["duckdb", "sqlite", "vegalite"] -duckdb = ["dep:duckdb"] +duckdb = ["dep:duckdb", "dep:arrow"] postgres = ["dep:postgres"] sqlite = ["dep:rusqlite"] vegalite = [] diff --git a/src/cli.rs b/src/cli.rs index 80dec31e..bb6d4df9 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -9,10 +9,10 @@ use ggsql::parser::extract_sql; use ggsql::{parser, VERSION}; use std::path::PathBuf; -#[cfg(feature = "duckdb")] -use ggsql::execute::prepare_data; #[cfg(feature = "duckdb")] use ggsql::reader::{DuckDBReader, Reader}; +#[cfg(feature = "duckdb")] +use ggsql::validate; #[cfg(feature = "vegalite")] use ggsql::writer::{VegaLiteWriter, Writer}; @@ -169,15 +169,16 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option v, + Err(e) => { + eprintln!("Failed to validate query: {}", e); + std::process::exit(1); + } + }; - if viz_part.is_empty() { + if !validated.has_visual() { if verbose { eprintln!("Visualisation is empty. Printing table instead."); } @@ -185,28 +186,27 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option s, + Err(e) => { + eprintln!("Failed to execute query: {}", e); + std::process::exit(1); + } + }; if verbose { - eprintln!("\nData sources loaded:"); - for (key, df) in &prepared.data { - eprintln!(" {}: {:?}", key, df.shape()); - } - eprintln!("\nParsed {} visualisation spec(s)", prepared.specs.len()); + let metadata = spec.metadata(); + eprintln!("\nQuery executed:"); + eprintln!(" Rows: {}", metadata.rows); + eprintln!(" Columns: {}", metadata.columns.join(", ")); + eprintln!(" Layers: {}", metadata.layer_count); } - let first_spec = prepared.specs.first(); - if first_spec.is_none() { + if spec.plot().layers.is_empty() { eprintln!("No visualization specifications found"); std::process::exit(1); } - let first_spec = first_spec.unwrap(); // Check writer if writer != "vegalite" { @@ -220,14 +220,15 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option r, + Err(e) => { + eprintln!("Failed to generate Vega-Lite output: {}", e); + std::process::exit(1); + } + }; if output.is_none() { // Empty output location, write to stdout @@ -237,7 +238,7 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option { if verbose { eprintln!("\nVega-Lite JSON written to: {}", output.display()); @@ -291,13 +292,38 @@ fn cmd_parse(query: String, format: String) { } } -fn cmd_validate(query: String, reader: Option) { - println!("Validating query: {}", query); - if let Some(reader) = reader { - println!("Reader: {}", reader); +fn cmd_validate(query: String, _reader: Option) { + #[cfg(feature = "duckdb")] + { + match validate(&query) { + Ok(validated) if validated.valid() => { + println!("✓ Query syntax is valid"); + } + Ok(validated) => { + println!("✗ Validation errors:"); + for err in validated.errors() { + println!(" - {}", err.message); + } + if !validated.warnings().is_empty() { + println!("\nWarnings:"); + for warning in validated.warnings() { + println!(" - {}", warning.message); + } + } + std::process::exit(1); + } + Err(e) => { + eprintln!("Error during validation: {}", e); + std::process::exit(1); + } + } + } + + #[cfg(not(feature = "duckdb"))] + { + eprintln!("Validation requires the duckdb feature"); + std::process::exit(1); } - // TODO: Implement validation logic - println!("Validation not yet implemented"); } // Prints a CSV-like output to stdout with aligned columns @@ -309,7 +335,7 @@ fn print_table_fallback(query: &str, reader: &DuckDBReader, max_rows: usize) { } let parsed = parsed.unwrap(); - let data = reader.execute(&parsed); + let data = reader.execute_sql(&parsed); if let Err(e) = data { eprintln!("Failed to execute SQL query: {}", e); std::process::exit(1) diff --git a/src/doc/API.md b/src/doc/API.md new file mode 100644 index 00000000..1327960e --- /dev/null +++ b/src/doc/API.md @@ -0,0 +1,525 @@ +# ggsql API Reference + +This document provides a comprehensive reference for the ggsql public API. + +## Overview + +- **Stage 1: `reader.execute()`** - Parse query, execute SQL, resolve mappings, create Spec +- **Stage 2: `writer.render()`** - Generate output (Vega-Lite JSON, etc.) + +### API Functions + +| Function | Use Case | +| ------------------ | ---------------------------------------------------- | +| `reader.execute()` | Main entry point - full visualization pipeline | +| `writer.render()` | Generate output from Spec | +| `validate()` | Validate syntax + semantics, inspect query structure | + +--- + +## Core Functions + +### `Reader::execute` + +```rust +fn execute(&self, query: &str) -> Result +``` + +Execute a ggsql query for visualization. This is the main entry point - a default method on the Reader trait. + +**What happens during execution:** + +1. Parses the query (SQL + VISUALISE portions) +2. Executes the main SQL query using the reader +3. Resolves wildcards (`VISUALISE *`) against actual columns +4. Merges global mappings into each layer +5. Executes layer-specific queries (filters, stats) +6. Injects constant values as synthetic columns +7. Computes aesthetic labels from column names + +**Arguments:** + +- `query` - The full ggsql query string + +**Returns:** + +- `Ok(Spec)` - Ready for rendering +- `Err(GgsqlError)` - Parse, validation, or execution error + +**Example:** + +```rust +use ggsql::reader::{DuckDBReader, Reader}; +use ggsql::writer::{VegaLiteWriter, Writer}; + +let reader = DuckDBReader::from_connection_string("duckdb://memory")?; +let spec = reader.execute( + "SELECT x, y FROM data VISUALISE x, y DRAW point" +)?; + +// Access metadata +println!("Rows: {}", spec.metadata().rows); +println!("Columns: {:?}", spec.metadata().columns); + +// Render to Vega-Lite +let writer = VegaLiteWriter::new(); +let result = writer.render(&spec)?; +``` + +**Error Conditions:** + +- Parse error in SQL or VISUALISE portion +- SQL execution failure +- Missing required aesthetics +- Invalid geom type +- Multiple VISUALISE statements (not yet supported) + +--- + +### `validate` + +```rust +pub fn validate(query: &str) -> Result +``` + +Validate query syntax and semantics without executing SQL. This function combines query parsing and validation into a single operation. + +**What is validated:** + +- Syntax (parsing) +- Required aesthetics for each geom type +- Valid scale types (linear, log10, date, etc.) +- Valid coord types and properties +- Valid geom types +- Valid aesthetic names +- Valid SETTING parameters + +**Arguments:** + +- `query` - The full ggsql query string (SQL + VISUALISE) + +**Returns:** + +- `Ok(Validated)` - Validation results with query inspection methods +- `Err(GgsqlError)` - Internal error + +**Example:** + +```rust +use ggsql::validate; + +let validated = validate("SELECT x, y FROM data VISUALISE x, y DRAW point")?; + +// Check validity +if !validated.valid() { + for error in validated.errors() { + eprintln!("Error: {}", error.message); + } +} + +// Inspect query structure +if validated.has_visual() { + println!("SQL: {}", validated.sql()); + println!("Visual: {}", validated.visual()); +} +``` + +**Notes:** + +- Does not execute SQL +- Does not resolve wildcards or global mappings +- Cannot validate column existence (requires data) +- Returns all errors, not just the first one +- CST available via `tree()` for advanced inspection + +--- + +## Type Reference + +### `Validated` + +Result of validating a query (syntax + semantics, no SQL execution). + +```rust +pub struct Validated { + // All fields private +} +``` + +**Methods:** + +| Method | Signature | Description | +| ------------ | -------------------------------------------- | ---------------------------------- | +| `has_visual` | `fn has_visual(&self) -> bool` | Whether query contains VISUALISE | +| `sql` | `fn sql(&self) -> &str` | The SQL portion (before VISUALISE) | +| `visual` | `fn visual(&self) -> &str` | The VISUALISE portion (raw text) | +| `tree` | `fn tree(&self) -> Option<&Tree>` | CST for advanced inspection | +| `valid` | `fn valid(&self) -> bool` | Whether query is valid | +| `errors` | `fn errors(&self) -> &[ValidationError]` | Validation errors | +| `warnings` | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings | + +**Example:** + +```rust +let validated = ggsql::validate("SELECT 1 as x VISUALISE DRAW point MAPPING x AS x, y AS y")?; + +// Check validity +if !validated.valid() { + for error in validated.errors() { + eprintln!("Error: {}", error.message); + } +} + +// Inspect query structure +assert!(validated.has_visual()); +assert_eq!(validated.sql(), "SELECT 1 as x"); +assert!(validated.visual().starts_with("VISUALISE")); + +// CST access for advanced use cases +if let Some(tree) = validated.tree() { + println!("Root node: {}", tree.root_node().kind()); +} +``` + +--- + +### `Spec` + +Result of executing a ggsql query, ready for rendering. + +#### Rendering + +Use `writer.render(&spec)` to generate output. + +**Example:** + +```rust +let writer = VegaLiteWriter::new(); +let json = writer.render(&spec)?; +println!("{}", json); +``` + +#### Plot Access Methods + +| Method | Signature | Description | +| ------------- | -------------------------------- | ------------------------------- | +| `plot` | `fn plot(&self) -> &Plot` | Get resolved plot specification | +| `layer_count` | `fn layer_count(&self) -> usize` | Number of layers | + +**Example:** + +```rust +println!("Layers: {}", spec.layer_count()); + +let plot = spec.plot(); +for (i, layer) in plot.layers.iter().enumerate() { + println!("Layer {}: {:?}", i, layer.geom); +} +``` + +#### Metadata Methods + +| Method | Signature | Description | +| ---------- | --------------------------------- | -------------------------- | +| `metadata` | `fn metadata(&self) -> &Metadata` | Get visualization metadata | + +**Example:** + +```rust +let meta = spec.metadata(); +println!("Rows: {}", meta.rows); +println!("Columns: {:?}", meta.columns); +println!("Layer count: {}", meta.layer_count); +``` + +#### Data Access Methods + +| Method | Signature | Description | +| ------------ | ------------------------------------------------------ | ------------------------------- | +| `data` | `fn data(&self) -> Option<&DataFrame>` | Global data (main query result) | +| `layer_data` | `fn layer_data(&self, i: usize) -> Option<&DataFrame>` | Layer-specific data | +| `stat_data` | `fn stat_data(&self, i: usize) -> Option<&DataFrame>` | Stat transform results | +| `data_map` | `fn data_map(&self) -> &HashMap` | Raw data map access | + +**Example:** + +```rust +// Global data +if let Some(df) = spec.data() { + println!("Global data: {} rows", df.height()); +} + +// Layer-specific data (from FILTER or FROM clause) +if let Some(df) = spec.layer_data(0) { + println!("Layer 0 has filtered data: {} rows", df.height()); +} + +// Stat data (histogram bins, density estimates, etc.) +if let Some(df) = spec.stat_data(1) { + println!("Layer 1 stat data: {} rows", df.height()); +} +``` + +#### Query Introspection Methods + +| Method | Signature | Description | +| ----------- | ----------------------------------------------- | -------------------------------- | +| `sql` | `fn sql(&self) -> &str` | Main SQL query that was executed | +| `visual` | `fn visual(&self) -> &str` | Raw VISUALISE text | +| `layer_sql` | `fn layer_sql(&self, i: usize) -> Option<&str>` | Layer filter/source query | +| `stat_sql` | `fn stat_sql(&self, i: usize) -> Option<&str>` | Stat transform query | + +**Example:** + +```rust +// Main query +println!("SQL: {}", spec.sql()); +println!("Visual: {}", spec.visual()); + +// Per-layer queries +for i in 0..spec.layer_count() { + if let Some(sql) = spec.layer_sql(i) { + println!("Layer {} filter: {}", i, sql); + } + if let Some(sql) = spec.stat_sql(i) { + println!("Layer {} stat: {}", i, sql); + } +} +``` + +#### Warnings Method + +| Method | Signature | Description | +| ---------- | -------------------------------------------- | ---------------------------------- | +| `warnings` | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings from execution | + +**Example:** + +```rust +let spec = reader.execute(query)?; + +// Check for warnings +if !spec.warnings().is_empty() { + for warning in spec.warnings() { + eprintln!("Warning: {}", warning.message); + } +} + +// Continue with rendering +let writer = VegaLiteWriter::new(); +let json = writer.render(&spec)?; +``` + +--- + +### `Metadata` + +Information about the prepared visualization. + +```rust +pub struct Metadata { + pub rows: usize, // Rows in primary data source + pub columns: Vec, // Column names + pub layer_count: usize, // Number of layers in the plot +} +``` + +--- + +### `ValidationError` + +A validation error (fatal issue). + +```rust +pub struct ValidationError { + pub message: String, + pub location: Option, +} +``` + +--- + +### `ValidationWarning` + +A validation warning (non-fatal issue). + +```rust +pub struct ValidationWarning { + pub message: String, + pub location: Option, +} +``` + +--- + +### `Location` + +Location within a query string. + +```rust +pub struct Location { + pub line: usize, // 0-based line number + pub column: usize, // 0-based column number +} +``` + +--- + +## Reader Trait & Implementations + +### `Reader` Trait + +```rust +pub trait Reader { + /// Execute a SQL query and return a DataFrame + fn execute_sql(&self, sql: &str) -> Result; + + /// Register a DataFrame as a queryable table + fn register(&mut self, name: &str, df: DataFrame) -> Result<()>; + + /// Unregister a previously registered table + fn unregister(&mut self, name: &str) -> Result<()>; + + /// Check if this reader supports DataFrame registration + fn supports_register(&self) -> bool; +} +``` + +--- + +## Writer Trait & Implementations + +### `Writer` Trait + +```rust +pub trait Writer { + /// Render a plot specification to output format + fn write(&self, spec: &Plot, data: &HashMap) -> Result; + + /// Get the file extension for this writer's output + fn file_extension(&self) -> &str; +} +``` + +## Python Bindings + +The Python bindings provide the same two-stage API with Pythonic conventions. + +### Classes + +#### `DuckDBReader` + +```python +class DuckDBReader: + def __init__(self, connection: str) -> None: + """Create a DuckDB reader. + + Args: + connection: Connection string (e.g., "duckdb://memory") + """ + + def register(self, name: str, df: Any) -> None: + """Register a DataFrame as a queryable table. + + Args: + name: Table name + df: Polars DataFrame or narwhals-compatible DataFrame + """ + + def unregister(self, name: str) -> None: + """Unregister a previously registered table. + + Args: + name: Table name to unregister + """ + + def execute_sql(self, sql: str) -> polars.DataFrame: + """Execute SQL and return a Polars DataFrame.""" + + def supports_register(self) -> bool: + """Check if registration is supported.""" +``` + +#### `VegaLiteWriter` + +```python +class VegaLiteWriter: + def __init__(self) -> None: + """Create a Vega-Lite writer.""" +``` + +#### `Validated` + +```python +class Validated: + def has_visual(self) -> bool: + """Check if query has VISUALISE clause.""" + + def sql(self) -> str: + """Get the SQL portion.""" + + def visual(self) -> str: + """Get the VISUALISE portion.""" + + def valid(self) -> bool: + """Check if query is valid.""" + + def errors(self) -> list[dict]: + """Get validation errors as list of dicts with 'message', 'location'.""" + + def warnings(self) -> list[dict]: + """Get validation warnings as list of dicts with 'message', 'location'.""" + + # Note: tree() not exposed (tree-sitter nodes are Rust-only) +``` + +#### `Spec` + +```python +class Spec: + def metadata(self) -> dict: + """Get metadata as dict with keys: rows, columns, layer_count.""" + + def sql(self) -> str: + """Get the main SQL query.""" + + def visual(self) -> str: + """Get the VISUALISE text.""" + + def layer_count(self) -> int: + """Get number of layers.""" + + def warnings(self) -> list[dict]: + """Get validation warnings as list of dicts with 'message', 'location'.""" + + def data(self) -> polars.DataFrame | None: + """Get global data.""" + + def layer_data(self, index: int) -> polars.DataFrame | None: + """Get layer-specific data.""" + + def stat_data(self, index: int) -> polars.DataFrame | None: + """Get stat transform data.""" + + def layer_sql(self, index: int) -> str | None: + """Get layer filter query.""" + + def stat_sql(self, index: int) -> str | None: + """Get stat transform query.""" +``` + +### Functions + +```python +def validate(query: str) -> Validated: + """Validate query syntax and semantics. + + Returns Validated object with query inspection and validation methods. + """ + +def execute(query: str, reader: Any) -> Spec: + """Execute a ggsql query with a custom Python reader. + + For native readers, use reader.execute() method instead. + """ +``` diff --git a/src/execute.rs b/src/execute.rs index 33116ceb..3bf2be33 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -531,6 +531,23 @@ fn transform_global_sql(sql: &str, materialized_ctes: &HashSet) -> Optio } } +/// Result of building a layer query +/// +/// Contains information about the queries executed for a layer, +/// distinguishing between base filter queries and stat transform queries. +#[derive(Debug, Default)] +pub struct LayerQueryResult { + /// The final query to execute (if any) + /// None means layer uses global data directly + pub query: Option, + /// The base query before stat transform (filter/source only) + /// None if layer uses global data directly without filter + pub layer_sql: Option, + /// The stat transform query (if a stat transform was applied) + /// None if no stat transform was needed + pub stat_sql: Option, +} + /// Build a layer query handling all source types /// /// Handles: @@ -544,12 +561,12 @@ fn transform_global_sql(sql: &str, materialized_ctes: &HashSet) -> Optio /// (e.g., histogram binning, bar counting). /// /// Returns: -/// - `Ok(Some(query))` - execute this query and store result -/// - `Ok(None)` - layer uses `__global__` directly (no source, no filter, no constants, no stat transform) +/// - `Ok(LayerQueryResult)` with information about queries executed /// - `Err(...)` - validation error (e.g., filter without global data) /// /// Note: This function takes `&mut Layer` because stat transforms may add new aesthetic mappings /// (e.g., mapping y to `__ggsql_stat__count` for histogram or bar count). +#[allow(clippy::too_many_arguments)] fn build_layer_query( layer: &mut Layer, schema: &Schema, @@ -559,7 +576,7 @@ fn build_layer_query( facet: Option<&Facet>, constants: &[(String, LiteralValue)], execute_query: &F, -) -> Result> +) -> Result where F: Fn(&str) -> Result, { @@ -603,7 +620,7 @@ where naming::global_table() } else { // No source, no filter, no constants, no stat transform - use __global__ data directly - return Ok(None); + return Ok(LayerQueryResult::default()); } } }; @@ -635,6 +652,9 @@ where query = format!("{} WHERE {}", query, f); } + // Save the base query (with filter) before stat transform + let base_query = query.clone(); + // Apply statistical transformation (after filter, uses combined group_by) // Returns StatResult::Identity for no transformation, StatResult::Transformed for transformed query let stat_result = layer.geom.apply_stat_transform( @@ -692,11 +712,15 @@ where } // Use the transformed query - let mut final_query = transformed_query; + let mut final_query = transformed_query.clone(); if let Some(o) = order_by { final_query = format!("{} ORDER BY {}", final_query, o); } - Ok(Some(final_query)) + Ok(LayerQueryResult { + query: Some(final_query), + layer_sql: Some(base_query), + stat_sql: Some(transformed_query), + }) } StatResult::Identity => { // Identity - no stat transformation @@ -707,14 +731,18 @@ where && order_by.is_none() && constants.is_empty() { - Ok(None) + Ok(LayerQueryResult::default()) } else { // Layer has filter, order_by, or constants - still need the query let mut final_query = query; if let Some(o) = order_by { final_query = format!("{} ORDER BY {}", final_query, o); } - Ok(Some(final_query)) + Ok(LayerQueryResult { + query: Some(final_query.clone()), + layer_sql: Some(final_query), + stat_sql: None, + }) } } } @@ -860,8 +888,16 @@ fn split_color_aesthetic(layers: &mut Vec) { pub struct PreparedData { /// Data map with global and layer-specific DataFrames pub data: HashMap, - /// Parsed and resolved visualization specifications - pub specs: Vec, + /// Parsed and resolved visualization specification + pub spec: Plot, + /// The main SQL query that was executed + pub sql: String, + /// The raw VISUALISE portion text + pub visual: String, + /// Per-layer filter/source queries (None = uses global data directly) + pub layer_sql: Vec>, + /// Per-layer stat transform queries (None = no stat transform) + pub stat_sql: Vec>, } /// Build data map from a query using a custom query executor function @@ -888,6 +924,13 @@ where )); } + // TODO: Support multiple VISUALISE statements in future + if specs.len() > 1 { + return Err(GgsqlError::ValidationError( + "Multiple VISUALISE statements are not yet supported. Please use a single VISUALISE statement.".to_string(), + )); + } + // Check if we have any visualization content if viz_part.trim().is_empty() { return Err(GgsqlError::ValidationError( @@ -1054,6 +1097,10 @@ where // - Layer with no source, no filter, no order_by → returns None (use global directly, constants already injected) let facet = specs[0].facet.clone(); + // Track layer and stat queries for introspection + let mut layer_sql_vec: Vec> = Vec::new(); + let mut stat_sql_vec: Vec> = Vec::new(); + for (idx, layer) in specs[0].layers.iter_mut().enumerate() { // For layers using global data without filter, constants are already in global data // (injected with layer-indexed names). For other layers, extract constants for injection. @@ -1064,7 +1111,7 @@ where }; // Get mutable reference to layer for stat transform to update aesthetics - if let Some(layer_query) = build_layer_query( + let query_result = build_layer_query( layer, &layer_schemas[idx], &materialized_ctes, @@ -1073,7 +1120,14 @@ where facet.as_ref(), &constants, &execute_query, - )? { + )?; + + // Store query information for introspection + layer_sql_vec.push(query_result.layer_sql); + stat_sql_vec.push(query_result.stat_sql); + + // Execute the query if one was generated + if let Some(layer_query) = query_result.query { let df = execute_query(&layer_query).map_err(|e| { GgsqlError::ReaderError(format!( "Failed to fetch data for layer {}: {}", @@ -1105,20 +1159,24 @@ where )); } - // Post-process specs: replace literals with column references and compute labels - for spec in &mut specs { - // Replace literal aesthetic values with column references to synthetic constant columns - replace_literals_with_columns(spec); - // Compute aesthetic labels (uses first non-constant column, respects user-specified labels) - spec.compute_aesthetic_labels(); - // Divide 'color' over 'stroke' and 'fill'. This needs to happens after - // literals have associated columns. - split_color_aesthetic(&mut spec.layers); - } + let mut spec = specs.into_iter().next().unwrap(); + + // Post-process spec: replace literals with column references and compute labels + // Replace literal aesthetic values with column references to synthetic constant columns + replace_literals_with_columns(&mut spec); + // Compute aesthetic labels (uses first non-constant column, respects user-specified labels) + spec.compute_aesthetic_labels(); + // Divide 'color' over 'stroke' and 'fill'. This needs to happens after + // literals have associated columns. + split_color_aesthetic(&mut spec.layers); Ok(PreparedData { data: data_map, - specs, + spec, + sql: sql_part, + visual: viz_part, + layer_sql: layer_sql_vec, + stat_sql: stat_sql_vec, }) } @@ -1127,7 +1185,7 @@ where /// Convenience wrapper around `prepare_data_with_executor` for direct DuckDB reader usage. #[cfg(feature = "duckdb")] pub fn prepare_data(query: &str, reader: &DuckDBReader) -> Result { - prepare_data_with_executor(query, |sql| reader.execute(sql)) + prepare_data_with_executor(query, |sql| reader.execute_sql(sql)) } #[cfg(test)] @@ -1146,7 +1204,7 @@ mod tests { let result = prepare_data(query, &reader).unwrap(); assert!(result.data.contains_key(naming::GLOBAL_DATA_KEY)); - assert_eq!(result.specs.len(), 1); + assert_eq!(result.spec.layers.len(), 1); } #[cfg(feature = "duckdb")] @@ -1373,7 +1431,8 @@ mod tests { ); // Should use temp table name with session UUID - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.starts_with("SELECT * FROM __ggsql_cte_sales_")); assert!(query.ends_with("__")); assert!(query.contains(naming::session_id())); @@ -1401,7 +1460,8 @@ mod tests { ); // Should use temp table name with session UUID and filter - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.contains("__ggsql_cte_sales_")); assert!(query.ends_with(" WHERE year = 2024")); assert!(query.contains(naming::session_id())); @@ -1427,8 +1487,9 @@ mod tests { ); // Should use table name directly + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some("SELECT * FROM some_table".to_string()) ); } @@ -1453,8 +1514,9 @@ mod tests { &mock_execute, ); + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some("SELECT * FROM some_table WHERE value > 100".to_string()) ); } @@ -1479,8 +1541,9 @@ mod tests { ); // File paths should be wrapped in single quotes + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some("SELECT * FROM 'data/sales.csv'".to_string()) ); } @@ -1505,8 +1568,9 @@ mod tests { &mock_execute, ); + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some("SELECT * FROM 'data.parquet' WHERE x > 10".to_string()) ); } @@ -1531,7 +1595,8 @@ mod tests { ); // Should query global table with session UUID and filter - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.starts_with("SELECT * FROM __ggsql_global_")); assert!(query.ends_with("__ WHERE category = 'A'")); assert!(query.contains(naming::session_id())); @@ -1555,8 +1620,11 @@ mod tests { &mock_execute, ); - // Should return None - layer uses __global__ directly - assert_eq!(result.unwrap(), None); + // Should return empty result - layer uses __global__ directly + let query_result = result.unwrap(); + assert!(query_result.query.is_none()); + assert!(query_result.layer_sql.is_none()); + assert!(query_result.stat_sql.is_none()); } #[test] @@ -1605,8 +1673,9 @@ mod tests { &mock_execute, ); + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some("SELECT * FROM some_table ORDER BY date ASC".to_string()) ); } @@ -1632,8 +1701,9 @@ mod tests { &mock_execute, ); + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some( "SELECT * FROM some_table WHERE year = 2024 ORDER BY date DESC, value ASC" .to_string() @@ -1661,7 +1731,8 @@ mod tests { ); // Should query global table with session UUID and order_by - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.starts_with("SELECT * FROM __ggsql_global_")); assert!(query.ends_with("__ ORDER BY x ASC")); assert!(query.contains(naming::session_id())); @@ -1697,7 +1768,8 @@ mod tests { ); // Should inject constants as columns - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.contains("SELECT *")); assert!(query.contains("'value' AS __ggsql_const_color__")); assert!(query.contains("'value2' AS __ggsql_const_size__")); @@ -1727,7 +1799,8 @@ mod tests { &mock_execute, ); - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.contains("FROM __ggsql_global_")); assert!(query.contains(naming::session_id())); assert!(query.contains("'value' AS __ggsql_const_fill__")); @@ -2259,8 +2332,8 @@ mod tests { assert_eq!(global_df.height(), 3); // Verify spec has x and y aesthetics merged into layer - assert_eq!(result.specs.len(), 1); - let layer = &result.specs[0].layers[0]; + assert_eq!(result.spec.layers.len(), 1); + let layer = &result.spec.layers[0]; assert!( layer.mappings.contains_key("x"), "Layer should have x from global mapping" @@ -2721,7 +2794,7 @@ mod tests { let result = prepare_data(query, &reader).unwrap(); - let aes = &result.specs[0].layers[0].mappings.aesthetics; + let aes = &result.spec.layers[0].mappings.aesthetics; assert!(aes.contains_key("stroke")); assert!(aes.contains_key("fill")); @@ -2739,7 +2812,7 @@ mod tests { "#; let result = prepare_data(query, &reader).unwrap(); - let aes = &result.specs[0].layers[0].mappings.aesthetics; + let aes = &result.spec.layers[0].mappings.aesthetics; let stroke = aes.get("stroke").unwrap(); assert_eq!(stroke.column_name().unwrap(), "island"); @@ -2754,7 +2827,7 @@ mod tests { "#; let result = prepare_data(query, &reader).unwrap(); - let aes = &result.specs[0].layers[0].mappings.aesthetics; + let aes = &result.spec.layers[0].mappings.aesthetics; let stroke = aes.get("stroke").unwrap(); assert_eq!(stroke.column_name().unwrap(), "__ggsql_const_color_0__"); diff --git a/src/lib.rs b/src/lib.rs index 9eec2d49..61273bd6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,10 +27,10 @@ ggsql splits queries at the `VISUALISE` boundary: ## Core Components +- [`api`] - Validation API (validate, Validated) - [`parser`] - Query parsing and AST generation -- [`engine`] - Core execution engine -- [`readers`] - Data source abstraction layer -- [`writers`] - Output format abstraction layer +- [`reader`] - Data source abstraction layer +- [`writer`] - Output format abstraction layer */ pub mod naming; @@ -46,14 +46,18 @@ pub mod writer; #[cfg(feature = "duckdb")] pub mod execute; +pub mod validate; + // Re-export key types for convenience pub use plot::{ AestheticValue, DataSource, Facet, Geom, Layer, Mappings, Plot, Scale, SqlExpression, }; -// Future modules - not yet implemented -// #[cfg(feature = "engine")] -// pub mod engine; +// Re-export validation types and functions +pub use validate::{validate, Location, Validated, ValidationError, ValidationWarning}; + +// Re-export reader types +pub use reader::{Metadata, Spec}; // DataFrame abstraction (wraps Polars) pub use polars::prelude::DataFrame; @@ -113,7 +117,7 @@ mod integration_tests { FROM generate_series(0, 4) as t(n) "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify DataFrame has temporal type (DuckDB returns Datetime for DATE + INTERVAL) assert_eq!(df.get_column_names(), vec!["date", "revenue"]); @@ -173,7 +177,7 @@ mod integration_tests { FROM generate_series(0, 3) as t(n) "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify DataFrame has Datetime type let timestamp_col = df.column("timestamp").unwrap(); @@ -221,7 +225,7 @@ mod integration_tests { // Real SQL that users would write let sql = "SELECT 1 as int_col, 2.5 as float_col, true as bool_col"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types are preserved // DuckDB treats numeric literals as DECIMAL, which we convert to Float64 @@ -276,7 +280,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT * FROM (VALUES (1, 2.5, 'a'), (2, NULL, 'b'), (NULL, 3.5, NULL)) AS t(int_col, float_col, str_col)"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types assert!(matches!( @@ -326,7 +330,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT * FROM (VALUES ('A', 10), ('B', 20), ('A', 15), ('C', 30)) AS t(category, value)"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); let mut spec = Plot::new(); let layer = Layer::new(Geom::bar()) @@ -372,7 +376,7 @@ mod integration_tests { GROUP BY day "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify temporal type is preserved through aggregation // DATE_TRUNC returns Date type (not Datetime) @@ -410,7 +414,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT 0.1 as small, 123.456 as medium, 999999.999999 as large"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // All should be Float64 assert!(matches!( @@ -462,7 +466,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT CAST(1 AS TINYINT) as tiny, CAST(1000 AS SMALLINT) as small, CAST(1000000 AS INTEGER) as int, CAST(1000000000000 AS BIGINT) as big"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types assert!(matches!( @@ -530,7 +534,7 @@ mod integration_tests { // Prepare data - this parses, injects constants into global data, and replaces literals with columns let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // Verify constants were injected into global data (not layer-specific data) // Both layers share __global__ data for faceting compatibility @@ -547,7 +551,7 @@ mod integration_tests { !prepared.data.contains_key(&naming::layer_key(1)), "Layer 1 should use global data, not layer-specific data" ); - assert_eq!(prepared.specs.len(), 1); + assert_eq!(prepared.spec.layers.len(), 2); // Verify global data contains layer-indexed constant columns let global_df = prepared.data.get(naming::GLOBAL_DATA_KEY).unwrap(); @@ -565,7 +569,7 @@ mod integration_tests { // Generate Vega-Lite let writer = VegaLiteWriter::new(); - let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap(); + let json_str = writer.write(&prepared.spec, &prepared.data).unwrap(); let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); // Verify we have two layers @@ -638,7 +642,7 @@ mod integration_tests { "#; let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // All layers should use global data for faceting to work assert!( @@ -685,7 +689,7 @@ mod integration_tests { // Generate Vega-Lite and verify faceting structure let writer = VegaLiteWriter::new(); - let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap(); + let json_str = writer.write(&prepared.spec, &prepared.data).unwrap(); let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); // Should have facet structure (row and column) @@ -726,7 +730,7 @@ mod integration_tests { "#; let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // Should have global data with the constant injected assert!( @@ -750,7 +754,7 @@ mod integration_tests { // Generate Vega-Lite and verify it works let writer = VegaLiteWriter::new(); - let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap(); + let json_str = writer.write(&prepared.spec, &prepared.data).unwrap(); let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); // Both layers should have color field-mapped to their indexed constant columns diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index 8ee13ebb..1824d6dc 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -5,7 +5,13 @@ use crate::reader::data::init_builtin_data; use crate::reader::{connection::ConnectionInfo, Reader}; use crate::{DataFrame, GgsqlError, Result}; +use arrow::ipc::reader::FileReader; +use duckdb::vtab::arrow::{arrow_recordbatch_to_query_params, ArrowVTab}; use duckdb::{params, Connection}; +use polars::io::SerWriter; +use polars::prelude::*; +use std::collections::HashSet; +use std::io::Cursor; /// DuckDB database reader /// @@ -19,14 +25,15 @@ use duckdb::{params, Connection}; /// /// // In-memory database /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?; -/// let df = reader.execute("SELECT 1 as x, 2 as y")?; +/// let df = reader.execute_sql("SELECT 1 as x, 2 as y")?; /// /// // File-based database /// let reader = DuckDBReader::from_connection_string("duckdb://data.db")?; -/// let df = reader.execute("SELECT * FROM sales")?; +/// let df = reader.execute_sql("SELECT * FROM sales")?; /// ``` pub struct DuckDBReader { conn: Connection, + registered_tables: HashSet, } impl DuckDBReader { @@ -64,7 +71,16 @@ impl DuckDBReader { } }; - Ok(Self { conn }) + // Register Arrow virtual table function for DataFrame registration + conn.register_table_function::("arrow") + .map_err(|e| { + GgsqlError::ReaderError(format!("Failed to register arrow function: {}", e)) + })?; + + Ok(Self { + conn, + registered_tables: HashSet::new(), + }) } /// Get a reference to the underlying DuckDB connection @@ -73,6 +89,81 @@ impl DuckDBReader { pub fn connection(&self) -> &Connection { &self.conn } + + /// Check if a table exists in the database + fn table_exists(&self, name: &str) -> Result { + let sql = "SELECT COUNT(*) FROM information_schema.tables WHERE table_name = ?"; + let count: i64 = self + .conn + .query_row(sql, [name], |row| row.get(0)) + .unwrap_or(0); + Ok(count > 0) + } +} + +/// Validate a table name +fn validate_table_name(name: &str) -> Result<()> { + if name.is_empty() { + return Err(GgsqlError::ReaderError("Table name cannot be empty".into())); + } + + // Reject characters that could break double-quoted identifiers or cause issues + let forbidden = ['"', '\0', '\n', '\r']; + for ch in forbidden { + if name.contains(ch) { + return Err(GgsqlError::ReaderError(format!( + "Table name '{}' contains invalid character '{}'", + name, + ch.escape_default() + ))); + } + } + + // Reasonable length limit + if name.len() > 128 { + return Err(GgsqlError::ReaderError(format!( + "Table name '{}' exceeds maximum length of 128 characters", + name + ))); + } + + Ok(()) +} + +/// Convert a Polars DataFrame to DuckDB Arrow query parameters via IPC serialization +fn dataframe_to_arrow_params(df: DataFrame) -> Result<[usize; 2]> { + // Serialize DataFrame to IPC format + let mut buffer = Vec::new(); + { + let mut writer = IpcWriter::new(&mut buffer); + writer.finish(&mut df.clone()).map_err(|e| { + GgsqlError::ReaderError(format!("Failed to serialize DataFrame: {}", e)) + })?; + } + + // Read IPC into arrow crate's RecordBatch + let cursor = Cursor::new(buffer); + let reader = FileReader::try_new(cursor, None) + .map_err(|e| GgsqlError::ReaderError(format!("Failed to read IPC: {}", e)))?; + + // Collect all batches and concatenate if needed + let batches: Vec<_> = reader.filter_map(|r| r.ok()).collect(); + + if batches.is_empty() { + return Err(GgsqlError::ReaderError( + "DataFrame produced no Arrow batches".into(), + )); + } + + // For single batch, use directly; for multiple, concatenate + let rb = if batches.len() == 1 { + batches.into_iter().next().unwrap() + } else { + arrow::compute::concat_batches(&batches[0].schema(), &batches) + .map_err(|e| GgsqlError::ReaderError(format!("Failed to concat batches: {}", e)))? + }; + + Ok(arrow_recordbatch_to_query_params(rb)) } /// Helper struct for building typed columns from rows @@ -294,7 +385,7 @@ impl ColumnBuilder { } impl Reader for DuckDBReader { - fn execute(&self, sql: &str) -> Result { + fn execute_sql(&self, sql: &str) -> Result { use polars::prelude::*; // Check if this is a DDL statement (CREATE, DROP, INSERT, UPDATE, DELETE, ALTER) @@ -413,30 +504,60 @@ impl Reader for DuckDBReader { Ok(df) } - fn validate_columns(&self, sql: &str, columns: &[String]) -> Result<()> { - // Execute the query to get the schema - let df = self.execute(sql)?; + fn register(&mut self, name: &str, df: DataFrame) -> Result<()> { + // Validate table name + validate_table_name(name)?; - // Get column names from the DataFrame - let schema_columns: Vec = df - .get_column_names() - .iter() - .map(|s| s.to_string()) - .collect(); + // Check for duplicates + if self.table_exists(name)? { + return Err(GgsqlError::ReaderError(format!( + "Table '{}' already exists", + name + ))); + } - // Check if all required columns exist - for col in columns { - if !schema_columns.contains(col) { - return Err(GgsqlError::ValidationError(format!( - "Column '{}' not found in query result. Available columns: {}", - col, - schema_columns.join(", ") - ))); - } + // Convert DataFrame to Arrow query params + let params = dataframe_to_arrow_params(df)?; + + // Create temp table from Arrow data + let sql = format!( + "CREATE TEMP TABLE \"{}\" AS SELECT * FROM arrow(?, ?)", + name + ); + self.conn.execute(&sql, params).map_err(|e| { + GgsqlError::ReaderError(format!("Failed to register table '{}': {}", name, e)) + })?; + + // Track the table so we can unregister it later + self.registered_tables.insert(name.to_string()); + + Ok(()) + } + + fn unregister(&mut self, name: &str) -> Result<()> { + // Only allow unregistering tables we created via register() + if !self.registered_tables.contains(name) { + return Err(GgsqlError::ReaderError(format!( + "Table '{}' was not registered via this reader", + name + ))); } + // Drop the temp table + let sql = format!("DROP TABLE IF EXISTS \"{}\"", name); + self.conn.execute(&sql, []).map_err(|e| { + GgsqlError::ReaderError(format!("Failed to unregister table '{}': {}", name, e)) + })?; + + // Remove from tracking + self.registered_tables.remove(name); + Ok(()) } + + fn supports_register(&self) -> bool { + true + } } #[cfg(test)] @@ -452,7 +573,7 @@ mod tests { #[test] fn test_simple_query() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let df = reader.execute("SELECT 1 as x, 2 as y").unwrap(); + let df = reader.execute_sql("SELECT 1 as x, 2 as y").unwrap(); assert_eq!(df.shape(), (1, 2)); assert_eq!(df.get_column_names(), vec!["x", "y"]); @@ -475,38 +596,16 @@ mod tests { .unwrap(); // Query data - let df = reader.execute("SELECT * FROM test").unwrap(); + let df = reader.execute_sql("SELECT * FROM test").unwrap(); assert_eq!(df.shape(), (2, 2)); assert_eq!(df.get_column_names(), vec!["x", "y"]); } - #[test] - fn test_validate_columns_success() { - let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let sql = "SELECT 1 as x, 2 as y"; - - let result = reader.validate_columns(sql, &["x".to_string(), "y".to_string()]); - assert!(result.is_ok()); - } - - #[test] - fn test_validate_columns_missing() { - let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let sql = "SELECT 1 as x, 2 as y"; - - let result = reader.validate_columns(sql, &["z".to_string()]); - assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("Column 'z' not found")); - } - #[test] fn test_invalid_sql() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let result = reader.execute("INVALID SQL SYNTAX"); + let result = reader.execute_sql("INVALID SQL SYNTAX"); assert!(result.is_err()); } @@ -528,10 +627,160 @@ mod tests { .unwrap(); let df = reader - .execute("SELECT region, SUM(revenue) as total FROM sales GROUP BY region") + .execute_sql("SELECT region, SUM(revenue) as total FROM sales GROUP BY region") .unwrap(); assert_eq!(df.shape(), (2, 2)); assert_eq!(df.get_column_names(), vec!["region", "total"]); } + + #[test] + fn test_register_and_query() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Create a DataFrame + let df = DataFrame::new(vec![ + Column::new("x".into(), vec![1i32, 2, 3]), + Column::new("y".into(), vec![10i32, 20, 30]), + ]) + .unwrap(); + + // Register the DataFrame + reader.register("my_table", df).unwrap(); + + // Query the registered table + let result = reader + .execute_sql("SELECT * FROM my_table ORDER BY x") + .unwrap(); + assert_eq!(result.shape(), (3, 2)); + assert_eq!(result.get_column_names(), vec!["x", "y"]); + } + + #[test] + fn test_register_duplicate_name_errors() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let df1 = DataFrame::new(vec![Column::new("a".into(), vec![1i32])]).unwrap(); + let df2 = DataFrame::new(vec![Column::new("b".into(), vec![2i32])]).unwrap(); + + // First registration should succeed + reader.register("dup_table", df1).unwrap(); + + // Second registration with same name should fail + let result = reader.register("dup_table", df2); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("already exists")); + } + + #[test] + fn test_register_invalid_table_names() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let df = DataFrame::new(vec![Column::new("a".into(), vec![1i32])]).unwrap(); + + // Empty name + let result = reader.register("", df.clone()); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("cannot be empty")); + + // Name with double quote + let result = reader.register("bad\"name", df.clone()); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("invalid character")); + + // Name with null byte + let result = reader.register("bad\0name", df.clone()); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("invalid character")); + + // Name too long + let long_name = "a".repeat(200); + let result = reader.register(&long_name, df); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("exceeds maximum length")); + } + + #[test] + fn test_supports_register() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + assert!(reader.supports_register()); + } + + #[test] + fn test_register_empty_dataframe() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Create an empty DataFrame with schema + let df = DataFrame::new(vec![ + Column::new("x".into(), Vec::::new()), + Column::new("y".into(), Vec::::new()), + ]) + .unwrap(); + + reader.register("empty_table", df).unwrap(); + + // Query should return empty result with correct schema + let result = reader.execute_sql("SELECT * FROM empty_table").unwrap(); + assert_eq!(result.shape(), (0, 2)); + assert_eq!(result.get_column_names(), vec!["x", "y"]); + } + + #[test] + fn test_unregister() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let df = DataFrame::new(vec![Column::new("x".into(), vec![1i32, 2, 3])]).unwrap(); + + reader.register("test_data", df).unwrap(); + + // Should be queryable + let result = reader.execute_sql("SELECT * FROM test_data").unwrap(); + assert_eq!(result.height(), 3); + + // Unregister + reader.unregister("test_data").unwrap(); + + // Should no longer exist + let result = reader.execute_sql("SELECT * FROM test_data"); + assert!(result.is_err()); + } + + #[test] + fn test_unregister_not_registered() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Create a table directly (not via register) + reader + .connection() + .execute("CREATE TABLE user_table (x INT)", params![]) + .unwrap(); + + // Should fail - we didn't register this via register() + let result = reader.unregister("user_table"); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("was not registered via this reader")); + } + + #[test] + fn test_reregister_after_unregister() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let df = DataFrame::new(vec![Column::new("x".into(), vec![1i32, 2, 3])]).unwrap(); + + reader.register("data", df.clone()).unwrap(); + reader.unregister("data").unwrap(); + + // Should be able to register again + reader.register("data", df).unwrap(); + let result = reader.execute_sql("SELECT * FROM data").unwrap(); + assert_eq!(result.height(), 3); + } } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 7f3f403a..cfbd271a 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -7,34 +7,101 @@ //! //! All readers implement the `Reader` trait, which provides: //! - SQL query execution → DataFrame conversion -//! - Column validation for query introspection +//! - Visualization query execution → Spec +//! - Optional DataFrame registration for queryable tables //! - Connection management and error handling //! //! # Example //! //! ```rust,ignore //! use ggsql::reader::{Reader, DuckDBReader}; +//! use ggsql::writer::{Writer, VegaLiteWriter}; //! +//! // Execute a ggsql query //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?; -//! let df = reader.execute("SELECT * FROM table")?; +//! let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?; +//! +//! // Render to Vega-Lite JSON +//! let writer = VegaLiteWriter::new(); +//! let json = writer.render(&spec)?; +//! +//! // With DataFrame registration +//! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?; +//! reader.register("my_table", some_dataframe)?; +//! let spec = reader.execute("SELECT * FROM my_table VISUALISE x, y DRAW point")?; //! ``` -use crate::{DataFrame, Result}; +use std::collections::HashMap; + +use crate::execute::prepare_data_with_executor; +use crate::plot::Plot; +use crate::validate::{validate, ValidationWarning}; +use crate::{DataFrame, GgsqlError, Result}; #[cfg(feature = "duckdb")] pub mod duckdb; pub mod connection; - pub mod data; +mod spec; #[cfg(feature = "duckdb")] pub use duckdb::DuckDBReader; +// ============================================================================ +// Spec - Result of reader.execute() +// ============================================================================ + +/// Result of executing a ggsql query, ready for rendering. +pub struct Spec { + /// Single resolved plot specification + pub(crate) plot: Plot, + /// Internal data map (global + layer-specific DataFrames) + pub(crate) data: HashMap, + /// Cached metadata about the prepared visualization + pub(crate) metadata: Metadata, + /// The main SQL query that was executed + pub(crate) sql: String, + /// The raw VISUALISE portion text + pub(crate) visual: String, + /// Per-layer filter/source queries (None = uses global data directly) + pub(crate) layer_sql: Vec>, + /// Per-layer stat transform queries (None = no stat transform) + pub(crate) stat_sql: Vec>, + /// Validation warnings from preparation + pub(crate) warnings: Vec, +} + +/// Metadata about the prepared visualization. +#[derive(Debug, Clone)] +pub struct Metadata { + pub rows: usize, + pub columns: Vec, + pub layer_count: usize, +} + +// ============================================================================ +// Reader Trait +// ============================================================================ + /// Trait for data source readers /// /// Readers execute SQL queries and return Polars DataFrames. /// They provide a uniform interface for different database backends. +/// +/// # DataFrame Registration +/// +/// Some readers support registering DataFrames as queryable tables using +/// the [`register`](Reader::register) method. This allows you to query +/// in-memory DataFrames with SQL, join them with other tables, etc. +/// +/// ```rust,ignore +/// // Register a DataFrame (takes ownership) +/// reader.register("sales", sales_df)?; +/// +/// // Now you can query it +/// let result = reader.execute_sql("SELECT * FROM sales WHERE amount > 100")?; +/// ``` pub trait Reader { /// Execute a SQL query and return the result as a DataFrame /// @@ -52,20 +119,256 @@ pub trait Reader { /// - The SQL is invalid /// - The connection fails /// - The table or columns don't exist - fn execute(&self, sql: &str) -> Result; + fn execute_sql(&self, sql: &str) -> Result; + + /// Register a DataFrame as a queryable table (takes ownership) + /// + /// After registration, the DataFrame can be queried by name in SQL: + /// ```sql + /// SELECT * FROM WHERE ... + /// ``` + /// + /// # Arguments + /// + /// * `name` - The table name to register under + /// * `df` - The DataFrame to register (ownership is transferred) + /// + /// # Returns + /// + /// `Ok(())` on success, error if registration fails or isn't supported. + /// + /// # Default Implementation + /// + /// Returns an error by default. Override for readers that support registration. + fn register(&mut self, name: &str, _df: DataFrame) -> Result<()> { + Err(GgsqlError::ReaderError(format!( + "This reader does not support DataFrame registration for table '{}'", + name + ))) + } - /// Validate that specified columns exist in a query result + /// Unregister a previously registered table + /// + /// # Arguments + /// + /// * `name` - The table name to unregister + /// + /// # Returns /// - /// This is useful for checking column names before visualization - /// to provide better error messages. + /// `Ok(())` on success. + /// + /// # Default Implementation + /// + /// Returns an error by default. Override for readers that support registration. + fn unregister(&mut self, name: &str) -> Result<()> { + Err(GgsqlError::ReaderError(format!( + "This reader does not support unregistering table '{}'", + name + ))) + } + + /// Check if this reader supports DataFrame registration + /// + /// # Returns + /// + /// `true` if [`register`](Reader::register) is implemented, `false` otherwise. + fn supports_register(&self) -> bool { + false + } + + /// Execute a ggsql query and return the visualization specification. + /// + /// This is the main entry point for creating visualizations. It parses the query, + /// executes the SQL portion, and returns a `Spec` ready for rendering. /// /// # Arguments /// - /// * `sql` - The SQL query to introspect - /// * `columns` - Column names to validate + /// * `query` - The ggsql query (SQL + VISUALISE clause) /// /// # Returns /// - /// Ok(()) if all columns exist, otherwise an error - fn validate_columns(&self, sql: &str, columns: &[String]) -> Result<()>; + /// A `Spec` containing the resolved visualization specification and data. + /// + /// # Errors + /// + /// Returns an error if: + /// - The query syntax is invalid + /// - The query has no VISUALISE clause + /// - The SQL execution fails + /// + /// # Example + /// + /// ```rust,ignore + /// use ggsql::reader::{Reader, DuckDBReader}; + /// use ggsql::writer::{Writer, VegaLiteWriter}; + /// + /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?; + /// let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?; + /// + /// let writer = VegaLiteWriter::new(); + /// let json = writer.render(&spec)?; + /// ``` + #[cfg(feature = "duckdb")] + fn execute(&self, query: &str) -> Result { + // Run validation first to capture warnings + let validated = validate(query)?; + let warnings: Vec = validated.warnings().to_vec(); + + // Prepare data (this also validates, but we want the warnings from above) + let prepared_data = prepare_data_with_executor(query, |sql| self.execute_sql(sql))?; + + Ok(Spec::new( + prepared_data.spec, + prepared_data.data, + prepared_data.sql, + prepared_data.visual, + prepared_data.layer_sql, + prepared_data.stat_sql, + warnings, + )) + } +} + +#[cfg(test)] +#[cfg(all(feature = "duckdb", feature = "vegalite"))] +mod tests { + use super::*; + use crate::writer::{VegaLiteWriter, Writer}; + + #[test] + fn test_execute_and_render() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let spec = reader + .execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point") + .unwrap(); + + assert_eq!(spec.plot().layers.len(), 1); + assert_eq!(spec.metadata().layer_count, 1); + assert!(spec.data().is_some()); + + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + assert!(result.contains("point")); + } + + #[test] + fn test_execute_metadata() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let spec = reader + .execute( + "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) VISUALISE x, y DRAW point", + ) + .unwrap(); + + let metadata = spec.metadata(); + assert_eq!(metadata.rows, 3); + assert_eq!(metadata.columns.len(), 2); + assert!(metadata.columns.contains(&"x".to_string())); + assert!(metadata.columns.contains(&"y".to_string())); + assert_eq!(metadata.layer_count, 1); + } + + #[test] + fn test_execute_with_cte() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + WITH data AS ( + SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) + ) + SELECT * FROM data + VISUALISE x, y DRAW point + "#; + + let spec = reader.execute(query).unwrap(); + + assert_eq!(spec.plot().layers.len(), 1); + assert!(spec.data().is_some()); + let df = spec.data().unwrap(); + assert_eq!(df.height(), 2); + } + + #[test] + fn test_render_multi_layer() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) + VISUALISE + DRAW point MAPPING x AS x, y AS y + DRAW line MAPPING x AS x, y AS y + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + + assert!(result.contains("layer")); + } + + #[test] + fn test_register_and_query() { + use polars::prelude::*; + + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let df = df! { + "x" => [1i32, 2, 3], + "y" => [10i32, 20, 30], + } + .unwrap(); + + reader.register("my_data", df).unwrap(); + + let query = "SELECT * FROM my_data VISUALISE x, y DRAW point"; + let spec = reader.execute(query).unwrap(); + + assert_eq!(spec.metadata().rows, 3); + assert!(spec.metadata().columns.contains(&"x".to_string())); + + let writer = VegaLiteWriter::new(); + let result = writer.render(&spec).unwrap(); + assert!(result.contains("point")); + } + + #[test] + fn test_register_and_join() { + use polars::prelude::*; + + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let sales = df! { + "id" => [1i32, 2, 3], + "amount" => [100i32, 200, 300], + "product_id" => [1i32, 1, 2], + } + .unwrap(); + + let products = df! { + "id" => [1i32, 2], + "name" => ["Widget", "Gadget"], + } + .unwrap(); + + reader.register("sales", sales).unwrap(); + reader.register("products", products).unwrap(); + + let query = r#" + SELECT s.id, s.amount, p.name + FROM sales s + JOIN products p ON s.product_id = p.id + VISUALISE id AS x, amount AS y + DRAW bar + "#; + + let spec = reader.execute(query).unwrap(); + assert_eq!(spec.metadata().rows, 3); + } + + #[test] + fn test_execute_no_viz_fails() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = "SELECT 1 as x, 2 as y"; + + let result = reader.execute(query); + assert!(result.is_err()); + } } diff --git a/src/reader/spec.rs b/src/reader/spec.rs new file mode 100644 index 00000000..4b1fc5bd --- /dev/null +++ b/src/reader/spec.rs @@ -0,0 +1,120 @@ +//! Implementation of Spec methods. + +use std::collections::HashMap; + +use crate::naming; +use crate::plot::Plot; +use crate::validate::ValidationWarning; +use crate::DataFrame; + +use super::{Metadata, Spec}; + +impl Spec { + /// Create a new Spec from PreparedData + pub(crate) fn new( + plot: Plot, + data: HashMap, + sql: String, + visual: String, + layer_sql: Vec>, + stat_sql: Vec>, + warnings: Vec, + ) -> Self { + // Compute metadata from data + let (rows, columns) = if let Some(df) = data.get(naming::GLOBAL_DATA_KEY) { + let cols: Vec = df + .get_column_names() + .iter() + .map(|s| s.to_string()) + .collect(); + (df.height(), cols) + } else if let Some(df) = data.values().next() { + let cols: Vec = df + .get_column_names() + .iter() + .map(|s| s.to_string()) + .collect(); + (df.height(), cols) + } else { + (0, Vec::new()) + }; + + let layer_count = plot.layers.len(); + let metadata = Metadata { + rows, + columns, + layer_count, + }; + + Self { + plot, + data, + metadata, + sql, + visual, + layer_sql, + stat_sql, + warnings, + } + } + + /// Get the resolved plot specification. + pub fn plot(&self) -> &Plot { + &self.plot + } + + /// Get visualization metadata. + pub fn metadata(&self) -> &Metadata { + &self.metadata + } + + /// Number of layers. + pub fn layer_count(&self) -> usize { + self.plot.layers.len() + } + + /// Get global data (main query result). + pub fn data(&self) -> Option<&DataFrame> { + self.data.get(naming::GLOBAL_DATA_KEY) + } + + /// Get layer-specific data (from FILTER or FROM clause). + pub fn layer_data(&self, layer_index: usize) -> Option<&DataFrame> { + self.data.get(&naming::layer_key(layer_index)) + } + + /// Get stat transform data (e.g., histogram bins, density estimates). + pub fn stat_data(&self, layer_index: usize) -> Option<&DataFrame> { + self.layer_data(layer_index) + } + + /// Get internal data map (all DataFrames by key). + pub fn data_map(&self) -> &HashMap { + &self.data + } + + /// The main SQL query that was executed. + pub fn sql(&self) -> &str { + &self.sql + } + + /// The VISUALISE portion (raw text). + pub fn visual(&self) -> &str { + &self.visual + } + + /// Layer filter/source query, or `None` if using global data. + pub fn layer_sql(&self, layer_index: usize) -> Option<&str> { + self.layer_sql.get(layer_index).and_then(|s| s.as_deref()) + } + + /// Stat transform query, or `None` if no stat transform. + pub fn stat_sql(&self, layer_index: usize) -> Option<&str> { + self.stat_sql.get(layer_index).and_then(|s| s.as_deref()) + } + + /// Validation warnings from preparation. + pub fn warnings(&self) -> &[ValidationWarning] { + &self.warnings + } +} diff --git a/src/rest.rs b/src/rest.rs index 88fb61a6..8f2338c4 100644 --- a/src/rest.rs +++ b/src/rest.rs @@ -31,10 +31,8 @@ use tower_http::cors::{Any, CorsLayer}; use tracing::info; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; -use ggsql::{parser, GgsqlError, VERSION}; +use ggsql::{parser, validate, GgsqlError, VERSION}; -#[cfg(feature = "duckdb")] -use ggsql::execute::prepare_data_with_executor; #[cfg(feature = "duckdb")] use ggsql::reader::{DuckDBReader, Reader}; @@ -442,61 +440,38 @@ async fn query_handler( #[cfg(feature = "duckdb")] if request.reader.starts_with("duckdb://") { - // Create query executor that handles shared state vs new reader - let execute_query = |sql: &str| -> Result { - if request.reader == "duckdb://memory" && state.reader.is_some() { - let reader_mutex = state.reader.as_ref().unwrap(); - let reader = reader_mutex.lock().map_err(|e| { - GgsqlError::InternalError(format!("Failed to lock reader: {}", e)) - })?; - reader.execute(sql) - } else { - let reader = DuckDBReader::from_connection_string(&request.reader)?; - reader.execute(sql) - } - }; - - // Prepare data using shared execution logic - let prepared = prepare_data_with_executor(&request.query, execute_query)?; - - // Get metadata from available data - let (rows, columns) = if let Some(df) = prepared.data.get("__global__") { - let (r, _) = df.shape(); - let cols: Vec = df - .get_column_names() - .iter() - .map(|s| s.to_string()) - .collect(); - (r, cols) + // Use shared reader or create new one + let spec = if request.reader == "duckdb://memory" && state.reader.is_some() { + let reader_mutex = state.reader.as_ref().unwrap(); + let reader = reader_mutex + .lock() + .map_err(|e| GgsqlError::InternalError(format!("Failed to lock reader: {}", e)))?; + reader.execute(&request.query)? } else { - // Use first available data for metadata - let df = prepared.data.values().next().unwrap(); - let (r, _) = df.shape(); - let cols: Vec = df - .get_column_names() - .iter() - .map(|s| s.to_string()) - .collect(); - (r, cols) + let reader = DuckDBReader::from_connection_string(&request.reader)?; + reader.execute(&request.query)? }; - let first_spec = &prepared.specs[0]; + // Get metadata + let metadata = spec.metadata(); // Generate visualization output using writer #[cfg(feature = "vegalite")] if request.writer == "vegalite" { let writer = VegaLiteWriter::new(); - let json_output = writer.write(first_spec, &prepared.data)?; + let json_output = writer.render(&spec)?; let spec_value: serde_json::Value = serde_json::from_str(&json_output) .map_err(|e| GgsqlError::WriterError(format!("Failed to parse JSON: {}", e)))?; + let plot = spec.plot(); + let result = QueryResult { spec: spec_value, metadata: QueryMetadata { - rows, - columns, - global_mappings: format!("{:?}", first_spec.global_mappings), - layers: first_spec.layers.len(), + rows: metadata.rows, + columns: metadata.columns.clone(), + global_mappings: format!("{:?}", plot.global_mappings), + layers: plot.layers.len(), }, }; @@ -525,13 +500,45 @@ async fn query_handler( } /// POST /api/v1/parse - Parse a ggsql query +#[cfg(feature = "duckdb")] +async fn parse_handler( + Json(request): Json, +) -> Result>, ApiErrorResponse> { + info!("Parsing query: {} chars", request.query.len()); + + // Validate query to get sql/viz portions + let validated = validate(&request.query)?; + + // Parse ggsql portion + let specs = parser::parse_query(&request.query)?; + + // Convert specs to JSON + let specs_json: Vec = specs + .iter() + .map(|spec| serde_json::to_value(spec).unwrap_or(serde_json::Value::Null)) + .collect(); + + let result = ParseResult { + sql_portion: validated.sql().to_string(), + viz_portion: validated.visual().to_string(), + specs: specs_json, + }; + + Ok(Json(ApiSuccess { + status: "success".to_string(), + data: result, + })) +} + +/// POST /api/v1/parse - Parse a ggsql query +#[cfg(not(feature = "duckdb"))] async fn parse_handler( Json(request): Json, ) -> Result>, ApiErrorResponse> { info!("Parsing query: {} chars", request.query.len()); - // Split query - let (sql_part, viz_part) = parser::split_query(&request.query)?; + // Validate query to get sql/viz portions + let validated = validate(&request.query)?; // Parse ggsql portion let specs = parser::parse_query(&request.query)?; @@ -543,8 +550,8 @@ async fn parse_handler( .collect(); let result = ParseResult { - sql_portion: sql_part, - viz_portion: viz_part, + sql_portion: validated.sql().to_string(), + viz_portion: validated.visual().to_string(), specs: specs_json, }; diff --git a/src/validate.rs b/src/validate.rs new file mode 100644 index 00000000..79bf4ed1 --- /dev/null +++ b/src/validate.rs @@ -0,0 +1,274 @@ +//! Query validation without SQL execution. +//! +//! This module provides query syntax and semantic validation without executing +//! any SQL. Use this for IDE integration, syntax checking, and query inspection. + +use crate::parser; +use crate::Result; + +// ============================================================================ +// Core Types +// ============================================================================ + +/// Result of `validate()` - query inspection and validation without SQL execution. +pub struct Validated { + sql: String, + visual: String, + has_visual: bool, + tree: Option, + valid: bool, + errors: Vec, + warnings: Vec, +} + +impl Validated { + /// Whether the query contains a VISUALISE clause. + pub fn has_visual(&self) -> bool { + self.has_visual + } + + /// The SQL portion (before VISUALISE). + pub fn sql(&self) -> &str { + &self.sql + } + + /// The VISUALISE portion (raw text). + pub fn visual(&self) -> &str { + &self.visual + } + + /// CST for advanced inspection. + pub fn tree(&self) -> Option<&tree_sitter::Tree> { + self.tree.as_ref() + } + + /// Whether the query is valid (no errors). + pub fn valid(&self) -> bool { + self.valid + } + + /// Validation errors. + pub fn errors(&self) -> &[ValidationError] { + &self.errors + } + + /// Validation warnings. + pub fn warnings(&self) -> &[ValidationWarning] { + &self.warnings + } +} + +/// A validation error (fatal). +#[derive(Debug, Clone)] +pub struct ValidationError { + pub message: String, + pub location: Option, +} + +/// A validation warning (non-fatal). +#[derive(Debug, Clone)] +pub struct ValidationWarning { + pub message: String, + pub location: Option, +} + +/// Location within a query string (0-based). +#[derive(Debug, Clone)] +pub struct Location { + pub line: usize, + pub column: usize, +} + +// ============================================================================ +// Validation Function +// ============================================================================ + +/// Validate query syntax and semantics without executing SQL. +pub fn validate(query: &str) -> Result { + let mut errors = Vec::new(); + let warnings = Vec::new(); + + // Split to determine if there's a viz portion + let (sql_part, viz_part) = match parser::split_query(query) { + Ok((sql, viz)) => (sql, viz), + Err(e) => { + // Split error - return as validation error + errors.push(ValidationError { + message: e.to_string(), + location: None, + }); + return Ok(Validated { + sql: String::new(), + visual: String::new(), + has_visual: false, + tree: None, + valid: false, + errors, + warnings, + }); + } + }; + + let has_visual = !viz_part.trim().is_empty(); + + // Parse the full query to get the CST + let tree = if has_visual { + let mut ts_parser = tree_sitter::Parser::new(); + ts_parser + .set_language(&tree_sitter_ggsql::language()) + .map_err(|e| { + crate::GgsqlError::InternalError(format!("Failed to set language: {}", e)) + })?; + ts_parser.parse(query, None) + } else { + None + }; + + // If no visualization, just syntax check passed + if !has_visual { + return Ok(Validated { + sql: sql_part, + visual: viz_part, + has_visual, + tree, + valid: true, + errors, + warnings, + }); + } + + // Parse to get plot specifications for validation + let plots = match parser::parse_query(query) { + Ok(p) => p, + Err(e) => { + errors.push(ValidationError { + message: e.to_string(), + location: None, + }); + return Ok(Validated { + sql: sql_part, + visual: viz_part, + has_visual, + tree, + valid: false, + errors, + warnings, + }); + } + }; + + // Validate the single plot (we only support one VISUALISE statement) + if let Some(plot) = plots.first() { + // Validate each layer + for (layer_idx, layer) in plot.layers.iter().enumerate() { + let context = format!("Layer {}", layer_idx + 1); + + // Check required aesthetics + // Note: Without schema data, we can only check if mappings exist, + // not if the columns are valid. We skip this check for wildcards. + if !layer.mappings.wildcard { + if let Err(e) = layer.validate_required_aesthetics() { + errors.push(ValidationError { + message: format!("{}: {}", context, e), + location: None, + }); + } + } + + // Validate SETTING parameters + if let Err(e) = layer.validate_settings() { + errors.push(ValidationError { + message: format!("{}: {}", context, e), + location: None, + }); + } + } + } + + Ok(Validated { + sql: sql_part, + visual: viz_part, + has_visual, + tree, + valid: errors.is_empty(), + errors, + warnings, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_validate_with_visual() { + let validated = + validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y").unwrap(); + assert!(validated.has_visual()); + assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y"); + assert!(validated.visual().starts_with("VISUALISE")); + assert!(validated.tree().is_some()); + assert!(validated.valid()); + } + + #[test] + fn test_validate_without_visual() { + let validated = validate("SELECT 1 as x, 2 as y").unwrap(); + assert!(!validated.has_visual()); + assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y"); + assert!(validated.visual().is_empty()); + assert!(validated.tree().is_none()); + assert!(validated.valid()); + } + + #[test] + fn test_validate_valid_query() { + let validated = + validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y").unwrap(); + assert!( + validated.valid(), + "Expected valid query: {:?}", + validated.errors() + ); + assert!(validated.errors().is_empty()); + } + + #[test] + fn test_validate_missing_required_aesthetic() { + // Point requires x and y, but we only provide x + let validated = + validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x").unwrap(); + assert!(!validated.valid()); + assert!(!validated.errors().is_empty()); + assert!(validated.errors()[0].message.contains("y")); + } + + #[test] + fn test_validate_syntax_error() { + let validated = validate("SELECT 1 VISUALISE DRAW invalidgeom").unwrap(); + assert!(!validated.valid()); + assert!(!validated.errors().is_empty()); + } + + #[test] + fn test_validate_sql_and_visual_content() { + let query = "SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y DRAW line MAPPING x AS x, y AS y"; + let validated = validate(query).unwrap(); + + assert!(validated.has_visual()); + assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y"); + assert!(validated.visual().contains("DRAW point")); + assert!(validated.visual().contains("DRAW line")); + assert!(validated.valid()); + } + + #[test] + fn test_validate_sql_only() { + let query = "SELECT 1 as x, 2 as y"; + let validated = validate(query).unwrap(); + + // SQL-only queries should be valid (just syntax check) + assert!(validated.valid()); + assert!(validated.errors().is_empty()); + } +} diff --git a/src/writer/mod.rs b/src/writer/mod.rs index 7f026e6b..b06bf332 100644 --- a/src/writer/mod.rs +++ b/src/writer/mod.rs @@ -14,12 +14,17 @@ //! //! ```rust,ignore //! use ggsql::writer::{Writer, VegaLiteWriter}; +//! use ggsql::reader::{Reader, DuckDBReader}; +//! +//! let reader = DuckDBReader::from_connection_string("duckdb://memory")?; +//! let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?; //! //! let writer = VegaLiteWriter::new(); -//! let json = writer.write(&spec, &dataframe)?; +//! let json = writer.render(&spec)?; //! println!("{}", json); //! ``` +use crate::reader::Spec; use crate::{DataFrame, Plot, Result}; use std::collections::HashMap; @@ -33,7 +38,15 @@ pub use vegalite::VegaLiteWriter; /// /// Writers take a Plot and data sources and produce formatted output /// (JSON, R code, PNG bytes, etc.). +/// +/// # Associated Types +/// +/// * `Output` - The type returned by `write()` and `render()`. Use `Option` +/// for text output, `Option>` for binary, `()` for void writers, etc. pub trait Writer { + /// The output type produced by this writer. + type Output; + /// Generate output from a visualization specification and data sources /// /// # Arguments @@ -44,7 +57,7 @@ pub trait Writer { /// /// # Returns /// - /// A string containing the formatted output (JSON, code, etc.) + /// The writer's output, depends on writer implementation. /// /// # Errors /// @@ -52,7 +65,7 @@ pub trait Writer { /// - The spec is incompatible with this writer /// - The data doesn't match the spec's requirements /// - Output generation fails - fn write(&self, spec: &Plot, data: &HashMap) -> Result; + fn write(&self, spec: &Plot, data: &HashMap) -> Result; /// Validate that a spec is compatible with this writer /// @@ -67,4 +80,32 @@ pub trait Writer { /// /// Ok(()) if the spec is compatible, otherwise an error fn validate(&self, spec: &Plot) -> Result<()>; + + /// Render a Spec to output format + /// + /// This is the main entry point for generating visualization output. + /// + /// # Arguments + /// + /// * `spec` - The prepared visualization specification from `reader.execute()` + /// + /// # Returns + /// + /// The writer's output (type depends on writer implementation) + /// + /// # Example + /// + /// ```rust,ignore + /// use ggsql::reader::{Reader, DuckDBReader}; + /// use ggsql::writer::{Writer, VegaLiteWriter}; + /// + /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?; + /// let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?; + /// + /// let writer = VegaLiteWriter::new(); + /// let json = writer.render(&spec)?; + /// ``` + fn render(&self, spec: &Spec) -> Result { + self.write(spec.plot(), spec.data_map()) + } } diff --git a/src/writer/vegalite.rs b/src/writer/vegalite.rs index 01bf884b..ec86589a 100644 --- a/src/writer/vegalite.rs +++ b/src/writer/vegalite.rs @@ -999,6 +999,8 @@ impl VegaLiteWriter { } impl Writer for VegaLiteWriter { + type Output = String; + fn write(&self, spec: &Plot, data: &HashMap) -> Result { // Validate spec before processing self.validate(spec)?;