From 65177405d016d9c81b46fd67baef78a4ec73d560 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 28 Jan 2026 15:13:12 +0000 Subject: [PATCH 01/12] Implement high level Rust API --- CLAUDE.md | 143 ++++++-- ggsql-jupyter/src/executor.rs | 27 +- src/api.rs | 606 ++++++++++++++++++++++++++++++++++ src/cli.rs | 106 +++--- src/doc/API.md | 520 +++++++++++++++++++++++++++++ src/execute.rs | 158 ++++++--- src/lib.rs | 23 +- src/reader/duckdb.rs | 50 +-- src/reader/mod.rs | 58 +++- src/rest.rs | 98 +++--- 10 files changed, 1560 insertions(+), 229 deletions(-) create mode 100644 src/api.rs create mode 100644 src/doc/API.md diff --git a/CLAUDE.md b/CLAUDE.md index bdc93293..b5bafd99 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -147,6 +147,79 @@ DRAW line MAPPING month AS x, total AS y --- +## Public API (`src/api.rs`) + +### Quick Start + +```rust +use ggsql::{prepare, reader::DuckDBReader, writer::VegaLiteWriter}; + +// Create a reader +let reader = DuckDBReader::from_connection_string("duckdb://memory")?; + +// Prepare the visualization +let prepared = ggsql::prepare( + "SELECT x, y FROM data VISUALISE x, y DRAW point", + &reader +)?; + +// Render to Vega-Lite JSON +let writer = VegaLiteWriter::new(); +let json = prepared.render(&writer)?; +``` + +### Core Functions + +| Function | Purpose | +| ------------------------ | ------------------------------------------------------ | +| `prepare(query, reader)` | Main entry point: parse, execute SQL, resolve mappings | +| `render(writer)` | Generate output (Vega-Lite JSON) from prepared data | +| `validate(query)` | Validate syntax + semantics, inspect query structure | + +### Key Types + +**`Validated`** - Result of `validate()`: + +- `has_visual()` - Whether query has VISUALISE clause +- `sql()` - The SQL portion (before VISUALISE) +- `visual()` - The VISUALISE portion (raw text) +- `tree()` - CST for advanced inspection +- `valid()` - Whether query is valid +- `errors()` - Validation errors +- `warnings()` - Validation warnings + +**`Prepared`** - Result of `prepare()`, ready for rendering: + +- `render(writer)` - Generate output (Vega-Lite JSON) +- `plot()` - Resolved plot specification +- `metadata()` - Rows, columns, layer count +- `warnings()` - Validation warnings from preparation +- `data()` / `layer_data(i)` / `stat_data(i)` - Access DataFrames +- `sql()` / `visual()` / `layer_sql(i)` / `stat_sql(i)` - Query introspection + +**`Metadata`**: + +- `rows` - Number of rows in primary data +- `columns` - Column names +- `layer_count` - Number of layers + +### Reader & Writer + +**Reader trait** (data source abstraction): + +- `execute(sql)` - Run SQL, return DataFrame +- `register(name, df)` - Register DataFrame as table +- Implementation: `DuckDBReader` + +**Writer trait** (output format abstraction): + +- `write(spec, data)` - Generate output string +- Implementation: `VegaLiteWriter` (Vega-Lite v6 JSON) + +For detailed API documentation, see [`src/doc/API.md`](src/doc/API.md). + +--- + ## Component Breakdown ### 1. Parser Module (`src/parser/`) @@ -462,7 +535,6 @@ pub fn parse_connection_string(uri: &str) -> Result { The codebase includes connection string parsing and feature flags for additional readers, but they are not yet implemented: - **PostgreSQL Reader** (`postgres://...`) - - Feature flag: `postgres` - Connection string parsing exists in `connection.rs` - Reader implementation: Not yet available @@ -800,7 +872,9 @@ When running in Positron IDE, the extension provides enhanced functionality: - Works with any narwhals-compatible DataFrame (polars, pandas, etc.) - LazyFrames are collected automatically - Returns native `altair.Chart` objects for easy display and customization -- Query splitting to separate SQL from VISUALISE portions +- Two-stage API: `prepare()` → `render()` +- DuckDB reader with DataFrame registration +- Query introspection (SQL, layer queries, stat queries) **Installation**: @@ -817,26 +891,41 @@ maturin develop import ggsql import polars as pl -# Split a ggSQL query into SQL and VISUALISE portions -sql, viz = ggsql.split_query(""" - SELECT date, revenue FROM sales - VISUALISE date AS x, revenue AS y - DRAW line -""") - -# Execute SQL and render to Altair chart +# Create reader and register data +reader = ggsql.DuckDBReader("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") +reader.register("data", df) + +# Prepare visualization +prepared = ggsql.prepare( + "SELECT * FROM data VISUALISE x, y DRAW point", + reader +) + +# Inspect +print(f"Rows: {prepared.metadata()['rows']}") +print(f"SQL: {prepared.sql()}") -# Display or save -chart.display() # In Jupyter -chart.save("chart.html") +# Render to Vega-Lite JSON +writer = ggsql.VegaLiteWriter() +json_output = prepared.render(writer) ``` +**Classes**: + +| Class | Description | +| -------------------------- | ---------------------------- | +| `DuckDBReader(connection)` | Database reader | +| `VegaLiteWriter()` | Vega-Lite JSON output writer | +| `Validated` | Result of `validate()` | + **Functions**: -- `split_query(query: str) -> tuple[str, str]` - Split ggSQL query into SQL and VISUALISE portions -- `render_altair(df, viz, **kwargs) -> altair.Chart` - Render DataFrame with VISUALISE spec to Altair chart +| Function | Description | +| ------------------------ | ------------------------------------------------ | +| `validate(query)` | Syntax/semantic validation with query inspection | +| `prepare(query, reader)` | Full preparation pipeline | +| `render_altair(df, viz)` | Render DataFrame to Altair chart | **Dependencies**: @@ -920,22 +1009,23 @@ cargo build --all-features ``` Where `` can be: + - Empty: `VISUALISE` (layers must define all mappings) - Mappings: `VISUALISE x, y, date AS x` (mixed implicit/explicit) - Wildcard: `VISUALISE *` (map all columns) ### Clause Types -| Clause | Repeatable | Purpose | Example | -| -------------- | ---------- | ------------------ | ------------------------------------ | -| `VISUALISE` | ✅ Yes | Entry point | `VISUALISE date AS x, revenue AS y` | -| `DRAW` | ✅ Yes | Define layers | `DRAW line MAPPING date AS x, value AS y` | -| `SCALE` | ✅ Yes | Configure scales | `SCALE x SETTING type => 'date'` | -| `FACET` | ❌ No | Small multiples | `FACET WRAP region` | -| `COORD` | ❌ No | Coordinate system | `COORD cartesian SETTING xlim => [0,100]` | -| `LABEL` | ❌ No | Text labels | `LABEL title => 'My Chart', x => 'Date'` | -| `GUIDE` | ✅ Yes | Legend/axis config | `GUIDE color SETTING position => 'right'` | -| `THEME` | ❌ No | Visual styling | `THEME minimal` | +| Clause | Repeatable | Purpose | Example | +| ----------- | ---------- | ------------------ | ----------------------------------------- | +| `VISUALISE` | ✅ Yes | Entry point | `VISUALISE date AS x, revenue AS y` | +| `DRAW` | ✅ Yes | Define layers | `DRAW line MAPPING date AS x, value AS y` | +| `SCALE` | ✅ Yes | Configure scales | `SCALE x SETTING type => 'date'` | +| `FACET` | ❌ No | Small multiples | `FACET WRAP region` | +| `COORD` | ❌ No | Coordinate system | `COORD cartesian SETTING xlim => [0,100]` | +| `LABEL` | ❌ No | Text labels | `LABEL title => 'My Chart', x => 'Date'` | +| `GUIDE` | ✅ Yes | Legend/axis config | `GUIDE color SETTING position => 'right'` | +| `THEME` | ❌ No | Visual styling | `THEME minimal` | ### DRAW Clause (Layers) @@ -1201,7 +1291,6 @@ COORD cartesian SETTING xlim => [0, 100], ylim => [0, 200] LABEL x => 'Category', y => 'Count' ``` - ### LABEL Clause **Syntax**: diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs index 1c38e3ae..0f523ebb 100644 --- a/ggsql-jupyter/src/executor.rs +++ b/ggsql-jupyter/src/executor.rs @@ -5,10 +5,9 @@ use anyhow::Result; use ggsql::{ - execute::prepare_data, - parser, + prepare, validate, reader::{DuckDBReader, Reader}, - writer::{VegaLiteWriter, Writer}, + writer::VegaLiteWriter, }; use polars::frame::DataFrame; @@ -54,11 +53,11 @@ impl QueryExecutor { pub fn execute(&self, code: &str) -> Result { tracing::debug!("Executing query: {} chars", code.len()); - // 1. Split query to check if there's a visualization - let (_sql_part, viz_part) = parser::split_query(code)?; + // 1. Validate to check if there's a visualization + let validated = validate(code)?; // 2. Check if there's a visualization - if viz_part.is_empty() { + if !validated.has_visual() { // Pure SQL query - execute directly and return DataFrame let df = self.reader.execute(code)?; tracing::info!( @@ -69,17 +68,21 @@ impl QueryExecutor { return Ok(ExecutionResult::DataFrame(df)); } - // 3. Prepare data using shared execution logic (handles layer sources) - let prepared = prepare_data(code, &self.reader)?; + // 3. Prepare data using the new API + let prepared = prepare(code, &self.reader)?; - tracing::info!("Data sources prepared: {} sources", prepared.data.len()); + tracing::info!( + "Data prepared: {} rows, {} layers", + prepared.metadata().rows, + prepared.metadata().layer_count + ); - // 4. Generate Vega-Lite spec (use first spec if multiple) - let vega_json = self.writer.write(&prepared.specs[0], &prepared.data)?; + // 4. Render to Vega-Lite + let vega_json = prepared.render(&self.writer)?; tracing::debug!("Generated Vega-Lite spec: {} chars", vega_json.len()); - // 6. Return result + // 5. Return result Ok(ExecutionResult::Visualization { spec: vega_json }) } } diff --git a/src/api.rs b/src/api.rs new file mode 100644 index 00000000..ecfbdeaf --- /dev/null +++ b/src/api.rs @@ -0,0 +1,606 @@ +//! High-level ggsql API. +//! +//! Two-stage API: `prepare()` → `render()`. + +use crate::naming; +use crate::parser; +use crate::plot::Plot; +use crate::{DataFrame, Result}; +use std::collections::HashMap; + +#[cfg(feature = "duckdb")] +use crate::execute::prepare_data_with_executor; +#[cfg(feature = "duckdb")] +use crate::reader::Reader; + +#[cfg(feature = "vegalite")] +use crate::writer::Writer; + +// ============================================================================ +// Core Types +// ============================================================================ + +/// Result of `prepare()`, ready for rendering. +pub struct Prepared { + /// Single resolved plot specification + plot: Plot, + /// Internal data map (global + layer-specific DataFrames) + data: HashMap, + /// Cached metadata about the prepared visualization + metadata: Metadata, + /// The main SQL query that was executed + sql: String, + /// The raw VISUALISE portion text + visual: String, + /// Per-layer filter/source queries (None = uses global data directly) + layer_sql: Vec>, + /// Per-layer stat transform queries (None = no stat transform) + stat_sql: Vec>, + /// Validation warnings from preparation + warnings: Vec, +} + +impl Prepared { + /// Create a new Prepared from PreparedData + pub(crate) fn new( + plot: Plot, + data: HashMap, + sql: String, + visual: String, + layer_sql: Vec>, + stat_sql: Vec>, + warnings: Vec, + ) -> Self { + // Compute metadata from data + let (rows, columns) = if let Some(df) = data.get(naming::GLOBAL_DATA_KEY) { + let cols: Vec = df + .get_column_names() + .iter() + .map(|s| s.to_string()) + .collect(); + (df.height(), cols) + } else if let Some(df) = data.values().next() { + let cols: Vec = df + .get_column_names() + .iter() + .map(|s| s.to_string()) + .collect(); + (df.height(), cols) + } else { + (0, Vec::new()) + }; + + let layer_count = plot.layers.len(); + let metadata = Metadata { + rows, + columns, + layer_count, + }; + + Self { + plot, + data, + metadata, + sql, + visual, + layer_sql, + stat_sql, + warnings, + } + } + + /// Render to output format (e.g., Vega-Lite JSON). + #[cfg(feature = "vegalite")] + pub fn render(&self, writer: &dyn Writer) -> Result { + writer.write(&self.plot, &self.data) + } + + /// Get the resolved plot specification. + pub fn plot(&self) -> &Plot { + &self.plot + } + + /// Get visualization metadata. + pub fn metadata(&self) -> &Metadata { + &self.metadata + } + + /// Number of layers. + pub fn layer_count(&self) -> usize { + self.plot.layers.len() + } + + /// Get global data (main query result). + pub fn data(&self) -> Option<&DataFrame> { + self.data.get(naming::GLOBAL_DATA_KEY) + } + + /// Get layer-specific data (from FILTER or FROM clause). + pub fn layer_data(&self, layer_index: usize) -> Option<&DataFrame> { + self.data.get(&naming::layer_key(layer_index)) + } + + /// Get stat transform data (e.g., histogram bins, density estimates). + pub fn stat_data(&self, layer_index: usize) -> Option<&DataFrame> { + self.layer_data(layer_index) + } + + /// Get internal data map (all DataFrames by key). + pub fn data_map(&self) -> &HashMap { + &self.data + } + + /// The main SQL query that was executed. + pub fn sql(&self) -> &str { + &self.sql + } + + /// The VISUALISE portion (raw text). + pub fn visual(&self) -> &str { + &self.visual + } + + /// Layer filter/source query, or `None` if using global data. + pub fn layer_sql(&self, layer_index: usize) -> Option<&str> { + self.layer_sql.get(layer_index).and_then(|s| s.as_deref()) + } + + /// Stat transform query, or `None` if no stat transform. + pub fn stat_sql(&self, layer_index: usize) -> Option<&str> { + self.stat_sql.get(layer_index).and_then(|s| s.as_deref()) + } + + /// Validation warnings from preparation. + pub fn warnings(&self) -> &[ValidationWarning] { + &self.warnings + } +} + +/// Metadata about the prepared visualization. +#[derive(Debug, Clone)] +pub struct Metadata { + pub rows: usize, + pub columns: Vec, + pub layer_count: usize, +} + +/// Result of `validate()` - query inspection and validation without SQL execution. +pub struct Validated { + sql: String, + visual: String, + has_visual: bool, + tree: Option, + valid: bool, + errors: Vec, + warnings: Vec, +} + +impl Validated { + /// Whether the query contains a VISUALISE clause. + pub fn has_visual(&self) -> bool { + self.has_visual + } + + /// The SQL portion (before VISUALISE). + pub fn sql(&self) -> &str { + &self.sql + } + + /// The VISUALISE portion (raw text). + pub fn visual(&self) -> &str { + &self.visual + } + + /// CST for advanced inspection. + pub fn tree(&self) -> Option<&tree_sitter::Tree> { + self.tree.as_ref() + } + + /// Whether the query is valid (no errors). + pub fn valid(&self) -> bool { + self.valid + } + + /// Validation errors. + pub fn errors(&self) -> &[ValidationError] { + &self.errors + } + + /// Validation warnings. + pub fn warnings(&self) -> &[ValidationWarning] { + &self.warnings + } +} + +/// A validation error (fatal). +#[derive(Debug, Clone)] +pub struct ValidationError { + pub message: String, + pub location: Option, +} + +/// A validation warning (non-fatal). +#[derive(Debug, Clone)] +pub struct ValidationWarning { + pub message: String, + pub location: Option, +} + +/// Location within a query string (0-based). +#[derive(Debug, Clone)] +pub struct Location { + pub line: usize, + pub column: usize, +} + +// ============================================================================ +// High-Level API Functions +// ============================================================================ + +/// Prepare a query for visualization. Main entry point for the two-stage API. +#[cfg(feature = "duckdb")] +pub fn prepare(query: &str, reader: &dyn Reader) -> Result { + // Run validation first to capture warnings + let validated = validate(query)?; + let warnings: Vec = validated.warnings().to_vec(); + + // Prepare data (this also validates, but we want the warnings from above) + let prepared_data = prepare_data_with_executor(query, |sql| reader.execute(sql))?; + + Ok(Prepared::new( + prepared_data.spec, + prepared_data.data, + prepared_data.sql, + prepared_data.visual, + prepared_data.layer_sql, + prepared_data.stat_sql, + warnings, + )) +} + +/// Validate query syntax and semantics without executing SQL. +pub fn validate(query: &str) -> Result { + let mut errors = Vec::new(); + let warnings = Vec::new(); + + // Split to determine if there's a viz portion + let (sql_part, viz_part) = match parser::split_query(query) { + Ok((sql, viz)) => (sql, viz), + Err(e) => { + // Split error - return as validation error + errors.push(ValidationError { + message: e.to_string(), + location: None, + }); + return Ok(Validated { + sql: String::new(), + visual: String::new(), + has_visual: false, + tree: None, + valid: false, + errors, + warnings, + }); + } + }; + + let has_visual = !viz_part.trim().is_empty(); + + // Parse the full query to get the CST + let tree = if has_visual { + let mut ts_parser = tree_sitter::Parser::new(); + ts_parser + .set_language(&tree_sitter_ggsql::language()) + .map_err(|e| { + crate::GgsqlError::InternalError(format!("Failed to set language: {}", e)) + })?; + ts_parser.parse(query, None) + } else { + None + }; + + // If no visualization, just syntax check passed + if !has_visual { + return Ok(Validated { + sql: sql_part, + visual: viz_part, + has_visual, + tree, + valid: true, + errors, + warnings, + }); + } + + // Parse to get plot specifications for validation + let plots = match parser::parse_query(query) { + Ok(p) => p, + Err(e) => { + errors.push(ValidationError { + message: e.to_string(), + location: None, + }); + return Ok(Validated { + sql: sql_part, + visual: viz_part, + has_visual, + tree, + valid: false, + errors, + warnings, + }); + } + }; + + // Validate the single plot (we only support one VISUALISE statement) + if let Some(plot) = plots.first() { + // Validate each layer + for (layer_idx, layer) in plot.layers.iter().enumerate() { + let context = format!("Layer {}", layer_idx + 1); + + // Check required aesthetics + // Note: Without schema data, we can only check if mappings exist, + // not if the columns are valid. We skip this check for wildcards. + if !layer.mappings.wildcard { + if let Err(e) = layer.validate_required_aesthetics() { + errors.push(ValidationError { + message: format!("{}: {}", context, e), + location: None, + }); + } + } + + // Validate SETTING parameters + if let Err(e) = layer.validate_settings() { + errors.push(ValidationError { + message: format!("{}: {}", context, e), + location: None, + }); + } + } + } + + Ok(Validated { + sql: sql_part, + visual: viz_part, + has_visual, + tree, + valid: errors.is_empty(), + errors, + warnings, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_validate_with_visual() { + let validated = + validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y").unwrap(); + assert!(validated.has_visual()); + assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y"); + assert!(validated.visual().starts_with("VISUALISE")); + assert!(validated.tree().is_some()); + assert!(validated.valid()); + } + + #[test] + fn test_validate_without_visual() { + let validated = validate("SELECT 1 as x, 2 as y").unwrap(); + assert!(!validated.has_visual()); + assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y"); + assert!(validated.visual().is_empty()); + assert!(validated.tree().is_none()); + assert!(validated.valid()); + } + + #[test] + fn test_validate_valid_query() { + let validated = + validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y").unwrap(); + assert!( + validated.valid(), + "Expected valid query: {:?}", + validated.errors() + ); + assert!(validated.errors().is_empty()); + } + + #[test] + fn test_validate_missing_required_aesthetic() { + // Point requires x and y, but we only provide x + let validated = + validate("SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x").unwrap(); + assert!(!validated.valid()); + assert!(!validated.errors().is_empty()); + assert!(validated.errors()[0].message.contains("y")); + } + + #[test] + fn test_validate_syntax_error() { + let validated = validate("SELECT 1 VISUALISE DRAW invalidgeom").unwrap(); + assert!(!validated.valid()); + assert!(!validated.errors().is_empty()); + } + + #[cfg(all(feature = "duckdb", feature = "vegalite"))] + #[test] + fn test_prepare_and_render() { + use crate::reader::DuckDBReader; + use crate::writer::VegaLiteWriter; + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let prepared = prepare("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point", &reader).unwrap(); + + assert_eq!(prepared.plot().layers.len(), 1); + assert_eq!(prepared.metadata().layer_count, 1); + assert!(prepared.data().is_some()); + + let writer = VegaLiteWriter::new(); + let result = prepared.render(&writer).unwrap(); + assert!(result.contains("point")); + } + + #[cfg(all(feature = "duckdb", feature = "vegalite"))] + #[test] + fn test_prepare_metadata() { + use crate::reader::DuckDBReader; + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let prepared = prepare( + "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) VISUALISE x, y DRAW point", + &reader, + ) + .unwrap(); + + let metadata = prepared.metadata(); + assert_eq!(metadata.rows, 3); + assert_eq!(metadata.columns.len(), 2); + assert!(metadata.columns.contains(&"x".to_string())); + assert!(metadata.columns.contains(&"y".to_string())); + assert_eq!(metadata.layer_count, 1); + } + + #[cfg(all(feature = "duckdb", feature = "vegalite"))] + #[test] + fn test_prepare_with_cte() { + use crate::reader::DuckDBReader; + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + WITH data AS ( + SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) + ) + SELECT * FROM data + VISUALISE x, y DRAW point + "#; + + let prepared = prepare(query, &reader).unwrap(); + + assert_eq!(prepared.plot().layers.len(), 1); + assert!(prepared.data().is_some()); + let df = prepared.data().unwrap(); + assert_eq!(df.height(), 2); + } + + #[cfg(all(feature = "duckdb", feature = "vegalite"))] + #[test] + fn test_render_multi_layer() { + use crate::reader::DuckDBReader; + use crate::writer::VegaLiteWriter; + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) + VISUALISE + DRAW point MAPPING x AS x, y AS y + DRAW line MAPPING x AS x, y AS y + "#; + + let prepared = prepare(query, &reader).unwrap(); + let writer = VegaLiteWriter::new(); + let result = prepared.render(&writer).unwrap(); + + assert!(result.contains("layer")); + } + + #[cfg(all(feature = "duckdb", feature = "vegalite"))] + #[test] + fn test_register_and_query() { + use crate::reader::{DuckDBReader, Reader}; + use crate::writer::VegaLiteWriter; + use polars::prelude::*; + + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let df = df! { + "x" => [1i32, 2, 3], + "y" => [10i32, 20, 30], + } + .unwrap(); + + reader.register("my_data", df).unwrap(); + + let query = "SELECT * FROM my_data VISUALISE x, y DRAW point"; + let prepared = prepare(query, &reader).unwrap(); + + assert_eq!(prepared.metadata().rows, 3); + assert!(prepared.metadata().columns.contains(&"x".to_string())); + + let writer = VegaLiteWriter::new(); + let result = prepared.render(&writer).unwrap(); + assert!(result.contains("point")); + } + + #[cfg(all(feature = "duckdb", feature = "vegalite"))] + #[test] + fn test_register_and_join() { + use crate::reader::{DuckDBReader, Reader}; + use polars::prelude::*; + + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let sales = df! { + "id" => [1i32, 2, 3], + "amount" => [100i32, 200, 300], + "product_id" => [1i32, 1, 2], + } + .unwrap(); + + let products = df! { + "id" => [1i32, 2], + "name" => ["Widget", "Gadget"], + } + .unwrap(); + + reader.register("sales", sales).unwrap(); + reader.register("products", products).unwrap(); + + let query = r#" + SELECT s.id, s.amount, p.name + FROM sales s + JOIN products p ON s.product_id = p.id + VISUALISE id AS x, amount AS y + DRAW bar + "#; + + let prepared = prepare(query, &reader).unwrap(); + assert_eq!(prepared.metadata().rows, 3); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_prepare_no_viz_fails() { + use crate::reader::DuckDBReader; + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = "SELECT 1 as x, 2 as y"; + + let result = prepare(query, &reader); + assert!(result.is_err()); + } + + #[test] + fn test_validate_sql_and_visual_content() { + let query = "SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y DRAW line MAPPING x AS x, y AS y"; + let validated = validate(query).unwrap(); + + assert!(validated.has_visual()); + assert_eq!(validated.sql(), "SELECT 1 as x, 2 as y"); + assert!(validated.visual().contains("DRAW point")); + assert!(validated.visual().contains("DRAW line")); + assert!(validated.valid()); + } + + #[test] + fn test_validate_sql_only() { + let query = "SELECT 1 as x, 2 as y"; + let validated = validate(query).unwrap(); + + // SQL-only queries should be valid (just syntax check) + assert!(validated.valid()); + assert!(validated.errors().is_empty()); + } +} diff --git a/src/cli.rs b/src/cli.rs index 80dec31e..1844ff01 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -9,13 +9,13 @@ use ggsql::parser::extract_sql; use ggsql::{parser, VERSION}; use std::path::PathBuf; -#[cfg(feature = "duckdb")] -use ggsql::execute::prepare_data; #[cfg(feature = "duckdb")] use ggsql::reader::{DuckDBReader, Reader}; +#[cfg(feature = "duckdb")] +use ggsql::{prepare, validate}; #[cfg(feature = "vegalite")] -use ggsql::writer::{VegaLiteWriter, Writer}; +use ggsql::writer::VegaLiteWriter; #[derive(Parser)] #[command(name = "ggsql")] @@ -169,15 +169,16 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option v, + Err(e) => { + eprintln!("Failed to validate query: {}", e); + std::process::exit(1); + } + }; - if viz_part.is_empty() { + if !validated.has_visual() { if verbose { eprintln!("Visualisation is empty. Printing table instead."); } @@ -185,28 +186,27 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option p, + Err(e) => { + eprintln!("Failed to prepare data: {}", e); + std::process::exit(1); + } + }; if verbose { - eprintln!("\nData sources loaded:"); - for (key, df) in &prepared.data { - eprintln!(" {}: {:?}", key, df.shape()); - } - eprintln!("\nParsed {} visualisation spec(s)", prepared.specs.len()); + let metadata = prepared.metadata(); + eprintln!("\nData prepared:"); + eprintln!(" Rows: {}", metadata.rows); + eprintln!(" Columns: {}", metadata.columns.join(", ")); + eprintln!(" Layers: {}", metadata.layer_count); } - let first_spec = prepared.specs.first(); - if first_spec.is_none() { + if prepared.plot().layers.is_empty() { eprintln!("No visualization specifications found"); std::process::exit(1); } - let first_spec = first_spec.unwrap(); // Check writer if writer != "vegalite" { @@ -220,14 +220,15 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option r, + Err(e) => { + eprintln!("Failed to generate Vega-Lite output: {}", e); + std::process::exit(1); + } + }; if output.is_none() { // Empty output location, write to stdout @@ -237,7 +238,7 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option { if verbose { eprintln!("\nVega-Lite JSON written to: {}", output.display()); @@ -291,13 +292,38 @@ fn cmd_parse(query: String, format: String) { } } -fn cmd_validate(query: String, reader: Option) { - println!("Validating query: {}", query); - if let Some(reader) = reader { - println!("Reader: {}", reader); +fn cmd_validate(query: String, _reader: Option) { + #[cfg(feature = "duckdb")] + { + match validate(&query) { + Ok(validated) if validated.valid() => { + println!("✓ Query syntax is valid"); + } + Ok(validated) => { + println!("✗ Validation errors:"); + for err in validated.errors() { + println!(" - {}", err.message); + } + if !validated.warnings().is_empty() { + println!("\nWarnings:"); + for warning in validated.warnings() { + println!(" - {}", warning.message); + } + } + std::process::exit(1); + } + Err(e) => { + eprintln!("Error during validation: {}", e); + std::process::exit(1); + } + } + } + + #[cfg(not(feature = "duckdb"))] + { + eprintln!("Validation requires the duckdb feature"); + std::process::exit(1); } - // TODO: Implement validation logic - println!("Validation not yet implemented"); } // Prints a CSV-like output to stdout with aligned columns diff --git a/src/doc/API.md b/src/doc/API.md new file mode 100644 index 00000000..5ccd70e6 --- /dev/null +++ b/src/doc/API.md @@ -0,0 +1,520 @@ +# ggsql API Reference + +This document provides a comprehensive reference for the ggsql public API. + +## Overview + +- **Stage 1: `prepare()`** - Parse query, execute SQL, resolve mappings, prepare data +- **Stage 2: `render()`** - Generate output (Vega-Lite JSON, etc.) + +### API Functions + +| Function | Use Case | +| ------------ | ---------------------------------------------------- | +| `prepare()` | Main entry point - full visualization pipeline | +| `render()` | Generate output from prepared data | +| `validate()` | Validate syntax + semantics, inspect query structure | + +--- + +## Core Functions + +### `prepare` + +```rust +pub fn prepare(query: &str, reader: &dyn Reader) -> Result +``` + +Prepare a ggsql query for visualization. This is the main entry point for the two-stage API. + +**What happens during preparation:** + +1. Parses the query (SQL + VISUALISE portions) +2. Executes the main SQL query using the provided reader +3. Resolves wildcards (`VISUALISE *`) against actual columns +4. Merges global mappings into each layer +5. Executes layer-specific queries (filters, stats) +6. Injects constant values as synthetic columns +7. Computes aesthetic labels from column names + +**Arguments:** + +- `query` - The full ggsql query string +- `reader` - A reader implementing the `Reader` trait + +**Returns:** + +- `Ok(Prepared)` - Ready for rendering +- `Err(GgsqlError)` - Parse, validation, or execution error + +**Example:** + +```rust +use ggsql::{prepare, reader::DuckDBReader, writer::VegaLiteWriter}; + +let reader = DuckDBReader::from_connection_string("duckdb://memory")?; +let prepared = prepare( + "SELECT x, y FROM data VISUALISE x, y DRAW point", + &reader +)?; + +// Access metadata +println!("Rows: {}", prepared.metadata().rows); +println!("Columns: {:?}", prepared.metadata().columns); + +// Render to Vega-Lite +let writer = VegaLiteWriter::new(); +let result = prepared.render(&writer)?; +``` + +**Error Conditions:** + +- Parse error in SQL or VISUALISE portion +- SQL execution failure +- Missing required aesthetics +- Invalid geom type +- Multiple VISUALISE statements (not yet supported) + +--- + +### `validate` + +```rust +pub fn validate(query: &str) -> Result +``` + +Validate query syntax and semantics without executing SQL. This function combines query parsing and validation into a single operation. + +**What is validated:** + +- Syntax (parsing) +- Required aesthetics for each geom type +- Valid scale types (linear, log10, date, etc.) +- Valid coord types and properties +- Valid geom types +- Valid aesthetic names +- Valid SETTING parameters + +**Arguments:** + +- `query` - The full ggsql query string (SQL + VISUALISE) + +**Returns:** + +- `Ok(Validated)` - Validation results with query inspection methods +- `Err(GgsqlError)` - Internal error + +**Example:** + +```rust +use ggsql::validate; + +let validated = validate("SELECT x, y FROM data VISUALISE x, y DRAW point")?; + +// Check validity +if !validated.valid() { + for error in validated.errors() { + eprintln!("Error: {}", error.message); + } +} + +// Inspect query structure +if validated.has_visual() { + println!("SQL: {}", validated.sql()); + println!("Visual: {}", validated.visual()); +} +``` + +**Notes:** + +- Does not execute SQL +- Does not resolve wildcards or global mappings +- Cannot validate column existence (requires data) +- Returns all errors, not just the first one +- CST available via `tree()` for advanced inspection + +--- + +## Type Reference + +### `Validated` + +Result of validating a query (syntax + semantics, no SQL execution). + +```rust +pub struct Validated { + // All fields private +} +``` + +**Methods:** + +| Method | Signature | Description | +| ------------ | -------------------------------------------- | ---------------------------------- | +| `has_visual` | `fn has_visual(&self) -> bool` | Whether query contains VISUALISE | +| `sql` | `fn sql(&self) -> &str` | The SQL portion (before VISUALISE) | +| `visual` | `fn visual(&self) -> &str` | The VISUALISE portion (raw text) | +| `tree` | `fn tree(&self) -> Option<&Tree>` | CST for advanced inspection | +| `valid` | `fn valid(&self) -> bool` | Whether query is valid | +| `errors` | `fn errors(&self) -> &[ValidationError]` | Validation errors | +| `warnings` | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings | + +**Example:** + +```rust +let validated = ggsql::validate("SELECT 1 as x VISUALISE DRAW point MAPPING x AS x, y AS y")?; + +// Check validity +if !validated.valid() { + for error in validated.errors() { + eprintln!("Error: {}", error.message); + } +} + +// Inspect query structure +assert!(validated.has_visual()); +assert_eq!(validated.sql(), "SELECT 1 as x"); +assert!(validated.visual().starts_with("VISUALISE")); + +// CST access for advanced use cases +if let Some(tree) = validated.tree() { + println!("Root node: {}", tree.root_node().kind()); +} +``` + +--- + +### `Prepared` + +Result of preparing a visualization, ready for rendering. + +#### Rendering Methods + +| Method | Signature | Description | +| -------- | --------------------------------------------------------- | ----------------------- | +| `render` | `fn render(&self, writer: &dyn Writer) -> Result` | Render to output format | + +**Example:** + +```rust +let writer = VegaLiteWriter::new(); +let json = prepared.render(&writer)?; +println!("{}", json); +``` + +#### Plot Access Methods + +| Method | Signature | Description | +| ------------- | -------------------------------- | ------------------------------- | +| `plot` | `fn plot(&self) -> &Plot` | Get resolved plot specification | +| `layer_count` | `fn layer_count(&self) -> usize` | Number of layers | + +**Example:** + +```rust +println!("Layers: {}", prepared.layer_count()); + +let plot = prepared.plot(); +for (i, layer) in plot.layers.iter().enumerate() { + println!("Layer {}: {:?}", i, layer.geom); +} +``` + +#### Metadata Methods + +| Method | Signature | Description | +| ---------- | --------------------------------- | -------------------------- | +| `metadata` | `fn metadata(&self) -> &Metadata` | Get visualization metadata | + +**Example:** + +```rust +let meta = prepared.metadata(); +println!("Rows: {}", meta.rows); +println!("Columns: {:?}", meta.columns); +println!("Layer count: {}", meta.layer_count); +``` + +#### Data Access Methods + +| Method | Signature | Description | +| ------------ | ------------------------------------------------------ | ------------------------------- | +| `data` | `fn data(&self) -> Option<&DataFrame>` | Global data (main query result) | +| `layer_data` | `fn layer_data(&self, i: usize) -> Option<&DataFrame>` | Layer-specific data | +| `stat_data` | `fn stat_data(&self, i: usize) -> Option<&DataFrame>` | Stat transform results | +| `data_map` | `fn data_map(&self) -> &HashMap` | Raw data map access | + +**Example:** + +```rust +// Global data +if let Some(df) = prepared.data() { + println!("Global data: {} rows", df.height()); +} + +// Layer-specific data (from FILTER or FROM clause) +if let Some(df) = prepared.layer_data(0) { + println!("Layer 0 has filtered data: {} rows", df.height()); +} + +// Stat data (histogram bins, density estimates, etc.) +if let Some(df) = prepared.stat_data(1) { + println!("Layer 1 stat data: {} rows", df.height()); +} +``` + +#### Query Introspection Methods + +| Method | Signature | Description | +| ----------- | ----------------------------------------------- | -------------------------------- | +| `sql` | `fn sql(&self) -> &str` | Main SQL query that was executed | +| `visual` | `fn visual(&self) -> &str` | Raw VISUALISE text | +| `layer_sql` | `fn layer_sql(&self, i: usize) -> Option<&str>` | Layer filter/source query | +| `stat_sql` | `fn stat_sql(&self, i: usize) -> Option<&str>` | Stat transform query | + +**Example:** + +```rust +// Main query +println!("SQL: {}", prepared.sql()); +println!("Visual: {}", prepared.visual()); + +// Per-layer queries +for i in 0..prepared.layer_count() { + if let Some(sql) = prepared.layer_sql(i) { + println!("Layer {} filter: {}", i, sql); + } + if let Some(sql) = prepared.stat_sql(i) { + println!("Layer {} stat: {}", i, sql); + } +} +``` + +#### Warnings Method + +| Method | Signature | Description | +| ---------- | -------------------------------------------- | ------------------------------------ | +| `warnings` | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings from preparation | + +**Example:** + +```rust +let prepared = ggsql::prepare(query, &reader)?; + +// Check for warnings +if !prepared.warnings().is_empty() { + for warning in prepared.warnings() { + eprintln!("Warning: {}", warning.message); + } +} + +// Continue with rendering +let json = prepared.render(&writer)?; +``` + +--- + +### `Metadata` + +Information about the prepared visualization. + +```rust +pub struct Metadata { + pub rows: usize, // Rows in primary data source + pub columns: Vec, // Column names + pub layer_count: usize, // Number of layers in the plot +} +``` + +--- + +### `ValidationError` + +A validation error (fatal issue). + +```rust +pub struct ValidationError { + pub message: String, + pub location: Option, +} +``` + +--- + +### `ValidationWarning` + +A validation warning (non-fatal issue). + +```rust +pub struct ValidationWarning { + pub message: String, + pub location: Option, +} +``` + +--- + +### `Location` + +Location within a query string. + +```rust +pub struct Location { + pub line: usize, // 0-based line number + pub column: usize, // 0-based column number +} +``` + +--- + +## Reader Trait & Implementations + +### `Reader` Trait + +```rust +pub trait Reader { + /// Execute a SQL query and return a DataFrame + fn execute(&self, sql: &str) -> Result; + + /// Register a DataFrame as a queryable table + fn register(&mut self, name: &str, df: DataFrame) -> Result<()>; + + /// Check if this reader supports DataFrame registration + fn supports_register(&self) -> bool; +} +``` + +--- + +## Writer Trait & Implementations + +### `Writer` Trait + +```rust +pub trait Writer { + /// Render a plot specification to output format + fn write(&self, spec: &Plot, data: &HashMap) -> Result; + + /// Get the file extension for this writer's output + fn file_extension(&self) -> &str; +} +``` + +## Python Bindings + +The Python bindings provide the same two-stage API with Pythonic conventions. + +### Classes + +#### `DuckDBReader` + +```python +class DuckDBReader: + def __init__(self, connection: str) -> None: + """Create a DuckDB reader. + + Args: + connection: Connection string (e.g., "duckdb://memory") + """ + + def register(self, name: str, df: Any) -> None: + """Register a DataFrame as a queryable table. + + Args: + name: Table name + df: Polars DataFrame or narwhals-compatible DataFrame + """ + + def execute(self, sql: str) -> polars.DataFrame: + """Execute SQL and return a Polars DataFrame.""" + + def supports_register(self) -> bool: + """Check if registration is supported.""" +``` + +#### `VegaLiteWriter` + +```python +class VegaLiteWriter: + def __init__(self) -> None: + """Create a Vega-Lite writer.""" +``` + +#### `Validated` + +```python +class Validated: + def has_visual(self) -> bool: + """Check if query has VISUALISE clause.""" + + def sql(self) -> str: + """Get the SQL portion.""" + + def visual(self) -> str: + """Get the VISUALISE portion.""" + + def valid(self) -> bool: + """Check if query is valid.""" + + def errors(self) -> list[dict]: + """Get validation errors as list of dicts with 'message', 'location'.""" + + def warnings(self) -> list[dict]: + """Get validation warnings as list of dicts with 'message', 'location'.""" + + # Note: tree() not exposed (tree-sitter nodes are Rust-only) +``` + +#### `Prepared` + +```python +class Prepared: + def render(self, writer: VegaLiteWriter) -> str: + """Render to output format.""" + + def metadata(self) -> dict: + """Get metadata as dict with keys: rows, columns, layer_count.""" + + def sql(self) -> str: + """Get the main SQL query.""" + + def visual(self) -> str: + """Get the VISUALISE text.""" + + def layer_count(self) -> int: + """Get number of layers.""" + + def warnings(self) -> list[dict]: + """Get validation warnings as list of dicts with 'message', 'location'.""" + + def data(self) -> polars.DataFrame | None: + """Get global data.""" + + def layer_data(self, index: int) -> polars.DataFrame | None: + """Get layer-specific data.""" + + def stat_data(self, index: int) -> polars.DataFrame | None: + """Get stat transform data.""" + + def layer_sql(self, index: int) -> str | None: + """Get layer filter query.""" + + def stat_sql(self, index: int) -> str | None: + """Get stat transform query.""" +``` + +### Functions + +```python +def validate(query: str) -> Validated: + """Validate query syntax and semantics. + + Returns Validated object with query inspection and validation methods. + """ + +def prepare(query: str, reader: DuckDBReader) -> Prepared: + """Prepare a query for visualization.""" + +def split_query(query: str) -> tuple[str, str]: + """Split query into (sql, visualise) portions.""" +``` diff --git a/src/execute.rs b/src/execute.rs index 33116ceb..e79bb196 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -531,6 +531,23 @@ fn transform_global_sql(sql: &str, materialized_ctes: &HashSet) -> Optio } } +/// Result of building a layer query +/// +/// Contains information about the queries executed for a layer, +/// distinguishing between base filter queries and stat transform queries. +#[derive(Debug, Default)] +pub struct LayerQueryResult { + /// The final query to execute (if any) + /// None means layer uses global data directly + pub query: Option, + /// The base query before stat transform (filter/source only) + /// None if layer uses global data directly without filter + pub layer_sql: Option, + /// The stat transform query (if a stat transform was applied) + /// None if no stat transform was needed + pub stat_sql: Option, +} + /// Build a layer query handling all source types /// /// Handles: @@ -544,8 +561,7 @@ fn transform_global_sql(sql: &str, materialized_ctes: &HashSet) -> Optio /// (e.g., histogram binning, bar counting). /// /// Returns: -/// - `Ok(Some(query))` - execute this query and store result -/// - `Ok(None)` - layer uses `__global__` directly (no source, no filter, no constants, no stat transform) +/// - `Ok(LayerQueryResult)` with information about queries executed /// - `Err(...)` - validation error (e.g., filter without global data) /// /// Note: This function takes `&mut Layer` because stat transforms may add new aesthetic mappings @@ -559,7 +575,7 @@ fn build_layer_query( facet: Option<&Facet>, constants: &[(String, LiteralValue)], execute_query: &F, -) -> Result> +) -> Result where F: Fn(&str) -> Result, { @@ -603,7 +619,7 @@ where naming::global_table() } else { // No source, no filter, no constants, no stat transform - use __global__ data directly - return Ok(None); + return Ok(LayerQueryResult::default()); } } }; @@ -635,6 +651,9 @@ where query = format!("{} WHERE {}", query, f); } + // Save the base query (with filter) before stat transform + let base_query = query.clone(); + // Apply statistical transformation (after filter, uses combined group_by) // Returns StatResult::Identity for no transformation, StatResult::Transformed for transformed query let stat_result = layer.geom.apply_stat_transform( @@ -692,11 +711,15 @@ where } // Use the transformed query - let mut final_query = transformed_query; + let mut final_query = transformed_query.clone(); if let Some(o) = order_by { final_query = format!("{} ORDER BY {}", final_query, o); } - Ok(Some(final_query)) + Ok(LayerQueryResult { + query: Some(final_query), + layer_sql: Some(base_query), + stat_sql: Some(transformed_query), + }) } StatResult::Identity => { // Identity - no stat transformation @@ -707,14 +730,18 @@ where && order_by.is_none() && constants.is_empty() { - Ok(None) + Ok(LayerQueryResult::default()) } else { // Layer has filter, order_by, or constants - still need the query let mut final_query = query; if let Some(o) = order_by { final_query = format!("{} ORDER BY {}", final_query, o); } - Ok(Some(final_query)) + Ok(LayerQueryResult { + query: Some(final_query.clone()), + layer_sql: Some(final_query), + stat_sql: None, + }) } } } @@ -860,8 +887,16 @@ fn split_color_aesthetic(layers: &mut Vec) { pub struct PreparedData { /// Data map with global and layer-specific DataFrames pub data: HashMap, - /// Parsed and resolved visualization specifications - pub specs: Vec, + /// Parsed and resolved visualization specification + pub spec: Plot, + /// The main SQL query that was executed + pub sql: String, + /// The raw VISUALISE portion text + pub visual: String, + /// Per-layer filter/source queries (None = uses global data directly) + pub layer_sql: Vec>, + /// Per-layer stat transform queries (None = no stat transform) + pub stat_sql: Vec>, } /// Build data map from a query using a custom query executor function @@ -888,6 +923,13 @@ where )); } + // TODO: Support multiple VISUALISE statements in future + if specs.len() > 1 { + return Err(GgsqlError::ValidationError( + "Multiple VISUALISE statements are not yet supported. Please use a single VISUALISE statement.".to_string(), + )); + } + // Check if we have any visualization content if viz_part.trim().is_empty() { return Err(GgsqlError::ValidationError( @@ -1054,6 +1096,10 @@ where // - Layer with no source, no filter, no order_by → returns None (use global directly, constants already injected) let facet = specs[0].facet.clone(); + // Track layer and stat queries for introspection + let mut layer_sql_vec: Vec> = Vec::new(); + let mut stat_sql_vec: Vec> = Vec::new(); + for (idx, layer) in specs[0].layers.iter_mut().enumerate() { // For layers using global data without filter, constants are already in global data // (injected with layer-indexed names). For other layers, extract constants for injection. @@ -1064,7 +1110,7 @@ where }; // Get mutable reference to layer for stat transform to update aesthetics - if let Some(layer_query) = build_layer_query( + let query_result = build_layer_query( layer, &layer_schemas[idx], &materialized_ctes, @@ -1073,7 +1119,14 @@ where facet.as_ref(), &constants, &execute_query, - )? { + )?; + + // Store query information for introspection + layer_sql_vec.push(query_result.layer_sql); + stat_sql_vec.push(query_result.stat_sql); + + // Execute the query if one was generated + if let Some(layer_query) = query_result.query { let df = execute_query(&layer_query).map_err(|e| { GgsqlError::ReaderError(format!( "Failed to fetch data for layer {}: {}", @@ -1105,20 +1158,24 @@ where )); } - // Post-process specs: replace literals with column references and compute labels - for spec in &mut specs { - // Replace literal aesthetic values with column references to synthetic constant columns - replace_literals_with_columns(spec); - // Compute aesthetic labels (uses first non-constant column, respects user-specified labels) - spec.compute_aesthetic_labels(); - // Divide 'color' over 'stroke' and 'fill'. This needs to happens after - // literals have associated columns. - split_color_aesthetic(&mut spec.layers); - } + let mut spec = specs.into_iter().next().unwrap(); + + // Post-process spec: replace literals with column references and compute labels + // Replace literal aesthetic values with column references to synthetic constant columns + replace_literals_with_columns(&mut spec); + // Compute aesthetic labels (uses first non-constant column, respects user-specified labels) + spec.compute_aesthetic_labels(); + // Divide 'color' over 'stroke' and 'fill'. This needs to happens after + // literals have associated columns. + split_color_aesthetic(&mut spec.layers); Ok(PreparedData { data: data_map, - specs, + spec, + sql: sql_part, + visual: viz_part, + layer_sql: layer_sql_vec, + stat_sql: stat_sql_vec, }) } @@ -1146,7 +1203,7 @@ mod tests { let result = prepare_data(query, &reader).unwrap(); assert!(result.data.contains_key(naming::GLOBAL_DATA_KEY)); - assert_eq!(result.specs.len(), 1); + assert_eq!(result.spec.layers.len(), 1); } #[cfg(feature = "duckdb")] @@ -1373,7 +1430,8 @@ mod tests { ); // Should use temp table name with session UUID - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.starts_with("SELECT * FROM __ggsql_cte_sales_")); assert!(query.ends_with("__")); assert!(query.contains(naming::session_id())); @@ -1401,7 +1459,8 @@ mod tests { ); // Should use temp table name with session UUID and filter - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.contains("__ggsql_cte_sales_")); assert!(query.ends_with(" WHERE year = 2024")); assert!(query.contains(naming::session_id())); @@ -1427,8 +1486,9 @@ mod tests { ); // Should use table name directly + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some("SELECT * FROM some_table".to_string()) ); } @@ -1453,8 +1513,9 @@ mod tests { &mock_execute, ); + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some("SELECT * FROM some_table WHERE value > 100".to_string()) ); } @@ -1479,8 +1540,9 @@ mod tests { ); // File paths should be wrapped in single quotes + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some("SELECT * FROM 'data/sales.csv'".to_string()) ); } @@ -1505,8 +1567,9 @@ mod tests { &mock_execute, ); + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some("SELECT * FROM 'data.parquet' WHERE x > 10".to_string()) ); } @@ -1531,7 +1594,8 @@ mod tests { ); // Should query global table with session UUID and filter - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.starts_with("SELECT * FROM __ggsql_global_")); assert!(query.ends_with("__ WHERE category = 'A'")); assert!(query.contains(naming::session_id())); @@ -1555,8 +1619,11 @@ mod tests { &mock_execute, ); - // Should return None - layer uses __global__ directly - assert_eq!(result.unwrap(), None); + // Should return empty result - layer uses __global__ directly + let query_result = result.unwrap(); + assert!(query_result.query.is_none()); + assert!(query_result.layer_sql.is_none()); + assert!(query_result.stat_sql.is_none()); } #[test] @@ -1605,8 +1672,9 @@ mod tests { &mock_execute, ); + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some("SELECT * FROM some_table ORDER BY date ASC".to_string()) ); } @@ -1632,8 +1700,9 @@ mod tests { &mock_execute, ); + let query_result = result.unwrap(); assert_eq!( - result.unwrap(), + query_result.query, Some( "SELECT * FROM some_table WHERE year = 2024 ORDER BY date DESC, value ASC" .to_string() @@ -1661,7 +1730,8 @@ mod tests { ); // Should query global table with session UUID and order_by - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.starts_with("SELECT * FROM __ggsql_global_")); assert!(query.ends_with("__ ORDER BY x ASC")); assert!(query.contains(naming::session_id())); @@ -1697,7 +1767,8 @@ mod tests { ); // Should inject constants as columns - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.contains("SELECT *")); assert!(query.contains("'value' AS __ggsql_const_color__")); assert!(query.contains("'value2' AS __ggsql_const_size__")); @@ -1727,7 +1798,8 @@ mod tests { &mock_execute, ); - let query = result.unwrap().unwrap(); + let query_result = result.unwrap(); + let query = query_result.query.unwrap(); assert!(query.contains("FROM __ggsql_global_")); assert!(query.contains(naming::session_id())); assert!(query.contains("'value' AS __ggsql_const_fill__")); @@ -2259,8 +2331,8 @@ mod tests { assert_eq!(global_df.height(), 3); // Verify spec has x and y aesthetics merged into layer - assert_eq!(result.specs.len(), 1); - let layer = &result.specs[0].layers[0]; + assert_eq!(result.spec.layers.len(), 1); + let layer = &result.spec.layers[0]; assert!( layer.mappings.contains_key("x"), "Layer should have x from global mapping" @@ -2721,7 +2793,7 @@ mod tests { let result = prepare_data(query, &reader).unwrap(); - let aes = &result.specs[0].layers[0].mappings.aesthetics; + let aes = &result.spec.layers[0].mappings.aesthetics; assert!(aes.contains_key("stroke")); assert!(aes.contains_key("fill")); @@ -2739,7 +2811,7 @@ mod tests { "#; let result = prepare_data(query, &reader).unwrap(); - let aes = &result.specs[0].layers[0].mappings.aesthetics; + let aes = &result.spec.layers[0].mappings.aesthetics; let stroke = aes.get("stroke").unwrap(); assert_eq!(stroke.column_name().unwrap(), "island"); @@ -2754,7 +2826,7 @@ mod tests { "#; let result = prepare_data(query, &reader).unwrap(); - let aes = &result.specs[0].layers[0].mappings.aesthetics; + let aes = &result.spec.layers[0].mappings.aesthetics; let stroke = aes.get("stroke").unwrap(); assert_eq!(stroke.column_name().unwrap(), "__ggsql_const_color_0__"); diff --git a/src/lib.rs b/src/lib.rs index 9eec2d49..cf13aaa5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,10 +27,10 @@ ggsql splits queries at the `VISUALISE` boundary: ## Core Components +- [`api`] - High-level API (prepare, parse, validate) - [`parser`] - Query parsing and AST generation -- [`engine`] - Core execution engine -- [`readers`] - Data source abstraction layer -- [`writers`] - Output format abstraction layer +- [`reader`] - Data source abstraction layer +- [`writer`] - Output format abstraction layer */ pub mod naming; @@ -46,14 +46,17 @@ pub mod writer; #[cfg(feature = "duckdb")] pub mod execute; +pub mod api; + // Re-export key types for convenience pub use plot::{ AestheticValue, DataSource, Facet, Geom, Layer, Mappings, Plot, Scale, SqlExpression, }; -// Future modules - not yet implemented -// #[cfg(feature = "engine")] -// pub mod engine; +// Re-export API types and functions +pub use api::{ + prepare, validate, Location, Metadata, Prepared, Validated, ValidationError, ValidationWarning, +}; // DataFrame abstraction (wraps Polars) pub use polars::prelude::DataFrame; @@ -547,7 +550,7 @@ mod integration_tests { !prepared.data.contains_key(&naming::layer_key(1)), "Layer 1 should use global data, not layer-specific data" ); - assert_eq!(prepared.specs.len(), 1); + assert_eq!(prepared.spec.layers.len(), 2); // Verify global data contains layer-indexed constant columns let global_df = prepared.data.get(naming::GLOBAL_DATA_KEY).unwrap(); @@ -565,7 +568,7 @@ mod integration_tests { // Generate Vega-Lite let writer = VegaLiteWriter::new(); - let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap(); + let json_str = writer.write(&prepared.spec, &prepared.data).unwrap(); let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); // Verify we have two layers @@ -685,7 +688,7 @@ mod integration_tests { // Generate Vega-Lite and verify faceting structure let writer = VegaLiteWriter::new(); - let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap(); + let json_str = writer.write(&prepared.spec, &prepared.data).unwrap(); let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); // Should have facet structure (row and column) @@ -750,7 +753,7 @@ mod integration_tests { // Generate Vega-Lite and verify it works let writer = VegaLiteWriter::new(); - let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap(); + let json_str = writer.write(&prepared.spec, &prepared.data).unwrap(); let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); // Both layers should have color field-mapped to their indexed constant columns diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index 8ee13ebb..113ceea7 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -6,6 +6,7 @@ use crate::reader::data::init_builtin_data; use crate::reader::{connection::ConnectionInfo, Reader}; use crate::{DataFrame, GgsqlError, Result}; use duckdb::{params, Connection}; +use polars::prelude::*; /// DuckDB database reader /// @@ -413,30 +414,13 @@ impl Reader for DuckDBReader { Ok(df) } - fn validate_columns(&self, sql: &str, columns: &[String]) -> Result<()> { - // Execute the query to get the schema - let df = self.execute(sql)?; - - // Get column names from the DataFrame - let schema_columns: Vec = df - .get_column_names() - .iter() - .map(|s| s.to_string()) - .collect(); - - // Check if all required columns exist - for col in columns { - if !schema_columns.contains(col) { - return Err(GgsqlError::ValidationError(format!( - "Column '{}' not found in query result. Available columns: {}", - col, - schema_columns.join(", ") - ))); - } - } - + fn register(&mut self, _name: &str, _df: DataFrame) -> Result<()> { Ok(()) } + + fn supports_register(&self) -> bool { + false + } } #[cfg(test)] @@ -481,28 +465,6 @@ mod tests { assert_eq!(df.get_column_names(), vec!["x", "y"]); } - #[test] - fn test_validate_columns_success() { - let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let sql = "SELECT 1 as x, 2 as y"; - - let result = reader.validate_columns(sql, &["x".to_string(), "y".to_string()]); - assert!(result.is_ok()); - } - - #[test] - fn test_validate_columns_missing() { - let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let sql = "SELECT 1 as x, 2 as y"; - - let result = reader.validate_columns(sql, &["z".to_string()]); - assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("Column 'z' not found")); - } - #[test] fn test_invalid_sql() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 7f3f403a..762c0319 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -7,7 +7,7 @@ //! //! All readers implement the `Reader` trait, which provides: //! - SQL query execution → DataFrame conversion -//! - Column validation for query introspection +//! - Optional DataFrame registration for queryable tables //! - Connection management and error handling //! //! # Example @@ -15,11 +15,17 @@ //! ```rust,ignore //! use ggsql::reader::{Reader, DuckDBReader}; //! +//! // Basic usage //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?; //! let df = reader.execute("SELECT * FROM table")?; +//! +//! // With DataFrame registration +//! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?; +//! reader.register("my_table", some_dataframe)?; +//! let result = reader.execute("SELECT * FROM my_table")?; //! ``` -use crate::{DataFrame, Result}; +use crate::{DataFrame, GgsqlError, Result}; #[cfg(feature = "duckdb")] pub mod duckdb; @@ -35,6 +41,20 @@ pub use duckdb::DuckDBReader; /// /// Readers execute SQL queries and return Polars DataFrames. /// They provide a uniform interface for different database backends. +/// +/// # DataFrame Registration +/// +/// Some readers support registering DataFrames as queryable tables using +/// the [`register`](Reader::register) method. This allows you to query +/// in-memory DataFrames with SQL, join them with other tables, etc. +/// +/// ```rust,ignore +/// // Register a DataFrame (takes ownership) +/// reader.register("sales", sales_df)?; +/// +/// // Now you can query it +/// let result = reader.execute("SELECT * FROM sales WHERE amount > 100")?; +/// ``` pub trait Reader { /// Execute a SQL query and return the result as a DataFrame /// @@ -54,18 +74,38 @@ pub trait Reader { /// - The table or columns don't exist fn execute(&self, sql: &str) -> Result; - /// Validate that specified columns exist in a query result + /// Register a DataFrame as a queryable table (takes ownership) /// - /// This is useful for checking column names before visualization - /// to provide better error messages. + /// After registration, the DataFrame can be queried by name in SQL: + /// ```sql + /// SELECT * FROM WHERE ... + /// ``` /// /// # Arguments /// - /// * `sql` - The SQL query to introspect - /// * `columns` - Column names to validate + /// * `name` - The table name to register under + /// * `df` - The DataFrame to register (ownership is transferred) + /// + /// # Returns + /// + /// `Ok(())` on success, error if registration fails or isn't supported. + /// + /// # Default Implementation + /// + /// Returns an error by default. Override for readers that support registration. + fn register(&mut self, name: &str, _df: DataFrame) -> Result<()> { + Err(GgsqlError::ReaderError(format!( + "This reader does not support DataFrame registration for table '{}'", + name + ))) + } + + /// Check if this reader supports DataFrame registration /// /// # Returns /// - /// Ok(()) if all columns exist, otherwise an error - fn validate_columns(&self, sql: &str, columns: &[String]) -> Result<()>; + /// `true` if [`register`](Reader::register) is implemented, `false` otherwise. + fn supports_register(&self) -> bool { + false + } } diff --git a/src/rest.rs b/src/rest.rs index 88fb61a6..45d21963 100644 --- a/src/rest.rs +++ b/src/rest.rs @@ -34,12 +34,12 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use ggsql::{parser, GgsqlError, VERSION}; #[cfg(feature = "duckdb")] -use ggsql::execute::prepare_data_with_executor; +use ggsql::reader::DuckDBReader; #[cfg(feature = "duckdb")] -use ggsql::reader::{DuckDBReader, Reader}; +use ggsql::{parse, prepare}; #[cfg(feature = "vegalite")] -use ggsql::writer::{VegaLiteWriter, Writer}; +use ggsql::writer::VegaLiteWriter; /// CLI arguments for the REST API server #[derive(Parser)] @@ -442,61 +442,38 @@ async fn query_handler( #[cfg(feature = "duckdb")] if request.reader.starts_with("duckdb://") { - // Create query executor that handles shared state vs new reader - let execute_query = |sql: &str| -> Result { - if request.reader == "duckdb://memory" && state.reader.is_some() { - let reader_mutex = state.reader.as_ref().unwrap(); - let reader = reader_mutex.lock().map_err(|e| { - GgsqlError::InternalError(format!("Failed to lock reader: {}", e)) - })?; - reader.execute(sql) - } else { - let reader = DuckDBReader::from_connection_string(&request.reader)?; - reader.execute(sql) - } - }; - - // Prepare data using shared execution logic - let prepared = prepare_data_with_executor(&request.query, execute_query)?; - - // Get metadata from available data - let (rows, columns) = if let Some(df) = prepared.data.get("__global__") { - let (r, _) = df.shape(); - let cols: Vec = df - .get_column_names() - .iter() - .map(|s| s.to_string()) - .collect(); - (r, cols) + // Use shared reader or create new one + let prepared = if request.reader == "duckdb://memory" && state.reader.is_some() { + let reader_mutex = state.reader.as_ref().unwrap(); + let reader = reader_mutex.lock().map_err(|e| { + GgsqlError::InternalError(format!("Failed to lock reader: {}", e)) + })?; + prepare(&request.query, &*reader)? } else { - // Use first available data for metadata - let df = prepared.data.values().next().unwrap(); - let (r, _) = df.shape(); - let cols: Vec = df - .get_column_names() - .iter() - .map(|s| s.to_string()) - .collect(); - (r, cols) + let reader = DuckDBReader::from_connection_string(&request.reader)?; + prepare(&request.query, &reader)? }; - let first_spec = &prepared.specs[0]; + // Get metadata + let metadata = prepared.metadata(); // Generate visualization output using writer #[cfg(feature = "vegalite")] if request.writer == "vegalite" { let writer = VegaLiteWriter::new(); - let json_output = writer.write(first_spec, &prepared.data)?; + let json_output = prepared.render(&writer)?; let spec_value: serde_json::Value = serde_json::from_str(&json_output) .map_err(|e| GgsqlError::WriterError(format!("Failed to parse JSON: {}", e)))?; + let plot = prepared.plot(); + let result = QueryResult { spec: spec_value, metadata: QueryMetadata { - rows, - columns, - global_mappings: format!("{:?}", first_spec.global_mappings), - layers: first_spec.layers.len(), + rows: metadata.rows, + columns: metadata.columns.clone(), + global_mappings: format!("{:?}", plot.global_mappings), + layers: plot.layers.len(), }, }; @@ -525,6 +502,39 @@ async fn query_handler( } /// POST /api/v1/parse - Parse a ggsql query +#[cfg(feature = "duckdb")] +async fn parse_handler( + Json(request): Json, +) -> Result>, ApiErrorResponse> { + info!("Parsing query: {} chars", request.query.len()); + + // Split query (for backwards compatibility) + let (sql_part, viz_part) = parser::split_query(&request.query)?; + + // Parse using new API + let parsed = parse(&request.query)?; + + // Convert specs to JSON + let specs_json: Vec = parsed + .plots() + .iter() + .map(|spec| serde_json::to_value(spec).unwrap_or(serde_json::Value::Null)) + .collect(); + + let result = ParseResult { + sql_portion: sql_part, + viz_portion: viz_part, + specs: specs_json, + }; + + Ok(Json(ApiSuccess { + status: "success".to_string(), + data: result, + })) +} + +/// POST /api/v1/parse - Parse a ggsql query +#[cfg(not(feature = "duckdb"))] async fn parse_handler( Json(request): Json, ) -> Result>, ApiErrorResponse> { From b760081fb550a10a527fc53a8da3610bc9121892 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Thu, 29 Jan 2026 09:48:41 +0000 Subject: [PATCH 02/12] Add arrow table registration for duckdb --- Cargo.toml | 3 +- src/Cargo.toml | 3 +- src/reader/duckdb.rs | 210 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 212 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cd5b672c..0339c104 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,8 @@ csscolorparser = "0.8.1" polars = { version = "0.52", features = ["lazy", "sql", "ipc"] } # Readers -duckdb = { version = "1.1", features = ["bundled"] } +duckdb = { version = "1.1", features = ["bundled", "vtab-arrow"] } +arrow = { version = "56", default-features = false, features = ["ipc"] } postgres = "0.19" sqlx = { version = "0.8", features = ["postgres", "runtime-tokio-rustls"] } rusqlite = "0.32" diff --git a/src/Cargo.toml b/src/Cargo.toml index 75cbd1f6..dd60aac1 100644 --- a/src/Cargo.toml +++ b/src/Cargo.toml @@ -32,6 +32,7 @@ polars.workspace = true # Readers duckdb = { workspace = true, optional = true } +arrow = { workspace = true, optional = true } postgres = { workspace = true, optional = true } sqlx = { workspace = true, optional = true } rusqlite = { workspace = true, optional = true } @@ -69,7 +70,7 @@ proptest.workspace = true [features] default = ["duckdb", "sqlite", "vegalite"] -duckdb = ["dep:duckdb"] +duckdb = ["dep:duckdb", "dep:arrow"] postgres = ["dep:postgres"] sqlite = ["dep:rusqlite"] vegalite = [] diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index 113ceea7..b3cf46d7 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -5,8 +5,12 @@ use crate::reader::data::init_builtin_data; use crate::reader::{connection::ConnectionInfo, Reader}; use crate::{DataFrame, GgsqlError, Result}; +use arrow::ipc::reader::FileReader; +use duckdb::vtab::arrow::{arrow_recordbatch_to_query_params, ArrowVTab}; use duckdb::{params, Connection}; +use polars::io::SerWriter; use polars::prelude::*; +use std::io::Cursor; /// DuckDB database reader /// @@ -65,6 +69,12 @@ impl DuckDBReader { } }; + // Register Arrow virtual table function for DataFrame registration + conn.register_table_function::("arrow") + .map_err(|e| { + GgsqlError::ReaderError(format!("Failed to register arrow function: {}", e)) + })?; + Ok(Self { conn }) } @@ -74,6 +84,81 @@ impl DuckDBReader { pub fn connection(&self) -> &Connection { &self.conn } + + /// Check if a table exists in the database + fn table_exists(&self, name: &str) -> Result { + let sql = "SELECT COUNT(*) FROM information_schema.tables WHERE table_name = ?"; + let count: i64 = self + .conn + .query_row(sql, [name], |row| row.get(0)) + .unwrap_or(0); + Ok(count > 0) + } +} + +/// Validate a table name +fn validate_table_name(name: &str) -> Result<()> { + if name.is_empty() { + return Err(GgsqlError::ReaderError("Table name cannot be empty".into())); + } + + // Reject characters that could break double-quoted identifiers or cause issues + let forbidden = ['"', '\0', '\n', '\r']; + for ch in forbidden { + if name.contains(ch) { + return Err(GgsqlError::ReaderError(format!( + "Table name '{}' contains invalid character '{}'", + name, + ch.escape_default() + ))); + } + } + + // Reasonable length limit + if name.len() > 128 { + return Err(GgsqlError::ReaderError(format!( + "Table name '{}' exceeds maximum length of 128 characters", + name + ))); + } + + Ok(()) +} + +/// Convert a Polars DataFrame to DuckDB Arrow query parameters via IPC serialization +fn dataframe_to_arrow_params(df: DataFrame) -> Result<[usize; 2]> { + // Serialize DataFrame to IPC format + let mut buffer = Vec::new(); + { + let mut writer = IpcWriter::new(&mut buffer); + writer.finish(&mut df.clone()).map_err(|e| { + GgsqlError::ReaderError(format!("Failed to serialize DataFrame: {}", e)) + })?; + } + + // Read IPC into arrow crate's RecordBatch + let cursor = Cursor::new(buffer); + let reader = FileReader::try_new(cursor, None) + .map_err(|e| GgsqlError::ReaderError(format!("Failed to read IPC: {}", e)))?; + + // Collect all batches and concatenate if needed + let batches: Vec<_> = reader.filter_map(|r| r.ok()).collect(); + + if batches.is_empty() { + return Err(GgsqlError::ReaderError( + "DataFrame produced no Arrow batches".into(), + )); + } + + // For single batch, use directly; for multiple, concatenate + let rb = if batches.len() == 1 { + batches.into_iter().next().unwrap() + } else { + arrow::compute::concat_batches(&batches[0].schema(), &batches) + .map_err(|e| GgsqlError::ReaderError(format!("Failed to concat batches: {}", e)))? + }; + + Ok(arrow_recordbatch_to_query_params(rb)) } /// Helper struct for building typed columns from rows @@ -414,12 +499,35 @@ impl Reader for DuckDBReader { Ok(df) } - fn register(&mut self, _name: &str, _df: DataFrame) -> Result<()> { + fn register(&mut self, name: &str, df: DataFrame) -> Result<()> { + // Validate table name + validate_table_name(name)?; + + // Check for duplicates + if self.table_exists(name)? { + return Err(GgsqlError::ReaderError(format!( + "Table '{}' already exists", + name + ))); + } + + // Convert DataFrame to Arrow query params + let params = dataframe_to_arrow_params(df)?; + + // Create temp table from Arrow data + let sql = format!( + "CREATE TEMP TABLE \"{}\" AS SELECT * FROM arrow(?, ?)", + name + ); + self.conn.execute(&sql, params).map_err(|e| { + GgsqlError::ReaderError(format!("Failed to register table '{}': {}", name, e)) + })?; + Ok(()) } fn supports_register(&self) -> bool { - false + true } } @@ -496,4 +604,102 @@ mod tests { assert_eq!(df.shape(), (2, 2)); assert_eq!(df.get_column_names(), vec!["region", "total"]); } + + #[test] + fn test_register_and_query() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Create a DataFrame + let df = DataFrame::new(vec![ + Column::new("x".into(), vec![1i32, 2, 3]), + Column::new("y".into(), vec![10i32, 20, 30]), + ]) + .unwrap(); + + // Register the DataFrame + reader.register("my_table", df).unwrap(); + + // Query the registered table + let result = reader.execute("SELECT * FROM my_table ORDER BY x").unwrap(); + assert_eq!(result.shape(), (3, 2)); + assert_eq!(result.get_column_names(), vec!["x", "y"]); + } + + #[test] + fn test_register_duplicate_name_errors() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let df1 = DataFrame::new(vec![Column::new("a".into(), vec![1i32])]).unwrap(); + let df2 = DataFrame::new(vec![Column::new("b".into(), vec![2i32])]).unwrap(); + + // First registration should succeed + reader.register("dup_table", df1).unwrap(); + + // Second registration with same name should fail + let result = reader.register("dup_table", df2); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("already exists")); + } + + #[test] + fn test_register_invalid_table_names() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let df = DataFrame::new(vec![Column::new("a".into(), vec![1i32])]).unwrap(); + + // Empty name + let result = reader.register("", df.clone()); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("cannot be empty")); + + // Name with double quote + let result = reader.register("bad\"name", df.clone()); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("invalid character")); + + // Name with null byte + let result = reader.register("bad\0name", df.clone()); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("invalid character")); + + // Name too long + let long_name = "a".repeat(200); + let result = reader.register(&long_name, df); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("exceeds maximum length")); + } + + #[test] + fn test_supports_register() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + assert!(reader.supports_register()); + } + + #[test] + fn test_register_empty_dataframe() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Create an empty DataFrame with schema + let df = DataFrame::new(vec![ + Column::new("x".into(), Vec::::new()), + Column::new("y".into(), Vec::::new()), + ]) + .unwrap(); + + reader.register("empty_table", df).unwrap(); + + // Query should return empty result with correct schema + let result = reader.execute("SELECT * FROM empty_table").unwrap(); + assert_eq!(result.shape(), (0, 2)); + assert_eq!(result.get_column_names(), vec!["x", "y"]); + } } From 8181fea5cb1518411d36ee9af25e9b2b2190a805 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Thu, 29 Jan 2026 12:55:53 +0000 Subject: [PATCH 03/12] Rework Python bindings --- CLAUDE.md | 108 +++- README.md | 43 +- ggsql-python/Cargo.toml | 2 +- ggsql-python/README.md | 249 +++++++-- ggsql-python/python/ggsql/__init__.py | 38 +- ggsql-python/src/lib.rs | 737 +++++++++++++++++++++++--- ggsql-python/tests/test_ggsql.py | 414 ++++++++++++++- src/doc/API.md | 3 - src/rest.rs | 27 +- 9 files changed, 1440 insertions(+), 181 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index b5bafd99..aa093221 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -864,17 +864,18 @@ When running in Positron IDE, the extension provides enhanced functionality: ### 8. Python Bindings (`ggsql-python/`) -**Responsibility**: Python bindings for ggsql, enabling Python users to render Altair charts using ggsql's VISUALISE syntax. +**Responsibility**: Python bindings for ggsql, enabling Python users to create visualizations using ggsql's VISUALISE syntax. **Features**: - PyO3-based Rust bindings compiled to a native Python extension +- Two-stage API mirroring the Rust API: `prepare()` → `render()` +- DuckDB reader with DataFrame registration +- Custom Python reader support: any object with `execute(sql) -> DataFrame` method - Works with any narwhals-compatible DataFrame (polars, pandas, etc.) - LazyFrames are collected automatically -- Returns native `altair.Chart` objects for easy display and customization -- Two-stage API: `prepare()` → `render()` -- DuckDB reader with DataFrame registration -- Query introspection (SQL, layer queries, stat queries) +- Returns native `altair.Chart` objects via `render_altair()` convenience function +- Query validation and introspection (SQL, layer queries, stat queries) **Installation**: @@ -902,8 +903,9 @@ prepared = ggsql.prepare( reader ) -# Inspect +# Inspect metadata print(f"Rows: {prepared.metadata()['rows']}") +print(f"Columns: {prepared.metadata()['columns']}") print(f"SQL: {prepared.sql()}") # Render to Vega-Lite JSON @@ -911,21 +913,95 @@ writer = ggsql.VegaLiteWriter() json_output = prepared.render(writer) ``` +**Convenience Function** (`render_altair`): + +For quick visualizations without explicit reader setup: + +```python +import ggsql +import polars as pl + +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + +# Render DataFrame to Altair chart in one call +chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") +chart.display() # In Jupyter +``` + +**Query Validation**: + +```python +# Validate syntax without execution +validated = ggsql.validate( + "SELECT x, y FROM data VISUALISE x, y DRAW point" +) +print(f"Valid: {validated.valid()}") +print(f"Has VISUALISE: {validated.has_visual()}") +print(f"SQL portion: {validated.sql()}") +print(f"Errors: {validated.errors()}") +``` + **Classes**: -| Class | Description | -| -------------------------- | ---------------------------- | -| `DuckDBReader(connection)` | Database reader | -| `VegaLiteWriter()` | Vega-Lite JSON output writer | -| `Validated` | Result of `validate()` | +| Class | Description | +| -------------------------- | -------------------------------------------- | +| `DuckDBReader(connection)` | Database reader with DataFrame registration | +| `VegaLiteWriter()` | Vega-Lite JSON output writer | +| `Validated` | Result of `validate()` with query inspection | +| `Prepared` | Result of `prepare()`, ready for rendering | **Functions**: -| Function | Description | -| ------------------------ | ------------------------------------------------ | -| `validate(query)` | Syntax/semantic validation with query inspection | -| `prepare(query, reader)` | Full preparation pipeline | -| `render_altair(df, viz)` | Render DataFrame to Altair chart | +| Function | Description | +| ------------------------ | ------------------------------------------------- | +| `validate(query)` | Syntax/semantic validation with query inspection | +| `prepare(query, reader)` | Full preparation (reader can be native or custom) | +| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart | + +**Prepared Object Methods**: + +| Method | Description | +| ---------------- | -------------------------------------------- | +| `render(writer)` | Generate Vega-Lite JSON | +| `metadata()` | Get rows, columns, layer_count | +| `sql()` | Get the SQL portion | +| `visual()` | Get the VISUALISE portion | +| `layer_count()` | Number of DRAW layers | +| `data()` | Get the main DataFrame | +| `layer_data(i)` | Get layer-specific DataFrame (if filtered) | +| `stat_data(i)` | Get stat transform DataFrame (if applicable) | +| `layer_sql(i)` | Get layer filter SQL (if applicable) | +| `stat_sql(i)` | Get stat transform SQL (if applicable) | +| `warnings()` | Get validation warnings | + +**Custom Python Readers**: + +Any Python object with an `execute(sql: str) -> polars.DataFrame` method can be used as a reader: + +```python +import ggsql +import polars as pl + +class MyReader: + """Custom reader that returns static data.""" + + def execute(self, sql: str) -> pl.DataFrame: + return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + +# Use custom reader with prepare() +reader = MyReader() +prepared = ggsql.prepare( + "SELECT * FROM data VISUALISE x, y DRAW point", + reader +) +``` + +Optional methods for custom readers: + +- `supports_register() -> bool` - Return `True` if registration is supported +- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a table + +Native readers (e.g., `DuckDBReader`) use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization. **Dependencies**: diff --git a/README.md b/README.md index 43d70847..46ff2b50 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ THEME minimal - ✅ REST API server (`ggsql-rest`) with CORS support - ✅ Jupyter kernel (`ggsql-jupyter`) with inline Vega-Lite visualizations - ✅ VS Code extension (`ggsql-vscode`) with syntax highlighting and Positron IDE integration +- ✅ Python bindings (`ggsql-python`) with Altair chart output **Planned:** @@ -93,7 +94,9 @@ ggsql/ │ ├── ggsql-jupyter/ # Jupyter kernel │ -└── ggsql-vscode/ # VS Code extension +├── ggsql-vscode/ # VS Code extension +│ +└── ggsql-python/ # Python bindings ``` ## Development Workflow @@ -297,6 +300,44 @@ When running in Positron IDE, the extension provides additional features: - **Language runtime registration** for executing ggsql queries directly within Positron - **Plot pane integration** - visualizations are automatically routed to Positron's Plots pane +## Python Bindings + +The `ggsql-python` package provides Python bindings for using ggsql with DataFrames. + +### Installation + +```bash +cd ggsql-python +pip install maturin +maturin develop +``` + +### Usage + +```python +import ggsql +import polars as pl + +# Simple usage with render_altair +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) +chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") +chart.display() + +# Two-stage API for full control +reader = ggsql.DuckDBReader("duckdb://memory") +reader.register("data", df) + +prepared = ggsql.prepare( + "SELECT * FROM data VISUALISE x, y DRAW point", + reader +) + +writer = ggsql.VegaLiteWriter() +json_output = prepared.render(writer) +``` + +See the [ggsql-python README](ggsql-python/README.md) for complete API documentation. + ## CLI ### Installation diff --git a/ggsql-python/Cargo.toml b/ggsql-python/Cargo.toml index 62229afd..8f73e6f8 100644 --- a/ggsql-python/Cargo.toml +++ b/ggsql-python/Cargo.toml @@ -12,7 +12,7 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.26", features = ["extension-module"] } polars = { workspace = true, features = ["ipc"] } -ggsql = { path = "../src", default-features = false, features = ["vegalite"] } +ggsql = { path = "../src", default-features = false, features = ["duckdb", "vegalite"] } [features] default = [] diff --git a/ggsql-python/README.md b/ggsql-python/README.md index 0d97bbee..3ea2c603 100644 --- a/ggsql-python/README.md +++ b/ggsql-python/README.md @@ -2,7 +2,7 @@ Python bindings for [ggsql](https://github.com/georgestagg/ggsql), a SQL extension for declarative data visualization. -This package provides a thin wrapper around the Rust `ggsql` crate, enabling Python users to render Altair charts from DataFrames using ggsql's VISUALISE syntax. +This package provides Python bindings to the Rust `ggsql` crate, enabling Python users to create visualizations using ggsql's VISUALISE syntax with native Altair chart output. ## Installation @@ -15,6 +15,7 @@ pip install ggsql ### From source Building from source requires: + - Rust toolchain (install via [rustup](https://rustup.rs/)) - Python 3.10+ - [maturin](https://github.com/PyO3/maturin) @@ -39,35 +40,183 @@ maturin build --release pip install target/wheels/ggsql-*.whl ``` -## Usage +## Quick Start + +### Simple Usage with `render_altair` + +For quick visualizations, use the `render_altair` convenience function: ```python import ggsql -import duckdb +import polars as pl + +# Create a DataFrame +df = pl.DataFrame({ + "x": [1, 2, 3, 4, 5], + "y": [10, 20, 15, 30, 25], + "category": ["A", "B", "A", "B", "A"] +}) + +# Render to Altair chart +chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") + +# Display or save +chart.display() # In Jupyter +chart.save("chart.html") # Save to file +``` + +### Two-Stage API -# Split a ggSQL query into SQL and VISUALISE portions -sql, viz = ggsql.split_query(""" - SELECT date, revenue, region FROM sales - WHERE year = 2024 +For more control, use the two-stage API with explicit reader and writer: + +```python +import ggsql +import polars as pl + +# 1. Create a DuckDB reader +reader = ggsql.DuckDBReader("duckdb://memory") + +# 2. Register your DataFrame as a table +df = pl.DataFrame({ + "date": ["2024-01-01", "2024-01-02", "2024-01-03"], + "revenue": [100, 150, 120], + "region": ["North", "South", "North"] +}) +reader.register("sales", df) + +# 3. Prepare the visualization +prepared = ggsql.prepare( + """ + SELECT * FROM sales VISUALISE date AS x, revenue AS y, region AS color DRAW line - LABEL title => 'Sales Trends' -""") + LABEL title => 'Sales by Region' + """, + reader +) + +# 4. Inspect metadata +print(f"Rows: {prepared.metadata()['rows']}") +print(f"Columns: {prepared.metadata()['columns']}") +print(f"Layers: {prepared.layer_count()}") + +# 5. Inspect SQL/VISUALISE portions and data +print(f"SQL: {prepared.sql()}") +print(f"Visual: {prepared.visual()}") +print(prepared.data()) # Returns polars DataFrame + +# 6. Render to Vega-Lite JSON +writer = ggsql.VegaLiteWriter() +vegalite_json = prepared.render(writer) +print(vegalite_json) +``` -# Execute SQL with DuckDB -df = duckdb.sql(sql).pl() +## API Reference -# Render DataFrame + VISUALISE spec to Altair chart -chart = ggsql.render_altair(df, viz) +### Classes -# Display or save the chart -chart.display() # In Jupyter -chart.save("chart.html") # Save to file +#### `DuckDBReader(connection: str)` + +Database reader that executes SQL and manages DataFrames. + +```python +reader = ggsql.DuckDBReader("duckdb://memory") # In-memory database +reader = ggsql.DuckDBReader("duckdb:///path/to/file.db") # File database ``` -### Mapping styles +**Methods:** + +- `register(name: str, df: polars.DataFrame)` - Register a DataFrame as a queryable table +- `execute(sql: str) -> polars.DataFrame` - Execute SQL and return results +- `supports_register() -> bool` - Check if registration is supported -The `render_altair()` function supports various mapping styles: +#### `VegaLiteWriter()` + +Writer that generates Vega-Lite v6 JSON specifications. + +```python +writer = ggsql.VegaLiteWriter() +json_output = prepared.render(writer) +``` + +#### `Validated` + +Result of `validate()` containing query analysis without SQL execution. + +**Methods:** + +- `valid() -> bool` - Whether the query is syntactically and semantically valid +- `has_visual() -> bool` - Whether the query contains a VISUALISE clause +- `sql() -> str` - The SQL portion (before VISUALISE) +- `visual() -> str` - The VISUALISE portion +- `errors() -> list[dict]` - Validation errors with messages and locations +- `warnings() -> list[dict]` - Validation warnings + +#### `Prepared` + +Result of `prepare()`, containing resolved visualization ready for rendering. + +**Methods:** + +- `render(writer: VegaLiteWriter) -> str` - Generate Vega-Lite JSON +- `metadata() -> dict` - Get `{"rows": int, "columns": list[str], "layer_count": int}` +- `sql() -> str` - The executed SQL query +- `visual() -> str` - The VISUALISE clause +- `layer_count() -> int` - Number of DRAW layers +- `data() -> polars.DataFrame | None` - Main query result DataFrame +- `layer_data(index: int) -> polars.DataFrame | None` - Layer-specific data (if filtered) +- `stat_data(index: int) -> polars.DataFrame | None` - Statistical transform data +- `layer_sql(index: int) -> str | None` - Layer filter SQL +- `stat_sql(index: int) -> str | None` - Stat transform SQL +- `warnings() -> list[dict]` - Validation warnings from preparation + +### Functions + +#### `validate(query: str) -> Validated` + +Validate query syntax and semantics without executing SQL. + +```python +validated = ggsql.validate("SELECT x, y FROM data VISUALISE x, y DRAW point") +if validated.valid(): + print("Query is valid!") +else: + for error in validated.errors(): + print(f"Error: {error['message']}") +``` + +#### `prepare(query: str, reader: DuckDBReader) -> Prepared` + +Parse, validate, and execute a ggsql query. + +```python +reader = ggsql.DuckDBReader("duckdb://memory") +prepared = ggsql.prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) +``` + +#### `render_altair(df, viz: str, **kwargs) -> altair.Chart` + +Convenience function to render a DataFrame with a VISUALISE spec to an Altair chart. + +**Parameters:** + +- `df` - Any narwhals-compatible DataFrame (polars, pandas, etc.). LazyFrames are collected automatically. +- `viz` - The VISUALISE specification string +- `**kwargs` - Additional arguments passed to `altair.Chart.from_json()` (e.g., `validate=False`) + +**Returns:** An Altair chart object (Chart, LayerChart, FacetChart, etc.) + +```python +import polars as pl +import ggsql + +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) +chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") +``` + +## Examples + +### Mapping Styles ```python df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30], "category": ["A", "B", "A"]}) @@ -85,41 +234,65 @@ ggsql.render_altair(df, "VISUALISE * DRAW point") ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point") ``` -## API +### Custom Readers -### `split_query(query: str) -> tuple[str, str]` +You can use any Python object with an `execute(sql: str) -> polars.DataFrame` method as a reader. This enables integration with any data source. -Split a ggSQL query into SQL and VISUALISE portions. +```python +import ggsql +import polars as pl + +class CSVReader: + """Custom reader that loads data from CSV files.""" + + def __init__(self, data_dir: str): + self.data_dir = data_dir + + def execute(self, sql: str) -> pl.DataFrame: + # Simple implementation: ignore SQL and return fixed data + # A real implementation would parse SQL to determine which file to load + return pl.read_csv(f"{self.data_dir}/data.csv") + +# Use custom reader with prepare() +reader = CSVReader("/path/to/data") +prepared = ggsql.prepare( + "SELECT * FROM data VISUALISE x, y DRAW point", + reader +) +writer = ggsql.VegaLiteWriter() +json_output = prepared.render(writer) +``` -**Parameters:** -- `query`: The full ggSQL query string +**Optional methods** for custom readers: -**Returns:** -- Tuple of `(sql_portion, visualise_portion)` +- `supports_register() -> bool` - Return `True` if your reader supports DataFrame registration +- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a queryable table -**Raises:** -- `ValueError`: If the query cannot be parsed +```python +class AdvancedReader: + """Custom reader with registration support.""" -### `render_altair(df, viz, **kwargs) -> altair.Chart` + def __init__(self): + self.tables = {} -Render a DataFrame with a VISUALISE specification to an Altair chart. + def execute(self, sql: str) -> pl.DataFrame: + # Your SQL execution logic here + ... -**Parameters:** -- `df`: Any narwhals-compatible DataFrame (polars, pandas, etc.). LazyFrames are collected automatically. -- `viz`: The VISUALISE specification string -- `**kwargs`: Additional keyword arguments passed to `altair.Chart.from_json()`. Common options include `validate=False` to skip schema validation. + def supports_register(self) -> bool: + return True -**Returns:** -- An `altair.Chart` object that can be displayed, saved, or further customized + def register(self, name: str, df: pl.DataFrame) -> None: + self.tables[name] = df +``` -**Raises:** -- `ValueError`: If the spec cannot be parsed or rendered +Native readers like `DuckDBReader` use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization. ## Development ### Keeping in sync with the monorepo -The `ggsql-python` package is part of the [ggsql monorepo](https://github.com/georgestagg/ggsql) and depends on the Rust `ggsql` crate via a path dependency. When the Rust crate is updated, you may need to rebuild: +The `ggsql-python` package is part of the [ggsql monorepo](https://github.com/posit-dev/ggsql) and depends on the Rust `ggsql` crate via a path dependency. When the Rust crate is updated, you may need to rebuild: ```bash cd ggsql-python diff --git a/ggsql-python/python/ggsql/__init__.py b/ggsql-python/python/ggsql/__init__.py index dbbb5afb..06b5f720 100644 --- a/ggsql-python/python/ggsql/__init__.py +++ b/ggsql-python/python/ggsql/__init__.py @@ -1,6 +1,5 @@ from __future__ import annotations -import io import json from typing import Any, Union @@ -8,9 +7,26 @@ import narwhals as nw from narwhals.typing import IntoFrame -from ggsql._ggsql import split_query, render as _render +from ggsql._ggsql import ( + DuckDBReader, + VegaLiteWriter, + Validated, + Prepared, + validate, + prepare, +) -__all__ = ["split_query", "render_altair"] +__all__ = [ + # Classes + "DuckDBReader", + "VegaLiteWriter", + "Validated", + "Prepared", + # Functions + "validate", + "prepare", + "render_altair", +] __version__ = "0.1.0" # Type alias for any Altair chart type @@ -56,13 +72,19 @@ def render_altair( if not isinstance(df, nw.DataFrame): raise TypeError("df must be a narwhals DataFrame or compatible type") - # Convert to polars and serialize to IPC bytes pl_df = df.to_polars() - buffer = io.BytesIO() - pl_df.write_ipc(buffer) - ipc_bytes = buffer.getvalue() - vegalite_json = _render(ipc_bytes, viz, writer="vegalite") + # Create temporary reader and register data + reader = DuckDBReader("duckdb://memory") + reader.register("__data__", pl_df) + + # Build full query: SELECT * FROM __data__ + VISUALISE clause + query = f"SELECT * FROM __data__ {viz}" + + # Prepare and render + prepared = prepare(query, reader) + writer = VegaLiteWriter() + vegalite_json = prepared.render(writer) # Parse to determine the correct Altair class spec = json.loads(vegalite_json) diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index 7c472c35..b9d6496d 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -3,100 +3,693 @@ #![allow(clippy::useless_conversion)] use pyo3::prelude::*; -use pyo3::types::PyBytes; -use std::collections::{HashMap, HashSet}; +use pyo3::types::{PyBytes, PyDict, PyList}; use std::io::Cursor; -use ggsql::naming::GLOBAL_DATA_KEY; -use ggsql::parser::parse_query; -use ggsql::writer::{VegaLiteWriter, Writer}; -use ggsql::AestheticValue; +use ggsql::api::{prepare as rust_prepare, validate as rust_validate, Prepared, ValidationWarning}; +use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader}; +use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter; +use ggsql::GgsqlError; -use polars::prelude::{DataFrame, IpcReader, SerReader}; +use polars::prelude::{DataFrame, IpcReader, IpcWriter, SerReader, SerWriter}; -#[pyfunction] -fn split_query(query: &str) -> PyResult<(String, String)> { - ggsql::parser::split_query(query) - .map_err(|e| PyErr::new::(e.to_string())) +// ============================================================================ +// Helper Functions for DataFrame Conversion +// ============================================================================ + +/// Convert a Polars DataFrame to a Python polars DataFrame via IPC serialization +fn polars_to_py(py: Python<'_>, df: &DataFrame) -> PyResult> { + let mut buffer = Vec::new(); + IpcWriter::new(&mut buffer) + .finish(&mut df.clone()) + .map_err(|e| { + PyErr::new::(format!( + "Failed to serialize DataFrame: {}", + e + )) + })?; + + let io = py.import("io")?; + let bytes_io = io.call_method1("BytesIO", (PyBytes::new(py, &buffer),))?; + + let polars = py.import("polars")?; + polars + .call_method1("read_ipc", (bytes_io,)) + .map(|obj| obj.into()) } -#[pyfunction] -#[pyo3(signature = (ipc_bytes, viz, *, writer = "vegalite"))] -fn render(ipc_bytes: &Bound<'_, PyBytes>, viz: &str, writer: &str) -> PyResult { - // Read DataFrame from IPC bytes - let bytes = ipc_bytes.as_bytes(); - let cursor = Cursor::new(bytes); - let df: DataFrame = IpcReader::new(cursor).finish().map_err(|e| { - PyErr::new::(format!("Failed to read IPC data: {}", e)) - })?; +/// Convert a Python polars DataFrame to a Rust Polars DataFrame via IPC serialization +fn py_to_polars(py: Python<'_>, df: &Bound<'_, PyAny>) -> PyResult { + let io = py.import("io")?; + let bytes_io = io.call_method0("BytesIO")?; + df.call_method1("write_ipc", (&bytes_io,))?; + bytes_io.call_method1("seek", (0i64,))?; - // Parse the visualization spec - // The viz string should be a complete VISUALISE statement - let specs = parse_query(viz) - .map_err(|e| PyErr::new::(e.to_string()))?; + let ipc_bytes: Vec = bytes_io.call_method0("read")?.extract()?; + let cursor = Cursor::new(ipc_bytes); + + IpcReader::new(cursor).finish().map_err(|e| { + PyErr::new::(format!("Failed to read DataFrame: {}", e)) + }) +} - let mut spec = specs.into_iter().next().ok_or_else(|| { - PyErr::new::("No visualization spec found") +/// Convert a Python polars DataFrame to Rust DataFrame - for use inside Python::attach +/// This variant is used by PyReaderBridge where we already hold the GIL. +fn py_to_polars_inner(df: &Bound<'_, PyAny>) -> PyResult { + let py = df.py(); + let io = py.import("io")?; + let bytes_io = io.call_method0("BytesIO")?; + + df.call_method1("write_ipc", (&bytes_io,)).map_err(|_| { + PyErr::new::( + "Reader.execute() must return a polars.DataFrame", + ) })?; - // Get column names for resolving global mappings - let column_names: HashSet<&str> = df.get_column_names().iter().map(|s| s.as_str()).collect(); - - // Merge global mappings into layers and handle wildcards - for layer in &mut spec.layers { - let supported_aesthetics = layer.geom.aesthetics().supported; - - // 1. Merge explicit global aesthetics into layer (layer takes precedence) - for (aesthetic, value) in &spec.global_mappings.aesthetics { - if supported_aesthetics.contains(&aesthetic.as_str()) { - layer - .mappings - .aesthetics - .entry(aesthetic.clone()) - .or_insert_with(|| value.clone()); - } + bytes_io.call_method1("seek", (0i64,))?; + let ipc_bytes: Vec = bytes_io.call_method0("read")?.extract()?; + let cursor = Cursor::new(ipc_bytes); + + IpcReader::new(cursor).finish().map_err(|e| { + PyErr::new::(format!( + "Failed to deserialize DataFrame: {}", + e + )) + }) +} + +/// Convert validation errors/warnings to a Python list of dicts +fn errors_to_pylist( + py: Python<'_>, + items: &[(String, Option<(usize, usize)>)], +) -> PyResult> { + let list = PyList::empty(py); + for (message, location) in items { + let dict = PyDict::new(py); + dict.set_item("message", message)?; + if let Some((line, column)) = location { + let loc_dict = PyDict::new(py); + loc_dict.set_item("line", line)?; + loc_dict.set_item("column", column)?; + dict.set_item("location", loc_dict)?; + } else { + dict.set_item("location", py.None())?; } + list.append(dict)?; + } + Ok(list.into()) +} + +/// Convert ValidationWarning slice to Python list format +fn warnings_to_pylist(py: Python<'_>, warnings: &[ValidationWarning]) -> PyResult> { + let items: Vec<_> = warnings + .iter() + .map(|w| { + ( + w.message.clone(), + w.location.as_ref().map(|l| (l.line, l.column)), + ) + }) + .collect(); + errors_to_pylist(py, &items) +} + +// ============================================================================ +// PyReaderBridge - Bridges Python reader objects to Rust Reader trait +// ============================================================================ + +/// Bridges a Python reader object to the Rust Reader trait. +/// +/// This allows any Python object with an `execute(sql: str) -> polars.DataFrame` +/// method to be used as a ggsql reader. +struct PyReaderBridge { + obj: Py, +} + +impl Reader for PyReaderBridge { + fn execute(&self, sql: &str) -> ggsql::Result { + Python::attach(|py| { + let bound = self.obj.bind(py); + let result = bound + .call_method1("execute", (sql,)) + .map_err(|e| GgsqlError::ReaderError(format!("Reader.execute() failed: {}", e)))?; + py_to_polars_inner(&result).map_err(|e| GgsqlError::ReaderError(e.to_string())) + }) + } + + fn supports_register(&self) -> bool { + Python::attach(|py| { + self.obj + .bind(py) + .call_method0("supports_register") + .and_then(|r| r.extract::()) + .unwrap_or(false) + }) + } + + fn register(&mut self, name: &str, df: DataFrame) -> ggsql::Result<()> { + Python::attach(|py| { + let py_df = + polars_to_py(py, &df).map_err(|e| GgsqlError::ReaderError(e.to_string()))?; + self.obj + .bind(py) + .call_method1("register", (name, py_df)) + .map_err(|e| GgsqlError::ReaderError(format!("Reader.register() failed: {}", e)))?; + Ok(()) + }) + } +} - // 2. Handle wildcard expansion: map columns to aesthetics with matching names - let has_wildcard = layer.mappings.wildcard || spec.global_mappings.wildcard; - if has_wildcard { - for &aes in supported_aesthetics { - // Only create mapping if column exists in the dataframe - if column_names.contains(aes) { - layer - .mappings - .aesthetics - .entry(aes.to_string()) - .or_insert_with(|| AestheticValue::standard_column(aes)); - } +// ============================================================================ +// Native Reader Detection Macro +// ============================================================================ + +/// Macro to try native readers and fall back to bridge. +/// Adding new native readers = add to the macro invocation list. +macro_rules! try_native_readers { + ($query:expr, $reader:expr, $($native_type:ty),*) => {{ + $( + if let Ok(native) = $reader.downcast::<$native_type>() { + return rust_prepare($query, &native.borrow().inner) + .map(|p| PyPrepared { inner: p }) + .map_err(|e| PyErr::new::(e.to_string())); } - } + )* + }}; +} + +// ============================================================================ +// PyDuckDBReader +// ============================================================================ + +/// DuckDB database reader for executing SQL queries. +/// +/// Creates an in-memory or file-based DuckDB connection that can execute +/// SQL queries and register DataFrames as queryable tables. +/// +/// Examples +/// -------- +/// >>> reader = DuckDBReader("duckdb://memory") +/// >>> df = reader.execute("SELECT 1 as x, 2 as y") +/// +/// >>> reader = DuckDBReader("duckdb://memory") +/// >>> reader.register("data", pl.DataFrame({"x": [1, 2, 3]})) +/// >>> df = reader.execute("SELECT * FROM data WHERE x > 1") +#[pyclass(name = "DuckDBReader", unsendable)] +struct PyDuckDBReader { + inner: RustDuckDBReader, +} + +#[pymethods] +impl PyDuckDBReader { + /// Create a new DuckDB reader from a connection string. + /// + /// Parameters + /// ---------- + /// connection : str + /// Connection string. Use "duckdb://memory" for in-memory database + /// or "duckdb://path/to/file.db" for file-based database. + /// + /// Returns + /// ------- + /// DuckDBReader + /// A configured DuckDB reader instance. + /// + /// Raises + /// ------ + /// ValueError + /// If the connection string is invalid or the database cannot be opened. + #[new] + fn new(connection: &str) -> PyResult { + let inner = RustDuckDBReader::from_connection_string(connection) + .map_err(|e| PyErr::new::(e.to_string()))?; + Ok(Self { inner }) } - // Compute aesthetic labels from column names - spec.compute_aesthetic_labels(); + /// Register a DataFrame as a queryable table. + /// + /// After registration, the DataFrame can be queried by name in SQL. + /// + /// Parameters + /// ---------- + /// name : str + /// The table name to register under. + /// df : polars.DataFrame + /// The DataFrame to register. Must be a polars DataFrame. + /// + /// Raises + /// ------ + /// ValueError + /// If registration fails or the table name is invalid. + fn register(&mut self, py: Python<'_>, name: &str, df: &Bound<'_, PyAny>) -> PyResult<()> { + let rust_df = py_to_polars(py, df)?; + self.inner + .register(name, rust_df) + .map_err(|e| PyErr::new::(e.to_string())) + } - // Create data map with the DataFrame as global data - let mut data_map: HashMap = HashMap::new(); - data_map.insert(GLOBAL_DATA_KEY.to_string(), df); + /// Execute a SQL query and return the result as a DataFrame. + /// + /// Parameters + /// ---------- + /// sql : str + /// The SQL query to execute. + /// + /// Returns + /// ------- + /// polars.DataFrame + /// The query result as a polars DataFrame. + /// + /// Raises + /// ------ + /// ValueError + /// If the SQL is invalid or execution fails. + fn execute(&self, py: Python<'_>, sql: &str) -> PyResult> { + let df = self + .inner + .execute(sql) + .map_err(|e| PyErr::new::(e.to_string()))?; + polars_to_py(py, &df) + } - // Write using the specified writer - match writer { - "vegalite" => { - let w = VegaLiteWriter::new(); - w.write(&spec, &data_map) - .map_err(|e| PyErr::new::(e.to_string())) + /// Check if this reader supports DataFrame registration. + /// + /// Returns + /// ------- + /// bool + /// True if register() is supported, False otherwise. + fn supports_register(&self) -> bool { + self.inner.supports_register() + } +} + +// ============================================================================ +// PyVegaLiteWriter +// ============================================================================ + +/// Vega-Lite JSON output writer. +/// +/// Converts prepared visualization specifications to Vega-Lite v6 JSON. +/// +/// Examples +/// -------- +/// >>> writer = VegaLiteWriter() +/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) +/// >>> json_output = prepared.render(writer) +#[pyclass(name = "VegaLiteWriter")] +struct PyVegaLiteWriter { + inner: RustVegaLiteWriter, +} + +#[pymethods] +impl PyVegaLiteWriter { + /// Create a new Vega-Lite writer. + /// + /// Returns + /// ------- + /// VegaLiteWriter + /// A configured Vega-Lite writer instance. + #[new] + fn new() -> Self { + Self { + inner: RustVegaLiteWriter::new(), } - _ => Err(PyErr::new::(format!( - "Unknown writer: {}", - writer - ))), } } +// ============================================================================ +// PyValidated +// ============================================================================ + +/// Result of validate() - query inspection and validation without SQL execution. +/// +/// Contains information about query structure and any validation errors/warnings. +/// The tree() method from Rust is not exposed as it's not useful in Python. +#[pyclass(name = "Validated")] +struct PyValidated { + sql: String, + visual: String, + has_visual: bool, + valid: bool, + errors: Vec<(String, Option<(usize, usize)>)>, + warnings: Vec<(String, Option<(usize, usize)>)>, +} + +#[pymethods] +impl PyValidated { + /// Whether the query contains a VISUALISE clause. + /// + /// Returns + /// ------- + /// bool + /// True if the query has a VISUALISE clause. + fn has_visual(&self) -> bool { + self.has_visual + } + + /// The SQL portion (before VISUALISE). + /// + /// Returns + /// ------- + /// str + /// The SQL part of the query. + fn sql(&self) -> &str { + &self.sql + } + + /// The VISUALISE portion (raw text). + /// + /// Returns + /// ------- + /// str + /// The VISUALISE part of the query. + fn visual(&self) -> &str { + &self.visual + } + + /// Whether the query is valid (no errors). + /// + /// Returns + /// ------- + /// bool + /// True if the query is syntactically and semantically valid. + fn valid(&self) -> bool { + self.valid + } + + /// Validation errors (fatal issues). + /// + /// Returns + /// ------- + /// list[dict] + /// List of error dictionaries with 'message' and optional 'location' keys. + fn errors(&self, py: Python<'_>) -> PyResult> { + errors_to_pylist(py, &self.errors) + } + + /// Validation warnings (non-fatal issues). + /// + /// Returns + /// ------- + /// list[dict] + /// List of warning dictionaries with 'message' and optional 'location' keys. + fn warnings(&self, py: Python<'_>) -> PyResult> { + errors_to_pylist(py, &self.warnings) + } +} + +// ============================================================================ +// PyPrepared +// ============================================================================ + +/// Result of prepare(), ready for rendering. +/// +/// Contains the resolved plot specification, data, and metadata. +/// Use render() to generate Vega-Lite JSON output. +/// +/// Examples +/// -------- +/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) +/// >>> print(f"Rows: {prepared.metadata()['rows']}") +/// >>> json_output = prepared.render(VegaLiteWriter()) +#[pyclass(name = "Prepared")] +struct PyPrepared { + inner: Prepared, +} + +#[pymethods] +impl PyPrepared { + /// Render to output format (Vega-Lite JSON). + /// + /// Parameters + /// ---------- + /// writer : VegaLiteWriter + /// The writer to use for rendering. + /// + /// Returns + /// ------- + /// str + /// The Vega-Lite JSON specification as a string. + /// + /// Raises + /// ------ + /// ValueError + /// If rendering fails. + fn render(&self, writer: &PyVegaLiteWriter) -> PyResult { + self.inner + .render(&writer.inner) + .map_err(|e| PyErr::new::(e.to_string())) + } + + /// Get visualization metadata. + /// + /// Returns + /// ------- + /// dict + /// Dictionary with 'rows', 'columns', and 'layer_count' keys. + fn metadata(&self, py: Python<'_>) -> PyResult> { + let m = self.inner.metadata(); + let dict = PyDict::new(py); + dict.set_item("rows", m.rows)?; + dict.set_item("columns", m.columns.clone())?; + dict.set_item("layer_count", m.layer_count)?; + Ok(dict.into()) + } + + /// The main SQL query that was executed. + /// + /// Returns + /// ------- + /// str + /// The SQL query string. + fn sql(&self) -> &str { + self.inner.sql() + } + + /// The VISUALISE portion (raw text). + /// + /// Returns + /// ------- + /// str + /// The VISUALISE clause text. + fn visual(&self) -> &str { + self.inner.visual() + } + + /// Number of layers. + /// + /// Returns + /// ------- + /// int + /// The number of DRAW clauses in the visualization. + fn layer_count(&self) -> usize { + self.inner.layer_count() + } + + /// Get global data (main query result). + /// + /// Returns + /// ------- + /// polars.DataFrame | None + /// The main query result DataFrame, or None if not available. + fn data(&self, py: Python<'_>) -> PyResult>> { + self.inner.data().map(|df| polars_to_py(py, df)).transpose() + } + + /// Get layer-specific data (from FILTER or FROM clause). + /// + /// Parameters + /// ---------- + /// index : int + /// The layer index (0-based). + /// + /// Returns + /// ------- + /// polars.DataFrame | None + /// The layer-specific DataFrame, or None if the layer uses global data. + fn layer_data(&self, py: Python<'_>, index: usize) -> PyResult>> { + self.inner + .layer_data(index) + .map(|df| polars_to_py(py, df)) + .transpose() + } + + /// Get stat transform data (e.g., histogram bins, density estimates). + /// + /// Parameters + /// ---------- + /// index : int + /// The layer index (0-based). + /// + /// Returns + /// ------- + /// polars.DataFrame | None + /// The stat transform DataFrame, or None if no stat transform. + fn stat_data(&self, py: Python<'_>, index: usize) -> PyResult>> { + self.inner + .stat_data(index) + .map(|df| polars_to_py(py, df)) + .transpose() + } + + /// Layer filter/source query, or None if using global data. + /// + /// Parameters + /// ---------- + /// index : int + /// The layer index (0-based). + /// + /// Returns + /// ------- + /// str | None + /// The filter SQL query, or None if the layer uses global data directly. + fn layer_sql(&self, index: usize) -> Option { + self.inner.layer_sql(index).map(|s| s.to_string()) + } + + /// Stat transform query, or None if no stat transform. + /// + /// Parameters + /// ---------- + /// index : int + /// The layer index (0-based). + /// + /// Returns + /// ------- + /// str | None + /// The stat transform SQL query, or None if no stat transform. + fn stat_sql(&self, index: usize) -> Option { + self.inner.stat_sql(index).map(|s| s.to_string()) + } + + /// Validation warnings from preparation. + /// + /// Returns + /// ------- + /// list[dict] + /// List of warning dictionaries with 'message' and optional 'location' keys. + fn warnings(&self, py: Python<'_>) -> PyResult> { + warnings_to_pylist(py, self.inner.warnings()) + } +} + +// ============================================================================ +// Module Functions +// ============================================================================ + +/// Validate query syntax and semantics without executing SQL. +/// +/// Parameters +/// ---------- +/// query : str +/// The ggsql query to validate. +/// +/// Returns +/// ------- +/// Validated +/// Validation result with query inspection methods. +/// +/// Raises +/// ------ +/// ValueError +/// If validation fails unexpectedly (not for syntax errors, which are captured). +#[pyfunction] +fn validate(query: &str) -> PyResult { + let v = rust_validate(query) + .map_err(|e| PyErr::new::(e.to_string()))?; + + Ok(PyValidated { + sql: v.sql().to_string(), + visual: v.visual().to_string(), + has_visual: v.has_visual(), + valid: v.valid(), + errors: v + .errors() + .iter() + .map(|e| { + ( + e.message.clone(), + e.location.as_ref().map(|l| (l.line, l.column)), + ) + }) + .collect(), + warnings: v + .warnings() + .iter() + .map(|w| { + ( + w.message.clone(), + w.location.as_ref().map(|l| (l.line, l.column)), + ) + }) + .collect(), + }) +} + +/// Prepare a query for visualization. Main entry point for the Rust API. +/// +/// Parameters +/// ---------- +/// query : str +/// The ggsql query to prepare. +/// reader : DuckDBReader | object +/// The database reader to execute SQL against. Can be a native DuckDBReader +/// for optimal performance, or any Python object with an +/// `execute(sql: str) -> polars.DataFrame` method. +/// +/// Returns +/// ------- +/// Prepared +/// A prepared visualization ready for rendering. +/// +/// Raises +/// ------ +/// ValueError +/// If parsing, validation, or SQL execution fails. +/// +/// Examples +/// -------- +/// >>> # Using native reader (fast path) +/// >>> reader = DuckDBReader("duckdb://memory") +/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) +/// >>> json_output = prepared.render(VegaLiteWriter()) +/// +/// >>> # Using custom Python reader +/// >>> class MyReader: +/// ... def execute(self, sql: str) -> pl.DataFrame: +/// ... return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) +/// >>> reader = MyReader() +/// >>> prepared = prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) +#[pyfunction] +fn prepare(query: &str, reader: &Bound<'_, PyAny>) -> PyResult { + // Fast path: try all known native reader types + // Add new native readers to this list as they're implemented + try_native_readers!(query, reader, PyDuckDBReader); + + // Bridge path: wrap Python object as Reader + let bridge = PyReaderBridge { + obj: reader.clone().unbind(), + }; + rust_prepare(query, &bridge) + .map(|p| PyPrepared { inner: p }) + .map_err(|e| PyErr::new::(e.to_string())) +} + +// ============================================================================ +// Module Registration +// ============================================================================ + #[pymodule] fn _ggsql(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_function(wrap_pyfunction!(split_query, m)?)?; - m.add_function(wrap_pyfunction!(render, m)?)?; + // Classes + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + // Functions + m.add_function(wrap_pyfunction!(validate, m)?)?; + m.add_function(wrap_pyfunction!(prepare, m)?)?; + Ok(()) } diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index 9df03779..970dcf5a 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -3,10 +3,13 @@ These tests focus on Python-specific logic: - DataFrame conversion via narwhals - Return type handling +- Two-stage API (prepare -> render) Rust logic (parsing, Vega-Lite generation) is tested in the Rust test suite. """ +import json + import pytest import polars as pl import altair @@ -14,20 +17,188 @@ import ggsql -class TestSplitQuery: - """Tests for split_query() function.""" +class TestValidate: + """Tests for validate() function.""" + + def test_valid_query_with_visualise(self): + validated = ggsql.validate( + "SELECT 1 AS x, 2 AS y VISUALISE DRAW point MAPPING x AS x, y AS y" + ) + assert validated.has_visual() + assert validated.valid() + assert "SELECT" in validated.sql() + assert "VISUALISE" in validated.visual() + assert len(validated.errors()) == 0 + + def test_valid_query_without_visualise(self): + validated = ggsql.validate("SELECT 1 AS x, 2 AS y") + assert not validated.has_visual() + assert validated.valid() + assert validated.sql() == "SELECT 1 AS x, 2 AS y" + assert validated.visual() == "" + + def test_invalid_query_has_errors(self): + validated = ggsql.validate("SELECT 1 VISUALISE DRAW invalid_geom") + assert not validated.valid() + assert len(validated.errors()) > 0 + + def test_missing_required_aesthetic(self): + # Point requires x and y, only providing x + validated = ggsql.validate( + "SELECT 1 AS x, 2 AS y VISUALISE DRAW point MAPPING x AS x" + ) + assert not validated.valid() + errors = validated.errors() + assert len(errors) > 0 + assert any("y" in e["message"] for e in errors) + + +class TestDuckDBReader: + """Tests for DuckDBReader class.""" + + def test_create_in_memory(self): + reader = ggsql.DuckDBReader("duckdb://memory") + assert reader is not None + + def test_execute_simple_query(self): + reader = ggsql.DuckDBReader("duckdb://memory") + df = reader.execute("SELECT 1 AS x, 2 AS y") + assert isinstance(df, pl.DataFrame) + assert df.shape == (1, 2) + assert list(df.columns) == ["x", "y"] + + def test_register_and_query(self): + reader = ggsql.DuckDBReader("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + reader.register("my_data", df) + + result = reader.execute("SELECT * FROM my_data WHERE x > 1") + assert isinstance(result, pl.DataFrame) + assert result.shape == (2, 2) + + def test_supports_register(self): + reader = ggsql.DuckDBReader("duckdb://memory") + assert reader.supports_register() is True + + def test_invalid_connection_string(self): + with pytest.raises(ValueError): + ggsql.DuckDBReader("invalid://connection") + + +class TestVegaLiteWriter: + """Tests for VegaLiteWriter class.""" + + def test_create_writer(self): + writer = ggsql.VegaLiteWriter() + assert writer is not None + + +class TestPrepare: + """Tests for prepare() function.""" + + def test_prepare_simple_query(self): + reader = ggsql.DuckDBReader("duckdb://memory") + prepared = ggsql.prepare( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + ) + assert prepared is not None + assert prepared.layer_count() == 1 + + def test_prepare_with_registered_data(self): + reader = ggsql.DuckDBReader("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + reader.register("data", df) + + prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) + assert prepared.metadata()["rows"] == 3 + + def test_prepare_metadata(self): + reader = ggsql.DuckDBReader("duckdb://memory") + prepared = ggsql.prepare( + "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) " + "VISUALISE x, y DRAW point", + reader, + ) + + metadata = prepared.metadata() + assert metadata["rows"] == 3 + assert "x" in metadata["columns"] + assert "y" in metadata["columns"] + assert metadata["layer_count"] == 1 + + def test_prepare_sql_accessor(self): + reader = ggsql.DuckDBReader("duckdb://memory") + prepared = ggsql.prepare( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + ) + assert "SELECT" in prepared.sql() + + def test_prepare_visual_accessor(self): + reader = ggsql.DuckDBReader("duckdb://memory") + prepared = ggsql.prepare( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + ) + assert "VISUALISE" in prepared.visual() - def test_splits_sql_and_visualise(self): - sql, viz = ggsql.split_query( - "SELECT x, y FROM data VISUALISE x, y DRAW point" + def test_prepare_data_accessor(self): + reader = ggsql.DuckDBReader("duckdb://memory") + prepared = ggsql.prepare( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader ) - assert "SELECT" in sql - assert "VISUALISE" in viz + data = prepared.data() + assert isinstance(data, pl.DataFrame) + assert data.shape == (1, 2) + + def test_prepare_without_visualise_fails(self): + reader = ggsql.DuckDBReader("duckdb://memory") + with pytest.raises(ValueError): + ggsql.prepare("SELECT 1 AS x, 2 AS y", reader) + - def test_no_visualise_returns_empty_viz(self): - sql, viz = ggsql.split_query("SELECT * FROM data") - assert sql == "SELECT * FROM data" - assert viz == "" +class TestPreparedRender: + """Tests for Prepared.render() method.""" + + def test_render_to_vegalite(self): + reader = ggsql.DuckDBReader("duckdb://memory") + prepared = ggsql.prepare( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + ) + writer = ggsql.VegaLiteWriter() + + result = prepared.render(writer) + assert isinstance(result, str) + + spec = json.loads(result) + assert "$schema" in spec + assert "vega-lite" in spec["$schema"] + + def test_render_contains_data(self): + reader = ggsql.DuckDBReader("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + reader.register("data", df) + + prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) + writer = ggsql.VegaLiteWriter() + + result = prepared.render(writer) + spec = json.loads(result) + # Data should be in the spec (either inline or in datasets) + assert "data" in spec or "datasets" in spec + + def test_render_multi_layer(self): + reader = ggsql.DuckDBReader("duckdb://memory") + prepared = ggsql.prepare( + "SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) " + "VISUALISE " + "DRAW point MAPPING x AS x, y AS y " + "DRAW line MAPPING x AS x, y AS y", + reader, + ) + writer = ggsql.VegaLiteWriter() + + result = prepared.render(writer) + spec = json.loads(result) + assert "layer" in spec class TestRenderAltairDataFrameConversion: @@ -112,23 +283,31 @@ def test_layered_chart_can_round_trip(self): def test_faceted_chart_returns_facet_chart(self): """FACET WRAP specs produce FacetChart.""" - df = pl.DataFrame({ - "x": [1, 2, 3, 4, 5, 6], - "y": [10, 20, 30, 40, 50, 60], - "group": ["A", "A", "A", "B", "B", "B"], - }) + df = pl.DataFrame( + { + "x": [1, 2, 3, 4, 5, 6], + "y": [10, 20, 30, 40, 50, 60], + "group": ["A", "A", "A", "B", "B", "B"], + } + ) # Need validate=False because ggsql produces v6 specs - chart = ggsql.render_altair(df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False) + chart = ggsql.render_altair( + df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False + ) assert isinstance(chart, altair.FacetChart) def test_faceted_chart_can_round_trip(self): """FacetChart can be converted to dict and back.""" - df = pl.DataFrame({ - "x": [1, 2, 3, 4, 5, 6], - "y": [10, 20, 30, 40, 50, 60], - "group": ["A", "A", "A", "B", "B", "B"], - }) - chart = ggsql.render_altair(df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False) + df = pl.DataFrame( + { + "x": [1, 2, 3, 4, 5, 6], + "y": [10, 20, 30, 40, 50, 60], + "group": ["A", "A", "A", "B", "B", "B"], + } + ) + chart = ggsql.render_altair( + df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False + ) # Convert to dict (skip validation for ggsql specs) spec = chart.to_dict(validate=False) @@ -140,11 +319,13 @@ def test_faceted_chart_can_round_trip(self): def test_chart_with_color_encoding(self): """Charts with color encoding still return correct type.""" - df = pl.DataFrame({ - "x": [1, 2, 3, 4], - "y": [10, 20, 30, 40], - "category": ["A", "B", "A", "B"], - }) + df = pl.DataFrame( + { + "x": [1, 2, 3, 4], + "y": [10, 20, 30, 40], + "category": ["A", "B", "A", "B"], + } + ) chart = ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point") # Should still be a LayerChart (ggsql wraps in layer) assert isinstance(chart, altair.LayerChart) @@ -157,3 +338,180 @@ def test_invalid_viz_raises(self): df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) with pytest.raises(ValueError): ggsql.render_altair(df, "NOT VALID SYNTAX") + + +class TestTwoStageAPIIntegration: + """Integration tests for the two-stage prepare -> render API.""" + + def test_end_to_end_workflow(self): + """Complete workflow: create reader, register data, prepare, render.""" + # Create reader + reader = ggsql.DuckDBReader("duckdb://memory") + + # Register data + df = pl.DataFrame( + { + "date": ["2024-01-01", "2024-01-02", "2024-01-03"], + "value": [10, 20, 30], + "region": ["North", "South", "North"], + } + ) + reader.register("sales", df) + + # Prepare visualization + prepared = ggsql.prepare( + "SELECT * FROM sales VISUALISE date AS x, value AS y, region AS color DRAW line", + reader, + ) + + # Verify metadata + assert prepared.metadata()["rows"] == 3 + assert prepared.layer_count() == 1 + + # Render to Vega-Lite + writer = ggsql.VegaLiteWriter() + result = prepared.render(writer) + + # Verify output + spec = json.loads(result) + assert "$schema" in spec + assert "line" in json.dumps(spec) + + def test_can_introspect_prepared(self): + """Test all introspection methods on Prepared.""" + reader = ggsql.DuckDBReader("duckdb://memory") + prepared = ggsql.prepare( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + ) + + # All these should work without error + assert prepared.sql() is not None + assert prepared.visual() is not None + assert prepared.layer_count() >= 1 + assert prepared.metadata() is not None + assert prepared.data() is not None + assert prepared.warnings() is not None + + # Layer-specific accessors (may return None) + _ = prepared.layer_data(0) + _ = prepared.stat_data(0) + _ = prepared.layer_sql(0) + _ = prepared.stat_sql(0) + + +class TestCustomReader: + """Tests for custom Python reader support.""" + + def test_simple_custom_reader(self): + """Custom reader with execute() method works.""" + + class SimpleReader: + def execute(self, sql: str) -> pl.DataFrame: + return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + + reader = SimpleReader() + prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) + assert prepared.metadata()["rows"] == 3 + + def test_custom_reader_with_register(self): + """Custom reader with register() support.""" + + class RegisterReader: + def __init__(self): + self.tables = {} + + def execute(self, sql: str) -> pl.DataFrame: + # Simple: just return the first registered table + if self.tables: + return next(iter(self.tables.values())) + return pl.DataFrame({"x": [1], "y": [2]}) + + def supports_register(self) -> bool: + return True + + def register(self, name: str, df: pl.DataFrame) -> None: + self.tables[name] = df + + reader = RegisterReader() + prepared = ggsql.prepare( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + ) + assert prepared is not None + + def test_custom_reader_error_handling(self): + """Custom reader errors are propagated.""" + + class ErrorReader: + def execute(self, sql: str) -> pl.DataFrame: + raise ValueError("Custom reader error") + + reader = ErrorReader() + with pytest.raises(ValueError, match="Custom reader error"): + ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader) + + def test_custom_reader_wrong_return_type(self): + """Custom reader returning wrong type raises TypeError.""" + + class WrongTypeReader: + def execute(self, sql: str): + return {"x": [1, 2, 3]} # dict, not DataFrame + + reader = WrongTypeReader() + with pytest.raises((ValueError, TypeError)): + ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader) + + def test_native_reader_fast_path(self): + """Native DuckDBReader still works (fast path).""" + reader = ggsql.DuckDBReader("duckdb://memory") + prepared = ggsql.prepare( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + ) + assert prepared.metadata()["rows"] == 1 + + def test_custom_reader_can_render(self): + """Custom reader result can be rendered to Vega-Lite.""" + + class StaticReader: + def execute(self, sql: str) -> pl.DataFrame: + return pl.DataFrame( + { + "x": [1, 2, 3, 4, 5], + "y": [10, 40, 20, 50, 30], + "category": ["A", "B", "A", "B", "A"], + } + ) + + reader = StaticReader() + prepared = ggsql.prepare( + "SELECT * FROM data VISUALISE x, y, category AS color DRAW point", + reader, + ) + + writer = ggsql.VegaLiteWriter() + result = prepared.render(writer) + + spec = json.loads(result) + assert "$schema" in spec + assert "vega-lite" in spec["$schema"] + + def test_custom_reader_execute_called(self): + """Verify execute() is called on the custom reader.""" + + class RecordingReader: + def __init__(self): + self.execute_calls = [] + + def execute(self, sql: str) -> pl.DataFrame: + self.execute_calls.append(sql) + return pl.DataFrame({"x": [1], "y": [2]}) + + reader = RecordingReader() + ggsql.prepare( + "SELECT * FROM data VISUALISE x, y DRAW point", + reader, + ) + + # execute() should have been called at least once + assert len(reader.execute_calls) > 0 + # All calls should be valid SQL strings + assert all(isinstance(sql, str) for sql in reader.execute_calls) diff --git a/src/doc/API.md b/src/doc/API.md index 5ccd70e6..a0f97ab6 100644 --- a/src/doc/API.md +++ b/src/doc/API.md @@ -514,7 +514,4 @@ def validate(query: str) -> Validated: def prepare(query: str, reader: DuckDBReader) -> Prepared: """Prepare a query for visualization.""" - -def split_query(query: str) -> tuple[str, str]: - """Split query into (sql, visualise) portions.""" ``` diff --git a/src/rest.rs b/src/rest.rs index 45d21963..3dba85fe 100644 --- a/src/rest.rs +++ b/src/rest.rs @@ -31,12 +31,12 @@ use tower_http::cors::{Any, CorsLayer}; use tracing::info; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; -use ggsql::{parser, GgsqlError, VERSION}; +use ggsql::{parser, validate, GgsqlError, VERSION}; #[cfg(feature = "duckdb")] use ggsql::reader::DuckDBReader; #[cfg(feature = "duckdb")] -use ggsql::{parse, prepare}; +use ggsql::prepare; #[cfg(feature = "vegalite")] use ggsql::writer::VegaLiteWriter; @@ -508,22 +508,21 @@ async fn parse_handler( ) -> Result>, ApiErrorResponse> { info!("Parsing query: {} chars", request.query.len()); - // Split query (for backwards compatibility) - let (sql_part, viz_part) = parser::split_query(&request.query)?; + // Validate query to get sql/viz portions + let validated = validate(&request.query)?; - // Parse using new API - let parsed = parse(&request.query)?; + // Parse ggsql portion + let specs = parser::parse_query(&request.query)?; // Convert specs to JSON - let specs_json: Vec = parsed - .plots() + let specs_json: Vec = specs .iter() .map(|spec| serde_json::to_value(spec).unwrap_or(serde_json::Value::Null)) .collect(); let result = ParseResult { - sql_portion: sql_part, - viz_portion: viz_part, + sql_portion: validated.sql().to_string(), + viz_portion: validated.visual().to_string(), specs: specs_json, }; @@ -540,8 +539,8 @@ async fn parse_handler( ) -> Result>, ApiErrorResponse> { info!("Parsing query: {} chars", request.query.len()); - // Split query - let (sql_part, viz_part) = parser::split_query(&request.query)?; + // Validate query to get sql/viz portions + let validated = validate(&request.query)?; // Parse ggsql portion let specs = parser::parse_query(&request.query)?; @@ -553,8 +552,8 @@ async fn parse_handler( .collect(); let result = ParseResult { - sql_portion: sql_part, - viz_portion: viz_part, + sql_portion: validated.sql().to_string(), + viz_portion: validated.visual().to_string(), specs: specs_json, }; From 6a60e7121d8606b52dc0c3c333f3594c4ad44a8c Mon Sep 17 00:00:00 2001 From: George Stagg Date: Thu, 29 Jan 2026 13:34:51 +0000 Subject: [PATCH 04/12] Keep clippy happy --- Cargo.toml | 2 +- src/execute.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0339c104..5e98f8aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,7 @@ csscolorparser = "0.8.1" polars = { version = "0.52", features = ["lazy", "sql", "ipc"] } # Readers -duckdb = { version = "1.1", features = ["bundled", "vtab-arrow"] } +duckdb = { version = "1.4", features = ["bundled", "vtab-arrow"] } arrow = { version = "56", default-features = false, features = ["ipc"] } postgres = "0.19" sqlx = { version = "0.8", features = ["postgres", "runtime-tokio-rustls"] } diff --git a/src/execute.rs b/src/execute.rs index e79bb196..b7683f56 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -566,6 +566,7 @@ pub struct LayerQueryResult { /// /// Note: This function takes `&mut Layer` because stat transforms may add new aesthetic mappings /// (e.g., mapping y to `__ggsql_stat__count` for histogram or bar count). +#[allow(clippy::too_many_arguments)] fn build_layer_query( layer: &mut Layer, schema: &Schema, From 3bc364590b5a169754899689fadb4c4a2a90741d Mon Sep 17 00:00:00 2001 From: George Stagg Date: Thu, 29 Jan 2026 13:58:53 +0000 Subject: [PATCH 05/12] cargo fmt --- ggsql-jupyter/src/executor.rs | 3 ++- src/rest.rs | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs index 0f523ebb..40f74f72 100644 --- a/ggsql-jupyter/src/executor.rs +++ b/ggsql-jupyter/src/executor.rs @@ -5,8 +5,9 @@ use anyhow::Result; use ggsql::{ - prepare, validate, + prepare, reader::{DuckDBReader, Reader}, + validate, writer::VegaLiteWriter, }; use polars::frame::DataFrame; diff --git a/src/rest.rs b/src/rest.rs index 3dba85fe..e87a14f9 100644 --- a/src/rest.rs +++ b/src/rest.rs @@ -33,10 +33,10 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use ggsql::{parser, validate, GgsqlError, VERSION}; -#[cfg(feature = "duckdb")] -use ggsql::reader::DuckDBReader; #[cfg(feature = "duckdb")] use ggsql::prepare; +#[cfg(feature = "duckdb")] +use ggsql::reader::DuckDBReader; #[cfg(feature = "vegalite")] use ggsql::writer::VegaLiteWriter; @@ -445,9 +445,9 @@ async fn query_handler( // Use shared reader or create new one let prepared = if request.reader == "duckdb://memory" && state.reader.is_some() { let reader_mutex = state.reader.as_ref().unwrap(); - let reader = reader_mutex.lock().map_err(|e| { - GgsqlError::InternalError(format!("Failed to lock reader: {}", e)) - })?; + let reader = reader_mutex + .lock() + .map_err(|e| GgsqlError::InternalError(format!("Failed to lock reader: {}", e)))?; prepare(&request.query, &*reader)? } else { let reader = DuckDBReader::from_connection_string(&request.reader)?; From c1aff572a44f8ac8a16609e2e4ecd2193622ec10 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Mon, 2 Feb 2026 11:32:52 +0000 Subject: [PATCH 06/12] Rename reader.execute to reader.execute_sql --- CLAUDE.md | 10 +++++----- ggsql-jupyter/src/executor.rs | 2 +- ggsql-python/README.md | 8 ++++---- ggsql-python/src/lib.rs | 16 ++++++++-------- ggsql-python/tests/test_ggsql.py | 16 ++++++++-------- src/api.rs | 2 +- src/cli.rs | 2 +- src/doc/API.md | 4 ++-- src/execute.rs | 2 +- src/lib.rs | 22 +++++++++++----------- src/reader/duckdb.rs | 18 +++++++++--------- src/reader/mod.rs | 8 ++++---- 12 files changed, 55 insertions(+), 55 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index aa093221..1ac04a87 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -207,7 +207,7 @@ let json = prepared.render(&writer)?; **Reader trait** (data source abstraction): -- `execute(sql)` - Run SQL, return DataFrame +- `execute_sql(sql)` - Run SQL, return DataFrame - `register(name, df)` - Register DataFrame as table - Implementation: `DuckDBReader` @@ -505,7 +505,7 @@ pub type Result = std::result::Result; ```rust pub trait Reader { - fn execute(&self, sql: &str) -> Result; + fn execute_sql(&self, sql: &str) -> Result; fn supports_query(&self, sql: &str) -> bool; } ``` @@ -871,7 +871,7 @@ When running in Positron IDE, the extension provides enhanced functionality: - PyO3-based Rust bindings compiled to a native Python extension - Two-stage API mirroring the Rust API: `prepare()` → `render()` - DuckDB reader with DataFrame registration -- Custom Python reader support: any object with `execute(sql) -> DataFrame` method +- Custom Python reader support: any object with `execute_sql(sql) -> DataFrame` method - Works with any narwhals-compatible DataFrame (polars, pandas, etc.) - LazyFrames are collected automatically - Returns native `altair.Chart` objects via `render_altair()` convenience function @@ -976,7 +976,7 @@ print(f"Errors: {validated.errors()}") **Custom Python Readers**: -Any Python object with an `execute(sql: str) -> polars.DataFrame` method can be used as a reader: +Any Python object with an `execute_sql(sql: str) -> polars.DataFrame` method can be used as a reader: ```python import ggsql @@ -985,7 +985,7 @@ import polars as pl class MyReader: """Custom reader that returns static data.""" - def execute(self, sql: str) -> pl.DataFrame: + def execute_sql(self, sql: str) -> pl.DataFrame: return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) # Use custom reader with prepare() diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs index 40f74f72..b2415228 100644 --- a/ggsql-jupyter/src/executor.rs +++ b/ggsql-jupyter/src/executor.rs @@ -60,7 +60,7 @@ impl QueryExecutor { // 2. Check if there's a visualization if !validated.has_visual() { // Pure SQL query - execute directly and return DataFrame - let df = self.reader.execute(code)?; + let df = self.reader.execute_sql(code)?; tracing::info!( "Pure SQL executed: {} rows, {} cols", df.height(), diff --git a/ggsql-python/README.md b/ggsql-python/README.md index 3ea2c603..22b5fb8b 100644 --- a/ggsql-python/README.md +++ b/ggsql-python/README.md @@ -127,7 +127,7 @@ reader = ggsql.DuckDBReader("duckdb:///path/to/file.db") # File database **Methods:** - `register(name: str, df: polars.DataFrame)` - Register a DataFrame as a queryable table -- `execute(sql: str) -> polars.DataFrame` - Execute SQL and return results +- `execute_sql(sql: str) -> polars.DataFrame` - Execute SQL and return results - `supports_register() -> bool` - Check if registration is supported #### `VegaLiteWriter()` @@ -236,7 +236,7 @@ ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point") ### Custom Readers -You can use any Python object with an `execute(sql: str) -> polars.DataFrame` method as a reader. This enables integration with any data source. +You can use any Python object with an `execute_sql(sql: str) -> polars.DataFrame` method as a reader. This enables integration with any data source. ```python import ggsql @@ -248,7 +248,7 @@ class CSVReader: def __init__(self, data_dir: str): self.data_dir = data_dir - def execute(self, sql: str) -> pl.DataFrame: + def execute_sql(self, sql: str) -> pl.DataFrame: # Simple implementation: ignore SQL and return fixed data # A real implementation would parse SQL to determine which file to load return pl.read_csv(f"{self.data_dir}/data.csv") @@ -275,7 +275,7 @@ class AdvancedReader: def __init__(self): self.tables = {} - def execute(self, sql: str) -> pl.DataFrame: + def execute_sql(self, sql: str) -> pl.DataFrame: # Your SQL execution logic here ... diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index b9d6496d..726db016 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -62,7 +62,7 @@ fn py_to_polars_inner(df: &Bound<'_, PyAny>) -> PyResult { df.call_method1("write_ipc", (&bytes_io,)).map_err(|_| { PyErr::new::( - "Reader.execute() must return a polars.DataFrame", + "Reader.execute_sql() must return a polars.DataFrame", ) })?; @@ -127,12 +127,12 @@ struct PyReaderBridge { } impl Reader for PyReaderBridge { - fn execute(&self, sql: &str) -> ggsql::Result { + fn execute_sql(&self, sql: &str) -> ggsql::Result { Python::attach(|py| { let bound = self.obj.bind(py); let result = bound - .call_method1("execute", (sql,)) - .map_err(|e| GgsqlError::ReaderError(format!("Reader.execute() failed: {}", e)))?; + .call_method1("execute_sql", (sql,)) + .map_err(|e| GgsqlError::ReaderError(format!("Reader.execute_sql() failed: {}", e)))?; py_to_polars_inner(&result).map_err(|e| GgsqlError::ReaderError(e.to_string())) }) } @@ -190,11 +190,11 @@ macro_rules! try_native_readers { /// Examples /// -------- /// >>> reader = DuckDBReader("duckdb://memory") -/// >>> df = reader.execute("SELECT 1 as x, 2 as y") +/// >>> df = reader.execute_sql("SELECT 1 as x, 2 as y") /// /// >>> reader = DuckDBReader("duckdb://memory") /// >>> reader.register("data", pl.DataFrame({"x": [1, 2, 3]})) -/// >>> df = reader.execute("SELECT * FROM data WHERE x > 1") +/// >>> df = reader.execute_sql("SELECT * FROM data WHERE x > 1") #[pyclass(name = "DuckDBReader", unsendable)] struct PyDuckDBReader { inner: RustDuckDBReader, @@ -264,10 +264,10 @@ impl PyDuckDBReader { /// ------ /// ValueError /// If the SQL is invalid or execution fails. - fn execute(&self, py: Python<'_>, sql: &str) -> PyResult> { + fn execute_sql(&self, py: Python<'_>, sql: &str) -> PyResult> { let df = self .inner - .execute(sql) + .execute_sql(sql) .map_err(|e| PyErr::new::(e.to_string()))?; polars_to_py(py, &df) } diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index 970dcf5a..b8614d0a 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -62,7 +62,7 @@ def test_create_in_memory(self): def test_execute_simple_query(self): reader = ggsql.DuckDBReader("duckdb://memory") - df = reader.execute("SELECT 1 AS x, 2 AS y") + df = reader.execute_sql("SELECT 1 AS x, 2 AS y") assert isinstance(df, pl.DataFrame) assert df.shape == (1, 2) assert list(df.columns) == ["x", "y"] @@ -72,7 +72,7 @@ def test_register_and_query(self): df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) reader.register("my_data", df) - result = reader.execute("SELECT * FROM my_data WHERE x > 1") + result = reader.execute_sql("SELECT * FROM my_data WHERE x > 1") assert isinstance(result, pl.DataFrame) assert result.shape == (2, 2) @@ -406,7 +406,7 @@ def test_simple_custom_reader(self): """Custom reader with execute() method works.""" class SimpleReader: - def execute(self, sql: str) -> pl.DataFrame: + def execute_sql(self, sql: str) -> pl.DataFrame: return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) reader = SimpleReader() @@ -420,7 +420,7 @@ class RegisterReader: def __init__(self): self.tables = {} - def execute(self, sql: str) -> pl.DataFrame: + def execute_sql(self, sql: str) -> pl.DataFrame: # Simple: just return the first registered table if self.tables: return next(iter(self.tables.values())) @@ -442,7 +442,7 @@ def test_custom_reader_error_handling(self): """Custom reader errors are propagated.""" class ErrorReader: - def execute(self, sql: str) -> pl.DataFrame: + def execute_sql(self, sql: str) -> pl.DataFrame: raise ValueError("Custom reader error") reader = ErrorReader() @@ -453,7 +453,7 @@ def test_custom_reader_wrong_return_type(self): """Custom reader returning wrong type raises TypeError.""" class WrongTypeReader: - def execute(self, sql: str): + def execute_sql(self, sql: str): return {"x": [1, 2, 3]} # dict, not DataFrame reader = WrongTypeReader() @@ -472,7 +472,7 @@ def test_custom_reader_can_render(self): """Custom reader result can be rendered to Vega-Lite.""" class StaticReader: - def execute(self, sql: str) -> pl.DataFrame: + def execute_sql(self, sql: str) -> pl.DataFrame: return pl.DataFrame( { "x": [1, 2, 3, 4, 5], @@ -501,7 +501,7 @@ class RecordingReader: def __init__(self): self.execute_calls = [] - def execute(self, sql: str) -> pl.DataFrame: + def execute_sql(self, sql: str) -> pl.DataFrame: self.execute_calls.append(sql) return pl.DataFrame({"x": [1], "y": [2]}) diff --git a/src/api.rs b/src/api.rs index ecfbdeaf..1158f5ef 100644 --- a/src/api.rs +++ b/src/api.rs @@ -245,7 +245,7 @@ pub fn prepare(query: &str, reader: &dyn Reader) -> Result { let warnings: Vec = validated.warnings().to_vec(); // Prepare data (this also validates, but we want the warnings from above) - let prepared_data = prepare_data_with_executor(query, |sql| reader.execute(sql))?; + let prepared_data = prepare_data_with_executor(query, |sql| reader.execute_sql(sql))?; Ok(Prepared::new( prepared_data.spec, diff --git a/src/cli.rs b/src/cli.rs index 1844ff01..c8ad4e5c 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -335,7 +335,7 @@ fn print_table_fallback(query: &str, reader: &DuckDBReader, max_rows: usize) { } let parsed = parsed.unwrap(); - let data = reader.execute(&parsed); + let data = reader.execute_sql(&parsed); if let Err(e) = data { eprintln!("Failed to execute SQL query: {}", e); std::process::exit(1) diff --git a/src/doc/API.md b/src/doc/API.md index a0f97ab6..ed5af4fe 100644 --- a/src/doc/API.md +++ b/src/doc/API.md @@ -374,7 +374,7 @@ pub struct Location { ```rust pub trait Reader { /// Execute a SQL query and return a DataFrame - fn execute(&self, sql: &str) -> Result; + fn execute_sql(&self, sql: &str) -> Result; /// Register a DataFrame as a queryable table fn register(&mut self, name: &str, df: DataFrame) -> Result<()>; @@ -425,7 +425,7 @@ class DuckDBReader: df: Polars DataFrame or narwhals-compatible DataFrame """ - def execute(self, sql: str) -> polars.DataFrame: + def execute_sql(self, sql: str) -> polars.DataFrame: """Execute SQL and return a Polars DataFrame.""" def supports_register(self) -> bool: diff --git a/src/execute.rs b/src/execute.rs index b7683f56..3bf2be33 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -1185,7 +1185,7 @@ where /// Convenience wrapper around `prepare_data_with_executor` for direct DuckDB reader usage. #[cfg(feature = "duckdb")] pub fn prepare_data(query: &str, reader: &DuckDBReader) -> Result { - prepare_data_with_executor(query, |sql| reader.execute(sql)) + prepare_data_with_executor(query, |sql| reader.execute_sql(sql)) } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index cf13aaa5..c9128f54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -116,7 +116,7 @@ mod integration_tests { FROM generate_series(0, 4) as t(n) "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify DataFrame has temporal type (DuckDB returns Datetime for DATE + INTERVAL) assert_eq!(df.get_column_names(), vec!["date", "revenue"]); @@ -176,7 +176,7 @@ mod integration_tests { FROM generate_series(0, 3) as t(n) "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify DataFrame has Datetime type let timestamp_col = df.column("timestamp").unwrap(); @@ -224,7 +224,7 @@ mod integration_tests { // Real SQL that users would write let sql = "SELECT 1 as int_col, 2.5 as float_col, true as bool_col"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types are preserved // DuckDB treats numeric literals as DECIMAL, which we convert to Float64 @@ -279,7 +279,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT * FROM (VALUES (1, 2.5, 'a'), (2, NULL, 'b'), (NULL, 3.5, NULL)) AS t(int_col, float_col, str_col)"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types assert!(matches!( @@ -329,7 +329,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT * FROM (VALUES ('A', 10), ('B', 20), ('A', 15), ('C', 30)) AS t(category, value)"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); let mut spec = Plot::new(); let layer = Layer::new(Geom::bar()) @@ -375,7 +375,7 @@ mod integration_tests { GROUP BY day "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify temporal type is preserved through aggregation // DATE_TRUNC returns Date type (not Datetime) @@ -413,7 +413,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT 0.1 as small, 123.456 as medium, 999999.999999 as large"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // All should be Float64 assert!(matches!( @@ -465,7 +465,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT CAST(1 AS TINYINT) as tiny, CAST(1000 AS SMALLINT) as small, CAST(1000000 AS INTEGER) as int, CAST(1000000000000 AS BIGINT) as big"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types assert!(matches!( @@ -533,7 +533,7 @@ mod integration_tests { // Prepare data - this parses, injects constants into global data, and replaces literals with columns let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // Verify constants were injected into global data (not layer-specific data) // Both layers share __global__ data for faceting compatibility @@ -641,7 +641,7 @@ mod integration_tests { "#; let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // All layers should use global data for faceting to work assert!( @@ -729,7 +729,7 @@ mod integration_tests { "#; let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // Should have global data with the constant injected assert!( diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index b3cf46d7..f67c39f2 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -24,11 +24,11 @@ use std::io::Cursor; /// /// // In-memory database /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?; -/// let df = reader.execute("SELECT 1 as x, 2 as y")?; +/// let df = reader.execute_sql("SELECT 1 as x, 2 as y")?; /// /// // File-based database /// let reader = DuckDBReader::from_connection_string("duckdb://data.db")?; -/// let df = reader.execute("SELECT * FROM sales")?; +/// let df = reader.execute_sql("SELECT * FROM sales")?; /// ``` pub struct DuckDBReader { conn: Connection, @@ -380,7 +380,7 @@ impl ColumnBuilder { } impl Reader for DuckDBReader { - fn execute(&self, sql: &str) -> Result { + fn execute_sql(&self, sql: &str) -> Result { use polars::prelude::*; // Check if this is a DDL statement (CREATE, DROP, INSERT, UPDATE, DELETE, ALTER) @@ -544,7 +544,7 @@ mod tests { #[test] fn test_simple_query() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let df = reader.execute("SELECT 1 as x, 2 as y").unwrap(); + let df = reader.execute_sql("SELECT 1 as x, 2 as y").unwrap(); assert_eq!(df.shape(), (1, 2)); assert_eq!(df.get_column_names(), vec!["x", "y"]); @@ -567,7 +567,7 @@ mod tests { .unwrap(); // Query data - let df = reader.execute("SELECT * FROM test").unwrap(); + let df = reader.execute_sql("SELECT * FROM test").unwrap(); assert_eq!(df.shape(), (2, 2)); assert_eq!(df.get_column_names(), vec!["x", "y"]); @@ -576,7 +576,7 @@ mod tests { #[test] fn test_invalid_sql() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let result = reader.execute("INVALID SQL SYNTAX"); + let result = reader.execute_sql("INVALID SQL SYNTAX"); assert!(result.is_err()); } @@ -598,7 +598,7 @@ mod tests { .unwrap(); let df = reader - .execute("SELECT region, SUM(revenue) as total FROM sales GROUP BY region") + .execute_sql("SELECT region, SUM(revenue) as total FROM sales GROUP BY region") .unwrap(); assert_eq!(df.shape(), (2, 2)); @@ -620,7 +620,7 @@ mod tests { reader.register("my_table", df).unwrap(); // Query the registered table - let result = reader.execute("SELECT * FROM my_table ORDER BY x").unwrap(); + let result = reader.execute_sql("SELECT * FROM my_table ORDER BY x").unwrap(); assert_eq!(result.shape(), (3, 2)); assert_eq!(result.get_column_names(), vec!["x", "y"]); } @@ -698,7 +698,7 @@ mod tests { reader.register("empty_table", df).unwrap(); // Query should return empty result with correct schema - let result = reader.execute("SELECT * FROM empty_table").unwrap(); + let result = reader.execute_sql("SELECT * FROM empty_table").unwrap(); assert_eq!(result.shape(), (0, 2)); assert_eq!(result.get_column_names(), vec!["x", "y"]); } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 762c0319..ed41a62c 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -17,12 +17,12 @@ //! //! // Basic usage //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?; -//! let df = reader.execute("SELECT * FROM table")?; +//! let df = reader.execute_sql("SELECT * FROM table")?; //! //! // With DataFrame registration //! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?; //! reader.register("my_table", some_dataframe)?; -//! let result = reader.execute("SELECT * FROM my_table")?; +//! let result = reader.execute_sql("SELECT * FROM my_table")?; //! ``` use crate::{DataFrame, GgsqlError, Result}; @@ -53,7 +53,7 @@ pub use duckdb::DuckDBReader; /// reader.register("sales", sales_df)?; /// /// // Now you can query it -/// let result = reader.execute("SELECT * FROM sales WHERE amount > 100")?; +/// let result = reader.execute_sql("SELECT * FROM sales WHERE amount > 100")?; /// ``` pub trait Reader { /// Execute a SQL query and return the result as a DataFrame @@ -72,7 +72,7 @@ pub trait Reader { /// - The SQL is invalid /// - The connection fails /// - The table or columns don't exist - fn execute(&self, sql: &str) -> Result; + fn execute_sql(&self, sql: &str) -> Result; /// Register a DataFrame as a queryable table (takes ownership) /// From d74bca3a269efb2e89e92763d38abdb12c02943c Mon Sep 17 00:00:00 2001 From: George Stagg Date: Mon, 2 Feb 2026 13:35:28 +0000 Subject: [PATCH 07/12] Switch from api.prepare() to reader.execute() --- CLAUDE.md | 66 +++++----- ggsql-jupyter/src/executor.rs | 13 +- ggsql-python/README.md | 41 +++--- ggsql-python/python/ggsql/__init__.py | 14 +- ggsql-python/src/lib.rs | 109 ++++++++++------ ggsql-python/tests/test_ggsql.py | 177 +++++++++++++------------- src/api.rs | 95 +++++--------- src/cli.rs | 18 +-- src/doc/API.md | 94 +++++++------- src/lib.rs | 4 +- src/reader/mod.rs | 69 +++++++++- src/rest.rs | 16 +-- 12 files changed, 392 insertions(+), 324 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 1ac04a87..8c0cba03 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -152,28 +152,28 @@ DRAW line MAPPING month AS x, total AS y ### Quick Start ```rust -use ggsql::{prepare, reader::DuckDBReader, writer::VegaLiteWriter}; +use ggsql::reader::{DuckDBReader, Reader}; +use ggsql::writer::VegaLiteWriter; // Create a reader let reader = DuckDBReader::from_connection_string("duckdb://memory")?; -// Prepare the visualization -let prepared = ggsql::prepare( - "SELECT x, y FROM data VISUALISE x, y DRAW point", - &reader +// Execute the ggsql query +let spec = reader.execute( + "SELECT x, y FROM data VISUALISE x, y DRAW point" )?; // Render to Vega-Lite JSON let writer = VegaLiteWriter::new(); -let json = prepared.render(&writer)?; +let json = spec.render(&writer)?; ``` ### Core Functions | Function | Purpose | | ------------------------ | ------------------------------------------------------ | -| `prepare(query, reader)` | Main entry point: parse, execute SQL, resolve mappings | -| `render(writer)` | Generate output (Vega-Lite JSON) from prepared data | +| `reader.execute(query)` | Main entry point: parse, execute SQL, resolve mappings | +| `spec.render(writer)` | Generate output (Vega-Lite JSON) from Spec | | `validate(query)` | Validate syntax + semantics, inspect query structure | ### Key Types @@ -188,12 +188,12 @@ let json = prepared.render(&writer)?; - `errors()` - Validation errors - `warnings()` - Validation warnings -**`Prepared`** - Result of `prepare()`, ready for rendering: +**`Spec`** - Result of `reader.execute()`, ready for rendering: - `render(writer)` - Generate output (Vega-Lite JSON) - `plot()` - Resolved plot specification - `metadata()` - Rows, columns, layer count -- `warnings()` - Validation warnings from preparation +- `warnings()` - Validation warnings from execution - `data()` / `layer_data(i)` / `stat_data(i)` - Access DataFrames - `sql()` / `visual()` / `layer_sql(i)` / `stat_sql(i)` - Query introspection @@ -869,7 +869,7 @@ When running in Positron IDE, the extension provides enhanced functionality: **Features**: - PyO3-based Rust bindings compiled to a native Python extension -- Two-stage API mirroring the Rust API: `prepare()` → `render()` +- Two-stage API mirroring the Rust API: `reader.execute()` → `render()` - DuckDB reader with DataFrame registration - Custom Python reader support: any object with `execute_sql(sql) -> DataFrame` method - Works with any narwhals-compatible DataFrame (polars, pandas, etc.) @@ -897,20 +897,19 @@ reader = ggsql.DuckDBReader("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) reader.register("data", df) -# Prepare visualization -prepared = ggsql.prepare( - "SELECT * FROM data VISUALISE x, y DRAW point", - reader +# Execute visualization +spec = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point" ) # Inspect metadata -print(f"Rows: {prepared.metadata()['rows']}") -print(f"Columns: {prepared.metadata()['columns']}") -print(f"SQL: {prepared.sql()}") +print(f"Rows: {spec.metadata()['rows']}") +print(f"Columns: {spec.metadata()['columns']}") +print(f"SQL: {spec.sql()}") # Render to Vega-Lite JSON writer = ggsql.VegaLiteWriter() -json_output = prepared.render(writer) +json_output = spec.render(writer) ``` **Convenience Function** (`render_altair`): @@ -943,22 +942,23 @@ print(f"Errors: {validated.errors()}") **Classes**: -| Class | Description | -| -------------------------- | -------------------------------------------- | -| `DuckDBReader(connection)` | Database reader with DataFrame registration | -| `VegaLiteWriter()` | Vega-Lite JSON output writer | -| `Validated` | Result of `validate()` with query inspection | -| `Prepared` | Result of `prepare()`, ready for rendering | +| Class | Description | +| -------------------------- | ------------------------------------------------ | +| `DuckDBReader(connection)` | Database reader with DataFrame registration | +| `VegaLiteWriter()` | Vega-Lite JSON output writer | +| `Validated` | Result of `validate()` with query inspection | +| `Spec` | Result of `reader.execute()`, ready for rendering | **Functions**: -| Function | Description | -| ------------------------ | ------------------------------------------------- | -| `validate(query)` | Syntax/semantic validation with query inspection | -| `prepare(query, reader)` | Full preparation (reader can be native or custom) | -| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart | +| Function | Description | +| -------------------------- | ------------------------------------------------- | +| `validate(query)` | Syntax/semantic validation with query inspection | +| `reader.execute(query)` | Execute ggsql query, return Spec | +| `execute(query, reader)` | Execute with custom reader (bridge path) | +| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart | -**Prepared Object Methods**: +**Spec Methods**: | Method | Description | | ---------------- | -------------------------------------------- | @@ -988,9 +988,9 @@ class MyReader: def execute_sql(self, sql: str) -> pl.DataFrame: return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -# Use custom reader with prepare() +# Use custom reader with ggsql.execute() reader = MyReader() -prepared = ggsql.prepare( +spec = ggsql.execute( "SELECT * FROM data VISUALISE x, y DRAW point", reader ) diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs index b2415228..42b541d8 100644 --- a/ggsql-jupyter/src/executor.rs +++ b/ggsql-jupyter/src/executor.rs @@ -5,7 +5,6 @@ use anyhow::Result; use ggsql::{ - prepare, reader::{DuckDBReader, Reader}, validate, writer::VegaLiteWriter, @@ -69,17 +68,17 @@ impl QueryExecutor { return Ok(ExecutionResult::DataFrame(df)); } - // 3. Prepare data using the new API - let prepared = prepare(code, &self.reader)?; + // 3. Execute ggsql query using reader + let spec = self.reader.execute(code)?; tracing::info!( - "Data prepared: {} rows, {} layers", - prepared.metadata().rows, - prepared.metadata().layer_count + "Query executed: {} rows, {} layers", + spec.metadata().rows, + spec.metadata().layer_count ); // 4. Render to Vega-Lite - let vega_json = prepared.render(&self.writer)?; + let vega_json = spec.render(&self.writer)?; tracing::debug!("Generated Vega-Lite spec: {} chars", vega_json.len()); diff --git a/ggsql-python/README.md b/ggsql-python/README.md index 22b5fb8b..08e9b848 100644 --- a/ggsql-python/README.md +++ b/ggsql-python/README.md @@ -84,30 +84,29 @@ df = pl.DataFrame({ }) reader.register("sales", df) -# 3. Prepare the visualization -prepared = ggsql.prepare( +# 3. Execute the ggsql query +spec = reader.execute( """ SELECT * FROM sales VISUALISE date AS x, revenue AS y, region AS color DRAW line LABEL title => 'Sales by Region' - """, - reader + """ ) # 4. Inspect metadata -print(f"Rows: {prepared.metadata()['rows']}") -print(f"Columns: {prepared.metadata()['columns']}") -print(f"Layers: {prepared.layer_count()}") +print(f"Rows: {spec.metadata()['rows']}") +print(f"Columns: {spec.metadata()['columns']}") +print(f"Layers: {spec.layer_count()}") # 5. Inspect SQL/VISUALISE portions and data -print(f"SQL: {prepared.sql()}") -print(f"Visual: {prepared.visual()}") -print(prepared.data()) # Returns polars DataFrame +print(f"SQL: {spec.sql()}") +print(f"Visual: {spec.visual()}") +print(spec.data()) # Returns polars DataFrame # 6. Render to Vega-Lite JSON writer = ggsql.VegaLiteWriter() -vegalite_json = prepared.render(writer) +vegalite_json = spec.render(writer) print(vegalite_json) ``` @@ -136,7 +135,7 @@ Writer that generates Vega-Lite v6 JSON specifications. ```python writer = ggsql.VegaLiteWriter() -json_output = prepared.render(writer) +json_output = spec.render(writer) ``` #### `Validated` @@ -152,9 +151,9 @@ Result of `validate()` containing query analysis without SQL execution. - `errors() -> list[dict]` - Validation errors with messages and locations - `warnings() -> list[dict]` - Validation warnings -#### `Prepared` +#### `Spec` -Result of `prepare()`, containing resolved visualization ready for rendering. +Result of `reader.execute()`, containing resolved visualization ready for rendering. **Methods:** @@ -168,7 +167,7 @@ Result of `prepare()`, containing resolved visualization ready for rendering. - `stat_data(index: int) -> polars.DataFrame | None` - Statistical transform data - `layer_sql(index: int) -> str | None` - Layer filter SQL - `stat_sql(index: int) -> str | None` - Stat transform SQL -- `warnings() -> list[dict]` - Validation warnings from preparation +- `warnings() -> list[dict]` - Validation warnings from execution ### Functions @@ -185,13 +184,13 @@ else: print(f"Error: {error['message']}") ``` -#### `prepare(query: str, reader: DuckDBReader) -> Prepared` +#### `reader.execute(query: str) -> Spec` -Parse, validate, and execute a ggsql query. +Execute a ggsql query and return the visualization specification. ```python reader = ggsql.DuckDBReader("duckdb://memory") -prepared = ggsql.prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) +spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") ``` #### `render_altair(df, viz: str, **kwargs) -> altair.Chart` @@ -253,14 +252,14 @@ class CSVReader: # A real implementation would parse SQL to determine which file to load return pl.read_csv(f"{self.data_dir}/data.csv") -# Use custom reader with prepare() +# Use custom reader with ggsql.execute() reader = CSVReader("/path/to/data") -prepared = ggsql.prepare( +spec = ggsql.execute( "SELECT * FROM data VISUALISE x, y DRAW point", reader ) writer = ggsql.VegaLiteWriter() -json_output = prepared.render(writer) +json_output = spec.render(writer) ``` **Optional methods** for custom readers: diff --git a/ggsql-python/python/ggsql/__init__.py b/ggsql-python/python/ggsql/__init__.py index 06b5f720..de159d2a 100644 --- a/ggsql-python/python/ggsql/__init__.py +++ b/ggsql-python/python/ggsql/__init__.py @@ -11,9 +11,9 @@ DuckDBReader, VegaLiteWriter, Validated, - Prepared, + Spec, validate, - prepare, + execute, ) __all__ = [ @@ -21,10 +21,10 @@ "DuckDBReader", "VegaLiteWriter", "Validated", - "Prepared", + "Spec", # Functions "validate", - "prepare", + "execute", "render_altair", ] __version__ = "0.1.0" @@ -81,10 +81,10 @@ def render_altair( # Build full query: SELECT * FROM __data__ + VISUALISE clause query = f"SELECT * FROM __data__ {viz}" - # Prepare and render - prepared = prepare(query, reader) + # Execute and render + spec = reader.execute(query) writer = VegaLiteWriter() - vegalite_json = prepared.render(writer) + vegalite_json = spec.render(writer) # Parse to determine the correct Altair class spec = json.loads(vegalite_json) diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index 726db016..5be68c42 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -6,7 +6,7 @@ use pyo3::prelude::*; use pyo3::types::{PyBytes, PyDict, PyList}; use std::io::Cursor; -use ggsql::api::{prepare as rust_prepare, validate as rust_validate, Prepared, ValidationWarning}; +use ggsql::api::{validate as rust_validate, Spec, ValidationWarning}; use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader}; use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter; use ggsql::GgsqlError; @@ -120,7 +120,7 @@ fn warnings_to_pylist(py: Python<'_>, warnings: &[ValidationWarning]) -> PyResul /// Bridges a Python reader object to the Rust Reader trait. /// -/// This allows any Python object with an `execute(sql: str) -> polars.DataFrame` +/// This allows any Python object with an `execute_sql(sql: str) -> polars.DataFrame` /// method to be used as a ggsql reader. struct PyReaderBridge { obj: Py, @@ -130,9 +130,9 @@ impl Reader for PyReaderBridge { fn execute_sql(&self, sql: &str) -> ggsql::Result { Python::attach(|py| { let bound = self.obj.bind(py); - let result = bound - .call_method1("execute_sql", (sql,)) - .map_err(|e| GgsqlError::ReaderError(format!("Reader.execute_sql() failed: {}", e)))?; + let result = bound.call_method1("execute_sql", (sql,)).map_err(|e| { + GgsqlError::ReaderError(format!("Reader.execute_sql() failed: {}", e)) + })?; py_to_polars_inner(&result).map_err(|e| GgsqlError::ReaderError(e.to_string())) }) } @@ -170,8 +170,8 @@ macro_rules! try_native_readers { ($query:expr, $reader:expr, $($native_type:ty),*) => {{ $( if let Ok(native) = $reader.downcast::<$native_type>() { - return rust_prepare($query, &native.borrow().inner) - .map(|p| PyPrepared { inner: p }) + return native.borrow().inner.execute($query) + .map(|s| PySpec { inner: s }) .map_err(|e| PyErr::new::(e.to_string())); } )* @@ -281,6 +281,39 @@ impl PyDuckDBReader { fn supports_register(&self) -> bool { self.inner.supports_register() } + + /// Execute a ggsql query and return the visualization specification. + /// + /// This is the main entry point for creating visualizations. It parses + /// the query, executes the SQL portion, and returns a PySpec ready + /// for rendering. + /// + /// Parameters + /// ---------- + /// query : str + /// The ggsql query (SQL + VISUALISE clause). + /// + /// Returns + /// ------- + /// Spec + /// The resolved visualization specification ready for rendering. + /// + /// Raises + /// ------ + /// ValueError + /// If the query syntax is invalid, has no VISUALISE clause, or SQL execution fails. + /// + /// Examples + /// -------- + /// >>> reader = DuckDBReader("duckdb://memory") + /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + /// >>> json_output = spec.render(VegaLiteWriter()) + fn execute(&self, query: &str) -> PyResult { + self.inner + .execute(query) + .map(|s| PySpec { inner: s }) + .map_err(|e| PyErr::new::(e.to_string())) + } } // ============================================================================ @@ -289,13 +322,13 @@ impl PyDuckDBReader { /// Vega-Lite JSON output writer. /// -/// Converts prepared visualization specifications to Vega-Lite v6 JSON. +/// Converts visualization specifications to Vega-Lite v6 JSON. /// /// Examples /// -------- /// >>> writer = VegaLiteWriter() -/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) -/// >>> json_output = prepared.render(writer) +/// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") +/// >>> json_output = spec.render(writer) #[pyclass(name = "VegaLiteWriter")] struct PyVegaLiteWriter { inner: RustVegaLiteWriter, @@ -399,26 +432,26 @@ impl PyValidated { } // ============================================================================ -// PyPrepared +// PySpec // ============================================================================ -/// Result of prepare(), ready for rendering. +/// Result of reader.execute(), ready for rendering. /// /// Contains the resolved plot specification, data, and metadata. /// Use render() to generate Vega-Lite JSON output. /// /// Examples /// -------- -/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) -/// >>> print(f"Rows: {prepared.metadata()['rows']}") -/// >>> json_output = prepared.render(VegaLiteWriter()) -#[pyclass(name = "Prepared")] -struct PyPrepared { - inner: Prepared, +/// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") +/// >>> print(f"Rows: {spec.metadata()['rows']}") +/// >>> json_output = spec.render(VegaLiteWriter()) +#[pyclass(name = "Spec")] +struct PySpec { + inner: Spec, } #[pymethods] -impl PyPrepared { +impl PySpec { /// Render to output format (Vega-Lite JSON). /// /// Parameters @@ -626,21 +659,24 @@ fn validate(query: &str) -> PyResult { }) } -/// Prepare a query for visualization. Main entry point for the Rust API. +/// Execute a ggsql query using a custom Python reader. +/// +/// This is a convenience function for custom readers. For native readers, +/// prefer using `reader.execute()` directly. /// /// Parameters /// ---------- /// query : str -/// The ggsql query to prepare. -/// reader : DuckDBReader | object -/// The database reader to execute SQL against. Can be a native DuckDBReader +/// The ggsql query to execute. +/// reader : Reader | object +/// The database reader to execute SQL against. Can be a native Reader /// for optimal performance, or any Python object with an -/// `execute(sql: str) -> polars.DataFrame` method. +/// `execute_sql(sql: str) -> polars.DataFrame` method. /// /// Returns /// ------- -/// Prepared -/// A prepared visualization ready for rendering. +/// Spec +/// The resolved visualization specification ready for rendering. /// /// Raises /// ------ @@ -649,19 +685,19 @@ fn validate(query: &str) -> PyResult { /// /// Examples /// -------- -/// >>> # Using native reader (fast path) +/// >>> # Using native reader (prefer reader.execute() instead) /// >>> reader = DuckDBReader("duckdb://memory") -/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) -/// >>> json_output = prepared.render(VegaLiteWriter()) +/// >>> spec = execute("SELECT 1 AS x, 2 AS Y VISUALISE x, y DRAW point", reader) +/// >>> json_output = spec.render(VegaLiteWriter()) /// /// >>> # Using custom Python reader /// >>> class MyReader: -/// ... def execute(self, sql: str) -> pl.DataFrame: +/// ... def execute_sql(self, sql: str) -> pl.DataFrame: /// ... return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) /// >>> reader = MyReader() -/// >>> prepared = prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) +/// >>> spec = execute("SELECT * FROM data VISUALISE x, y DRAW point", reader) #[pyfunction] -fn prepare(query: &str, reader: &Bound<'_, PyAny>) -> PyResult { +fn execute(query: &str, reader: &Bound<'_, PyAny>) -> PyResult { // Fast path: try all known native reader types // Add new native readers to this list as they're implemented try_native_readers!(query, reader, PyDuckDBReader); @@ -670,8 +706,9 @@ fn prepare(query: &str, reader: &Bound<'_, PyAny>) -> PyResult { let bridge = PyReaderBridge { obj: reader.clone().unbind(), }; - rust_prepare(query, &bridge) - .map(|p| PyPrepared { inner: p }) + bridge + .execute(query) + .map(|s| PySpec { inner: s }) .map_err(|e| PyErr::new::(e.to_string())) } @@ -685,11 +722,11 @@ fn _ggsql(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; + m.add_class::()?; // Functions m.add_function(wrap_pyfunction!(validate, m)?)?; - m.add_function(wrap_pyfunction!(prepare, m)?)?; + m.add_function(wrap_pyfunction!(execute, m)?)?; Ok(()) } diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index b8614d0a..e54df2e8 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -3,7 +3,7 @@ These tests focus on Python-specific logic: - DataFrame conversion via narwhals - Return type handling -- Two-stage API (prepare -> render) +- Two-stage API (reader.execute() -> render) Rust logic (parsing, Vega-Lite generation) is tested in the Rust test suite. """ @@ -93,112 +93,110 @@ def test_create_writer(self): assert writer is not None -class TestPrepare: - """Tests for prepare() function.""" +class TestExecute: + """Tests for reader.execute() method.""" - def test_prepare_simple_query(self): + def test_execute_simple_query(self): reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + spec = reader.execute( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" ) - assert prepared is not None - assert prepared.layer_count() == 1 + assert spec is not None + assert spec.layer_count() == 1 - def test_prepare_with_registered_data(self): + def test_execute_with_registered_data(self): reader = ggsql.DuckDBReader("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) reader.register("data", df) - prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) - assert prepared.metadata()["rows"] == 3 + spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point") + assert spec.metadata()["rows"] == 3 - def test_prepare_metadata(self): + def test_execute_metadata(self): reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( + spec = reader.execute( "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) " "VISUALISE x, y DRAW point", - reader, ) - metadata = prepared.metadata() + metadata = spec.metadata() assert metadata["rows"] == 3 assert "x" in metadata["columns"] assert "y" in metadata["columns"] assert metadata["layer_count"] == 1 - def test_prepare_sql_accessor(self): + def test_execute_sql_accessor(self): reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + spec = reader.execute( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" ) - assert "SELECT" in prepared.sql() + assert "SELECT" in spec.sql() - def test_prepare_visual_accessor(self): + def test_execute_visual_accessor(self): reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + spec = reader.execute( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" ) - assert "VISUALISE" in prepared.visual() + assert "VISUALISE" in spec.visual() - def test_prepare_data_accessor(self): + def test_execute_data_accessor(self): reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + spec = reader.execute( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" ) - data = prepared.data() + data = spec.data() assert isinstance(data, pl.DataFrame) assert data.shape == (1, 2) - def test_prepare_without_visualise_fails(self): + def test_execute_without_visualise_fails(self): reader = ggsql.DuckDBReader("duckdb://memory") with pytest.raises(ValueError): - ggsql.prepare("SELECT 1 AS x, 2 AS y", reader) + reader.execute("SELECT 1 AS x, 2 AS y") -class TestPreparedRender: - """Tests for Prepared.render() method.""" +class TestSpecRender: + """Tests for Spec.render() method.""" def test_render_to_vegalite(self): reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + spec = reader.execute( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" ) writer = ggsql.VegaLiteWriter() - result = prepared.render(writer) + result = spec.render(writer) assert isinstance(result, str) - spec = json.loads(result) - assert "$schema" in spec - assert "vega-lite" in spec["$schema"] + spec_dict = json.loads(result) + assert "$schema" in spec_dict + assert "vega-lite" in spec_dict["$schema"] def test_render_contains_data(self): reader = ggsql.DuckDBReader("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) reader.register("data", df) - prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) + spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point") writer = ggsql.VegaLiteWriter() - result = prepared.render(writer) - spec = json.loads(result) + result = spec.render(writer) + spec_dict = json.loads(result) # Data should be in the spec (either inline or in datasets) - assert "data" in spec or "datasets" in spec + assert "data" in spec_dict or "datasets" in spec_dict def test_render_multi_layer(self): reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( + spec = reader.execute( "SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) " "VISUALISE " "DRAW point MAPPING x AS x, y AS y " "DRAW line MAPPING x AS x, y AS y", - reader, ) writer = ggsql.VegaLiteWriter() - result = prepared.render(writer) - spec = json.loads(result) - assert "layer" in spec + result = spec.render(writer) + spec_dict = json.loads(result) + assert "layer" in spec_dict class TestRenderAltairDataFrameConversion: @@ -341,10 +339,10 @@ def test_invalid_viz_raises(self): class TestTwoStageAPIIntegration: - """Integration tests for the two-stage prepare -> render API.""" + """Integration tests for the two-stage reader.execute() -> render API.""" def test_end_to_end_workflow(self): - """Complete workflow: create reader, register data, prepare, render.""" + """Complete workflow: create reader, register data, execute, render.""" # Create reader reader = ggsql.DuckDBReader("duckdb://memory") @@ -358,60 +356,59 @@ def test_end_to_end_workflow(self): ) reader.register("sales", df) - # Prepare visualization - prepared = ggsql.prepare( + # Execute visualization + spec = reader.execute( "SELECT * FROM sales VISUALISE date AS x, value AS y, region AS color DRAW line", - reader, ) # Verify metadata - assert prepared.metadata()["rows"] == 3 - assert prepared.layer_count() == 1 + assert spec.metadata()["rows"] == 3 + assert spec.layer_count() == 1 # Render to Vega-Lite writer = ggsql.VegaLiteWriter() - result = prepared.render(writer) + result = spec.render(writer) # Verify output - spec = json.loads(result) - assert "$schema" in spec - assert "line" in json.dumps(spec) + spec_dict = json.loads(result) + assert "$schema" in spec_dict + assert "line" in json.dumps(spec_dict) - def test_can_introspect_prepared(self): - """Test all introspection methods on Prepared.""" + def test_can_introspect_spec(self): + """Test all introspection methods on Spec.""" reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + spec = reader.execute( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" ) # All these should work without error - assert prepared.sql() is not None - assert prepared.visual() is not None - assert prepared.layer_count() >= 1 - assert prepared.metadata() is not None - assert prepared.data() is not None - assert prepared.warnings() is not None + assert spec.sql() is not None + assert spec.visual() is not None + assert spec.layer_count() >= 1 + assert spec.metadata() is not None + assert spec.data() is not None + assert spec.warnings() is not None # Layer-specific accessors (may return None) - _ = prepared.layer_data(0) - _ = prepared.stat_data(0) - _ = prepared.layer_sql(0) - _ = prepared.stat_sql(0) + _ = spec.layer_data(0) + _ = spec.stat_data(0) + _ = spec.layer_sql(0) + _ = spec.stat_sql(0) class TestCustomReader: """Tests for custom Python reader support.""" def test_simple_custom_reader(self): - """Custom reader with execute() method works.""" + """Custom reader with execute_sql() method works.""" class SimpleReader: def execute_sql(self, sql: str) -> pl.DataFrame: return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) reader = SimpleReader() - prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) - assert prepared.metadata()["rows"] == 3 + spec = ggsql.execute("SELECT * FROM data VISUALISE x, y DRAW point", reader) + assert spec.metadata()["rows"] == 3 def test_custom_reader_with_register(self): """Custom reader with register() support.""" @@ -433,10 +430,10 @@ def register(self, name: str, df: pl.DataFrame) -> None: self.tables[name] = df reader = RegisterReader() - prepared = ggsql.prepare( + spec = ggsql.execute( "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader ) - assert prepared is not None + assert spec is not None def test_custom_reader_error_handling(self): """Custom reader errors are propagated.""" @@ -447,7 +444,7 @@ def execute_sql(self, sql: str) -> pl.DataFrame: reader = ErrorReader() with pytest.raises(ValueError, match="Custom reader error"): - ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader) + ggsql.execute("SELECT 1 VISUALISE x, y DRAW point", reader) def test_custom_reader_wrong_return_type(self): """Custom reader returning wrong type raises TypeError.""" @@ -458,15 +455,15 @@ def execute_sql(self, sql: str): reader = WrongTypeReader() with pytest.raises((ValueError, TypeError)): - ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader) + ggsql.execute("SELECT 1 VISUALISE x, y DRAW point", reader) def test_native_reader_fast_path(self): """Native DuckDBReader still works (fast path).""" reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader + spec = reader.execute( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" ) - assert prepared.metadata()["rows"] == 1 + assert spec.metadata()["rows"] == 1 def test_custom_reader_can_render(self): """Custom reader result can be rendered to Vega-Lite.""" @@ -482,20 +479,20 @@ def execute_sql(self, sql: str) -> pl.DataFrame: ) reader = StaticReader() - prepared = ggsql.prepare( + spec = ggsql.execute( "SELECT * FROM data VISUALISE x, y, category AS color DRAW point", reader, ) writer = ggsql.VegaLiteWriter() - result = prepared.render(writer) + result = spec.render(writer) - spec = json.loads(result) - assert "$schema" in spec - assert "vega-lite" in spec["$schema"] + spec_dict = json.loads(result) + assert "$schema" in spec_dict + assert "vega-lite" in spec_dict["$schema"] - def test_custom_reader_execute_called(self): - """Verify execute() is called on the custom reader.""" + def test_custom_reader_execute_sql_called(self): + """Verify execute_sql() is called on the custom reader.""" class RecordingReader: def __init__(self): @@ -506,12 +503,12 @@ def execute_sql(self, sql: str) -> pl.DataFrame: return pl.DataFrame({"x": [1], "y": [2]}) reader = RecordingReader() - ggsql.prepare( + ggsql.execute( "SELECT * FROM data VISUALISE x, y DRAW point", reader, ) - # execute() should have been called at least once + # execute_sql() should have been called at least once assert len(reader.execute_calls) > 0 # All calls should be valid SQL strings assert all(isinstance(sql, str) for sql in reader.execute_calls) diff --git a/src/api.rs b/src/api.rs index 1158f5ef..b715c459 100644 --- a/src/api.rs +++ b/src/api.rs @@ -1,6 +1,6 @@ //! High-level ggsql API. //! -//! Two-stage API: `prepare()` → `render()`. +//! Two-stage API: `reader.execute()` → `render()`. use crate::naming; use crate::parser; @@ -8,11 +8,6 @@ use crate::plot::Plot; use crate::{DataFrame, Result}; use std::collections::HashMap; -#[cfg(feature = "duckdb")] -use crate::execute::prepare_data_with_executor; -#[cfg(feature = "duckdb")] -use crate::reader::Reader; - #[cfg(feature = "vegalite")] use crate::writer::Writer; @@ -20,8 +15,8 @@ use crate::writer::Writer; // Core Types // ============================================================================ -/// Result of `prepare()`, ready for rendering. -pub struct Prepared { +/// Result of `reader.execute()`, ready for rendering. +pub struct Spec { /// Single resolved plot specification plot: Plot, /// Internal data map (global + layer-specific DataFrames) @@ -40,8 +35,8 @@ pub struct Prepared { warnings: Vec, } -impl Prepared { - /// Create a new Prepared from PreparedData +impl Spec { + /// Create a new Spec from PreparedData pub(crate) fn new( plot: Plot, data: HashMap, @@ -237,27 +232,6 @@ pub struct Location { // High-Level API Functions // ============================================================================ -/// Prepare a query for visualization. Main entry point for the two-stage API. -#[cfg(feature = "duckdb")] -pub fn prepare(query: &str, reader: &dyn Reader) -> Result { - // Run validation first to capture warnings - let validated = validate(query)?; - let warnings: Vec = validated.warnings().to_vec(); - - // Prepare data (this also validates, but we want the warnings from above) - let prepared_data = prepare_data_with_executor(query, |sql| reader.execute_sql(sql))?; - - Ok(Prepared::new( - prepared_data.spec, - prepared_data.data, - prepared_data.sql, - prepared_data.visual, - prepared_data.layer_sql, - prepared_data.stat_sql, - warnings, - )) -} - /// Validate query syntax and semantics without executing SQL. pub fn validate(query: &str) -> Result { let mut errors = Vec::new(); @@ -427,35 +401,34 @@ mod tests { #[cfg(all(feature = "duckdb", feature = "vegalite"))] #[test] - fn test_prepare_and_render() { - use crate::reader::DuckDBReader; + fn test_execute_and_render() { + use crate::reader::{DuckDBReader, Reader}; use crate::writer::VegaLiteWriter; let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let prepared = prepare("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point", &reader).unwrap(); + let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point").unwrap(); - assert_eq!(prepared.plot().layers.len(), 1); - assert_eq!(prepared.metadata().layer_count, 1); - assert!(prepared.data().is_some()); + assert_eq!(spec.plot().layers.len(), 1); + assert_eq!(spec.metadata().layer_count, 1); + assert!(spec.data().is_some()); let writer = VegaLiteWriter::new(); - let result = prepared.render(&writer).unwrap(); + let result = spec.render(&writer).unwrap(); assert!(result.contains("point")); } #[cfg(all(feature = "duckdb", feature = "vegalite"))] #[test] - fn test_prepare_metadata() { - use crate::reader::DuckDBReader; + fn test_execute_metadata() { + use crate::reader::{DuckDBReader, Reader}; let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let prepared = prepare( + let spec = reader.execute( "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) VISUALISE x, y DRAW point", - &reader, ) .unwrap(); - let metadata = prepared.metadata(); + let metadata = spec.metadata(); assert_eq!(metadata.rows, 3); assert_eq!(metadata.columns.len(), 2); assert!(metadata.columns.contains(&"x".to_string())); @@ -465,8 +438,8 @@ mod tests { #[cfg(all(feature = "duckdb", feature = "vegalite"))] #[test] - fn test_prepare_with_cte() { - use crate::reader::DuckDBReader; + fn test_execute_with_cte() { + use crate::reader::{DuckDBReader, Reader}; let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let query = r#" @@ -477,18 +450,18 @@ mod tests { VISUALISE x, y DRAW point "#; - let prepared = prepare(query, &reader).unwrap(); + let spec = reader.execute(query).unwrap(); - assert_eq!(prepared.plot().layers.len(), 1); - assert!(prepared.data().is_some()); - let df = prepared.data().unwrap(); + assert_eq!(spec.plot().layers.len(), 1); + assert!(spec.data().is_some()); + let df = spec.data().unwrap(); assert_eq!(df.height(), 2); } #[cfg(all(feature = "duckdb", feature = "vegalite"))] #[test] fn test_render_multi_layer() { - use crate::reader::DuckDBReader; + use crate::reader::{DuckDBReader, Reader}; use crate::writer::VegaLiteWriter; let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); @@ -499,9 +472,9 @@ mod tests { DRAW line MAPPING x AS x, y AS y "#; - let prepared = prepare(query, &reader).unwrap(); + let spec = reader.execute(query).unwrap(); let writer = VegaLiteWriter::new(); - let result = prepared.render(&writer).unwrap(); + let result = spec.render(&writer).unwrap(); assert!(result.contains("layer")); } @@ -524,13 +497,13 @@ mod tests { reader.register("my_data", df).unwrap(); let query = "SELECT * FROM my_data VISUALISE x, y DRAW point"; - let prepared = prepare(query, &reader).unwrap(); + let spec = reader.execute(query).unwrap(); - assert_eq!(prepared.metadata().rows, 3); - assert!(prepared.metadata().columns.contains(&"x".to_string())); + assert_eq!(spec.metadata().rows, 3); + assert!(spec.metadata().columns.contains(&"x".to_string())); let writer = VegaLiteWriter::new(); - let result = prepared.render(&writer).unwrap(); + let result = spec.render(&writer).unwrap(); assert!(result.contains("point")); } @@ -566,19 +539,19 @@ mod tests { DRAW bar "#; - let prepared = prepare(query, &reader).unwrap(); - assert_eq!(prepared.metadata().rows, 3); + let spec = reader.execute(query).unwrap(); + assert_eq!(spec.metadata().rows, 3); } #[cfg(feature = "duckdb")] #[test] - fn test_prepare_no_viz_fails() { - use crate::reader::DuckDBReader; + fn test_execute_no_viz_fails() { + use crate::reader::{DuckDBReader, Reader}; let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let query = "SELECT 1 as x, 2 as y"; - let result = prepare(query, &reader); + let result = reader.execute(query); assert!(result.is_err()); } diff --git a/src/cli.rs b/src/cli.rs index c8ad4e5c..ee97eca1 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -12,7 +12,7 @@ use std::path::PathBuf; #[cfg(feature = "duckdb")] use ggsql::reader::{DuckDBReader, Reader}; #[cfg(feature = "duckdb")] -use ggsql::{prepare, validate}; +use ggsql::validate; #[cfg(feature = "vegalite")] use ggsql::writer::VegaLiteWriter; @@ -186,24 +186,24 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option p, + // Execute ggsql query + let spec = match db_reader.execute(&query) { + Ok(s) => s, Err(e) => { - eprintln!("Failed to prepare data: {}", e); + eprintln!("Failed to execute query: {}", e); std::process::exit(1); } }; if verbose { - let metadata = prepared.metadata(); - eprintln!("\nData prepared:"); + let metadata = spec.metadata(); + eprintln!("\nQuery executed:"); eprintln!(" Rows: {}", metadata.rows); eprintln!(" Columns: {}", metadata.columns.join(", ")); eprintln!(" Layers: {}", metadata.layer_count); } - if prepared.plot().layers.is_empty() { + if spec.plot().layers.is_empty() { eprintln!("No visualization specifications found"); std::process::exit(1); } @@ -222,7 +222,7 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option r, Err(e) => { eprintln!("Failed to generate Vega-Lite output: {}", e); diff --git a/src/doc/API.md b/src/doc/API.md index ed5af4fe..3cbf9c71 100644 --- a/src/doc/API.md +++ b/src/doc/API.md @@ -4,33 +4,33 @@ This document provides a comprehensive reference for the ggsql public API. ## Overview -- **Stage 1: `prepare()`** - Parse query, execute SQL, resolve mappings, prepare data -- **Stage 2: `render()`** - Generate output (Vega-Lite JSON, etc.) +- **Stage 1: `reader.execute()`** - Parse query, execute SQL, resolve mappings, create Spec +- **Stage 2: `spec.render()`** - Generate output (Vega-Lite JSON, etc.) ### API Functions -| Function | Use Case | -| ------------ | ---------------------------------------------------- | -| `prepare()` | Main entry point - full visualization pipeline | -| `render()` | Generate output from prepared data | -| `validate()` | Validate syntax + semantics, inspect query structure | +| Function | Use Case | +| ------------------ | ---------------------------------------------------- | +| `reader.execute()` | Main entry point - full visualization pipeline | +| `spec.render()` | Generate output from Spec | +| `validate()` | Validate syntax + semantics, inspect query structure | --- ## Core Functions -### `prepare` +### `Reader::execute` ```rust -pub fn prepare(query: &str, reader: &dyn Reader) -> Result +fn execute(&self, query: &str) -> Result ``` -Prepare a ggsql query for visualization. This is the main entry point for the two-stage API. +Execute a ggsql query for visualization. This is the main entry point - a default method on the Reader trait. -**What happens during preparation:** +**What happens during execution:** 1. Parses the query (SQL + VISUALISE portions) -2. Executes the main SQL query using the provided reader +2. Executes the main SQL query using the reader 3. Resolves wildcards (`VISUALISE *`) against actual columns 4. Merges global mappings into each layer 5. Executes layer-specific queries (filters, stats) @@ -40,31 +40,30 @@ Prepare a ggsql query for visualization. This is the main entry point for the tw **Arguments:** - `query` - The full ggsql query string -- `reader` - A reader implementing the `Reader` trait **Returns:** -- `Ok(Prepared)` - Ready for rendering +- `Ok(Spec)` - Ready for rendering - `Err(GgsqlError)` - Parse, validation, or execution error **Example:** ```rust -use ggsql::{prepare, reader::DuckDBReader, writer::VegaLiteWriter}; +use ggsql::reader::{DuckDBReader, Reader}; +use ggsql::writer::VegaLiteWriter; let reader = DuckDBReader::from_connection_string("duckdb://memory")?; -let prepared = prepare( - "SELECT x, y FROM data VISUALISE x, y DRAW point", - &reader +let spec = reader.execute( + "SELECT x, y FROM data VISUALISE x, y DRAW point" )?; // Access metadata -println!("Rows: {}", prepared.metadata().rows); -println!("Columns: {:?}", prepared.metadata().columns); +println!("Rows: {}", spec.metadata().rows); +println!("Columns: {:?}", spec.metadata().columns); // Render to Vega-Lite let writer = VegaLiteWriter::new(); -let result = prepared.render(&writer)?; +let result = spec.render(&writer)?; ``` **Error Conditions:** @@ -184,9 +183,9 @@ if let Some(tree) = validated.tree() { --- -### `Prepared` +### `Spec` -Result of preparing a visualization, ready for rendering. +Result of executing a ggsql query, ready for rendering. #### Rendering Methods @@ -198,7 +197,7 @@ Result of preparing a visualization, ready for rendering. ```rust let writer = VegaLiteWriter::new(); -let json = prepared.render(&writer)?; +let json = spec.render(&writer)?; println!("{}", json); ``` @@ -212,9 +211,9 @@ println!("{}", json); **Example:** ```rust -println!("Layers: {}", prepared.layer_count()); +println!("Layers: {}", spec.layer_count()); -let plot = prepared.plot(); +let plot = spec.plot(); for (i, layer) in plot.layers.iter().enumerate() { println!("Layer {}: {:?}", i, layer.geom); } @@ -229,7 +228,7 @@ for (i, layer) in plot.layers.iter().enumerate() { **Example:** ```rust -let meta = prepared.metadata(); +let meta = spec.metadata(); println!("Rows: {}", meta.rows); println!("Columns: {:?}", meta.columns); println!("Layer count: {}", meta.layer_count); @@ -248,17 +247,17 @@ println!("Layer count: {}", meta.layer_count); ```rust // Global data -if let Some(df) = prepared.data() { +if let Some(df) = spec.data() { println!("Global data: {} rows", df.height()); } // Layer-specific data (from FILTER or FROM clause) -if let Some(df) = prepared.layer_data(0) { +if let Some(df) = spec.layer_data(0) { println!("Layer 0 has filtered data: {} rows", df.height()); } // Stat data (histogram bins, density estimates, etc.) -if let Some(df) = prepared.stat_data(1) { +if let Some(df) = spec.stat_data(1) { println!("Layer 1 stat data: {} rows", df.height()); } ``` @@ -276,15 +275,15 @@ if let Some(df) = prepared.stat_data(1) { ```rust // Main query -println!("SQL: {}", prepared.sql()); -println!("Visual: {}", prepared.visual()); +println!("SQL: {}", spec.sql()); +println!("Visual: {}", spec.visual()); // Per-layer queries -for i in 0..prepared.layer_count() { - if let Some(sql) = prepared.layer_sql(i) { +for i in 0..spec.layer_count() { + if let Some(sql) = spec.layer_sql(i) { println!("Layer {} filter: {}", i, sql); } - if let Some(sql) = prepared.stat_sql(i) { + if let Some(sql) = spec.stat_sql(i) { println!("Layer {} stat: {}", i, sql); } } @@ -292,24 +291,24 @@ for i in 0..prepared.layer_count() { #### Warnings Method -| Method | Signature | Description | -| ---------- | -------------------------------------------- | ------------------------------------ | -| `warnings` | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings from preparation | +| Method | Signature | Description | +| ---------- | -------------------------------------------- | ---------------------------------- | +| `warnings` | `fn warnings(&self) -> &[ValidationWarning]` | Validation warnings from execution | **Example:** ```rust -let prepared = ggsql::prepare(query, &reader)?; +let spec = reader.execute(query)?; // Check for warnings -if !prepared.warnings().is_empty() { - for warning in prepared.warnings() { +if !spec.warnings().is_empty() { + for warning in spec.warnings() { eprintln!("Warning: {}", warning.message); } } // Continue with rendering -let json = prepared.render(&writer)?; +let json = spec.render(&writer)?; ``` --- @@ -465,10 +464,10 @@ class Validated: # Note: tree() not exposed (tree-sitter nodes are Rust-only) ``` -#### `Prepared` +#### `Spec` ```python -class Prepared: +class Spec: def render(self, writer: VegaLiteWriter) -> str: """Render to output format.""" @@ -512,6 +511,9 @@ def validate(query: str) -> Validated: Returns Validated object with query inspection and validation methods. """ -def prepare(query: str, reader: DuckDBReader) -> Prepared: - """Prepare a query for visualization.""" +def execute(query: str, reader: Any) -> Spec: + """Execute a ggsql query with a custom Python reader. + + For native readers, use reader.execute() method instead. + """ ``` diff --git a/src/lib.rs b/src/lib.rs index c9128f54..15523596 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,7 +27,7 @@ ggsql splits queries at the `VISUALISE` boundary: ## Core Components -- [`api`] - High-level API (prepare, parse, validate) +- [`api`] - High-level API (validate, Spec) - [`parser`] - Query parsing and AST generation - [`reader`] - Data source abstraction layer - [`writer`] - Output format abstraction layer @@ -55,7 +55,7 @@ pub use plot::{ // Re-export API types and functions pub use api::{ - prepare, validate, Location, Metadata, Prepared, Validated, ValidationError, ValidationWarning, + validate, Location, Metadata, Spec, Validated, ValidationError, ValidationWarning, }; // DataFrame abstraction (wraps Polars) diff --git a/src/reader/mod.rs b/src/reader/mod.rs index ed41a62c..0ed80949 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -7,6 +7,7 @@ //! //! All readers implement the `Reader` trait, which provides: //! - SQL query execution → DataFrame conversion +//! - Visualization query execution → Spec //! - Optional DataFrame registration for queryable tables //! - Connection management and error handling //! @@ -14,19 +15,29 @@ //! //! ```rust,ignore //! use ggsql::reader::{Reader, DuckDBReader}; +//! use ggsql::writer::VegaLiteWriter; //! -//! // Basic usage +//! // Execute a ggsql query //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?; -//! let df = reader.execute_sql("SELECT * FROM table")?; +//! let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?; +//! +//! // Render to Vega-Lite JSON +//! let writer = VegaLiteWriter::new(); +//! let json = spec.render(&writer)?; //! //! // With DataFrame registration //! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?; //! reader.register("my_table", some_dataframe)?; -//! let result = reader.execute_sql("SELECT * FROM my_table")?; +//! let spec = reader.execute("SELECT * FROM my_table VISUALISE x, y DRAW point")?; //! ``` use crate::{DataFrame, GgsqlError, Result}; +#[cfg(feature = "duckdb")] +use crate::api::{validate, Spec, ValidationWarning}; +#[cfg(feature = "duckdb")] +use crate::execute::prepare_data_with_executor; + #[cfg(feature = "duckdb")] pub mod duckdb; @@ -108,4 +119,56 @@ pub trait Reader { fn supports_register(&self) -> bool { false } + + /// Execute a ggsql query and return the visualization specification. + /// + /// This is the main entry point for creating visualizations. It parses the query, + /// executes the SQL portion, and returns a `Spec` ready for rendering. + /// + /// # Arguments + /// + /// * `query` - The ggsql query (SQL + VISUALISE clause) + /// + /// # Returns + /// + /// A `Spec` containing the resolved visualization specification and data. + /// + /// # Errors + /// + /// Returns an error if: + /// - The query syntax is invalid + /// - The query has no VISUALISE clause + /// - The SQL execution fails + /// + /// # Example + /// + /// ```rust,ignore + /// use ggsql::reader::{Reader, DuckDBReader}; + /// use ggsql::writer::VegaLiteWriter; + /// + /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?; + /// let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?; + /// + /// let writer = VegaLiteWriter::new(); + /// let json = spec.render(&writer)?; + /// ``` + #[cfg(feature = "duckdb")] + fn execute(&self, query: &str) -> Result { + // Run validation first to capture warnings + let validated = validate(query)?; + let warnings: Vec = validated.warnings().to_vec(); + + // Prepare data (this also validates, but we want the warnings from above) + let prepared_data = prepare_data_with_executor(query, |sql| self.execute_sql(sql))?; + + Ok(Spec::new( + prepared_data.spec, + prepared_data.data, + prepared_data.sql, + prepared_data.visual, + prepared_data.layer_sql, + prepared_data.stat_sql, + warnings, + )) + } } diff --git a/src/rest.rs b/src/rest.rs index e87a14f9..68d59a54 100644 --- a/src/rest.rs +++ b/src/rest.rs @@ -34,9 +34,7 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use ggsql::{parser, validate, GgsqlError, VERSION}; #[cfg(feature = "duckdb")] -use ggsql::prepare; -#[cfg(feature = "duckdb")] -use ggsql::reader::DuckDBReader; +use ggsql::reader::{DuckDBReader, Reader}; #[cfg(feature = "vegalite")] use ggsql::writer::VegaLiteWriter; @@ -443,29 +441,29 @@ async fn query_handler( #[cfg(feature = "duckdb")] if request.reader.starts_with("duckdb://") { // Use shared reader or create new one - let prepared = if request.reader == "duckdb://memory" && state.reader.is_some() { + let spec = if request.reader == "duckdb://memory" && state.reader.is_some() { let reader_mutex = state.reader.as_ref().unwrap(); let reader = reader_mutex .lock() .map_err(|e| GgsqlError::InternalError(format!("Failed to lock reader: {}", e)))?; - prepare(&request.query, &*reader)? + reader.execute(&request.query)? } else { let reader = DuckDBReader::from_connection_string(&request.reader)?; - prepare(&request.query, &reader)? + reader.execute(&request.query)? }; // Get metadata - let metadata = prepared.metadata(); + let metadata = spec.metadata(); // Generate visualization output using writer #[cfg(feature = "vegalite")] if request.writer == "vegalite" { let writer = VegaLiteWriter::new(); - let json_output = prepared.render(&writer)?; + let json_output = spec.render(&writer)?; let spec_value: serde_json::Value = serde_json::from_str(&json_output) .map_err(|e| GgsqlError::WriterError(format!("Failed to parse JSON: {}", e)))?; - let plot = prepared.plot(); + let plot = spec.plot(); let result = QueryResult { spec: spec_value, From f652acce4d0bbb0491302ff2595895a95c633057 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Mon, 2 Feb 2026 13:53:45 +0000 Subject: [PATCH 08/12] Move Spec out of api.rs --- ggsql-python/src/lib.rs | 3 +- src/api.rs | 154 +--------------------------------------- src/lib.rs | 9 +-- src/reader/mod.rs | 46 ++++++++++-- src/reader/spec.rs | 126 ++++++++++++++++++++++++++++++++ 5 files changed, 176 insertions(+), 162 deletions(-) create mode 100644 src/reader/spec.rs diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index 5be68c42..1a4a8922 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -6,7 +6,8 @@ use pyo3::prelude::*; use pyo3::types::{PyBytes, PyDict, PyList}; use std::io::Cursor; -use ggsql::api::{validate as rust_validate, Spec, ValidationWarning}; +use ggsql::api::{validate as rust_validate, ValidationWarning}; +use ggsql::reader::Spec; use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader}; use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter; use ggsql::GgsqlError; diff --git a/src/api.rs b/src/api.rs index b715c459..ab613f2c 100644 --- a/src/api.rs +++ b/src/api.rs @@ -1,164 +1,14 @@ //! High-level ggsql API. //! -//! Two-stage API: `reader.execute()` → `render()`. +//! Validation and query inspection without SQL execution. -use crate::naming; use crate::parser; -use crate::plot::Plot; -use crate::{DataFrame, Result}; -use std::collections::HashMap; - -#[cfg(feature = "vegalite")] -use crate::writer::Writer; +use crate::Result; // ============================================================================ // Core Types // ============================================================================ -/// Result of `reader.execute()`, ready for rendering. -pub struct Spec { - /// Single resolved plot specification - plot: Plot, - /// Internal data map (global + layer-specific DataFrames) - data: HashMap, - /// Cached metadata about the prepared visualization - metadata: Metadata, - /// The main SQL query that was executed - sql: String, - /// The raw VISUALISE portion text - visual: String, - /// Per-layer filter/source queries (None = uses global data directly) - layer_sql: Vec>, - /// Per-layer stat transform queries (None = no stat transform) - stat_sql: Vec>, - /// Validation warnings from preparation - warnings: Vec, -} - -impl Spec { - /// Create a new Spec from PreparedData - pub(crate) fn new( - plot: Plot, - data: HashMap, - sql: String, - visual: String, - layer_sql: Vec>, - stat_sql: Vec>, - warnings: Vec, - ) -> Self { - // Compute metadata from data - let (rows, columns) = if let Some(df) = data.get(naming::GLOBAL_DATA_KEY) { - let cols: Vec = df - .get_column_names() - .iter() - .map(|s| s.to_string()) - .collect(); - (df.height(), cols) - } else if let Some(df) = data.values().next() { - let cols: Vec = df - .get_column_names() - .iter() - .map(|s| s.to_string()) - .collect(); - (df.height(), cols) - } else { - (0, Vec::new()) - }; - - let layer_count = plot.layers.len(); - let metadata = Metadata { - rows, - columns, - layer_count, - }; - - Self { - plot, - data, - metadata, - sql, - visual, - layer_sql, - stat_sql, - warnings, - } - } - - /// Render to output format (e.g., Vega-Lite JSON). - #[cfg(feature = "vegalite")] - pub fn render(&self, writer: &dyn Writer) -> Result { - writer.write(&self.plot, &self.data) - } - - /// Get the resolved plot specification. - pub fn plot(&self) -> &Plot { - &self.plot - } - - /// Get visualization metadata. - pub fn metadata(&self) -> &Metadata { - &self.metadata - } - - /// Number of layers. - pub fn layer_count(&self) -> usize { - self.plot.layers.len() - } - - /// Get global data (main query result). - pub fn data(&self) -> Option<&DataFrame> { - self.data.get(naming::GLOBAL_DATA_KEY) - } - - /// Get layer-specific data (from FILTER or FROM clause). - pub fn layer_data(&self, layer_index: usize) -> Option<&DataFrame> { - self.data.get(&naming::layer_key(layer_index)) - } - - /// Get stat transform data (e.g., histogram bins, density estimates). - pub fn stat_data(&self, layer_index: usize) -> Option<&DataFrame> { - self.layer_data(layer_index) - } - - /// Get internal data map (all DataFrames by key). - pub fn data_map(&self) -> &HashMap { - &self.data - } - - /// The main SQL query that was executed. - pub fn sql(&self) -> &str { - &self.sql - } - - /// The VISUALISE portion (raw text). - pub fn visual(&self) -> &str { - &self.visual - } - - /// Layer filter/source query, or `None` if using global data. - pub fn layer_sql(&self, layer_index: usize) -> Option<&str> { - self.layer_sql.get(layer_index).and_then(|s| s.as_deref()) - } - - /// Stat transform query, or `None` if no stat transform. - pub fn stat_sql(&self, layer_index: usize) -> Option<&str> { - self.stat_sql.get(layer_index).and_then(|s| s.as_deref()) - } - - /// Validation warnings from preparation. - pub fn warnings(&self) -> &[ValidationWarning] { - &self.warnings - } -} - -/// Metadata about the prepared visualization. -#[derive(Debug, Clone)] -pub struct Metadata { - pub rows: usize, - pub columns: Vec, - pub layer_count: usize, -} - /// Result of `validate()` - query inspection and validation without SQL execution. pub struct Validated { sql: String, diff --git a/src/lib.rs b/src/lib.rs index 15523596..a0dd8cb7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,7 +27,7 @@ ggsql splits queries at the `VISUALISE` boundary: ## Core Components -- [`api`] - High-level API (validate, Spec) +- [`api`] - Validation API (validate, Validated) - [`parser`] - Query parsing and AST generation - [`reader`] - Data source abstraction layer - [`writer`] - Output format abstraction layer @@ -54,9 +54,10 @@ pub use plot::{ }; // Re-export API types and functions -pub use api::{ - validate, Location, Metadata, Spec, Validated, ValidationError, ValidationWarning, -}; +pub use api::{validate, Location, Validated, ValidationError, ValidationWarning}; + +// Re-export reader types +pub use reader::{Metadata, Spec}; // DataFrame abstraction (wraps Polars) pub use polars::prelude::DataFrame; diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 0ed80949..7da9130b 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -31,23 +31,59 @@ //! let spec = reader.execute("SELECT * FROM my_table VISUALISE x, y DRAW point")?; //! ``` -use crate::{DataFrame, GgsqlError, Result}; +use std::collections::HashMap; -#[cfg(feature = "duckdb")] -use crate::api::{validate, Spec, ValidationWarning}; -#[cfg(feature = "duckdb")] +use crate::api::{validate, ValidationWarning}; use crate::execute::prepare_data_with_executor; +use crate::plot::Plot; +use crate::{DataFrame, GgsqlError, Result}; #[cfg(feature = "duckdb")] pub mod duckdb; pub mod connection; - pub mod data; +mod spec; #[cfg(feature = "duckdb")] pub use duckdb::DuckDBReader; +// ============================================================================ +// Spec - Result of reader.execute() +// ============================================================================ + +/// Result of executing a ggsql query, ready for rendering. +pub struct Spec { + /// Single resolved plot specification + pub(crate) plot: Plot, + /// Internal data map (global + layer-specific DataFrames) + pub(crate) data: HashMap, + /// Cached metadata about the prepared visualization + pub(crate) metadata: Metadata, + /// The main SQL query that was executed + pub(crate) sql: String, + /// The raw VISUALISE portion text + pub(crate) visual: String, + /// Per-layer filter/source queries (None = uses global data directly) + pub(crate) layer_sql: Vec>, + /// Per-layer stat transform queries (None = no stat transform) + pub(crate) stat_sql: Vec>, + /// Validation warnings from preparation + pub(crate) warnings: Vec, +} + +/// Metadata about the prepared visualization. +#[derive(Debug, Clone)] +pub struct Metadata { + pub rows: usize, + pub columns: Vec, + pub layer_count: usize, +} + +// ============================================================================ +// Reader Trait +// ============================================================================ + /// Trait for data source readers /// /// Readers execute SQL queries and return Polars DataFrames. diff --git a/src/reader/spec.rs b/src/reader/spec.rs new file mode 100644 index 00000000..a8c08914 --- /dev/null +++ b/src/reader/spec.rs @@ -0,0 +1,126 @@ +//! Implementation of Spec methods. + +use std::collections::HashMap; + +use crate::api::ValidationWarning; +use crate::naming; +use crate::plot::Plot; +use crate::writer::Writer; +use crate::{DataFrame, Result}; + +use super::{Metadata, Spec}; + +impl Spec { + /// Create a new Spec from PreparedData + pub(crate) fn new( + plot: Plot, + data: HashMap, + sql: String, + visual: String, + layer_sql: Vec>, + stat_sql: Vec>, + warnings: Vec, + ) -> Self { + // Compute metadata from data + let (rows, columns) = if let Some(df) = data.get(naming::GLOBAL_DATA_KEY) { + let cols: Vec = df + .get_column_names() + .iter() + .map(|s| s.to_string()) + .collect(); + (df.height(), cols) + } else if let Some(df) = data.values().next() { + let cols: Vec = df + .get_column_names() + .iter() + .map(|s| s.to_string()) + .collect(); + (df.height(), cols) + } else { + (0, Vec::new()) + }; + + let layer_count = plot.layers.len(); + let metadata = Metadata { + rows, + columns, + layer_count, + }; + + Self { + plot, + data, + metadata, + sql, + visual, + layer_sql, + stat_sql, + warnings, + } + } + + /// Render to output format (e.g., Vega-Lite JSON). + pub fn render(&self, writer: &dyn Writer) -> Result { + writer.write(&self.plot, &self.data) + } + + /// Get the resolved plot specification. + pub fn plot(&self) -> &Plot { + &self.plot + } + + /// Get visualization metadata. + pub fn metadata(&self) -> &Metadata { + &self.metadata + } + + /// Number of layers. + pub fn layer_count(&self) -> usize { + self.plot.layers.len() + } + + /// Get global data (main query result). + pub fn data(&self) -> Option<&DataFrame> { + self.data.get(naming::GLOBAL_DATA_KEY) + } + + /// Get layer-specific data (from FILTER or FROM clause). + pub fn layer_data(&self, layer_index: usize) -> Option<&DataFrame> { + self.data.get(&naming::layer_key(layer_index)) + } + + /// Get stat transform data (e.g., histogram bins, density estimates). + pub fn stat_data(&self, layer_index: usize) -> Option<&DataFrame> { + self.layer_data(layer_index) + } + + /// Get internal data map (all DataFrames by key). + pub fn data_map(&self) -> &HashMap { + &self.data + } + + /// The main SQL query that was executed. + pub fn sql(&self) -> &str { + &self.sql + } + + /// The VISUALISE portion (raw text). + pub fn visual(&self) -> &str { + &self.visual + } + + /// Layer filter/source query, or `None` if using global data. + pub fn layer_sql(&self, layer_index: usize) -> Option<&str> { + self.layer_sql.get(layer_index).and_then(|s| s.as_deref()) + } + + /// Stat transform query, or `None` if no stat transform. + pub fn stat_sql(&self, layer_index: usize) -> Option<&str> { + self.stat_sql.get(layer_index).and_then(|s| s.as_deref()) + } + + /// Validation warnings from preparation. + pub fn warnings(&self) -> &[ValidationWarning] { + &self.warnings + } +} From 7eaab8e11c62159f92ade35322fb93de97d55717 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Mon, 2 Feb 2026 14:06:56 +0000 Subject: [PATCH 09/12] Move validation to validate.rs --- ggsql-python/src/lib.rs | 2 +- src/lib.rs | 7 +- src/reader/mod.rs | 146 +++++++++++++++++++++++++++++++- src/reader/spec.rs | 2 +- src/{api.rs => validate.rs} | 163 +----------------------------------- 5 files changed, 155 insertions(+), 165 deletions(-) rename src/{api.rs => validate.rs} (60%) diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index 1a4a8922..45587266 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -6,7 +6,7 @@ use pyo3::prelude::*; use pyo3::types::{PyBytes, PyDict, PyList}; use std::io::Cursor; -use ggsql::api::{validate as rust_validate, ValidationWarning}; +use ggsql::validate::{validate as rust_validate, ValidationWarning}; use ggsql::reader::Spec; use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader}; use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter; diff --git a/src/lib.rs b/src/lib.rs index a0dd8cb7..9bb21554 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,15 +46,15 @@ pub mod writer; #[cfg(feature = "duckdb")] pub mod execute; -pub mod api; +pub mod validate; // Re-export key types for convenience pub use plot::{ AestheticValue, DataSource, Facet, Geom, Layer, Mappings, Plot, Scale, SqlExpression, }; -// Re-export API types and functions -pub use api::{validate, Location, Validated, ValidationError, ValidationWarning}; +// Re-export validation types and functions +pub use validate::{validate, Location, Validated, ValidationError, ValidationWarning}; // Re-export reader types pub use reader::{Metadata, Spec}; @@ -779,4 +779,5 @@ mod integration_tests { assert_eq!(data["__ggsql_const_stroke_0__"], "value"); assert_eq!(data["__ggsql_const_stroke_1__"], "value"); } + } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 7da9130b..34afb559 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -33,7 +33,7 @@ use std::collections::HashMap; -use crate::api::{validate, ValidationWarning}; +use crate::validate::{validate, ValidationWarning}; use crate::execute::prepare_data_with_executor; use crate::plot::Plot; use crate::{DataFrame, GgsqlError, Result}; @@ -208,3 +208,147 @@ pub trait Reader { )) } } + +#[cfg(test)] +#[cfg(all(feature = "duckdb", feature = "vegalite"))] +mod tests { + use super::*; + use crate::writer::VegaLiteWriter; + + #[test] + fn test_execute_and_render() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let spec = reader + .execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point") + .unwrap(); + + assert_eq!(spec.plot().layers.len(), 1); + assert_eq!(spec.metadata().layer_count, 1); + assert!(spec.data().is_some()); + + let writer = VegaLiteWriter::new(); + let result = spec.render(&writer).unwrap(); + assert!(result.contains("point")); + } + + #[test] + fn test_execute_metadata() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let spec = reader + .execute( + "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) VISUALISE x, y DRAW point", + ) + .unwrap(); + + let metadata = spec.metadata(); + assert_eq!(metadata.rows, 3); + assert_eq!(metadata.columns.len(), 2); + assert!(metadata.columns.contains(&"x".to_string())); + assert!(metadata.columns.contains(&"y".to_string())); + assert_eq!(metadata.layer_count, 1); + } + + #[test] + fn test_execute_with_cte() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + WITH data AS ( + SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) + ) + SELECT * FROM data + VISUALISE x, y DRAW point + "#; + + let spec = reader.execute(query).unwrap(); + + assert_eq!(spec.plot().layers.len(), 1); + assert!(spec.data().is_some()); + let df = spec.data().unwrap(); + assert_eq!(df.height(), 2); + } + + #[test] + fn test_render_multi_layer() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = r#" + SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) + VISUALISE + DRAW point MAPPING x AS x, y AS y + DRAW line MAPPING x AS x, y AS y + "#; + + let spec = reader.execute(query).unwrap(); + let writer = VegaLiteWriter::new(); + let result = spec.render(&writer).unwrap(); + + assert!(result.contains("layer")); + } + + #[test] + fn test_register_and_query() { + use polars::prelude::*; + + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let df = df! { + "x" => [1i32, 2, 3], + "y" => [10i32, 20, 30], + } + .unwrap(); + + reader.register("my_data", df).unwrap(); + + let query = "SELECT * FROM my_data VISUALISE x, y DRAW point"; + let spec = reader.execute(query).unwrap(); + + assert_eq!(spec.metadata().rows, 3); + assert!(spec.metadata().columns.contains(&"x".to_string())); + + let writer = VegaLiteWriter::new(); + let result = spec.render(&writer).unwrap(); + assert!(result.contains("point")); + } + + #[test] + fn test_register_and_join() { + use polars::prelude::*; + + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let sales = df! { + "id" => [1i32, 2, 3], + "amount" => [100i32, 200, 300], + "product_id" => [1i32, 1, 2], + } + .unwrap(); + + let products = df! { + "id" => [1i32, 2], + "name" => ["Widget", "Gadget"], + } + .unwrap(); + + reader.register("sales", sales).unwrap(); + reader.register("products", products).unwrap(); + + let query = r#" + SELECT s.id, s.amount, p.name + FROM sales s + JOIN products p ON s.product_id = p.id + VISUALISE id AS x, amount AS y + DRAW bar + "#; + + let spec = reader.execute(query).unwrap(); + assert_eq!(spec.metadata().rows, 3); + } + + #[test] + fn test_execute_no_viz_fails() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let query = "SELECT 1 as x, 2 as y"; + + let result = reader.execute(query); + assert!(result.is_err()); + } +} diff --git a/src/reader/spec.rs b/src/reader/spec.rs index a8c08914..92c45781 100644 --- a/src/reader/spec.rs +++ b/src/reader/spec.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; -use crate::api::ValidationWarning; +use crate::validate::ValidationWarning; use crate::naming; use crate::plot::Plot; use crate::writer::Writer; diff --git a/src/api.rs b/src/validate.rs similarity index 60% rename from src/api.rs rename to src/validate.rs index ab613f2c..79bf4ed1 100644 --- a/src/api.rs +++ b/src/validate.rs @@ -1,6 +1,7 @@ -//! High-level ggsql API. +//! Query validation without SQL execution. //! -//! Validation and query inspection without SQL execution. +//! This module provides query syntax and semantic validation without executing +//! any SQL. Use this for IDE integration, syntax checking, and query inspection. use crate::parser; use crate::Result; @@ -79,7 +80,7 @@ pub struct Location { } // ============================================================================ -// High-Level API Functions +// Validation Function // ============================================================================ /// Validate query syntax and semantics without executing SQL. @@ -249,162 +250,6 @@ mod tests { assert!(!validated.errors().is_empty()); } - #[cfg(all(feature = "duckdb", feature = "vegalite"))] - #[test] - fn test_execute_and_render() { - use crate::reader::{DuckDBReader, Reader}; - use crate::writer::VegaLiteWriter; - - let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point").unwrap(); - - assert_eq!(spec.plot().layers.len(), 1); - assert_eq!(spec.metadata().layer_count, 1); - assert!(spec.data().is_some()); - - let writer = VegaLiteWriter::new(); - let result = spec.render(&writer).unwrap(); - assert!(result.contains("point")); - } - - #[cfg(all(feature = "duckdb", feature = "vegalite"))] - #[test] - fn test_execute_metadata() { - use crate::reader::{DuckDBReader, Reader}; - - let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let spec = reader.execute( - "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) VISUALISE x, y DRAW point", - ) - .unwrap(); - - let metadata = spec.metadata(); - assert_eq!(metadata.rows, 3); - assert_eq!(metadata.columns.len(), 2); - assert!(metadata.columns.contains(&"x".to_string())); - assert!(metadata.columns.contains(&"y".to_string())); - assert_eq!(metadata.layer_count, 1); - } - - #[cfg(all(feature = "duckdb", feature = "vegalite"))] - #[test] - fn test_execute_with_cte() { - use crate::reader::{DuckDBReader, Reader}; - - let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let query = r#" - WITH data AS ( - SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) - ) - SELECT * FROM data - VISUALISE x, y DRAW point - "#; - - let spec = reader.execute(query).unwrap(); - - assert_eq!(spec.plot().layers.len(), 1); - assert!(spec.data().is_some()); - let df = spec.data().unwrap(); - assert_eq!(df.height(), 2); - } - - #[cfg(all(feature = "duckdb", feature = "vegalite"))] - #[test] - fn test_render_multi_layer() { - use crate::reader::{DuckDBReader, Reader}; - use crate::writer::VegaLiteWriter; - - let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let query = r#" - SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) - VISUALISE - DRAW point MAPPING x AS x, y AS y - DRAW line MAPPING x AS x, y AS y - "#; - - let spec = reader.execute(query).unwrap(); - let writer = VegaLiteWriter::new(); - let result = spec.render(&writer).unwrap(); - - assert!(result.contains("layer")); - } - - #[cfg(all(feature = "duckdb", feature = "vegalite"))] - #[test] - fn test_register_and_query() { - use crate::reader::{DuckDBReader, Reader}; - use crate::writer::VegaLiteWriter; - use polars::prelude::*; - - let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - - let df = df! { - "x" => [1i32, 2, 3], - "y" => [10i32, 20, 30], - } - .unwrap(); - - reader.register("my_data", df).unwrap(); - - let query = "SELECT * FROM my_data VISUALISE x, y DRAW point"; - let spec = reader.execute(query).unwrap(); - - assert_eq!(spec.metadata().rows, 3); - assert!(spec.metadata().columns.contains(&"x".to_string())); - - let writer = VegaLiteWriter::new(); - let result = spec.render(&writer).unwrap(); - assert!(result.contains("point")); - } - - #[cfg(all(feature = "duckdb", feature = "vegalite"))] - #[test] - fn test_register_and_join() { - use crate::reader::{DuckDBReader, Reader}; - use polars::prelude::*; - - let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - - let sales = df! { - "id" => [1i32, 2, 3], - "amount" => [100i32, 200, 300], - "product_id" => [1i32, 1, 2], - } - .unwrap(); - - let products = df! { - "id" => [1i32, 2], - "name" => ["Widget", "Gadget"], - } - .unwrap(); - - reader.register("sales", sales).unwrap(); - reader.register("products", products).unwrap(); - - let query = r#" - SELECT s.id, s.amount, p.name - FROM sales s - JOIN products p ON s.product_id = p.id - VISUALISE id AS x, amount AS y - DRAW bar - "#; - - let spec = reader.execute(query).unwrap(); - assert_eq!(spec.metadata().rows, 3); - } - - #[cfg(feature = "duckdb")] - #[test] - fn test_execute_no_viz_fails() { - use crate::reader::{DuckDBReader, Reader}; - - let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let query = "SELECT 1 as x, 2 as y"; - - let result = reader.execute(query); - assert!(result.is_err()); - } - #[test] fn test_validate_sql_and_visual_content() { let query = "SELECT 1 as x, 2 as y VISUALISE DRAW point MAPPING x AS x, y AS y DRAW line MAPPING x AS x, y AS y"; From b588a4ee26bfddd471dd1bb4eda4bd1e349e57a1 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Mon, 2 Feb 2026 15:55:51 +0000 Subject: [PATCH 10/12] Switch rendering to writer.render() --- CLAUDE.md | 36 +++++++------- README.md | 7 +-- ggsql-jupyter/src/executor.rs | 6 +-- ggsql-python/README.md | 6 +-- ggsql-python/python/ggsql/__init__.py | 2 +- ggsql-python/src/lib.rs | 70 +++++++++++++++------------ ggsql-python/tests/test_ggsql.py | 46 ++++++------------ src/cli.rs | 4 +- src/doc/API.md | 19 ++++---- src/lib.rs | 1 - src/reader/duckdb.rs | 4 +- src/reader/mod.rs | 18 +++---- src/reader/spec.rs | 10 +--- src/rest.rs | 4 +- src/writer/mod.rs | 47 ++++++++++++++++-- src/writer/vegalite.rs | 2 + 16 files changed, 155 insertions(+), 127 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 8c0cba03..ca8c0e09 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -165,16 +165,16 @@ let spec = reader.execute( // Render to Vega-Lite JSON let writer = VegaLiteWriter::new(); -let json = spec.render(&writer)?; +let json = writer.render(&spec)?; ``` ### Core Functions -| Function | Purpose | -| ------------------------ | ------------------------------------------------------ | -| `reader.execute(query)` | Main entry point: parse, execute SQL, resolve mappings | -| `spec.render(writer)` | Generate output (Vega-Lite JSON) from Spec | -| `validate(query)` | Validate syntax + semantics, inspect query structure | +| Function | Purpose | +| ----------------------- | ------------------------------------------------------ | +| `reader.execute(query)` | Main entry point: parse, execute SQL, resolve mappings | +| `writer.render(spec)` | Generate output from a Spec | +| `validate(query)` | Validate syntax + semantics, inspect query structure | ### Key Types @@ -909,7 +909,7 @@ print(f"SQL: {spec.sql()}") # Render to Vega-Lite JSON writer = ggsql.VegaLiteWriter() -json_output = spec.render(writer) +json_output = writer.render(spec) ``` **Convenience Function** (`render_altair`): @@ -942,21 +942,21 @@ print(f"Errors: {validated.errors()}") **Classes**: -| Class | Description | -| -------------------------- | ------------------------------------------------ | -| `DuckDBReader(connection)` | Database reader with DataFrame registration | -| `VegaLiteWriter()` | Vega-Lite JSON output writer | -| `Validated` | Result of `validate()` with query inspection | +| Class | Description | +| -------------------------- | ------------------------------------------------- | +| `DuckDBReader(connection)` | Database reader with DataFrame registration | +| `VegaLiteWriter()` | Vega-Lite JSON output writer | +| `Validated` | Result of `validate()` with query inspection | | `Spec` | Result of `reader.execute()`, ready for rendering | **Functions**: -| Function | Description | -| -------------------------- | ------------------------------------------------- | -| `validate(query)` | Syntax/semantic validation with query inspection | -| `reader.execute(query)` | Execute ggsql query, return Spec | -| `execute(query, reader)` | Execute with custom reader (bridge path) | -| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart | +| Function | Description | +| ------------------------ | ------------------------------------------------ | +| `validate(query)` | Syntax/semantic validation with query inspection | +| `reader.execute(query)` | Execute ggsql query, return Spec | +| `execute(query, reader)` | Execute with custom reader (bridge path) | +| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart | **Spec Methods**: diff --git a/README.md b/README.md index 46ff2b50..8af476f9 100644 --- a/README.md +++ b/README.md @@ -327,13 +327,10 @@ chart.display() reader = ggsql.DuckDBReader("duckdb://memory") reader.register("data", df) -prepared = ggsql.prepare( - "SELECT * FROM data VISUALISE x, y DRAW point", - reader -) +spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point") writer = ggsql.VegaLiteWriter() -json_output = prepared.render(writer) +json_output = writer.render(spec) ``` See the [ggsql-python README](ggsql-python/README.md) for complete API documentation. diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs index 42b541d8..d91b223a 100644 --- a/ggsql-jupyter/src/executor.rs +++ b/ggsql-jupyter/src/executor.rs @@ -7,7 +7,7 @@ use anyhow::Result; use ggsql::{ reader::{DuckDBReader, Reader}, validate, - writer::VegaLiteWriter, + writer::{VegaLiteWriter, Writer}, }; use polars::frame::DataFrame; @@ -77,8 +77,8 @@ impl QueryExecutor { spec.metadata().layer_count ); - // 4. Render to Vega-Lite - let vega_json = spec.render(&self.writer)?; + // 4. Render to output format + let vega_json = self.writer.render(&spec)?; tracing::debug!("Generated Vega-Lite spec: {} chars", vega_json.len()); diff --git a/ggsql-python/README.md b/ggsql-python/README.md index 08e9b848..f69dd073 100644 --- a/ggsql-python/README.md +++ b/ggsql-python/README.md @@ -106,7 +106,7 @@ print(spec.data()) # Returns polars DataFrame # 6. Render to Vega-Lite JSON writer = ggsql.VegaLiteWriter() -vegalite_json = spec.render(writer) +vegalite_json = writer.render(spec) print(vegalite_json) ``` @@ -135,7 +135,7 @@ Writer that generates Vega-Lite v6 JSON specifications. ```python writer = ggsql.VegaLiteWriter() -json_output = spec.render(writer) +json_output = writer.render(spec) ``` #### `Validated` @@ -259,7 +259,7 @@ spec = ggsql.execute( reader ) writer = ggsql.VegaLiteWriter() -json_output = spec.render(writer) +json_output = writer.render(spec) ``` **Optional methods** for custom readers: diff --git a/ggsql-python/python/ggsql/__init__.py b/ggsql-python/python/ggsql/__init__.py index de159d2a..d69c84ef 100644 --- a/ggsql-python/python/ggsql/__init__.py +++ b/ggsql-python/python/ggsql/__init__.py @@ -84,7 +84,7 @@ def render_altair( # Execute and render spec = reader.execute(query) writer = VegaLiteWriter() - vegalite_json = spec.render(writer) + vegalite_json = writer.render(spec) # Parse to determine the correct Altair class spec = json.loads(vegalite_json) diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index 45587266..1a9d0efc 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -6,10 +6,10 @@ use pyo3::prelude::*; use pyo3::types::{PyBytes, PyDict, PyList}; use std::io::Cursor; -use ggsql::validate::{validate as rust_validate, ValidationWarning}; use ggsql::reader::Spec; use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader}; -use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter; +use ggsql::validate::{validate as rust_validate, ValidationWarning}; +use ggsql::writer::{VegaLiteWriter as RustVegaLiteWriter, Writer as RustWriter}; use ggsql::GgsqlError; use polars::prelude::{DataFrame, IpcReader, IpcWriter, SerReader, SerWriter}; @@ -308,7 +308,8 @@ impl PyDuckDBReader { /// -------- /// >>> reader = DuckDBReader("duckdb://memory") /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") - /// >>> json_output = spec.render(VegaLiteWriter()) + /// >>> writer = VegaLiteWriter() + /// >>> json_output = writer.render(spec) fn execute(&self, query: &str) -> PyResult { self.inner .execute(query) @@ -329,7 +330,7 @@ impl PyDuckDBReader { /// -------- /// >>> writer = VegaLiteWriter() /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") -/// >>> json_output = spec.render(writer) +/// >>> json_output = writer.render(spec) #[pyclass(name = "VegaLiteWriter")] struct PyVegaLiteWriter { inner: RustVegaLiteWriter, @@ -349,6 +350,35 @@ impl PyVegaLiteWriter { inner: RustVegaLiteWriter::new(), } } + + /// Render a Spec to Vega-Lite JSON output + /// + /// Parameters + /// ---------- + /// spec : Spec + /// The visualization specification from reader.execute(). + /// + /// Returns + /// ------- + /// str + /// The output (i.e., Vega-Lite JSON string). + /// + /// Raises + /// ------ + /// ValueError + /// If rendering fails. + /// + /// Examples + /// -------- + /// >>> reader = DuckDBReader("duckdb://memory") + /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + /// >>> writer = VegaLiteWriter() + /// >>> json_output = writer.render(spec) + fn render(&self, spec: &PySpec) -> PyResult { + self.inner + .render(&spec.inner) + .map_err(|e| PyErr::new::(e.to_string())) + } } // ============================================================================ @@ -439,13 +469,14 @@ impl PyValidated { /// Result of reader.execute(), ready for rendering. /// /// Contains the resolved plot specification, data, and metadata. -/// Use render() to generate Vega-Lite JSON output. +/// Use writer.render(spec) to generate output. /// /// Examples /// -------- /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") /// >>> print(f"Rows: {spec.metadata()['rows']}") -/// >>> json_output = spec.render(VegaLiteWriter()) +/// >>> writer = VegaLiteWriter() +/// >>> json_output = writer.render(spec) #[pyclass(name = "Spec")] struct PySpec { inner: Spec, @@ -453,28 +484,6 @@ struct PySpec { #[pymethods] impl PySpec { - /// Render to output format (Vega-Lite JSON). - /// - /// Parameters - /// ---------- - /// writer : VegaLiteWriter - /// The writer to use for rendering. - /// - /// Returns - /// ------- - /// str - /// The Vega-Lite JSON specification as a string. - /// - /// Raises - /// ------ - /// ValueError - /// If rendering fails. - fn render(&self, writer: &PyVegaLiteWriter) -> PyResult { - self.inner - .render(&writer.inner) - .map_err(|e| PyErr::new::(e.to_string())) - } - /// Get visualization metadata. /// /// Returns @@ -688,8 +697,9 @@ fn validate(query: &str) -> PyResult { /// -------- /// >>> # Using native reader (prefer reader.execute() instead) /// >>> reader = DuckDBReader("duckdb://memory") -/// >>> spec = execute("SELECT 1 AS x, 2 AS Y VISUALISE x, y DRAW point", reader) -/// >>> json_output = spec.render(VegaLiteWriter()) +/// >>> spec = execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) +/// >>> writer = VegaLiteWriter() +/// >>> json_output = writer.render(spec) /// /// >>> # Using custom Python reader /// >>> class MyReader: diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index e54df2e8..8c7c1f19 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -98,9 +98,7 @@ class TestExecute: def test_execute_simple_query(self): reader = ggsql.DuckDBReader("duckdb://memory") - spec = reader.execute( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" - ) + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") assert spec is not None assert spec.layer_count() == 1 @@ -127,23 +125,17 @@ def test_execute_metadata(self): def test_execute_sql_accessor(self): reader = ggsql.DuckDBReader("duckdb://memory") - spec = reader.execute( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" - ) + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") assert "SELECT" in spec.sql() def test_execute_visual_accessor(self): reader = ggsql.DuckDBReader("duckdb://memory") - spec = reader.execute( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" - ) + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") assert "VISUALISE" in spec.visual() def test_execute_data_accessor(self): reader = ggsql.DuckDBReader("duckdb://memory") - spec = reader.execute( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" - ) + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") data = spec.data() assert isinstance(data, pl.DataFrame) assert data.shape == (1, 2) @@ -154,17 +146,15 @@ def test_execute_without_visualise_fails(self): reader.execute("SELECT 1 AS x, 2 AS y") -class TestSpecRender: - """Tests for Spec.render() method.""" +class TestWriterRender: + """Tests for VegaLiteWriter.render() method.""" def test_render_to_vegalite(self): reader = ggsql.DuckDBReader("duckdb://memory") - spec = reader.execute( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" - ) + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") writer = ggsql.VegaLiteWriter() - result = spec.render(writer) + result = writer.render(spec) assert isinstance(result, str) spec_dict = json.loads(result) @@ -179,7 +169,7 @@ def test_render_contains_data(self): spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point") writer = ggsql.VegaLiteWriter() - result = spec.render(writer) + result = writer.render(spec) spec_dict = json.loads(result) # Data should be in the spec (either inline or in datasets) assert "data" in spec_dict or "datasets" in spec_dict @@ -194,7 +184,7 @@ def test_render_multi_layer(self): ) writer = ggsql.VegaLiteWriter() - result = spec.render(writer) + result = writer.render(spec) spec_dict = json.loads(result) assert "layer" in spec_dict @@ -367,7 +357,7 @@ def test_end_to_end_workflow(self): # Render to Vega-Lite writer = ggsql.VegaLiteWriter() - result = spec.render(writer) + result = writer.render(spec) # Verify output spec_dict = json.loads(result) @@ -377,9 +367,7 @@ def test_end_to_end_workflow(self): def test_can_introspect_spec(self): """Test all introspection methods on Spec.""" reader = ggsql.DuckDBReader("duckdb://memory") - spec = reader.execute( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" - ) + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") # All these should work without error assert spec.sql() is not None @@ -430,9 +418,7 @@ def register(self, name: str, df: pl.DataFrame) -> None: self.tables[name] = df reader = RegisterReader() - spec = ggsql.execute( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) + spec = ggsql.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) assert spec is not None def test_custom_reader_error_handling(self): @@ -460,9 +446,7 @@ def execute_sql(self, sql: str): def test_native_reader_fast_path(self): """Native DuckDBReader still works (fast path).""" reader = ggsql.DuckDBReader("duckdb://memory") - spec = reader.execute( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point" - ) + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") assert spec.metadata()["rows"] == 1 def test_custom_reader_can_render(self): @@ -485,7 +469,7 @@ def execute_sql(self, sql: str) -> pl.DataFrame: ) writer = ggsql.VegaLiteWriter() - result = spec.render(writer) + result = writer.render(spec) spec_dict = json.loads(result) assert "$schema" in spec_dict diff --git a/src/cli.rs b/src/cli.rs index ee97eca1..bb6d4df9 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -15,7 +15,7 @@ use ggsql::reader::{DuckDBReader, Reader}; use ggsql::validate; #[cfg(feature = "vegalite")] -use ggsql::writer::VegaLiteWriter; +use ggsql::writer::{VegaLiteWriter, Writer}; #[derive(Parser)] #[command(name = "ggsql")] @@ -222,7 +222,7 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option r, Err(e) => { eprintln!("Failed to generate Vega-Lite output: {}", e); diff --git a/src/doc/API.md b/src/doc/API.md index 3cbf9c71..89753bc5 100644 --- a/src/doc/API.md +++ b/src/doc/API.md @@ -5,14 +5,14 @@ This document provides a comprehensive reference for the ggsql public API. ## Overview - **Stage 1: `reader.execute()`** - Parse query, execute SQL, resolve mappings, create Spec -- **Stage 2: `spec.render()`** - Generate output (Vega-Lite JSON, etc.) +- **Stage 2: `writer.render()`** - Generate output (Vega-Lite JSON, etc.) ### API Functions | Function | Use Case | | ------------------ | ---------------------------------------------------- | | `reader.execute()` | Main entry point - full visualization pipeline | -| `spec.render()` | Generate output from Spec | +| `writer.render()` | Generate output from Spec | | `validate()` | Validate syntax + semantics, inspect query structure | --- @@ -50,7 +50,7 @@ Execute a ggsql query for visualization. This is the main entry point - a defaul ```rust use ggsql::reader::{DuckDBReader, Reader}; -use ggsql::writer::VegaLiteWriter; +use ggsql::writer::{VegaLiteWriter, Writer}; let reader = DuckDBReader::from_connection_string("duckdb://memory")?; let spec = reader.execute( @@ -63,7 +63,7 @@ println!("Columns: {:?}", spec.metadata().columns); // Render to Vega-Lite let writer = VegaLiteWriter::new(); -let result = spec.render(&writer)?; +let result = writer.render(&spec)?; ``` **Error Conditions:** @@ -187,17 +187,15 @@ if let Some(tree) = validated.tree() { Result of executing a ggsql query, ready for rendering. -#### Rendering Methods +#### Rendering -| Method | Signature | Description | -| -------- | --------------------------------------------------------- | ----------------------- | -| `render` | `fn render(&self, writer: &dyn Writer) -> Result` | Render to output format | +Use `writer.render(&spec)` to generate output. **Example:** ```rust let writer = VegaLiteWriter::new(); -let json = spec.render(&writer)?; +let json = writer.render(&spec)?; println!("{}", json); ``` @@ -308,7 +306,8 @@ if !spec.warnings().is_empty() { } // Continue with rendering -let json = spec.render(&writer)?; +let writer = VegaLiteWriter::new(); +let json = writer.render(&spec)?; ``` --- diff --git a/src/lib.rs b/src/lib.rs index 9bb21554..61273bd6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -779,5 +779,4 @@ mod integration_tests { assert_eq!(data["__ggsql_const_stroke_0__"], "value"); assert_eq!(data["__ggsql_const_stroke_1__"], "value"); } - } diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index f67c39f2..de1cf2eb 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -620,7 +620,9 @@ mod tests { reader.register("my_table", df).unwrap(); // Query the registered table - let result = reader.execute_sql("SELECT * FROM my_table ORDER BY x").unwrap(); + let result = reader + .execute_sql("SELECT * FROM my_table ORDER BY x") + .unwrap(); assert_eq!(result.shape(), (3, 2)); assert_eq!(result.get_column_names(), vec!["x", "y"]); } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 34afb559..5ac40828 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -15,7 +15,7 @@ //! //! ```rust,ignore //! use ggsql::reader::{Reader, DuckDBReader}; -//! use ggsql::writer::VegaLiteWriter; +//! use ggsql::writer::{Writer, VegaLiteWriter}; //! //! // Execute a ggsql query //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?; @@ -23,7 +23,7 @@ //! //! // Render to Vega-Lite JSON //! let writer = VegaLiteWriter::new(); -//! let json = spec.render(&writer)?; +//! let json = writer.render(&spec)?; //! //! // With DataFrame registration //! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?; @@ -33,9 +33,9 @@ use std::collections::HashMap; -use crate::validate::{validate, ValidationWarning}; use crate::execute::prepare_data_with_executor; use crate::plot::Plot; +use crate::validate::{validate, ValidationWarning}; use crate::{DataFrame, GgsqlError, Result}; #[cfg(feature = "duckdb")] @@ -180,13 +180,13 @@ pub trait Reader { /// /// ```rust,ignore /// use ggsql::reader::{Reader, DuckDBReader}; - /// use ggsql::writer::VegaLiteWriter; + /// use ggsql::writer::{Writer, VegaLiteWriter}; /// /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?; /// let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?; /// /// let writer = VegaLiteWriter::new(); - /// let json = spec.render(&writer)?; + /// let json = writer.render(&spec)?; /// ``` #[cfg(feature = "duckdb")] fn execute(&self, query: &str) -> Result { @@ -213,7 +213,7 @@ pub trait Reader { #[cfg(all(feature = "duckdb", feature = "vegalite"))] mod tests { use super::*; - use crate::writer::VegaLiteWriter; + use crate::writer::{VegaLiteWriter, Writer}; #[test] fn test_execute_and_render() { @@ -227,7 +227,7 @@ mod tests { assert!(spec.data().is_some()); let writer = VegaLiteWriter::new(); - let result = spec.render(&writer).unwrap(); + let result = writer.render(&spec).unwrap(); assert!(result.contains("point")); } @@ -279,7 +279,7 @@ mod tests { let spec = reader.execute(query).unwrap(); let writer = VegaLiteWriter::new(); - let result = spec.render(&writer).unwrap(); + let result = writer.render(&spec).unwrap(); assert!(result.contains("layer")); } @@ -305,7 +305,7 @@ mod tests { assert!(spec.metadata().columns.contains(&"x".to_string())); let writer = VegaLiteWriter::new(); - let result = spec.render(&writer).unwrap(); + let result = writer.render(&spec).unwrap(); assert!(result.contains("point")); } diff --git a/src/reader/spec.rs b/src/reader/spec.rs index 92c45781..4b1fc5bd 100644 --- a/src/reader/spec.rs +++ b/src/reader/spec.rs @@ -2,11 +2,10 @@ use std::collections::HashMap; -use crate::validate::ValidationWarning; use crate::naming; use crate::plot::Plot; -use crate::writer::Writer; -use crate::{DataFrame, Result}; +use crate::validate::ValidationWarning; +use crate::DataFrame; use super::{Metadata, Spec}; @@ -59,11 +58,6 @@ impl Spec { } } - /// Render to output format (e.g., Vega-Lite JSON). - pub fn render(&self, writer: &dyn Writer) -> Result { - writer.write(&self.plot, &self.data) - } - /// Get the resolved plot specification. pub fn plot(&self) -> &Plot { &self.plot diff --git a/src/rest.rs b/src/rest.rs index 68d59a54..8f2338c4 100644 --- a/src/rest.rs +++ b/src/rest.rs @@ -37,7 +37,7 @@ use ggsql::{parser, validate, GgsqlError, VERSION}; use ggsql::reader::{DuckDBReader, Reader}; #[cfg(feature = "vegalite")] -use ggsql::writer::VegaLiteWriter; +use ggsql::writer::{VegaLiteWriter, Writer}; /// CLI arguments for the REST API server #[derive(Parser)] @@ -459,7 +459,7 @@ async fn query_handler( #[cfg(feature = "vegalite")] if request.writer == "vegalite" { let writer = VegaLiteWriter::new(); - let json_output = spec.render(&writer)?; + let json_output = writer.render(&spec)?; let spec_value: serde_json::Value = serde_json::from_str(&json_output) .map_err(|e| GgsqlError::WriterError(format!("Failed to parse JSON: {}", e)))?; diff --git a/src/writer/mod.rs b/src/writer/mod.rs index 7f026e6b..b06bf332 100644 --- a/src/writer/mod.rs +++ b/src/writer/mod.rs @@ -14,12 +14,17 @@ //! //! ```rust,ignore //! use ggsql::writer::{Writer, VegaLiteWriter}; +//! use ggsql::reader::{Reader, DuckDBReader}; +//! +//! let reader = DuckDBReader::from_connection_string("duckdb://memory")?; +//! let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?; //! //! let writer = VegaLiteWriter::new(); -//! let json = writer.write(&spec, &dataframe)?; +//! let json = writer.render(&spec)?; //! println!("{}", json); //! ``` +use crate::reader::Spec; use crate::{DataFrame, Plot, Result}; use std::collections::HashMap; @@ -33,7 +38,15 @@ pub use vegalite::VegaLiteWriter; /// /// Writers take a Plot and data sources and produce formatted output /// (JSON, R code, PNG bytes, etc.). +/// +/// # Associated Types +/// +/// * `Output` - The type returned by `write()` and `render()`. Use `Option` +/// for text output, `Option>` for binary, `()` for void writers, etc. pub trait Writer { + /// The output type produced by this writer. + type Output; + /// Generate output from a visualization specification and data sources /// /// # Arguments @@ -44,7 +57,7 @@ pub trait Writer { /// /// # Returns /// - /// A string containing the formatted output (JSON, code, etc.) + /// The writer's output, depends on writer implementation. /// /// # Errors /// @@ -52,7 +65,7 @@ pub trait Writer { /// - The spec is incompatible with this writer /// - The data doesn't match the spec's requirements /// - Output generation fails - fn write(&self, spec: &Plot, data: &HashMap) -> Result; + fn write(&self, spec: &Plot, data: &HashMap) -> Result; /// Validate that a spec is compatible with this writer /// @@ -67,4 +80,32 @@ pub trait Writer { /// /// Ok(()) if the spec is compatible, otherwise an error fn validate(&self, spec: &Plot) -> Result<()>; + + /// Render a Spec to output format + /// + /// This is the main entry point for generating visualization output. + /// + /// # Arguments + /// + /// * `spec` - The prepared visualization specification from `reader.execute()` + /// + /// # Returns + /// + /// The writer's output (type depends on writer implementation) + /// + /// # Example + /// + /// ```rust,ignore + /// use ggsql::reader::{Reader, DuckDBReader}; + /// use ggsql::writer::{Writer, VegaLiteWriter}; + /// + /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?; + /// let spec = reader.execute("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point")?; + /// + /// let writer = VegaLiteWriter::new(); + /// let json = writer.render(&spec)?; + /// ``` + fn render(&self, spec: &Spec) -> Result { + self.write(spec.plot(), spec.data_map()) + } } diff --git a/src/writer/vegalite.rs b/src/writer/vegalite.rs index 01bf884b..ec86589a 100644 --- a/src/writer/vegalite.rs +++ b/src/writer/vegalite.rs @@ -999,6 +999,8 @@ impl VegaLiteWriter { } impl Writer for VegaLiteWriter { + type Output = String; + fn write(&self, spec: &Plot, data: &HashMap) -> Result { // Validate spec before processing self.validate(spec)?; From d0b585b5b83c336585340de44ced547c6c164484 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Mon, 2 Feb 2026 16:09:30 +0000 Subject: [PATCH 11/12] Add unregister method --- CLAUDE.md | 2 + ggsql-python/README.md | 5 +++ ggsql-python/src/lib.rs | 29 +++++++++++++++ src/doc/API.md | 10 +++++ src/reader/duckdb.rs | 81 ++++++++++++++++++++++++++++++++++++++++- src/reader/mod.rs | 20 ++++++++++ 6 files changed, 146 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index ca8c0e09..29149b76 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -209,6 +209,7 @@ let json = writer.render(&spec)?; - `execute_sql(sql)` - Run SQL, return DataFrame - `register(name, df)` - Register DataFrame as table +- `unregister(name)` - Unregister a previously registered table - Implementation: `DuckDBReader` **Writer trait** (output format abstraction): @@ -1000,6 +1001,7 @@ Optional methods for custom readers: - `supports_register() -> bool` - Return `True` if registration is supported - `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a table +- `unregister(name: str) -> None` - Unregister a previously registered table Native readers (e.g., `DuckDBReader`) use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization. diff --git a/ggsql-python/README.md b/ggsql-python/README.md index f69dd073..f08afe34 100644 --- a/ggsql-python/README.md +++ b/ggsql-python/README.md @@ -126,6 +126,7 @@ reader = ggsql.DuckDBReader("duckdb:///path/to/file.db") # File database **Methods:** - `register(name: str, df: polars.DataFrame)` - Register a DataFrame as a queryable table +- `unregister(name: str)` - Unregister a previously registered table - `execute_sql(sql: str) -> polars.DataFrame` - Execute SQL and return results - `supports_register() -> bool` - Check if registration is supported @@ -266,6 +267,7 @@ json_output = writer.render(spec) - `supports_register() -> bool` - Return `True` if your reader supports DataFrame registration - `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a queryable table +- `unregister(name: str) -> None` - Unregister a previously registered table ```python class AdvancedReader: @@ -283,6 +285,9 @@ class AdvancedReader: def register(self, name: str, df: pl.DataFrame) -> None: self.tables[name] = df + + def unregister(self, name: str) -> None: + del self.tables[name] ``` Native readers like `DuckDBReader` use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization. diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index 1a9d0efc..d2eb0ec0 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -159,6 +159,18 @@ impl Reader for PyReaderBridge { Ok(()) }) } + + fn unregister(&mut self, name: &str) -> ggsql::Result<()> { + Python::attach(|py| { + self.obj + .bind(py) + .call_method1("unregister", (name,)) + .map_err(|e| { + GgsqlError::ReaderError(format!("Reader.unregister() failed: {}", e)) + })?; + Ok(()) + }) + } } // ============================================================================ @@ -249,6 +261,23 @@ impl PyDuckDBReader { .map_err(|e| PyErr::new::(e.to_string())) } + /// Unregister a previously registered table. + /// + /// Parameters + /// ---------- + /// name : str + /// The table name to unregister. + /// + /// Raises + /// ------ + /// ValueError + /// If the table wasn't registered via this reader or unregistration fails. + fn unregister(&mut self, name: &str) -> PyResult<()> { + self.inner + .unregister(name) + .map_err(|e| PyErr::new::(e.to_string())) + } + /// Execute a SQL query and return the result as a DataFrame. /// /// Parameters diff --git a/src/doc/API.md b/src/doc/API.md index 89753bc5..0676ac5a 100644 --- a/src/doc/API.md +++ b/src/doc/API.md @@ -377,6 +377,9 @@ pub trait Reader { /// Register a DataFrame as a queryable table fn register(&mut self, name: &str, df: DataFrame) -> Result<()>; + /// Unregister a previously registered table + fn unregister(&mut self, name: &str) -> Result<()>; + /// Check if this reader supports DataFrame registration fn supports_register(&self) -> bool; } @@ -423,6 +426,13 @@ class DuckDBReader: df: Polars DataFrame or narwhals-compatible DataFrame """ + def unregister(self, name: str) -> None: + """Unregister a previously registered table. + + Args: + name: Table name to unregister + """ + def execute_sql(self, sql: str) -> polars.DataFrame: """Execute SQL and return a Polars DataFrame.""" diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index de1cf2eb..1824d6dc 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -10,6 +10,7 @@ use duckdb::vtab::arrow::{arrow_recordbatch_to_query_params, ArrowVTab}; use duckdb::{params, Connection}; use polars::io::SerWriter; use polars::prelude::*; +use std::collections::HashSet; use std::io::Cursor; /// DuckDB database reader @@ -32,6 +33,7 @@ use std::io::Cursor; /// ``` pub struct DuckDBReader { conn: Connection, + registered_tables: HashSet, } impl DuckDBReader { @@ -75,7 +77,10 @@ impl DuckDBReader { GgsqlError::ReaderError(format!("Failed to register arrow function: {}", e)) })?; - Ok(Self { conn }) + Ok(Self { + conn, + registered_tables: HashSet::new(), + }) } /// Get a reference to the underlying DuckDB connection @@ -523,6 +528,30 @@ impl Reader for DuckDBReader { GgsqlError::ReaderError(format!("Failed to register table '{}': {}", name, e)) })?; + // Track the table so we can unregister it later + self.registered_tables.insert(name.to_string()); + + Ok(()) + } + + fn unregister(&mut self, name: &str) -> Result<()> { + // Only allow unregistering tables we created via register() + if !self.registered_tables.contains(name) { + return Err(GgsqlError::ReaderError(format!( + "Table '{}' was not registered via this reader", + name + ))); + } + + // Drop the temp table + let sql = format!("DROP TABLE IF EXISTS \"{}\"", name); + self.conn.execute(&sql, []).map_err(|e| { + GgsqlError::ReaderError(format!("Failed to unregister table '{}': {}", name, e)) + })?; + + // Remove from tracking + self.registered_tables.remove(name); + Ok(()) } @@ -704,4 +733,54 @@ mod tests { assert_eq!(result.shape(), (0, 2)); assert_eq!(result.get_column_names(), vec!["x", "y"]); } + + #[test] + fn test_unregister() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let df = DataFrame::new(vec![Column::new("x".into(), vec![1i32, 2, 3])]).unwrap(); + + reader.register("test_data", df).unwrap(); + + // Should be queryable + let result = reader.execute_sql("SELECT * FROM test_data").unwrap(); + assert_eq!(result.height(), 3); + + // Unregister + reader.unregister("test_data").unwrap(); + + // Should no longer exist + let result = reader.execute_sql("SELECT * FROM test_data"); + assert!(result.is_err()); + } + + #[test] + fn test_unregister_not_registered() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Create a table directly (not via register) + reader + .connection() + .execute("CREATE TABLE user_table (x INT)", params![]) + .unwrap(); + + // Should fail - we didn't register this via register() + let result = reader.unregister("user_table"); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("was not registered via this reader")); + } + + #[test] + fn test_reregister_after_unregister() { + let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + let df = DataFrame::new(vec![Column::new("x".into(), vec![1i32, 2, 3])]).unwrap(); + + reader.register("data", df.clone()).unwrap(); + reader.unregister("data").unwrap(); + + // Should be able to register again + reader.register("data", df).unwrap(); + let result = reader.execute_sql("SELECT * FROM data").unwrap(); + assert_eq!(result.height(), 3); + } } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 5ac40828..cfbd271a 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -147,6 +147,26 @@ pub trait Reader { ))) } + /// Unregister a previously registered table + /// + /// # Arguments + /// + /// * `name` - The table name to unregister + /// + /// # Returns + /// + /// `Ok(())` on success. + /// + /// # Default Implementation + /// + /// Returns an error by default. Override for readers that support registration. + fn unregister(&mut self, name: &str) -> Result<()> { + Err(GgsqlError::ReaderError(format!( + "This reader does not support unregistering table '{}'", + name + ))) + } + /// Check if this reader supports DataFrame registration /// /// # Returns From ba2d56689dcfd4e6c20bce4c84bbf2e51f15622d Mon Sep 17 00:00:00 2001 From: George Stagg Date: Mon, 2 Feb 2026 16:43:34 +0000 Subject: [PATCH 12/12] Add ibis SQL example --- CLAUDE.md | 3 +- ggsql-python/README.md | 48 +++++++++++++++++++++++++++++++- ggsql-python/tests/test_ggsql.py | 42 ++++++++++++++++++++++++++++ src/doc/API.md | 3 -- 4 files changed, 90 insertions(+), 6 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 29149b76..45d32fb7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -147,7 +147,7 @@ DRAW line MAPPING month AS x, total AS y --- -## Public API (`src/api.rs`) +## Public API ### Quick Start @@ -190,7 +190,6 @@ let json = writer.render(&spec)?; **`Spec`** - Result of `reader.execute()`, ready for rendering: -- `render(writer)` - Generate output (Vega-Lite JSON) - `plot()` - Resolved plot specification - `metadata()` - Rows, columns, layer count - `warnings()` - Validation warnings from execution diff --git a/ggsql-python/README.md b/ggsql-python/README.md index f08afe34..7a2148f1 100644 --- a/ggsql-python/README.md +++ b/ggsql-python/README.md @@ -158,7 +158,6 @@ Result of `reader.execute()`, containing resolved visualization ready for render **Methods:** -- `render(writer: VegaLiteWriter) -> str` - Generate Vega-Lite JSON - `metadata() -> dict` - Get `{"rows": int, "columns": list[str], "layer_count": int}` - `sql() -> str` - The executed SQL query - `visual() -> str` - The VISUALISE clause @@ -292,6 +291,53 @@ class AdvancedReader: Native readers like `DuckDBReader` use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization. +### Ibis Reader Example + +[Ibis](https://ibis-project.org/) provides a unified Python API for SQL operations across multiple backends. Here's how to create an ibis-based custom reader: + +```python +import ggsql +import polars as pl +import ibis + +class IbisReader: + """Custom reader using ibis as the SQL backend.""" + + def __init__(self, backend="duckdb"): + if backend == "duckdb": + self.con = ibis.duckdb.connect() + elif backend == "sqlite": + self.con = ibis.sqlite.connect() + # Add other backends as needed + + def execute_sql(self, sql: str) -> pl.DataFrame: + return self.con.con.execute(sql).pl() + + def supports_register(self) -> bool: + return True + + def register(self, name: str, df: pl.DataFrame) -> None: + self.con.create_table(name, df.to_arrow(), overwrite=True) + + def unregister(self, name: str) -> None: + self.con.drop_table(name) + +# Usage +reader = IbisReader() +df = pl.DataFrame({ + "date": ["2024-01-01", "2024-01-02", "2024-01-03"], + "revenue": [100, 150, 120], +}) +reader.register("sales", df) + +spec = ggsql.execute( + "SELECT * FROM sales VISUALISE date AS x, revenue AS y DRAW line", + reader +) +writer = ggsql.VegaLiteWriter() +print(writer.render(spec)) +``` + ## Development ### Keeping in sync with the monorepo diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index 8c7c1f19..f5c666fc 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -16,6 +16,14 @@ import ggsql +# Optional dependency for ibis test +try: + import ibis + + HAS_IBIS = hasattr(ibis, "duckdb") +except ImportError: + HAS_IBIS = False + class TestValidate: """Tests for validate() function.""" @@ -496,3 +504,37 @@ def execute_sql(self, sql: str) -> pl.DataFrame: assert len(reader.execute_calls) > 0 # All calls should be valid SQL strings assert all(isinstance(sql, str) for sql in reader.execute_calls) + + @pytest.mark.skipif(not HAS_IBIS, reason="ibis not installed") + def test_custom_reader_ibis(self): + """Test custom reader using ibis as backend.""" + + class IbisReader: + def __init__(self): + self.con = ibis.duckdb.connect() + + def execute_sql(self, sql: str) -> pl.DataFrame: + return self.con.con.execute(sql).pl() + + def supports_register(self) -> bool: + return True + + def register(self, name: str, df: pl.DataFrame) -> None: + self.con.create_table(name, df.to_arrow(), overwrite=True) + + def unregister(self, name: str) -> None: + self.con.drop_table(name) + + reader = IbisReader() + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + reader.register("mydata", df) + + spec = ggsql.execute( + "SELECT * FROM mydata VISUALISE x, y DRAW point", + reader, + ) + + assert spec.metadata()["rows"] == 3 + writer = ggsql.VegaLiteWriter() + json_output = writer.render(spec) + assert "point" in json_output diff --git a/src/doc/API.md b/src/doc/API.md index 0676ac5a..1327960e 100644 --- a/src/doc/API.md +++ b/src/doc/API.md @@ -477,9 +477,6 @@ class Validated: ```python class Spec: - def render(self, writer: VegaLiteWriter) -> str: - """Render to output format.""" - def metadata(self) -> dict: """Get metadata as dict with keys: rows, columns, layer_count."""