From 7cb225e9407cf1e46afc4651366cfb4c6cc287cc Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 29 Jan 2026 19:40:13 -0600 Subject: [PATCH 1/7] Refactor Rust API: Writer::render(), Reader::execute_sql(), unregister() API changes: - Move render() from Prepared to Writer trait for better separation of concerns - Rename Reader::execute() to execute_sql() for clarity - Add Reader::unregister() method for table cleanup - Add GgsqlError::NoVisualise variant for queries without VISUALISE clause The Writer now has primary responsibility for rendering, with render() as the main entry point that delegates to write() internally. This makes the API more intuitive: writer.render(&prepared) instead of prepared.render(&writer). Co-Authored-By: Claude Opus 4.5 --- ggsql-jupyter/src/executor.rs | 6 +++--- src/api.rs | 23 +++++++---------------- src/cli.rs | 6 +++--- src/execute.rs | 2 +- src/lib.rs | 25 ++++++++++++++----------- src/reader/duckdb.rs | 24 +++++++++++++++--------- src/reader/mod.rs | 27 +++++++++++++++++++++++---- src/rest.rs | 2 +- src/writer/mod.rs | 34 +++++++++++++++++++++++++++++----- 9 files changed, 96 insertions(+), 53 deletions(-) diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs index 40f74f72..d1a2db89 100644 --- a/ggsql-jupyter/src/executor.rs +++ b/ggsql-jupyter/src/executor.rs @@ -8,7 +8,7 @@ use ggsql::{ prepare, reader::{DuckDBReader, Reader}, validate, - writer::VegaLiteWriter, + writer::{VegaLiteWriter, Writer}, }; use polars::frame::DataFrame; @@ -60,7 +60,7 @@ impl QueryExecutor { // 2. Check if there's a visualization if !validated.has_visual() { // Pure SQL query - execute directly and return DataFrame - let df = self.reader.execute(code)?; + let df = self.reader.execute_sql(code)?; tracing::info!( "Pure SQL executed: {} rows, {} cols", df.height(), @@ -79,7 +79,7 @@ impl QueryExecutor { ); // 4. Render to Vega-Lite - let vega_json = prepared.render(&self.writer)?; + let vega_json = self.writer.render(&prepared)?; tracing::debug!("Generated Vega-Lite spec: {} chars", vega_json.len()); diff --git a/src/api.rs b/src/api.rs index ecfbdeaf..0cf18448 100644 --- a/src/api.rs +++ b/src/api.rs @@ -13,9 +13,6 @@ use crate::execute::prepare_data_with_executor; #[cfg(feature = "duckdb")] use crate::reader::Reader; -#[cfg(feature = "vegalite")] -use crate::writer::Writer; - // ============================================================================ // Core Types // ============================================================================ @@ -89,12 +86,6 @@ impl Prepared { } } - /// Render to output format (e.g., Vega-Lite JSON). - #[cfg(feature = "vegalite")] - pub fn render(&self, writer: &dyn Writer) -> Result { - writer.write(&self.plot, &self.data) - } - /// Get the resolved plot specification. pub fn plot(&self) -> &Plot { &self.plot @@ -245,7 +236,7 @@ pub fn prepare(query: &str, reader: &dyn Reader) -> Result { let warnings: Vec = validated.warnings().to_vec(); // Prepare data (this also validates, but we want the warnings from above) - let prepared_data = prepare_data_with_executor(query, |sql| reader.execute(sql))?; + let prepared_data = prepare_data_with_executor(query, |sql| reader.execute_sql(sql))?; Ok(Prepared::new( prepared_data.spec, @@ -429,7 +420,7 @@ mod tests { #[test] fn test_prepare_and_render() { use crate::reader::DuckDBReader; - use crate::writer::VegaLiteWriter; + use crate::writer::{VegaLiteWriter, Writer}; let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let prepared = prepare("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point", &reader).unwrap(); @@ -439,7 +430,7 @@ mod tests { assert!(prepared.data().is_some()); let writer = VegaLiteWriter::new(); - let result = prepared.render(&writer).unwrap(); + let result = writer.render(&prepared).unwrap(); assert!(result.contains("point")); } @@ -489,7 +480,7 @@ mod tests { #[test] fn test_render_multi_layer() { use crate::reader::DuckDBReader; - use crate::writer::VegaLiteWriter; + use crate::writer::{VegaLiteWriter, Writer}; let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let query = r#" @@ -501,7 +492,7 @@ mod tests { let prepared = prepare(query, &reader).unwrap(); let writer = VegaLiteWriter::new(); - let result = prepared.render(&writer).unwrap(); + let result = writer.render(&prepared).unwrap(); assert!(result.contains("layer")); } @@ -510,7 +501,7 @@ mod tests { #[test] fn test_register_and_query() { use crate::reader::{DuckDBReader, Reader}; - use crate::writer::VegaLiteWriter; + use crate::writer::{VegaLiteWriter, Writer}; use polars::prelude::*; let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); @@ -530,7 +521,7 @@ mod tests { assert!(prepared.metadata().columns.contains(&"x".to_string())); let writer = VegaLiteWriter::new(); - let result = prepared.render(&writer).unwrap(); + let result = writer.render(&prepared).unwrap(); assert!(result.contains("point")); } diff --git a/src/cli.rs b/src/cli.rs index 1844ff01..73f14789 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -15,7 +15,7 @@ use ggsql::reader::{DuckDBReader, Reader}; use ggsql::{prepare, validate}; #[cfg(feature = "vegalite")] -use ggsql::writer::VegaLiteWriter; +use ggsql::writer::{VegaLiteWriter, Writer}; #[derive(Parser)] #[command(name = "ggsql")] @@ -222,7 +222,7 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option r, Err(e) => { eprintln!("Failed to generate Vega-Lite output: {}", e); @@ -335,7 +335,7 @@ fn print_table_fallback(query: &str, reader: &DuckDBReader, max_rows: usize) { } let parsed = parsed.unwrap(); - let data = reader.execute(&parsed); + let data = reader.execute_sql(&parsed); if let Err(e) = data { eprintln!("Failed to execute SQL query: {}", e); std::process::exit(1) diff --git a/src/execute.rs b/src/execute.rs index b7683f56..3bf2be33 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -1185,7 +1185,7 @@ where /// Convenience wrapper around `prepare_data_with_executor` for direct DuckDB reader usage. #[cfg(feature = "duckdb")] pub fn prepare_data(query: &str, reader: &DuckDBReader) -> Result { - prepare_data_with_executor(query, |sql| reader.execute(sql)) + prepare_data_with_executor(query, |sql| reader.execute_sql(sql)) } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index cf13aaa5..8edad501 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,6 +78,9 @@ pub enum GgsqlError { #[error("Internal error: {0}")] InternalError(String), + + #[error("Query has no VISUALISE clause")] + NoVisualise, } pub type Result = std::result::Result; @@ -116,7 +119,7 @@ mod integration_tests { FROM generate_series(0, 4) as t(n) "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify DataFrame has temporal type (DuckDB returns Datetime for DATE + INTERVAL) assert_eq!(df.get_column_names(), vec!["date", "revenue"]); @@ -176,7 +179,7 @@ mod integration_tests { FROM generate_series(0, 3) as t(n) "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify DataFrame has Datetime type let timestamp_col = df.column("timestamp").unwrap(); @@ -224,7 +227,7 @@ mod integration_tests { // Real SQL that users would write let sql = "SELECT 1 as int_col, 2.5 as float_col, true as bool_col"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types are preserved // DuckDB treats numeric literals as DECIMAL, which we convert to Float64 @@ -279,7 +282,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT * FROM (VALUES (1, 2.5, 'a'), (2, NULL, 'b'), (NULL, 3.5, NULL)) AS t(int_col, float_col, str_col)"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types assert!(matches!( @@ -329,7 +332,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT * FROM (VALUES ('A', 10), ('B', 20), ('A', 15), ('C', 30)) AS t(category, value)"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); let mut spec = Plot::new(); let layer = Layer::new(Geom::bar()) @@ -375,7 +378,7 @@ mod integration_tests { GROUP BY day "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify temporal type is preserved through aggregation // DATE_TRUNC returns Date type (not Datetime) @@ -413,7 +416,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT 0.1 as small, 123.456 as medium, 999999.999999 as large"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // All should be Float64 assert!(matches!( @@ -465,7 +468,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT CAST(1 AS TINYINT) as tiny, CAST(1000 AS SMALLINT) as small, CAST(1000000 AS INTEGER) as int, CAST(1000000000000 AS BIGINT) as big"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types assert!(matches!( @@ -533,7 +536,7 @@ mod integration_tests { // Prepare data - this parses, injects constants into global data, and replaces literals with columns let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // Verify constants were injected into global data (not layer-specific data) // Both layers share __global__ data for faceting compatibility @@ -641,7 +644,7 @@ mod integration_tests { "#; let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // All layers should use global data for faceting to work assert!( @@ -729,7 +732,7 @@ mod integration_tests { "#; let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // Should have global data with the constant injected assert!( diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index b3cf46d7..75475717 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -24,11 +24,11 @@ use std::io::Cursor; /// /// // In-memory database /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?; -/// let df = reader.execute("SELECT 1 as x, 2 as y")?; +/// let df = reader.execute_sql("SELECT 1 as x, 2 as y")?; /// /// // File-based database /// let reader = DuckDBReader::from_connection_string("duckdb://data.db")?; -/// let df = reader.execute("SELECT * FROM sales")?; +/// let df = reader.execute_sql("SELECT * FROM sales")?; /// ``` pub struct DuckDBReader { conn: Connection, @@ -380,7 +380,7 @@ impl ColumnBuilder { } impl Reader for DuckDBReader { - fn execute(&self, sql: &str) -> Result { + fn execute_sql(&self, sql: &str) -> Result { use polars::prelude::*; // Check if this is a DDL statement (CREATE, DROP, INSERT, UPDATE, DELETE, ALTER) @@ -526,6 +526,12 @@ impl Reader for DuckDBReader { Ok(()) } + fn unregister(&mut self, name: &str) { + // Fail silently if table doesn't exist (DROP TABLE IF EXISTS) + let sql = format!("DROP TABLE IF EXISTS \"{}\"", name); + let _ = self.conn.execute(&sql, duckdb::params![]); + } + fn supports_register(&self) -> bool { true } @@ -544,7 +550,7 @@ mod tests { #[test] fn test_simple_query() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let df = reader.execute("SELECT 1 as x, 2 as y").unwrap(); + let df = reader.execute_sql("SELECT 1 as x, 2 as y").unwrap(); assert_eq!(df.shape(), (1, 2)); assert_eq!(df.get_column_names(), vec!["x", "y"]); @@ -567,7 +573,7 @@ mod tests { .unwrap(); // Query data - let df = reader.execute("SELECT * FROM test").unwrap(); + let df = reader.execute_sql("SELECT * FROM test").unwrap(); assert_eq!(df.shape(), (2, 2)); assert_eq!(df.get_column_names(), vec!["x", "y"]); @@ -576,7 +582,7 @@ mod tests { #[test] fn test_invalid_sql() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let result = reader.execute("INVALID SQL SYNTAX"); + let result = reader.execute_sql("INVALID SQL SYNTAX"); assert!(result.is_err()); } @@ -598,7 +604,7 @@ mod tests { .unwrap(); let df = reader - .execute("SELECT region, SUM(revenue) as total FROM sales GROUP BY region") + .execute_sql("SELECT region, SUM(revenue) as total FROM sales GROUP BY region") .unwrap(); assert_eq!(df.shape(), (2, 2)); @@ -620,7 +626,7 @@ mod tests { reader.register("my_table", df).unwrap(); // Query the registered table - let result = reader.execute("SELECT * FROM my_table ORDER BY x").unwrap(); + let result = reader.execute_sql("SELECT * FROM my_table ORDER BY x").unwrap(); assert_eq!(result.shape(), (3, 2)); assert_eq!(result.get_column_names(), vec!["x", "y"]); } @@ -698,7 +704,7 @@ mod tests { reader.register("empty_table", df).unwrap(); // Query should return empty result with correct schema - let result = reader.execute("SELECT * FROM empty_table").unwrap(); + let result = reader.execute_sql("SELECT * FROM empty_table").unwrap(); assert_eq!(result.shape(), (0, 2)); assert_eq!(result.get_column_names(), vec!["x", "y"]); } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 762c0319..519d1581 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -17,12 +17,12 @@ //! //! // Basic usage //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?; -//! let df = reader.execute("SELECT * FROM table")?; +//! let df = reader.execute_sql("SELECT * FROM table")?; //! //! // With DataFrame registration //! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?; //! reader.register("my_table", some_dataframe)?; -//! let result = reader.execute("SELECT * FROM my_table")?; +//! let result = reader.execute_sql("SELECT * FROM my_table")?; //! ``` use crate::{DataFrame, GgsqlError, Result}; @@ -53,7 +53,10 @@ pub use duckdb::DuckDBReader; /// reader.register("sales", sales_df)?; /// /// // Now you can query it -/// let result = reader.execute("SELECT * FROM sales WHERE amount > 100")?; +/// let result = reader.execute_sql("SELECT * FROM sales WHERE amount > 100")?; +/// +/// // Unregister when done (fails silently if table doesn't exist) +/// reader.unregister("sales"); /// ``` pub trait Reader { /// Execute a SQL query and return the result as a DataFrame @@ -72,7 +75,7 @@ pub trait Reader { /// - The SQL is invalid /// - The connection fails /// - The table or columns don't exist - fn execute(&self, sql: &str) -> Result; + fn execute_sql(&self, sql: &str) -> Result; /// Register a DataFrame as a queryable table (takes ownership) /// @@ -100,6 +103,22 @@ pub trait Reader { ))) } + /// Unregister a table by name. + /// + /// This removes a previously registered DataFrame from the reader. + /// Fails silently if the table doesn't exist. + /// + /// # Arguments + /// + /// * `name` - The table name to unregister + /// + /// # Default Implementation + /// + /// Does nothing by default. Override for readers that support registration. + fn unregister(&mut self, _name: &str) { + // Default: fail silently + } + /// Check if this reader supports DataFrame registration /// /// # Returns diff --git a/src/rest.rs b/src/rest.rs index e87a14f9..dd894440 100644 --- a/src/rest.rs +++ b/src/rest.rs @@ -461,7 +461,7 @@ async fn query_handler( #[cfg(feature = "vegalite")] if request.writer == "vegalite" { let writer = VegaLiteWriter::new(); - let json_output = prepared.render(&writer)?; + let json_output = writer.render(&prepared)?; let spec_value: serde_json::Value = serde_json::from_str(&json_output) .map_err(|e| GgsqlError::WriterError(format!("Failed to parse JSON: {}", e)))?; diff --git a/src/writer/mod.rs b/src/writer/mod.rs index 7f026e6b..e1928e60 100644 --- a/src/writer/mod.rs +++ b/src/writer/mod.rs @@ -1,13 +1,13 @@ //! Output writer abstraction layer for ggsql //! //! The writer module provides a pluggable interface for generating visualization -//! outputs from Plot + DataFrame combinations. +//! outputs from Prepared specifications. //! //! # Architecture //! //! All writers implement the `Writer` trait, which provides: -//! - Spec + Data → Output conversion -//! - Validation for writer compatibility +//! - Prepared → Output conversion via `render()` +//! - Low-level Plot + Data → Output via `write()` //! - Format-specific rendering logic //! //! # Example @@ -16,10 +16,11 @@ //! use ggsql::writer::{Writer, VegaLiteWriter}; //! //! let writer = VegaLiteWriter::new(); -//! let json = writer.write(&spec, &dataframe)?; +//! let json = writer.render(&prepared)?; //! println!("{}", json); //! ``` +use crate::api::Prepared; use crate::{DataFrame, Plot, Result}; use std::collections::HashMap; @@ -31,11 +32,34 @@ pub use vegalite::VegaLiteWriter; /// Trait for visualization output writers /// -/// Writers take a Plot and data sources and produce formatted output +/// Writers take a Prepared specification and produce formatted output /// (JSON, R code, PNG bytes, etc.). pub trait Writer { + /// Render a prepared visualization to output format + /// + /// This is the primary rendering method. It extracts the plot and data + /// from the Prepared object and generates the output. + /// + /// # Arguments + /// + /// * `prepared` - The prepared visualization (from `reader.execute()`) + /// + /// # Returns + /// + /// A string containing the formatted output (JSON, code, etc.) + /// + /// # Errors + /// + /// Returns `GgsqlError::WriterError` if rendering fails + fn render(&self, prepared: &Prepared) -> Result { + self.write(prepared.plot(), prepared.data_map()) + } + /// Generate output from a visualization specification and data sources /// + /// This is a lower-level method that takes the plot and data separately. + /// Most callers should use `render()` instead. + /// /// # Arguments /// /// * `spec` - The parsed ggsql specification From 9d522401f2f2e94a01b0ef102523611d3af6f76a Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 29 Jan 2026 19:40:40 -0600 Subject: [PATCH 2/7] Restructure Python bindings with cleaner module hierarchy New module structure: - ggsql.readers: DuckDB reader class with execute() and execute_sql() - ggsql.writers: VegaLite writer with render_json() and render_chart() - ggsql.types: Prepared, Validated, and exception classes Key improvements: - Proper exception hierarchy: GgsqlError base with ParseError, ValidationError, ReaderError, WriterError, NoVisualiseError - DuckDB.execute() auto-registers/unregisters DataFrames for clean API - Narwhals integration moved to Python layer for DataFrame conversion - Type stubs (_ggsql.pyi) for IDE support and type checking - Context manager support for DuckDB reader - Removed render_altair() and prepare() - replaced by cleaner two-stage API Breaking changes: - ggsql.DuckDBReader -> ggsql.readers.DuckDB - ggsql.VegaLiteWriter -> ggsql.writers.VegaLite - ggsql.prepare(query, reader) -> reader.execute(query, data_dict) - prepared.render(writer) -> writer.render_json(prepared) - Custom Python readers no longer supported (use DuckDB with registration) Co-Authored-By: Claude Opus 4.5 --- ggsql-python/pyproject.toml | 4 +- ggsql-python/python/ggsql/__init__.py | 107 +----- ggsql-python/python/ggsql/_ggsql.pyi | 313 +++++++++++++++ ggsql-python/python/ggsql/readers.py | 156 ++++++++ ggsql-python/python/ggsql/types.py | 26 ++ ggsql-python/python/ggsql/writers.py | 112 ++++++ ggsql-python/src/lib.rs | 525 ++++++++------------------ 7 files changed, 772 insertions(+), 471 deletions(-) create mode 100644 ggsql-python/python/ggsql/_ggsql.pyi create mode 100644 ggsql-python/python/ggsql/readers.py create mode 100644 ggsql-python/python/ggsql/types.py create mode 100644 ggsql-python/python/ggsql/writers.py diff --git a/ggsql-python/pyproject.toml b/ggsql-python/pyproject.toml index 1a0ff8ef..1039f0c4 100644 --- a/ggsql-python/pyproject.toml +++ b/ggsql-python/pyproject.toml @@ -16,12 +16,12 @@ classifiers = [ ] dependencies = [ "altair>=5.0", - "narwhals>=2.15.0", + "narwhals>=1.0", "polars>=1.0", ] [project.optional-dependencies] -test = ["pytest>=7.0"] +test = ["pytest>=7.0", "pandas>=2.0"] dev = ["maturin>=1.4"] [tool.maturin] diff --git a/ggsql-python/python/ggsql/__init__.py b/ggsql-python/python/ggsql/__init__.py index 06b5f720..cbcb16f7 100644 --- a/ggsql-python/python/ggsql/__init__.py +++ b/ggsql-python/python/ggsql/__init__.py @@ -1,106 +1,17 @@ -from __future__ import annotations - -import json -from typing import Any, Union +"""ggsql - SQL extension for declarative data visualization.""" -import altair -import narwhals as nw -from narwhals.typing import IntoFrame +from __future__ import annotations -from ggsql._ggsql import ( - DuckDBReader, - VegaLiteWriter, - Validated, - Prepared, - validate, - prepare, -) +from ggsql import readers, types, writers +from ggsql._ggsql import validate __all__ = [ - # Classes - "DuckDBReader", - "VegaLiteWriter", - "Validated", - "Prepared", + # Submodules + "readers", + "writers", + "types", # Functions "validate", - "prepare", - "render_altair", ] __version__ = "0.1.0" - -# Type alias for any Altair chart type -AltairChart = Union[ - altair.Chart, - altair.LayerChart, - altair.FacetChart, - altair.ConcatChart, - altair.HConcatChart, - altair.VConcatChart, - altair.RepeatChart, -] - - -def render_altair( - df: IntoFrame, - viz: str, - **kwargs: Any, -) -> AltairChart: - """Render a DataFrame with a VISUALISE spec to an Altair chart. - - Parameters - ---------- - df - Data to visualize. Accepts polars, pandas, or any narwhals-compatible - DataFrame. LazyFrames are collected automatically. - viz - VISUALISE spec string (e.g., "VISUALISE x, y DRAW point") - **kwargs - Additional keyword arguments passed to `from_json()`. - Common options include `validate=False` to skip schema validation. - - Returns - ------- - AltairChart - An Altair chart object (Chart, LayerChart, FacetChart, etc.). - """ - df = nw.from_native(df, pass_through=True) - - if isinstance(df, nw.LazyFrame): - df = df.collect() - - if not isinstance(df, nw.DataFrame): - raise TypeError("df must be a narwhals DataFrame or compatible type") - - pl_df = df.to_polars() - - # Create temporary reader and register data - reader = DuckDBReader("duckdb://memory") - reader.register("__data__", pl_df) - - # Build full query: SELECT * FROM __data__ + VISUALISE clause - query = f"SELECT * FROM __data__ {viz}" - - # Prepare and render - prepared = prepare(query, reader) - writer = VegaLiteWriter() - vegalite_json = prepared.render(writer) - - # Parse to determine the correct Altair class - spec = json.loads(vegalite_json) - - # Determine the correct Altair class based on spec structure - if "layer" in spec: - return altair.LayerChart.from_json(vegalite_json, **kwargs) - elif "facet" in spec or "spec" in spec: - return altair.FacetChart.from_json(vegalite_json, **kwargs) - elif "concat" in spec: - return altair.ConcatChart.from_json(vegalite_json, **kwargs) - elif "hconcat" in spec: - return altair.HConcatChart.from_json(vegalite_json, **kwargs) - elif "vconcat" in spec: - return altair.VConcatChart.from_json(vegalite_json, **kwargs) - elif "repeat" in spec: - return altair.RepeatChart.from_json(vegalite_json, **kwargs) - else: - return altair.Chart.from_json(vegalite_json, **kwargs) +version_info = (0, 1, 0) diff --git a/ggsql-python/python/ggsql/_ggsql.pyi b/ggsql-python/python/ggsql/_ggsql.pyi new file mode 100644 index 00000000..7ce1976c --- /dev/null +++ b/ggsql-python/python/ggsql/_ggsql.pyi @@ -0,0 +1,313 @@ +"""Type stubs for the ggsql native extension module.""" + +from typing import Any + +import polars as pl + +# ============================================================================ +# Exception Types +# ============================================================================ + + +class GgsqlError(Exception): + """Base exception for all ggsql errors.""" + + ... + + +class ParseError(GgsqlError): + """Raised when query parsing fails.""" + + ... + + +class ValidationError(GgsqlError): + """Raised when query validation fails (semantic errors).""" + + ... + + +class ReaderError(GgsqlError): + """Raised when database/data source operations fail.""" + + ... + + +class WriterError(GgsqlError): + """Raised when output generation fails.""" + + ... + + +class NoVisualiseError(GgsqlError): + """Raised when execute() is called on a query without VISUALISE clause.""" + + ... + + +# ============================================================================ +# Classes +# ============================================================================ + + +class DuckDBReader: + """DuckDB database reader for executing SQL queries and ggsql visualizations.""" + + def __init__(self, connection: str) -> None: + """Create a new DuckDB reader from a connection string. + + Parameters + ---------- + connection + Connection string. Use "duckdb://memory" for in-memory database + or "duckdb://path/to/file.db" for file-based database. + """ + ... + + def __repr__(self) -> str: ... + + def execute( + self, query: str, data: dict[str, pl.DataFrame] | None = None + ) -> Prepared: + """Execute a ggsql query with optional DataFrame registration. + + DataFrames are registered before query execution and automatically + unregistered afterward (even on error) to avoid polluting the namespace. + + Parameters + ---------- + query + The ggsql query to execute. Must contain a VISUALISE clause. + data + DataFrames to register as queryable tables. Keys are table names. + + Returns + ------- + Prepared + A prepared visualization ready for rendering. + + Raises + ------ + NoVisualiseError + If the query has no VISUALISE clause. + ParseError + If query parsing fails. + ValidationError + If query validation fails. + ReaderError + If SQL execution fails. + """ + ... + + def execute_sql(self, sql: str) -> pl.DataFrame: + """Execute a SQL query and return the result as a DataFrame. + + This is for plain SQL queries without visualization. For ggsql queries + with VISUALISE clauses, use execute() instead. + + Parameters + ---------- + sql + The SQL query to execute. + + Returns + ------- + polars.DataFrame + The query result as a polars DataFrame. + """ + ... + + def register(self, name: str, df: pl.DataFrame) -> None: + """Register a DataFrame as a queryable table. + + After registration, the DataFrame can be queried by name in SQL. + Note: When using execute(), DataFrames are automatically registered + and unregistered, so manual registration is usually unnecessary. + + Parameters + ---------- + name + The table name to register under. + df + The DataFrame to register. + """ + ... + + def unregister(self, name: str) -> None: + """Unregister a table by name. + + Fails silently if the table doesn't exist. + + Parameters + ---------- + name + The table name to unregister. + """ + ... + + +class _VegaLiteWriter: + """Vega-Lite JSON output writer (internal). + + Use the Python VegaLiteWriter class which wraps this and adds render_chart(). + """ + + def __init__(self) -> None: + """Create a new Vega-Lite writer.""" + ... + + def __repr__(self) -> str: ... + + def render(self, spec: Prepared) -> str: + """Render a prepared visualization to Vega-Lite JSON. + + Parameters + ---------- + spec + The prepared visualization (from reader.execute()). + + Returns + ------- + str + The Vega-Lite JSON specification as a string. + """ + ... + + +class Validated: + """Result of validate() - query inspection and validation without SQL execution.""" + + def __repr__(self) -> str: ... + + def has_visual(self) -> bool: + """Whether the query contains a VISUALISE clause.""" + ... + + def sql(self) -> str: + """The SQL portion (before VISUALISE).""" + ... + + def visual(self) -> str: + """The VISUALISE portion (raw text).""" + ... + + def valid(self) -> bool: + """Whether the query is valid (no errors).""" + ... + + def errors(self) -> list[dict[str, Any]]: + """Validation errors (fatal issues). + + Returns + ------- + list[dict] + List of error dictionaries with 'message' and optional 'location' keys. + """ + ... + + def warnings(self) -> list[dict[str, Any]]: + """Validation warnings (non-fatal issues). + + Returns + ------- + list[dict] + List of warning dictionaries with 'message' and optional 'location' keys. + """ + ... + + +class Prepared: + """Result of reader.execute(), ready for rendering.""" + + def __repr__(self) -> str: ... + + def metadata(self) -> dict[str, Any]: + """Get visualization metadata. + + Returns + ------- + dict + Dictionary with 'rows', 'columns', and 'layer_count' keys. + """ + ... + + def sql(self) -> str: + """The main SQL query that was executed.""" + ... + + def visual(self) -> str: + """The VISUALISE portion (raw text).""" + ... + + def layer_count(self) -> int: + """Number of layers.""" + ... + + def data(self) -> pl.DataFrame | None: + """Get global data (main query result).""" + ... + + def layer_data(self, index: int) -> pl.DataFrame | None: + """Get layer-specific data (from FILTER or FROM clause). + + Parameters + ---------- + index + The layer index (0-based). + """ + ... + + def stat_data(self, index: int) -> pl.DataFrame | None: + """Get stat transform data (e.g., histogram bins, density estimates). + + Parameters + ---------- + index + The layer index (0-based). + """ + ... + + def layer_sql(self, index: int) -> str | None: + """Layer filter/source query, or None if using global data. + + Parameters + ---------- + index + The layer index (0-based). + """ + ... + + def stat_sql(self, index: int) -> str | None: + """Stat transform query, or None if no stat transform. + + Parameters + ---------- + index + The layer index (0-based). + """ + ... + + def warnings(self) -> list[dict[str, Any]]: + """Validation warnings from preparation.""" + ... + + +# ============================================================================ +# Functions +# ============================================================================ + + +def validate(query: str) -> Validated: + """Validate query syntax and semantics without executing SQL. + + Parameters + ---------- + query + The ggsql query to validate. + + Returns + ------- + Validated + Validation result with query inspection methods. + """ + ... diff --git a/ggsql-python/python/ggsql/readers.py b/ggsql-python/python/ggsql/readers.py new file mode 100644 index 00000000..7a5e5436 --- /dev/null +++ b/ggsql-python/python/ggsql/readers.py @@ -0,0 +1,156 @@ +"""Reader classes for ggsql.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import narwhals as nw +from narwhals.typing import IntoDataFrame + +from ggsql._ggsql import DuckDBReader as _DuckDBReader + +if TYPE_CHECKING: + import polars as pl + from ggsql._ggsql import Prepared + +__all__ = ["DuckDB"] + + +def _to_polars(df: IntoDataFrame) -> "pl.DataFrame": + """Convert any narwhals-compatible DataFrame to polars.""" + nw_df = nw.from_native(df, pass_through=True) + + if isinstance(nw_df, nw.LazyFrame): + nw_df = nw_df.collect() + + if not isinstance(nw_df, nw.DataFrame): + raise TypeError("df must be a DataFrame (polars, pandas, pyarrow, etc.)") + + return nw_df.to_polars() + + +class DuckDB: + """DuckDB database reader for executing SQL queries and ggsql visualizations. + + Creates an in-memory or file-based DuckDB connection that can execute + SQL queries and register DataFrames as queryable tables. + + Accepts any narwhals-compatible DataFrame (polars, pandas, pyarrow, etc.) + for data registration. + + Examples + -------- + >>> import ggsql.readers + >>> reader = ggsql.readers.DuckDB("duckdb://memory") + >>> reader = ggsql.readers.DuckDB("duckdb:///path/to/file.db") + """ + + def __init__(self, connection: str) -> None: + """Create a new DuckDB reader from a connection string. + + Parameters + ---------- + connection + Connection string. Use "duckdb://memory" for in-memory database + or "duckdb:///path/to/file.db" for file-based database. + """ + self._inner = _DuckDBReader(connection) + self._connection = connection + + def __repr__(self) -> str: + return f"" + + def execute( + self, + query: str, + data: dict[str, IntoDataFrame] | None = None, + ) -> "Prepared": + """Execute a ggsql query with optional DataFrame registration. + + DataFrames are registered before query execution and automatically + unregistered afterward (even on error) to avoid polluting the namespace. + + Parameters + ---------- + query + The ggsql query to execute. Must contain a VISUALISE clause. + data + DataFrames to register as queryable tables. Keys are table names. + Accepts any narwhals-compatible DataFrame (polars, pandas, pyarrow, etc.). + + Returns + ------- + Prepared + A prepared visualization ready for rendering. + + Raises + ------ + NoVisualiseError + If the query has no VISUALISE clause. + ValueError + If parsing, validation, or SQL execution fails. + """ + polars_data: dict[str, "pl.DataFrame"] | None = None + if data is not None: + polars_data = {name: _to_polars(df) for name, df in data.items()} + + return self._inner.execute(query, polars_data) + + def execute_sql(self, sql: str) -> "pl.DataFrame": + """Execute a SQL query and return the result as a DataFrame. + + This is for plain SQL queries without visualization. For ggsql queries + with VISUALISE clauses, use execute() instead. + + Parameters + ---------- + sql + The SQL query to execute. + + Returns + ------- + polars.DataFrame + The query result as a polars DataFrame. + """ + return self._inner.execute_sql(sql) + + def register(self, name: str, df: IntoDataFrame) -> None: + """Register a DataFrame as a queryable table. + + After registration, the DataFrame can be queried by name in SQL. + Note: When using execute(), DataFrames are automatically registered + and unregistered, so manual registration is usually unnecessary. + + Parameters + ---------- + name + The table name to register under. + df + The DataFrame to register. Accepts any narwhals-compatible + DataFrame (polars, pandas, pyarrow, etc.). + """ + self._inner.register(name, _to_polars(df)) + + def unregister(self, name: str) -> None: + """Unregister a table by name. + + Fails silently if the table doesn't exist. + + Parameters + ---------- + name + The table name to unregister. + """ + self._inner.unregister(name) + + def __enter__(self) -> "DuckDB": + """Enter context manager.""" + return self + + def __exit__(self, _exc_type, _exc_val, _exc_tb) -> None: + """Exit context manager. + + Currently a no-op since DuckDB connections don't require explicit cleanup, + but future-proofs the API for connection management. + """ + pass diff --git a/ggsql-python/python/ggsql/types.py b/ggsql-python/python/ggsql/types.py new file mode 100644 index 00000000..a56fbccd --- /dev/null +++ b/ggsql-python/python/ggsql/types.py @@ -0,0 +1,26 @@ +"""Type classes and exceptions for ggsql.""" + +from ggsql._ggsql import ( + GgsqlError, + NoVisualiseError, + ParseError, + Prepared, + ReaderError, + Validated, + ValidationError, + WriterError, +) + +__all__ = [ + # Base exception + "GgsqlError", + # Specific exceptions + "ParseError", + "ValidationError", + "ReaderError", + "WriterError", + "NoVisualiseError", + # Type classes + "Prepared", + "Validated", +] diff --git a/ggsql-python/python/ggsql/writers.py b/ggsql-python/python/ggsql/writers.py new file mode 100644 index 00000000..a84d96a2 --- /dev/null +++ b/ggsql-python/python/ggsql/writers.py @@ -0,0 +1,112 @@ +"""Writer classes for ggsql.""" + +from __future__ import annotations + +import json +from typing import Any, Union + +import altair + +from ggsql._ggsql import _VegaLiteWriter, Prepared + +__all__ = ["VegaLite", "AltairChart"] + +# Type alias for any Altair chart type +AltairChart = Union[ + altair.Chart, + altair.LayerChart, + altair.FacetChart, + altair.ConcatChart, + altair.HConcatChart, + altair.VConcatChart, + altair.RepeatChart, +] + + +class VegaLite: + """Vega-Lite JSON output writer. + + Converts prepared visualization specifications to Vega-Lite v6 JSON. + + Examples + -------- + >>> writer = ggsql.writers.VegaLite() + >>> json_str = writer.render_json(spec) + >>> chart = writer.render_chart(spec) + """ + + def __init__(self) -> None: + """Create a new Vega-Lite writer.""" + self._inner = _VegaLiteWriter() + + def __repr__(self) -> str: + return "" + + def render_json(self, spec: Prepared) -> str: + """Render a prepared visualization to Vega-Lite JSON. + + Parameters + ---------- + spec : Prepared + The prepared visualization (from reader.execute()). + + Returns + ------- + str + The Vega-Lite JSON specification as a string. + + Raises + ------ + WriterError + If rendering fails. + """ + return self._inner.render(spec) + + def render_chart(self, spec: Prepared, **kwargs: Any) -> AltairChart: + """Render a prepared visualization to an Altair chart object. + + Parameters + ---------- + spec : Prepared + The prepared visualization (from reader.execute()). + **kwargs + Additional keyword arguments passed to Altair's `from_json()`. + Common options include `validate=False` to skip schema validation. + Note: `validate=False` is used by default since ggsql produces + Vega-Lite v6 specs. + + Returns + ------- + AltairChart + An Altair chart object (Chart, LayerChart, FacetChart, etc.) + appropriate for the visualization structure. + + Raises + ------ + WriterError + If rendering fails. + """ + json_str = self._inner.render(spec) + + # Default to validate=False since ggsql produces v6 specs + if "validate" not in kwargs: + kwargs["validate"] = False + + # Parse the JSON to determine the chart type + spec_dict = json.loads(json_str) + + # Determine the correct Altair class based on spec structure + if "layer" in spec_dict: + return altair.LayerChart.from_json(json_str, **kwargs) + elif "facet" in spec_dict or "spec" in spec_dict: + return altair.FacetChart.from_json(json_str, **kwargs) + elif "concat" in spec_dict: + return altair.ConcatChart.from_json(json_str, **kwargs) + elif "hconcat" in spec_dict: + return altair.HConcatChart.from_json(json_str, **kwargs) + elif "vconcat" in spec_dict: + return altair.VConcatChart.from_json(json_str, **kwargs) + elif "repeat" in spec_dict: + return altair.RepeatChart.from_json(json_str, **kwargs) + else: + return altair.Chart.from_json(json_str, **kwargs) diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index b9d6496d..aefc5ffb 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -8,11 +8,41 @@ use std::io::Cursor; use ggsql::api::{prepare as rust_prepare, validate as rust_validate, Prepared, ValidationWarning}; use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader}; -use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter; -use ggsql::GgsqlError; +use ggsql::writer::{VegaLiteWriter as RustVegaLiteWriter, Writer}; use polars::prelude::{DataFrame, IpcReader, IpcWriter, SerReader, SerWriter}; +// ============================================================================ +// Custom Exception Types +// ============================================================================ + +// Base exception for all ggsql errors +pyo3::create_exception!(ggsql, PyGgsqlError, pyo3::exceptions::PyException); + +// Specific exception types +pyo3::create_exception!(ggsql, PyParseError, PyGgsqlError); +pyo3::create_exception!(ggsql, PyValidationError, PyGgsqlError); +pyo3::create_exception!(ggsql, PyReaderError, PyGgsqlError); +pyo3::create_exception!(ggsql, PyWriterError, PyGgsqlError); +pyo3::create_exception!(ggsql, NoVisualiseError, PyGgsqlError); + +/// Convert a GgsqlError to the appropriate Python exception +fn ggsql_error_to_pyerr(e: ggsql::GgsqlError) -> PyErr { + use ggsql::GgsqlError; + match e { + GgsqlError::ParseError(msg) => PyParseError::new_err(msg), + GgsqlError::ValidationError(msg) => PyValidationError::new_err(msg), + GgsqlError::ReaderError(msg) => PyReaderError::new_err(msg), + GgsqlError::WriterError(msg) => PyWriterError::new_err(msg), + GgsqlError::NoVisualise => { + NoVisualiseError::new_err("Query has no VISUALISE clause".to_string()) + } + GgsqlError::InternalError(msg) => { + PyGgsqlError::new_err(format!("Internal error: {}", msg)) + } + } +} + // ============================================================================ // Helper Functions for DataFrame Conversion // ============================================================================ @@ -53,31 +83,6 @@ fn py_to_polars(py: Python<'_>, df: &Bound<'_, PyAny>) -> PyResult { }) } -/// Convert a Python polars DataFrame to Rust DataFrame - for use inside Python::attach -/// This variant is used by PyReaderBridge where we already hold the GIL. -fn py_to_polars_inner(df: &Bound<'_, PyAny>) -> PyResult { - let py = df.py(); - let io = py.import("io")?; - let bytes_io = io.call_method0("BytesIO")?; - - df.call_method1("write_ipc", (&bytes_io,)).map_err(|_| { - PyErr::new::( - "Reader.execute() must return a polars.DataFrame", - ) - })?; - - bytes_io.call_method1("seek", (0i64,))?; - let ipc_bytes: Vec = bytes_io.call_method0("read")?.extract()?; - let cursor = Cursor::new(ipc_bytes); - - IpcReader::new(cursor).finish().map_err(|e| { - PyErr::new::(format!( - "Failed to deserialize DataFrame: {}", - e - )) - }) -} - /// Convert validation errors/warnings to a Python list of dicts fn errors_to_pylist( py: Python<'_>, @@ -114,172 +119,116 @@ fn warnings_to_pylist(py: Python<'_>, warnings: &[ValidationWarning]) -> PyResul errors_to_pylist(py, &items) } -// ============================================================================ -// PyReaderBridge - Bridges Python reader objects to Rust Reader trait -// ============================================================================ - -/// Bridges a Python reader object to the Rust Reader trait. -/// -/// This allows any Python object with an `execute(sql: str) -> polars.DataFrame` -/// method to be used as a ggsql reader. -struct PyReaderBridge { - obj: Py, -} - -impl Reader for PyReaderBridge { - fn execute(&self, sql: &str) -> ggsql::Result { - Python::attach(|py| { - let bound = self.obj.bind(py); - let result = bound - .call_method1("execute", (sql,)) - .map_err(|e| GgsqlError::ReaderError(format!("Reader.execute() failed: {}", e)))?; - py_to_polars_inner(&result).map_err(|e| GgsqlError::ReaderError(e.to_string())) - }) - } - - fn supports_register(&self) -> bool { - Python::attach(|py| { - self.obj - .bind(py) - .call_method0("supports_register") - .and_then(|r| r.extract::()) - .unwrap_or(false) - }) - } - - fn register(&mut self, name: &str, df: DataFrame) -> ggsql::Result<()> { - Python::attach(|py| { - let py_df = - polars_to_py(py, &df).map_err(|e| GgsqlError::ReaderError(e.to_string()))?; - self.obj - .bind(py) - .call_method1("register", (name, py_df)) - .map_err(|e| GgsqlError::ReaderError(format!("Reader.register() failed: {}", e)))?; - Ok(()) - }) - } -} - -// ============================================================================ -// Native Reader Detection Macro -// ============================================================================ - -/// Macro to try native readers and fall back to bridge. -/// Adding new native readers = add to the macro invocation list. -macro_rules! try_native_readers { - ($query:expr, $reader:expr, $($native_type:ty),*) => {{ - $( - if let Ok(native) = $reader.downcast::<$native_type>() { - return rust_prepare($query, &native.borrow().inner) - .map(|p| PyPrepared { inner: p }) - .map_err(|e| PyErr::new::(e.to_string())); - } - )* - }}; -} - // ============================================================================ // PyDuckDBReader // ============================================================================ -/// DuckDB database reader for executing SQL queries. +/// DuckDB database reader for executing SQL queries and ggsql visualizations. /// /// Creates an in-memory or file-based DuckDB connection that can execute /// SQL queries and register DataFrames as queryable tables. -/// -/// Examples -/// -------- -/// >>> reader = DuckDBReader("duckdb://memory") -/// >>> df = reader.execute("SELECT 1 as x, 2 as y") -/// -/// >>> reader = DuckDBReader("duckdb://memory") -/// >>> reader.register("data", pl.DataFrame({"x": [1, 2, 3]})) -/// >>> df = reader.execute("SELECT * FROM data WHERE x > 1") #[pyclass(name = "DuckDBReader", unsendable)] struct PyDuckDBReader { inner: RustDuckDBReader, + connection: String, } #[pymethods] impl PyDuckDBReader { /// Create a new DuckDB reader from a connection string. - /// - /// Parameters - /// ---------- - /// connection : str - /// Connection string. Use "duckdb://memory" for in-memory database - /// or "duckdb://path/to/file.db" for file-based database. - /// - /// Returns - /// ------- - /// DuckDBReader - /// A configured DuckDB reader instance. - /// - /// Raises - /// ------ - /// ValueError - /// If the connection string is invalid or the database cannot be opened. #[new] fn new(connection: &str) -> PyResult { let inner = RustDuckDBReader::from_connection_string(connection) - .map_err(|e| PyErr::new::(e.to_string()))?; - Ok(Self { inner }) + .map_err(ggsql_error_to_pyerr)?; + Ok(Self { + inner, + connection: connection.to_string(), + }) } - /// Register a DataFrame as a queryable table. - /// - /// After registration, the DataFrame can be queried by name in SQL. - /// - /// Parameters - /// ---------- - /// name : str - /// The table name to register under. - /// df : polars.DataFrame - /// The DataFrame to register. Must be a polars DataFrame. - /// - /// Raises - /// ------ - /// ValueError - /// If registration fails or the table name is invalid. - fn register(&mut self, py: Python<'_>, name: &str, df: &Bound<'_, PyAny>) -> PyResult<()> { - let rust_df = py_to_polars(py, df)?; - self.inner - .register(name, rust_df) - .map_err(|e| PyErr::new::(e.to_string())) + fn __repr__(&self) -> String { + format!("", self.connection) + } + + /// Execute a ggsql query with optional DataFrame registration. + /// + /// DataFrames are registered before query execution and automatically + /// unregistered afterward (even on error) to avoid polluting the namespace. + #[pyo3(signature = (query, data=None))] + fn execute( + &mut self, + py: Python<'_>, + query: &str, + data: Option<&Bound<'_, PyDict>>, + ) -> PyResult { + // First, validate that the query has a VISUALISE clause + let validated = rust_validate(query).map_err(ggsql_error_to_pyerr)?; + + if !validated.has_visual() { + return Err(NoVisualiseError::new_err( + "Query has no VISUALISE clause. Use execute_sql() for plain SQL queries.", + )); + } + + // Collect table names to register + let mut table_names: Vec = Vec::new(); + + // Register DataFrames + if let Some(data_dict) = data { + for (key, value) in data_dict.iter() { + let name: String = key.extract()?; + let rust_df = py_to_polars(py, &value)?; + self.inner + .register(&name, rust_df) + .map_err(ggsql_error_to_pyerr)?; + table_names.push(name); + } + } + + // Execute the query, ensuring cleanup happens even on error + let result = rust_prepare(query, &self.inner); + + // Always unregister tables (cleanup in finally-style) + for name in &table_names { + self.inner.unregister(name); + } + + // Return the result (or propagate the error) + result + .map(|p| PyPrepared { inner: p }) + .map_err(ggsql_error_to_pyerr) } /// Execute a SQL query and return the result as a DataFrame. /// - /// Parameters - /// ---------- - /// sql : str - /// The SQL query to execute. - /// - /// Returns - /// ------- - /// polars.DataFrame - /// The query result as a polars DataFrame. - /// - /// Raises - /// ------ - /// ValueError - /// If the SQL is invalid or execution fails. - fn execute(&self, py: Python<'_>, sql: &str) -> PyResult> { + /// This is for plain SQL queries without visualization. For ggsql queries + /// with VISUALISE clauses, use execute() instead. + #[pyo3(name = "execute_sql")] + fn execute_sql(&self, py: Python<'_>, sql: &str) -> PyResult> { let df = self .inner - .execute(sql) - .map_err(|e| PyErr::new::(e.to_string()))?; + .execute_sql(sql) + .map_err(ggsql_error_to_pyerr)?; polars_to_py(py, &df) } - /// Check if this reader supports DataFrame registration. + /// Register a DataFrame as a queryable table. + /// + /// After registration, the DataFrame can be queried by name in SQL. + /// Note: When using execute(), DataFrames are automatically registered + /// and unregistered, so manual registration is usually unnecessary. + fn register(&mut self, py: Python<'_>, name: &str, df: &Bound<'_, PyAny>) -> PyResult<()> { + let rust_df = py_to_polars(py, df)?; + self.inner + .register(name, rust_df) + .map_err(ggsql_error_to_pyerr) + } + + /// Unregister a table by name. /// - /// Returns - /// ------- - /// bool - /// True if register() is supported, False otherwise. - fn supports_register(&self) -> bool { - self.inner.supports_register() + /// Fails silently if the table doesn't exist. + fn unregister(&mut self, name: &str) { + self.inner.unregister(name); } } @@ -287,16 +236,11 @@ impl PyDuckDBReader { // PyVegaLiteWriter // ============================================================================ -/// Vega-Lite JSON output writer. +/// Vega-Lite JSON output writer (internal). /// /// Converts prepared visualization specifications to Vega-Lite v6 JSON. -/// -/// Examples -/// -------- -/// >>> writer = VegaLiteWriter() -/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) -/// >>> json_output = prepared.render(writer) -#[pyclass(name = "VegaLiteWriter")] +/// Use the Python VegaLiteWriter class which wraps this and adds render_chart(). +#[pyclass(name = "_VegaLiteWriter")] struct PyVegaLiteWriter { inner: RustVegaLiteWriter, } @@ -304,17 +248,23 @@ struct PyVegaLiteWriter { #[pymethods] impl PyVegaLiteWriter { /// Create a new Vega-Lite writer. - /// - /// Returns - /// ------- - /// VegaLiteWriter - /// A configured Vega-Lite writer instance. #[new] fn new() -> Self { Self { inner: RustVegaLiteWriter::new(), } } + + fn __repr__(&self) -> &'static str { + "" + } + + /// Render a prepared visualization to Vega-Lite JSON. + fn render(&self, spec: &PyPrepared) -> PyResult { + self.inner + .render(&spec.inner) + .map_err(ggsql_error_to_pyerr) + } } // ============================================================================ @@ -337,62 +287,41 @@ struct PyValidated { #[pymethods] impl PyValidated { + fn __repr__(&self) -> String { + format!( + "", + self.valid, + self.has_visual, + self.errors.len() + ) + } + /// Whether the query contains a VISUALISE clause. - /// - /// Returns - /// ------- - /// bool - /// True if the query has a VISUALISE clause. fn has_visual(&self) -> bool { self.has_visual } /// The SQL portion (before VISUALISE). - /// - /// Returns - /// ------- - /// str - /// The SQL part of the query. fn sql(&self) -> &str { &self.sql } /// The VISUALISE portion (raw text). - /// - /// Returns - /// ------- - /// str - /// The VISUALISE part of the query. fn visual(&self) -> &str { &self.visual } /// Whether the query is valid (no errors). - /// - /// Returns - /// ------- - /// bool - /// True if the query is syntactically and semantically valid. fn valid(&self) -> bool { self.valid } /// Validation errors (fatal issues). - /// - /// Returns - /// ------- - /// list[dict] - /// List of error dictionaries with 'message' and optional 'location' keys. fn errors(&self, py: Python<'_>) -> PyResult> { errors_to_pylist(py, &self.errors) } /// Validation warnings (non-fatal issues). - /// - /// Returns - /// ------- - /// list[dict] - /// List of warning dictionaries with 'message' and optional 'location' keys. fn warnings(&self, py: Python<'_>) -> PyResult> { errors_to_pylist(py, &self.warnings) } @@ -402,16 +331,10 @@ impl PyValidated { // PyPrepared // ============================================================================ -/// Result of prepare(), ready for rendering. +/// Result of reader.execute(), ready for rendering. /// /// Contains the resolved plot specification, data, and metadata. -/// Use render() to generate Vega-Lite JSON output. -/// -/// Examples -/// -------- -/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) -/// >>> print(f"Rows: {prepared.metadata()['rows']}") -/// >>> json_output = prepared.render(VegaLiteWriter()) +/// Use writer.render(spec) or writer.render_chart(spec) to generate output. #[pyclass(name = "Prepared")] struct PyPrepared { inner: Prepared, @@ -419,34 +342,17 @@ struct PyPrepared { #[pymethods] impl PyPrepared { - /// Render to output format (Vega-Lite JSON). - /// - /// Parameters - /// ---------- - /// writer : VegaLiteWriter - /// The writer to use for rendering. - /// - /// Returns - /// ------- - /// str - /// The Vega-Lite JSON specification as a string. - /// - /// Raises - /// ------ - /// ValueError - /// If rendering fails. - fn render(&self, writer: &PyVegaLiteWriter) -> PyResult { - self.inner - .render(&writer.inner) - .map_err(|e| PyErr::new::(e.to_string())) + fn __repr__(&self) -> String { + let m = self.inner.metadata(); + format!( + "", + m.rows, + m.columns.len(), + m.layer_count + ) } /// Get visualization metadata. - /// - /// Returns - /// ------- - /// dict - /// Dictionary with 'rows', 'columns', and 'layer_count' keys. fn metadata(&self, py: Python<'_>) -> PyResult> { let m = self.inner.metadata(); let dict = PyDict::new(py); @@ -457,56 +363,26 @@ impl PyPrepared { } /// The main SQL query that was executed. - /// - /// Returns - /// ------- - /// str - /// The SQL query string. fn sql(&self) -> &str { self.inner.sql() } /// The VISUALISE portion (raw text). - /// - /// Returns - /// ------- - /// str - /// The VISUALISE clause text. fn visual(&self) -> &str { self.inner.visual() } /// Number of layers. - /// - /// Returns - /// ------- - /// int - /// The number of DRAW clauses in the visualization. fn layer_count(&self) -> usize { self.inner.layer_count() } /// Get global data (main query result). - /// - /// Returns - /// ------- - /// polars.DataFrame | None - /// The main query result DataFrame, or None if not available. fn data(&self, py: Python<'_>) -> PyResult>> { self.inner.data().map(|df| polars_to_py(py, df)).transpose() } /// Get layer-specific data (from FILTER or FROM clause). - /// - /// Parameters - /// ---------- - /// index : int - /// The layer index (0-based). - /// - /// Returns - /// ------- - /// polars.DataFrame | None - /// The layer-specific DataFrame, or None if the layer uses global data. fn layer_data(&self, py: Python<'_>, index: usize) -> PyResult>> { self.inner .layer_data(index) @@ -515,16 +391,6 @@ impl PyPrepared { } /// Get stat transform data (e.g., histogram bins, density estimates). - /// - /// Parameters - /// ---------- - /// index : int - /// The layer index (0-based). - /// - /// Returns - /// ------- - /// polars.DataFrame | None - /// The stat transform DataFrame, or None if no stat transform. fn stat_data(&self, py: Python<'_>, index: usize) -> PyResult>> { self.inner .stat_data(index) @@ -533,41 +399,16 @@ impl PyPrepared { } /// Layer filter/source query, or None if using global data. - /// - /// Parameters - /// ---------- - /// index : int - /// The layer index (0-based). - /// - /// Returns - /// ------- - /// str | None - /// The filter SQL query, or None if the layer uses global data directly. fn layer_sql(&self, index: usize) -> Option { self.inner.layer_sql(index).map(|s| s.to_string()) } /// Stat transform query, or None if no stat transform. - /// - /// Parameters - /// ---------- - /// index : int - /// The layer index (0-based). - /// - /// Returns - /// ------- - /// str | None - /// The stat transform SQL query, or None if no stat transform. fn stat_sql(&self, index: usize) -> Option { self.inner.stat_sql(index).map(|s| s.to_string()) } /// Validation warnings from preparation. - /// - /// Returns - /// ------- - /// list[dict] - /// List of warning dictionaries with 'message' and optional 'location' keys. fn warnings(&self, py: Python<'_>) -> PyResult> { warnings_to_pylist(py, self.inner.warnings()) } @@ -578,25 +419,9 @@ impl PyPrepared { // ============================================================================ /// Validate query syntax and semantics without executing SQL. -/// -/// Parameters -/// ---------- -/// query : str -/// The ggsql query to validate. -/// -/// Returns -/// ------- -/// Validated -/// Validation result with query inspection methods. -/// -/// Raises -/// ------ -/// ValueError -/// If validation fails unexpectedly (not for syntax errors, which are captured). #[pyfunction] fn validate(query: &str) -> PyResult { - let v = rust_validate(query) - .map_err(|e| PyErr::new::(e.to_string()))?; + let v = rust_validate(query).map_err(ggsql_error_to_pyerr)?; Ok(PyValidated { sql: v.sql().to_string(), @@ -626,61 +451,20 @@ fn validate(query: &str) -> PyResult { }) } -/// Prepare a query for visualization. Main entry point for the Rust API. -/// -/// Parameters -/// ---------- -/// query : str -/// The ggsql query to prepare. -/// reader : DuckDBReader | object -/// The database reader to execute SQL against. Can be a native DuckDBReader -/// for optimal performance, or any Python object with an -/// `execute(sql: str) -> polars.DataFrame` method. -/// -/// Returns -/// ------- -/// Prepared -/// A prepared visualization ready for rendering. -/// -/// Raises -/// ------ -/// ValueError -/// If parsing, validation, or SQL execution fails. -/// -/// Examples -/// -------- -/// >>> # Using native reader (fast path) -/// >>> reader = DuckDBReader("duckdb://memory") -/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) -/// >>> json_output = prepared.render(VegaLiteWriter()) -/// -/// >>> # Using custom Python reader -/// >>> class MyReader: -/// ... def execute(self, sql: str) -> pl.DataFrame: -/// ... return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -/// >>> reader = MyReader() -/// >>> prepared = prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) -#[pyfunction] -fn prepare(query: &str, reader: &Bound<'_, PyAny>) -> PyResult { - // Fast path: try all known native reader types - // Add new native readers to this list as they're implemented - try_native_readers!(query, reader, PyDuckDBReader); - - // Bridge path: wrap Python object as Reader - let bridge = PyReaderBridge { - obj: reader.clone().unbind(), - }; - rust_prepare(query, &bridge) - .map(|p| PyPrepared { inner: p }) - .map_err(|e| PyErr::new::(e.to_string())) -} - // ============================================================================ // Module Registration // ============================================================================ #[pymodule] fn _ggsql(m: &Bound<'_, PyModule>) -> PyResult<()> { + // Exception classes (exported without the Py prefix) + m.add("GgsqlError", m.py().get_type::())?; + m.add("ParseError", m.py().get_type::())?; + m.add("ValidationError", m.py().get_type::())?; + m.add("ReaderError", m.py().get_type::())?; + m.add("WriterError", m.py().get_type::())?; + m.add("NoVisualiseError", m.py().get_type::())?; + // Classes m.add_class::()?; m.add_class::()?; @@ -689,7 +473,6 @@ fn _ggsql(m: &Bound<'_, PyModule>) -> PyResult<()> { // Functions m.add_function(wrap_pyfunction!(validate, m)?)?; - m.add_function(wrap_pyfunction!(prepare, m)?)?; Ok(()) } From 3fa8574cce8355b1818f190b19c96c080238944d Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 29 Jan 2026 19:40:57 -0600 Subject: [PATCH 3/7] Update Python tests for new API structure Test coverage for: - New module paths (ggsql.readers.DuckDB, ggsql.writers.VegaLite) - execute() with data dict registration and auto-cleanup - execute_sql() for plain SQL queries - NoVisualiseError exception handling - Exception hierarchy (GgsqlError as base) - Context manager support - Narwhals DataFrame support (pandas, polars) - __repr__ methods for debugging - render_json() and render_chart() methods Removed tests for: - render_altair() convenience function (removed from API) - Custom Python reader support (removed from API) Co-Authored-By: Claude Opus 4.5 --- ggsql-python/tests/test_ggsql.py | 634 +++++++++++++++++-------------- 1 file changed, 341 insertions(+), 293 deletions(-) diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index 970dcf5a..2c642154 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -1,9 +1,10 @@ """Tests for ggsql Python bindings. These tests focus on Python-specific logic: -- DataFrame conversion via narwhals -- Return type handling -- Two-stage API (prepare -> render) +- DataFrame conversion +- New API: reader.execute() -> writer.render_json() +- NoVisualiseError handling +- Two-stage API (execute -> render) Rust logic (parsing, Vega-Lite generation) is tested in the Rust test suite. """ @@ -53,71 +54,129 @@ def test_missing_required_aesthetic(self): assert any("y" in e["message"] for e in errors) -class TestDuckDBReader: - """Tests for DuckDBReader class.""" +class TestDuckDB: + """Tests for DuckDB class.""" def test_create_in_memory(self): - reader = ggsql.DuckDBReader("duckdb://memory") + reader = ggsql.readers.DuckDB("duckdb://memory") assert reader is not None - def test_execute_simple_query(self): - reader = ggsql.DuckDBReader("duckdb://memory") - df = reader.execute("SELECT 1 AS x, 2 AS y") + def test_execute_sql_simple_query(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + df = reader.execute_sql("SELECT 1 AS x, 2 AS y") assert isinstance(df, pl.DataFrame) assert df.shape == (1, 2) assert list(df.columns) == ["x", "y"] def test_register_and_query(self): - reader = ggsql.DuckDBReader("duckdb://memory") + reader = ggsql.readers.DuckDB("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) reader.register("my_data", df) - result = reader.execute("SELECT * FROM my_data WHERE x > 1") + result = reader.execute_sql("SELECT * FROM my_data WHERE x > 1") assert isinstance(result, pl.DataFrame) assert result.shape == (2, 2) - def test_supports_register(self): - reader = ggsql.DuckDBReader("duckdb://memory") - assert reader.supports_register() is True + def test_unregister(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3]}) + reader.register("test_table", df) + + # Table should exist + result = reader.execute_sql("SELECT * FROM test_table") + assert result.shape[0] == 3 + + # Unregister + reader.unregister("test_table") + + # Table should no longer exist + with pytest.raises(ggsql.types.ReaderError): + reader.execute_sql("SELECT * FROM test_table") + + def test_unregister_nonexistent_silent(self): + """Unregistering a non-existent table should not raise.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + # Should not raise + reader.unregister("nonexistent_table") def test_invalid_connection_string(self): - with pytest.raises(ValueError): - ggsql.DuckDBReader("invalid://connection") + with pytest.raises(ggsql.types.ReaderError): + ggsql.readers.DuckDB("invalid://connection") -class TestVegaLiteWriter: - """Tests for VegaLiteWriter class.""" +class TestVegaLite: + """Tests for VegaLite class.""" def test_create_writer(self): - writer = ggsql.VegaLiteWriter() + writer = ggsql.writers.VegaLite() assert writer is not None -class TestPrepare: - """Tests for prepare() function.""" +class TestExecute: + """Tests for reader.execute() method.""" - def test_prepare_simple_query(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) + def test_execute_simple_query(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") assert prepared is not None assert prepared.layer_count() == 1 - def test_prepare_with_registered_data(self): - reader = ggsql.DuckDBReader("duckdb://memory") + def test_execute_with_data_dict(self): + reader = ggsql.readers.DuckDB("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - reader.register("data", df) - prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) assert prepared.metadata()["rows"] == 3 - def test_prepare_metadata(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( + def test_execute_with_multiple_tables(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + sales = pl.DataFrame({"id": [1, 2], "product_id": [1, 1]}) + products = pl.DataFrame({"id": [1], "name": ["Widget"]}) + + prepared = reader.execute( + """ + SELECT s.id, p.name FROM sales s + JOIN products p ON s.product_id = p.id + VISUALISE id AS x, name AS color DRAW bar + """, + {"sales": sales, "products": products}, + ) + assert prepared.metadata()["rows"] == 2 + + def test_execute_tables_unregistered_after(self): + """Tables should be unregistered after execute().""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + + # Execute with data dict + reader.execute("SELECT * FROM data VISUALISE x, y DRAW point", {"data": df}) + + # Table should no longer exist + with pytest.raises(ggsql.types.ReaderError): + reader.execute_sql("SELECT * FROM data") + + def test_execute_tables_unregistered_on_error(self): + """Tables should be unregistered even if execute() fails.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3]}) # Missing 'y' column + + # This should fail because we reference 'y' which doesn't exist + with pytest.raises(ggsql.types.ValidationError): + reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + + # Table should still be unregistered + with pytest.raises(ggsql.types.ReaderError): + reader.execute_sql("SELECT * FROM data") + + def test_execute_metadata(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute( "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) " - "VISUALISE x, y DRAW point", - reader, + "VISUALISE x, y DRAW point" ) metadata = prepared.metadata() @@ -126,46 +185,53 @@ def test_prepare_metadata(self): assert "y" in metadata["columns"] assert metadata["layer_count"] == 1 - def test_prepare_sql_accessor(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) + def test_execute_sql_accessor(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") assert "SELECT" in prepared.sql() - def test_prepare_visual_accessor(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) + def test_execute_visual_accessor(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") assert "VISUALISE" in prepared.visual() - def test_prepare_data_accessor(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) + def test_execute_data_accessor(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") data = prepared.data() assert isinstance(data, pl.DataFrame) assert data.shape == (1, 2) - def test_prepare_without_visualise_fails(self): - reader = ggsql.DuckDBReader("duckdb://memory") - with pytest.raises(ValueError): - ggsql.prepare("SELECT 1 AS x, 2 AS y", reader) +class TestNoVisualiseError: + """Tests for NoVisualiseError exception.""" -class TestPreparedRender: - """Tests for Prepared.render() method.""" + def test_execute_without_visualise_raises(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + with pytest.raises(ggsql.types.NoVisualiseError): + reader.execute("SELECT 1 AS x, 2 AS y") + + def test_novisualise_error_message(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + with pytest.raises(ggsql.types.NoVisualiseError) as exc_info: + reader.execute("SELECT 1 AS x, 2 AS y") + assert "VISUALISE" in str(exc_info.value) + assert "execute_sql" in str(exc_info.value) + + def test_novisualise_error_is_exception(self): + """NoVisualiseError should be a proper exception type.""" + assert issubclass(ggsql.types.NoVisualiseError, Exception) + + +class TestWriterRender: + """Tests for VegaLite.render_json() method.""" def test_render_to_vegalite(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) - writer = ggsql.VegaLiteWriter() + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + writer = ggsql.writers.VegaLite() - result = prepared.render(writer) + result = writer.render_json(prepared) assert isinstance(result, str) spec = json.loads(result) @@ -173,116 +239,71 @@ def test_render_to_vegalite(self): assert "vega-lite" in spec["$schema"] def test_render_contains_data(self): - reader = ggsql.DuckDBReader("duckdb://memory") + reader = ggsql.readers.DuckDB("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - reader.register("data", df) - prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) - writer = ggsql.VegaLiteWriter() + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + writer = ggsql.writers.VegaLite() - result = prepared.render(writer) + result = writer.render_json(prepared) spec = json.loads(result) # Data should be in the spec (either inline or in datasets) assert "data" in spec or "datasets" in spec def test_render_multi_layer(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute( "SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) " "VISUALISE " "DRAW point MAPPING x AS x, y AS y " - "DRAW line MAPPING x AS x, y AS y", - reader, + "DRAW line MAPPING x AS x, y AS y" ) - writer = ggsql.VegaLiteWriter() + writer = ggsql.writers.VegaLite() - result = prepared.render(writer) + result = writer.render_json(prepared) spec = json.loads(result) assert "layer" in spec -class TestRenderAltairDataFrameConversion: - """Tests for DataFrame handling in render_altair().""" +class TestWriterRenderChart: + """Tests for VegaLite.render_chart() method.""" - def test_accepts_polars_dataframe(self): - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") - assert isinstance(chart, altair.TopLevelMixin) + def test_render_chart_returns_altair(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + writer = ggsql.writers.VegaLite() - def test_accepts_polars_lazyframe(self): - lf = pl.LazyFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(lf, "VISUALISE x, y DRAW point") + chart = writer.render_chart(prepared) assert isinstance(chart, altair.TopLevelMixin) - def test_accepts_narwhals_dataframe(self): - import narwhals as nw - - pl_df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - nw_df = nw.from_native(pl_df) - - chart = ggsql.render_altair(nw_df, "VISUALISE x, y DRAW point") - assert isinstance(chart, altair.TopLevelMixin) - - def test_accepts_pandas_dataframe(self): - pd = pytest.importorskip("pandas") - - pd_df = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(pd_df, "VISUALISE x, y DRAW point") - assert isinstance(chart, altair.TopLevelMixin) - - def test_rejects_invalid_dataframe_type(self): - with pytest.raises(TypeError, match="must be a narwhals DataFrame"): - ggsql.render_altair({"x": [1, 2, 3]}, "VISUALISE x, y DRAW point") - - -class TestRenderAltairReturnType: - """Tests for render_altair() return type.""" + def test_render_chart_layer_chart(self): + """Simple DRAW specs produce LayerChart (ggsql always wraps in layer).""" + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + writer = ggsql.writers.VegaLite() - def test_returns_altair_chart(self): - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") - assert isinstance(chart, altair.TopLevelMixin) + chart = writer.render_chart(prepared) + # ggsql wraps all charts in a layer + assert isinstance(chart, altair.LayerChart) - def test_chart_has_data(self): + def test_render_chart_can_serialize(self): + reader = ggsql.readers.DuckDB("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") - spec = chart.to_dict() - # Data should be embedded in datasets - assert "datasets" in spec + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + writer = ggsql.writers.VegaLite() - def test_chart_can_be_serialized(self): - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") + chart = writer.render_chart(prepared) # Should not raise json_str = chart.to_json() assert len(json_str) > 0 - -class TestRenderAltairChartTypeDetection: - """Tests for correct Altair chart type detection based on spec structure.""" - - def test_simple_chart_returns_layer_chart(self): - """Simple DRAW specs produce LayerChart (ggsql always wraps in layer).""" - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") - # ggsql wraps all charts in a layer - assert isinstance(chart, altair.LayerChart) - - def test_layered_chart_can_round_trip(self): - """LayerChart can be converted to dict and back.""" - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") - - # Convert to dict and back - spec = chart.to_dict() - assert "layer" in spec - - # Should be able to recreate from dict - recreated = altair.LayerChart.from_dict(spec) - assert isinstance(recreated, altair.LayerChart) - - def test_faceted_chart_returns_facet_chart(self): + def test_render_chart_faceted(self): """FACET WRAP specs produce FacetChart.""" + reader = ggsql.readers.DuckDB("duckdb://memory") df = pl.DataFrame( { "x": [1, 2, 3, 4, 5, 6], @@ -291,64 +312,25 @@ def test_faceted_chart_returns_facet_chart(self): } ) # Need validate=False because ggsql produces v6 specs - chart = ggsql.render_altair( - df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False - ) - assert isinstance(chart, altair.FacetChart) - - def test_faceted_chart_can_round_trip(self): - """FacetChart can be converted to dict and back.""" - df = pl.DataFrame( - { - "x": [1, 2, 3, 4, 5, 6], - "y": [10, 20, 30, 40, 50, 60], - "group": ["A", "A", "A", "B", "B", "B"], - } - ) - chart = ggsql.render_altair( - df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y FACET WRAP group DRAW point", + {"data": df}, ) + writer = ggsql.writers.VegaLite() - # Convert to dict (skip validation for ggsql specs) - spec = chart.to_dict(validate=False) - assert "facet" in spec or "spec" in spec - - # Should be able to recreate from dict (with validation disabled) - recreated = altair.FacetChart.from_dict(spec, validate=False) - assert isinstance(recreated, altair.FacetChart) - - def test_chart_with_color_encoding(self): - """Charts with color encoding still return correct type.""" - df = pl.DataFrame( - { - "x": [1, 2, 3, 4], - "y": [10, 20, 30, 40], - "category": ["A", "B", "A", "B"], - } - ) - chart = ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point") - # Should still be a LayerChart (ggsql wraps in layer) - assert isinstance(chart, altair.LayerChart) - - -class TestRenderAltairErrorHandling: - """Tests for error handling in render_altair().""" - - def test_invalid_viz_raises(self): - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - with pytest.raises(ValueError): - ggsql.render_altair(df, "NOT VALID SYNTAX") + chart = writer.render_chart(prepared) + assert isinstance(chart, altair.FacetChart) class TestTwoStageAPIIntegration: - """Integration tests for the two-stage prepare -> render API.""" + """Integration tests for the two-stage execute -> render API.""" def test_end_to_end_workflow(self): - """Complete workflow: create reader, register data, prepare, render.""" + """Complete workflow: create reader, execute with data, render.""" # Create reader - reader = ggsql.DuckDBReader("duckdb://memory") + reader = ggsql.readers.DuckDB("duckdb://memory") - # Register data + # Create data df = pl.DataFrame( { "date": ["2024-01-01", "2024-01-02", "2024-01-03"], @@ -356,12 +338,11 @@ def test_end_to_end_workflow(self): "region": ["North", "South", "North"], } ) - reader.register("sales", df) - # Prepare visualization - prepared = ggsql.prepare( + # Execute visualization + prepared = reader.execute( "SELECT * FROM sales VISUALISE date AS x, value AS y, region AS color DRAW line", - reader, + {"sales": df}, ) # Verify metadata @@ -369,8 +350,8 @@ def test_end_to_end_workflow(self): assert prepared.layer_count() == 1 # Render to Vega-Lite - writer = ggsql.VegaLiteWriter() - result = prepared.render(writer) + writer = ggsql.writers.VegaLite() + result = writer.render_json(prepared) # Verify output spec = json.loads(result) @@ -379,10 +360,8 @@ def test_end_to_end_workflow(self): def test_can_introspect_prepared(self): """Test all introspection methods on Prepared.""" - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") # All these should work without error assert prepared.sql() is not None @@ -398,120 +377,189 @@ def test_can_introspect_prepared(self): _ = prepared.layer_sql(0) _ = prepared.stat_sql(0) + def test_visualise_from_shorthand(self): + """Test VISUALISE FROM syntax.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -class TestCustomReader: - """Tests for custom Python reader support.""" + prepared = reader.execute( + "VISUALISE FROM data DRAW point MAPPING x AS x, y AS y", {"data": df} + ) + assert prepared.metadata()["rows"] == 3 - def test_simple_custom_reader(self): - """Custom reader with execute() method works.""" + def test_render_chart_workflow(self): + """Test workflow using render_chart().""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - class SimpleReader: - def execute(self, sql: str) -> pl.DataFrame: - return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + spec = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + writer = ggsql.writers.VegaLite() + chart = writer.render_chart(spec) + + # Should be able to convert to dict + spec_dict = chart.to_dict() + assert "layer" in spec_dict + + +class TestVersionInfo: + """Tests for version information.""" + + def test_version_string(self): + """__version__ should be a string.""" + assert isinstance(ggsql.__version__, str) + assert ggsql.__version__ == "0.1.0" + + def test_version_info_tuple(self): + """version_info should be a tuple.""" + assert hasattr(ggsql, "version_info") + assert isinstance(ggsql.version_info, tuple) + assert ggsql.version_info == (0, 1, 0) + + +class TestReprMethods: + """Tests for __repr__ methods.""" + + def test_duckdb_repr(self): + """DuckDB should have a useful repr.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + repr_str = repr(reader) + assert "DuckDB" in repr_str + assert "duckdb://memory" in repr_str + + def test_vegalite_repr(self): + """VegaLite should have a useful repr.""" + writer = ggsql.writers.VegaLite() + repr_str = repr(writer) + assert "VegaLite" in repr_str + + def test_validated_repr(self): + """Validated should have a useful repr.""" + validated = ggsql.validate("SELECT 1 AS x VISUALISE x DRAW point") + repr_str = repr(validated) + assert "Validated" in repr_str + assert "valid=" in repr_str + + def test_prepared_repr(self): + """Prepared should have a useful repr.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + repr_str = repr(prepared) + assert "Prepared" in repr_str + assert "rows=" in repr_str + assert "layers=" in repr_str + + +class TestNarwhalsSupport: + """Tests for narwhals DataFrame support.""" + + def test_execute_with_pandas_dataframe(self): + """execute() should accept pandas DataFrames.""" + import pandas as pd + + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + assert prepared.metadata()["rows"] == 3 + + def test_register_with_pandas_dataframe(self): + """register() should accept pandas DataFrames.""" + import pandas as pd + + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + + reader.register("my_data", df) + result = reader.execute_sql("SELECT * FROM my_data") + assert result.shape == (3, 2) + + def test_execute_with_polars_dataframe(self): + """execute() should still work with polars DataFrames.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - reader = SimpleReader() - prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) assert prepared.metadata()["rows"] == 3 - def test_custom_reader_with_register(self): - """Custom reader with register() support.""" - class RegisterReader: - def __init__(self): - self.tables = {} +class TestRenderJsonMethod: + """Tests for render_json() method.""" - def execute(self, sql: str) -> pl.DataFrame: - # Simple: just return the first registered table - if self.tables: - return next(iter(self.tables.values())) - return pl.DataFrame({"x": [1], "y": [2]}) + def test_render_json_returns_json(self): + """render_json() should return a valid JSON string.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + writer = ggsql.writers.VegaLite() - def supports_register(self) -> bool: - return True + result = writer.render_json(prepared) + assert isinstance(result, str) - def register(self, name: str, df: pl.DataFrame) -> None: - self.tables[name] = df + spec = json.loads(result) + assert "$schema" in spec - reader = RegisterReader() - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) - assert prepared is not None - def test_custom_reader_error_handling(self): - """Custom reader errors are propagated.""" +class TestContextManager: + """Tests for context manager protocol.""" - class ErrorReader: - def execute(self, sql: str) -> pl.DataFrame: - raise ValueError("Custom reader error") + def test_context_manager_basic(self): + """DuckDB should work as context manager.""" + with ggsql.readers.DuckDB("duckdb://memory") as reader: + df = reader.execute_sql("SELECT 1 AS x, 2 AS y") + assert df.shape == (1, 2) - reader = ErrorReader() - with pytest.raises(ValueError, match="Custom reader error"): - ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader) + def test_context_manager_with_execute(self): + """execute() should work inside context manager.""" + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - def test_custom_reader_wrong_return_type(self): - """Custom reader returning wrong type raises TypeError.""" + with ggsql.readers.DuckDB("duckdb://memory") as reader: + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + assert prepared.metadata()["rows"] == 3 - class WrongTypeReader: - def execute(self, sql: str): - return {"x": [1, 2, 3]} # dict, not DataFrame - reader = WrongTypeReader() - with pytest.raises((ValueError, TypeError)): - ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader) +class TestExceptionHierarchy: + """Tests for exception type hierarchy.""" - def test_native_reader_fast_path(self): - """Native DuckDBReader still works (fast path).""" - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) - assert prepared.metadata()["rows"] == 1 - - def test_custom_reader_can_render(self): - """Custom reader result can be rendered to Vega-Lite.""" - - class StaticReader: - def execute(self, sql: str) -> pl.DataFrame: - return pl.DataFrame( - { - "x": [1, 2, 3, 4, 5], - "y": [10, 40, 20, 50, 30], - "category": ["A", "B", "A", "B", "A"], - } - ) - - reader = StaticReader() - prepared = ggsql.prepare( - "SELECT * FROM data VISUALISE x, y, category AS color DRAW point", - reader, - ) + def test_ggsql_error_is_base(self): + """All exceptions should inherit from GgsqlError.""" + assert issubclass(ggsql.types.ParseError, ggsql.types.GgsqlError) + assert issubclass(ggsql.types.ValidationError, ggsql.types.GgsqlError) + assert issubclass(ggsql.types.ReaderError, ggsql.types.GgsqlError) + assert issubclass(ggsql.types.WriterError, ggsql.types.GgsqlError) + assert issubclass(ggsql.types.NoVisualiseError, ggsql.types.GgsqlError) - writer = ggsql.VegaLiteWriter() - result = prepared.render(writer) + def test_ggsql_error_is_exception(self): + """GgsqlError should be a proper exception type.""" + assert issubclass(ggsql.types.GgsqlError, Exception) - spec = json.loads(result) - assert "$schema" in spec - assert "vega-lite" in spec["$schema"] + def test_catch_all_ggsql_errors(self): + """Should be able to catch all errors with GgsqlError.""" + reader = ggsql.readers.DuckDB("duckdb://memory") - def test_custom_reader_execute_called(self): - """Verify execute() is called on the custom reader.""" + # This should raise ReaderError (missing table) + with pytest.raises(ggsql.types.GgsqlError): + reader.execute_sql("SELECT * FROM nonexistent_table") - class RecordingReader: - def __init__(self): - self.execute_calls = [] + def test_reader_error_for_sql_failure(self): + """ReaderError should be raised for SQL execution failures.""" + reader = ggsql.readers.DuckDB("duckdb://memory") - def execute(self, sql: str) -> pl.DataFrame: - self.execute_calls.append(sql) - return pl.DataFrame({"x": [1], "y": [2]}) + with pytest.raises(ggsql.types.ReaderError): + reader.execute_sql("SELECT * FROM nonexistent_table") - reader = RecordingReader() - ggsql.prepare( - "SELECT * FROM data VISUALISE x, y DRAW point", - reader, - ) + def test_validation_error_for_missing_column(self): + """ValidationError should be raised for missing column references.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3]}) # Missing 'y' column - # execute() should have been called at least once - assert len(reader.execute_calls) > 0 - # All calls should be valid SQL strings - assert all(isinstance(sql, str) for sql in reader.execute_calls) + with pytest.raises(ggsql.types.ValidationError): + reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) From cfa3114a10196924d504653764414a2844eb6d0b Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 29 Jan 2026 19:41:14 -0600 Subject: [PATCH 4/7] Update documentation for new API structure - CLAUDE.md: Updated Python bindings section with new module structure, exception hierarchy, and API examples - src/doc/API.md: Updated Rust and Python API reference to reflect Writer::render() pattern and new Python module paths - ggsql-python/README.md: Complete rewrite with new API, examples for execute() with data dicts, exception handling, and narwhals support Co-Authored-By: Claude Opus 4.5 --- CLAUDE.md | 145 ++++++++++------------ ggsql-python/README.md | 268 +++++++++++++++++++++-------------------- src/doc/API.md | 145 ++++++++++++++++------ 3 files changed, 305 insertions(+), 253 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index aa093221..275ba1ab 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -165,7 +165,7 @@ let prepared = ggsql::prepare( // Render to Vega-Lite JSON let writer = VegaLiteWriter::new(); -let json = prepared.render(&writer)?; +let json = writer.render(&prepared)?; ``` ### Core Functions @@ -869,13 +869,12 @@ When running in Positron IDE, the extension provides enhanced functionality: **Features**: - PyO3-based Rust bindings compiled to a native Python extension -- Two-stage API mirroring the Rust API: `prepare()` → `render()` -- DuckDB reader with DataFrame registration -- Custom Python reader support: any object with `execute(sql) -> DataFrame` method -- Works with any narwhals-compatible DataFrame (polars, pandas, etc.) -- LazyFrames are collected automatically -- Returns native `altair.Chart` objects via `render_altair()` convenience function +- Two-stage API: `reader.execute()` → `writer.render()` +- DuckDB reader with inline DataFrame registration via `execute(query, data_dict)` +- Automatic table cleanup after query execution +- Returns native `altair.Chart` objects via `writer.render_chart()` - Query validation and introspection (SQL, layer queries, stat queries) +- `NoVisualiseError` exception for queries without VISUALISE clause **Installation**: @@ -892,40 +891,24 @@ maturin develop import ggsql import polars as pl -# Create reader and register data -reader = ggsql.DuckDBReader("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -reader.register("data", df) -# Prepare visualization -prepared = ggsql.prepare( +# Execute with inline data registration (auto-registers and unregisters) +reader = ggsql.DuckDBReader("duckdb://memory") +spec = reader.execute( "SELECT * FROM data VISUALISE x, y DRAW point", - reader + {"data": df} ) # Inspect metadata -print(f"Rows: {prepared.metadata()['rows']}") -print(f"Columns: {prepared.metadata()['columns']}") -print(f"SQL: {prepared.sql()}") +print(f"Rows: {spec.metadata()['rows']}") +print(f"Columns: {spec.metadata()['columns']}") +print(f"SQL: {spec.sql()}") -# Render to Vega-Lite JSON +# Render to Vega-Lite JSON or Altair chart writer = ggsql.VegaLiteWriter() -json_output = prepared.render(writer) -``` - -**Convenience Function** (`render_altair`): - -For quick visualizations without explicit reader setup: - -```python -import ggsql -import polars as pl - -df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - -# Render DataFrame to Altair chart in one call -chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") -chart.display() # In Jupyter +json_output = writer.render(spec) +chart = writer.render_chart(spec) ``` **Query Validation**: @@ -941,73 +924,71 @@ print(f"SQL portion: {validated.sql()}") print(f"Errors: {validated.errors()}") ``` -**Classes**: - -| Class | Description | -| -------------------------- | -------------------------------------------- | -| `DuckDBReader(connection)` | Database reader with DataFrame registration | -| `VegaLiteWriter()` | Vega-Lite JSON output writer | -| `Validated` | Result of `validate()` with query inspection | -| `Prepared` | Result of `prepare()`, ready for rendering | +**Handling Plain SQL**: -**Functions**: +```python +try: + spec = reader.execute("SELECT * FROM data", {"data": df}) +except ggsql.NoVisualiseError: + # Use execute_sql() for queries without VISUALISE + result_df = reader.execute_sql("SELECT * FROM data") +``` -| Function | Description | -| ------------------------ | ------------------------------------------------- | -| `validate(query)` | Syntax/semantic validation with query inspection | -| `prepare(query, reader)` | Full preparation (reader can be native or custom) | -| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart | +**Classes**: -**Prepared Object Methods**: +| Class | Description | +| -------------------------- | --------------------------------------------------- | +| `DuckDBReader(connection)` | Database reader with DataFrame registration | +| `VegaLiteWriter()` | Vega-Lite JSON output writer with render methods | +| `Validated` | Result of `validate()` with query inspection | +| `Prepared` | Result of `reader.execute()`, ready for rendering | +| `NoVisualiseError` | Exception for queries without VISUALISE clause | -| Method | Description | -| ---------------- | -------------------------------------------- | -| `render(writer)` | Generate Vega-Lite JSON | -| `metadata()` | Get rows, columns, layer_count | -| `sql()` | Get the SQL portion | -| `visual()` | Get the VISUALISE portion | -| `layer_count()` | Number of DRAW layers | -| `data()` | Get the main DataFrame | -| `layer_data(i)` | Get layer-specific DataFrame (if filtered) | -| `stat_data(i)` | Get stat transform DataFrame (if applicable) | -| `layer_sql(i)` | Get layer filter SQL (if applicable) | -| `stat_sql(i)` | Get stat transform SQL (if applicable) | -| `warnings()` | Get validation warnings | +**Functions**: -**Custom Python Readers**: +| Function | Description | +| ----------------- | ------------------------------------------------ | +| `validate(query)` | Syntax/semantic validation with query inspection | -Any Python object with an `execute(sql: str) -> polars.DataFrame` method can be used as a reader: +**DuckDBReader Methods**: -```python -import ggsql -import polars as pl +| Method | Description | +| ---------------------------- | -------------------------------------------------------- | +| `execute(query, data=None)` | Execute ggsql query with optional data dict registration | +| `execute_sql(sql)` | Execute plain SQL, return DataFrame | +| `register(name, df)` | Manually register DataFrame as table | +| `unregister(name)` | Unregister table (fails silently if not found) | -class MyReader: - """Custom reader that returns static data.""" +**VegaLiteWriter Methods**: - def execute(self, sql: str) -> pl.DataFrame: - return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) +| Method | Description | +| ------------------------- | ------------------------------------ | +| `render(spec)` | Render to Vega-Lite JSON string | +| `render_chart(spec)` | Render to Altair chart object | -# Use custom reader with prepare() -reader = MyReader() -prepared = ggsql.prepare( - "SELECT * FROM data VISUALISE x, y DRAW point", - reader -) -``` +**Prepared Object Methods**: -Optional methods for custom readers: +| Method | Description | +| --------------- | -------------------------------------------- | +| `metadata()` | Get rows, columns, layer_count | +| `sql()` | Get the SQL portion | +| `visual()` | Get the VISUALISE portion | +| `layer_count()` | Number of DRAW layers | +| `data()` | Get the main DataFrame | +| `layer_data(i)` | Get layer-specific DataFrame (if filtered) | +| `stat_data(i)` | Get stat transform DataFrame (if applicable) | +| `layer_sql(i)` | Get layer filter SQL (if applicable) | +| `stat_sql(i)` | Get stat transform SQL (if applicable) | +| `warnings()` | Get validation warnings | -- `supports_register() -> bool` - Return `True` if registration is supported -- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a table +**Type Stubs**: -Native readers (e.g., `DuckDBReader`) use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization. +The Python package includes manually maintained type stubs (`ggsql-python/python/ggsql/_ggsql.pyi`) that provide IDE support and type checking for the native Rust extension. When making API changes to `ggsql-python/src/lib.rs`, always update the corresponding stubs to keep them in sync. The stubs include detailed docstrings that appear in IDE tooltips, so they provide significant value beyond just type information. **Dependencies**: - Python >= 3.10 - altair >= 5.0 -- narwhals >= 2.15 - polars >= 1.0 --- diff --git a/ggsql-python/README.md b/ggsql-python/README.md index 3ea2c603..cf9b408b 100644 --- a/ggsql-python/README.md +++ b/ggsql-python/README.md @@ -42,13 +42,9 @@ pip install target/wheels/ggsql-*.whl ## Quick Start -### Simple Usage with `render_altair` - -For quick visualizations, use the `render_altair` convenience function: - ```python -import ggsql import polars as pl +import ggsql # Create a DataFrame df = pl.DataFrame({ @@ -57,89 +53,83 @@ df = pl.DataFrame({ "category": ["A", "B", "A", "B", "A"] }) -# Render to Altair chart -chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") +# Create reader and execute query with inline data registration +reader = ggsql.readers.DuckDB("duckdb://memory") +spec = reader.execute( + "SELECT * FROM data VISUALISE x, y, category AS color DRAW point", + {"data": df} +) -# Display or save +# Render to Vega-Lite JSON +writer = ggsql.writers.VegaLite() +json_str = writer.render_json(spec) + +# Or render to Altair chart +chart = writer.render_chart(spec) chart.display() # In Jupyter -chart.save("chart.html") # Save to file ``` -### Two-Stage API - -For more control, use the two-stage API with explicit reader and writer: +## API Reference -```python -import ggsql -import polars as pl +### Modules -# 1. Create a DuckDB reader -reader = ggsql.DuckDBReader("duckdb://memory") +#### `ggsql.readers` -# 2. Register your DataFrame as a table -df = pl.DataFrame({ - "date": ["2024-01-01", "2024-01-02", "2024-01-03"], - "revenue": [100, 150, 120], - "region": ["North", "South", "North"] -}) -reader.register("sales", df) +Database reader classes. -# 3. Prepare the visualization -prepared = ggsql.prepare( - """ - SELECT * FROM sales - VISUALISE date AS x, revenue AS y, region AS color - DRAW line - LABEL title => 'Sales by Region' - """, - reader -) +##### `DuckDB(connection: str)` -# 4. Inspect metadata -print(f"Rows: {prepared.metadata()['rows']}") -print(f"Columns: {prepared.metadata()['columns']}") -print(f"Layers: {prepared.layer_count()}") +Database reader that executes SQL and manages DataFrames. -# 5. Inspect SQL/VISUALISE portions and data -print(f"SQL: {prepared.sql()}") -print(f"Visual: {prepared.visual()}") -print(prepared.data()) # Returns polars DataFrame +```python +import ggsql -# 6. Render to Vega-Lite JSON -writer = ggsql.VegaLiteWriter() -vegalite_json = prepared.render(writer) -print(vegalite_json) +reader = ggsql.readers.DuckDB("duckdb://memory") # In-memory database +reader = ggsql.readers.DuckDB("duckdb:///path/to/file.db") # File database ``` -## API Reference - -### Classes +**Methods:** -#### `DuckDBReader(connection: str)` +- `execute(query: str, data: dict[str, DataFrame] | None = None) -> Prepared` - Execute a ggsql query with optional DataFrame registration. DataFrames are automatically registered before execution and unregistered afterward. Raises `NoVisualiseError` if query has no VISUALISE clause. +- `execute_sql(sql: str) -> pl.DataFrame` - Execute plain SQL and return results (no VISUALISE clause needed) +- `register(name: str, df: DataFrame) -> None` - Manually register a DataFrame as a queryable table +- `unregister(name: str) -> None` - Unregister a table (fails silently if not found) -Database reader that executes SQL and manages DataFrames. +**Context manager:** DuckDB supports the context manager protocol for use with `with` statements: ```python -reader = ggsql.DuckDBReader("duckdb://memory") # In-memory database -reader = ggsql.DuckDBReader("duckdb:///path/to/file.db") # File database +with ggsql.readers.DuckDB("duckdb://memory") as reader: + spec = reader.execute(query, {"data": df}) ``` -**Methods:** +**DataFrame support:** Accepts any [narwhals](https://narwhals-dev.github.io/narwhals/)-compatible DataFrame (polars, pandas, pyarrow, etc.). + +#### `ggsql.writers` -- `register(name: str, df: polars.DataFrame)` - Register a DataFrame as a queryable table -- `execute(sql: str) -> polars.DataFrame` - Execute SQL and return results -- `supports_register() -> bool` - Check if registration is supported +Output writer classes. -#### `VegaLiteWriter()` +##### `VegaLite()` Writer that generates Vega-Lite v6 JSON specifications. ```python -writer = ggsql.VegaLiteWriter() -json_output = prepared.render(writer) +import ggsql + +writer = ggsql.writers.VegaLite() +json_str = writer.render_json(spec) +chart = writer.render_chart(spec) ``` -#### `Validated` +**Methods:** + +- `render_json(spec: Prepared) -> str` - Render to Vega-Lite JSON string +- `render_chart(spec: Prepared, **kwargs) -> AltairChart` - Render to Altair chart object + +#### `ggsql.types` + +Type classes returned by ggsql functions. + +##### `Validated` Result of `validate()` containing query analysis without SQL execution. @@ -152,24 +142,54 @@ Result of `validate()` containing query analysis without SQL execution. - `errors() -> list[dict]` - Validation errors with messages and locations - `warnings() -> list[dict]` - Validation warnings -#### `Prepared` +##### `Prepared` -Result of `prepare()`, containing resolved visualization ready for rendering. +Result of `reader.execute()`, containing resolved visualization ready for rendering. **Methods:** -- `render(writer: VegaLiteWriter) -> str` - Generate Vega-Lite JSON - `metadata() -> dict` - Get `{"rows": int, "columns": list[str], "layer_count": int}` - `sql() -> str` - The executed SQL query - `visual() -> str` - The VISUALISE clause - `layer_count() -> int` - Number of DRAW layers -- `data() -> polars.DataFrame | None` - Main query result DataFrame -- `layer_data(index: int) -> polars.DataFrame | None` - Layer-specific data (if filtered) -- `stat_data(index: int) -> polars.DataFrame | None` - Statistical transform data +- `data() -> pl.DataFrame | None` - Main query result DataFrame +- `layer_data(index: int) -> pl.DataFrame | None` - Layer-specific data (if filtered) +- `stat_data(index: int) -> pl.DataFrame | None` - Statistical transform data - `layer_sql(index: int) -> str | None` - Layer filter SQL - `stat_sql(index: int) -> str | None` - Stat transform SQL - `warnings() -> list[dict]` - Validation warnings from preparation +### Exceptions + +All ggsql exceptions inherit from `GgsqlError`, allowing you to catch all ggsql-specific errors: + +```python +try: + spec = reader.execute(query) +except ggsql.types.GgsqlError as e: + print(f"ggsql error: {e}") +``` + +#### Exception Hierarchy + +- `GgsqlError` - Base exception for all ggsql errors + - `ParseError` - Query parsing failed + - `ValidationError` - Query validation failed (e.g., missing required aesthetics) + - `ReaderError` - Database/SQL execution failed + - `WriterError` - Output generation failed + - `NoVisualiseError` - Query has no VISUALISE clause + +#### `NoVisualiseError` + +Raised when `reader.execute()` is called on a query without a VISUALISE clause. Use `reader.execute_sql()` for plain SQL queries. + +```python +try: + spec = reader.execute("SELECT * FROM data") # No VISUALISE +except ggsql.types.NoVisualiseError: + df = reader.execute_sql("SELECT * FROM data") # Use this instead +``` + ### Functions #### `validate(query: str) -> Validated` @@ -185,36 +205,48 @@ else: print(f"Error: {error['message']}") ``` -#### `prepare(query: str, reader: DuckDBReader) -> Prepared` +## Examples -Parse, validate, and execute a ggsql query. +### Basic Usage ```python -reader = ggsql.DuckDBReader("duckdb://memory") -prepared = ggsql.prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) -``` - -#### `render_altair(df, viz: str, **kwargs) -> altair.Chart` +import polars as pl +import ggsql -Convenience function to render a DataFrame with a VISUALISE spec to an Altair chart. +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -**Parameters:** +reader = ggsql.readers.DuckDB("duckdb://memory") +spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point", {"data": df}) -- `df` - Any narwhals-compatible DataFrame (polars, pandas, etc.). LazyFrames are collected automatically. -- `viz` - The VISUALISE specification string -- `**kwargs` - Additional arguments passed to `altair.Chart.from_json()` (e.g., `validate=False`) +writer = ggsql.writers.VegaLite() +chart = writer.render_chart(spec) +``` -**Returns:** An Altair chart object (Chart, LayerChart, FacetChart, etc.) +### Multiple Tables ```python -import polars as pl -import ggsql +sales = pl.DataFrame({"id": [1, 2], "product_id": [1, 1], "amount": [100, 200]}) +products = pl.DataFrame({"id": [1], "name": ["Widget"]}) -df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") +spec = reader.execute( + """ + SELECT s.id, s.amount, p.name + FROM sales s JOIN products p ON s.product_id = p.id + VISUALISE id AS x, amount AS y, name AS color + DRAW bar + """, + {"sales": sales, "products": products} +) ``` -## Examples +### VISUALISE FROM Shorthand + +```python +spec = reader.execute( + "VISUALISE FROM data DRAW point MAPPING x AS x, y AS y", + {"data": df} +) +``` ### Mapping Styles @@ -222,72 +254,46 @@ chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30], "category": ["A", "B", "A"]}) # Explicit mapping -ggsql.render_altair(df, "VISUALISE x AS x, y AS y DRAW point") +spec = reader.execute("SELECT * FROM df VISUALISE x AS x, y AS y DRAW point", {"df": df}) # Implicit mapping (column name = aesthetic name) -ggsql.render_altair(df, "VISUALISE x, y DRAW point") +spec = reader.execute("SELECT * FROM df VISUALISE x, y DRAW point", {"df": df}) # Wildcard mapping (map all matching columns) -ggsql.render_altair(df, "VISUALISE * DRAW point") +spec = reader.execute("SELECT * FROM df VISUALISE * DRAW point", {"df": df}) # With color encoding -ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point") +spec = reader.execute("SELECT * FROM df VISUALISE x, y, category AS color DRAW point", {"df": df}) ``` -### Custom Readers - -You can use any Python object with an `execute(sql: str) -> polars.DataFrame` method as a reader. This enables integration with any data source. +### Using Pandas DataFrames ```python +import pandas as pd import ggsql -import polars as pl - -class CSVReader: - """Custom reader that loads data from CSV files.""" - def __init__(self, data_dir: str): - self.data_dir = data_dir +# Works with pandas DataFrames (via narwhals) +df = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - def execute(self, sql: str) -> pl.DataFrame: - # Simple implementation: ignore SQL and return fixed data - # A real implementation would parse SQL to determine which file to load - return pl.read_csv(f"{self.data_dir}/data.csv") +reader = ggsql.readers.DuckDB("duckdb://memory") +spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point", {"data": df}) -# Use custom reader with prepare() -reader = CSVReader("/path/to/data") -prepared = ggsql.prepare( - "SELECT * FROM data VISUALISE x, y DRAW point", - reader -) -writer = ggsql.VegaLiteWriter() -json_output = prepared.render(writer) +writer = ggsql.writers.VegaLite() +chart = writer.render_chart(spec) ``` -**Optional methods** for custom readers: - -- `supports_register() -> bool` - Return `True` if your reader supports DataFrame registration -- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a queryable table +### Handling Plain SQL ```python -class AdvancedReader: - """Custom reader with registration support.""" - - def __init__(self): - self.tables = {} - - def execute(self, sql: str) -> pl.DataFrame: - # Your SQL execution logic here - ... - - def supports_register(self) -> bool: - return True +import ggsql - def register(self, name: str, df: pl.DataFrame) -> None: - self.tables[name] = df +try: + spec = reader.execute("SELECT * FROM data", {"data": df}) +except ggsql.types.NoVisualiseError: + # Use execute_sql() for queries without VISUALISE + result_df = reader.execute_sql("SELECT * FROM data") ``` -Native readers like `DuckDBReader` use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization. - ## Development ### Keeping in sync with the monorepo @@ -319,7 +325,7 @@ pytest tests/ -v - Python >= 3.10 - altair >= 5.0 -- narwhals >= 2.15 +- narwhals >= 1.0 - polars >= 1.0 ## License diff --git a/src/doc/API.md b/src/doc/API.md index a0f97ab6..5dd50b20 100644 --- a/src/doc/API.md +++ b/src/doc/API.md @@ -5,14 +5,14 @@ This document provides a comprehensive reference for the ggsql public API. ## Overview - **Stage 1: `prepare()`** - Parse query, execute SQL, resolve mappings, prepare data -- **Stage 2: `render()`** - Generate output (Vega-Lite JSON, etc.) +- **Stage 2: `writer.render()`** - Generate output (Vega-Lite JSON, etc.) ### API Functions | Function | Use Case | | ------------ | ---------------------------------------------------- | | `prepare()` | Main entry point - full visualization pipeline | -| `render()` | Generate output from prepared data | +| `writer.render()` | Generate output from prepared data | | `validate()` | Validate syntax + semantics, inspect query structure | --- @@ -50,7 +50,7 @@ Prepare a ggsql query for visualization. This is the main entry point for the tw **Example:** ```rust -use ggsql::{prepare, reader::DuckDBReader, writer::VegaLiteWriter}; +use ggsql::{prepare, reader::DuckDBReader, writer::{VegaLiteWriter, Writer}}; let reader = DuckDBReader::from_connection_string("duckdb://memory")?; let prepared = prepare( @@ -64,7 +64,7 @@ println!("Columns: {:?}", prepared.metadata().columns); // Render to Vega-Lite let writer = VegaLiteWriter::new(); -let result = prepared.render(&writer)?; +let result = writer.render(&prepared)?; ``` **Error Conditions:** @@ -188,20 +188,6 @@ if let Some(tree) = validated.tree() { Result of preparing a visualization, ready for rendering. -#### Rendering Methods - -| Method | Signature | Description | -| -------- | --------------------------------------------------------- | ----------------------- | -| `render` | `fn render(&self, writer: &dyn Writer) -> Result` | Render to output format | - -**Example:** - -```rust -let writer = VegaLiteWriter::new(); -let json = prepared.render(&writer)?; -println!("{}", json); -``` - #### Plot Access Methods | Method | Signature | Description | @@ -309,7 +295,8 @@ if !prepared.warnings().is_empty() { } // Continue with rendering -let json = prepared.render(&writer)?; +let writer = VegaLiteWriter::new(); +let json = writer.render(&prepared)?; ``` --- @@ -374,11 +361,14 @@ pub struct Location { ```rust pub trait Reader { /// Execute a SQL query and return a DataFrame - fn execute(&self, sql: &str) -> Result; + fn execute_sql(&self, sql: &str) -> Result; /// Register a DataFrame as a queryable table fn register(&mut self, name: &str, df: DataFrame) -> Result<()>; + /// Unregister a table (fails silently if not found) + fn unregister(&mut self, name: &str); + /// Check if this reader supports DataFrame registration fn supports_register(&self) -> bool; } @@ -392,24 +382,45 @@ pub trait Reader { ```rust pub trait Writer { - /// Render a plot specification to output format + /// Render a prepared visualization to output format + fn render(&self, prepared: &Prepared) -> Result; + + /// Lower-level: render from plot specification and data map fn write(&self, spec: &Plot, data: &HashMap) -> Result; - /// Get the file extension for this writer's output - fn file_extension(&self) -> &str; + /// Validate that a spec is compatible with this writer + fn validate(&self, spec: &Plot) -> Result<()>; } ``` +**Example:** + +```rust +use ggsql::writer::{VegaLiteWriter, Writer}; + +let writer = VegaLiteWriter::new(); +let json = writer.render(&prepared)?; +println!("{}", json); +``` + +--- + ## Python Bindings The Python bindings provide the same two-stage API with Pythonic conventions. +### Module Structure + +- `ggsql.readers` - Reader classes (`DuckDB`) +- `ggsql.writers` - Writer classes (`VegaLite`) +- `ggsql` - Types (`Validated`, `Prepared`), exceptions (`NoVisualiseError`), and functions (`validate`) + ### Classes -#### `DuckDBReader` +#### `ggsql.readers.DuckDB` ```python -class DuckDBReader: +class DuckDB: def __init__(self, connection: str) -> None: """Create a DuckDB reader. @@ -417,27 +428,58 @@ class DuckDBReader: connection: Connection string (e.g., "duckdb://memory") """ - def register(self, name: str, df: Any) -> None: - """Register a DataFrame as a queryable table. + def execute( + self, + query: str, + data: dict[str, polars.DataFrame] | None = None + ) -> Prepared: + """Execute a ggsql query with optional DataFrame registration. + + DataFrames are registered before execution and automatically + unregistered afterward (even on error). Args: - name: Table name - df: Polars DataFrame or narwhals-compatible DataFrame + query: The ggsql query (must contain VISUALISE clause) + data: DataFrames to register as tables (keys are table names) + + Returns: + Prepared visualization ready for rendering + + Raises: + NoVisualiseError: If query has no VISUALISE clause + ValueError: If parsing or execution fails """ - def execute(self, sql: str) -> polars.DataFrame: - """Execute SQL and return a Polars DataFrame.""" + def execute_sql(self, sql: str) -> polars.DataFrame: + """Execute plain SQL and return a Polars DataFrame.""" - def supports_register(self) -> bool: - """Check if registration is supported.""" + def register(self, name: str, df: polars.DataFrame) -> None: + """Manually register a DataFrame as a queryable table.""" + + def unregister(self, name: str) -> None: + """Unregister a table (fails silently if not found).""" ``` -#### `VegaLiteWriter` +#### `ggsql.writers.VegaLite` ```python -class VegaLiteWriter: +class VegaLite: def __init__(self) -> None: """Create a Vega-Lite writer.""" + + def render(self, spec: Prepared) -> str: + """Render to Vega-Lite JSON string.""" + + def render_chart(self, spec: Prepared, **kwargs) -> AltairChart: + """Render to Altair chart object. + + Args: + spec: Prepared visualization from reader.execute() + **kwargs: Additional args for altair.Chart.from_json() + + Returns: + Altair chart (Chart, LayerChart, FacetChart, etc.) + """ ``` #### `Validated` @@ -469,9 +511,6 @@ class Validated: ```python class Prepared: - def render(self, writer: VegaLiteWriter) -> str: - """Render to output format.""" - def metadata(self) -> dict: """Get metadata as dict with keys: rows, columns, layer_count.""" @@ -503,6 +542,13 @@ class Prepared: """Get stat transform query.""" ``` +### Exceptions + +```python +class NoVisualiseError(Exception): + """Raised when execute() is called on a query without VISUALISE clause.""" +``` + ### Functions ```python @@ -511,7 +557,26 @@ def validate(query: str) -> Validated: Returns Validated object with query inspection and validation methods. """ +``` -def prepare(query: str, reader: DuckDBReader) -> Prepared: - """Prepare a query for visualization.""" +### Usage Example + +```python +import polars as pl +from ggsql.readers import DuckDB +from ggsql.writers import VegaLite + +# Create data +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + +# Execute with inline data registration +reader = DuckDB("duckdb://memory") +spec = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", + {"data": df} +) + +# Render to Altair chart +writer = VegaLite() +chart = writer.render_chart(spec) ``` From 471602ecaf1ce0418ec7462aff972738ee4b9762 Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 29 Jan 2026 19:51:03 -0600 Subject: [PATCH 5/7] Fix formatting (cargo fmt) Co-Authored-By: Claude Opus 4.5 --- ggsql-python/src/lib.rs | 17 +++++------------ src/reader/duckdb.rs | 4 +++- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index aefc5ffb..c688f412 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -37,9 +37,7 @@ fn ggsql_error_to_pyerr(e: ggsql::GgsqlError) -> PyErr { GgsqlError::NoVisualise => { NoVisualiseError::new_err("Query has no VISUALISE clause".to_string()) } - GgsqlError::InternalError(msg) => { - PyGgsqlError::new_err(format!("Internal error: {}", msg)) - } + GgsqlError::InternalError(msg) => PyGgsqlError::new_err(format!("Internal error: {}", msg)), } } @@ -138,8 +136,8 @@ impl PyDuckDBReader { /// Create a new DuckDB reader from a connection string. #[new] fn new(connection: &str) -> PyResult { - let inner = RustDuckDBReader::from_connection_string(connection) - .map_err(ggsql_error_to_pyerr)?; + let inner = + RustDuckDBReader::from_connection_string(connection).map_err(ggsql_error_to_pyerr)?; Ok(Self { inner, connection: connection.to_string(), @@ -205,10 +203,7 @@ impl PyDuckDBReader { /// with VISUALISE clauses, use execute() instead. #[pyo3(name = "execute_sql")] fn execute_sql(&self, py: Python<'_>, sql: &str) -> PyResult> { - let df = self - .inner - .execute_sql(sql) - .map_err(ggsql_error_to_pyerr)?; + let df = self.inner.execute_sql(sql).map_err(ggsql_error_to_pyerr)?; polars_to_py(py, &df) } @@ -261,9 +256,7 @@ impl PyVegaLiteWriter { /// Render a prepared visualization to Vega-Lite JSON. fn render(&self, spec: &PyPrepared) -> PyResult { - self.inner - .render(&spec.inner) - .map_err(ggsql_error_to_pyerr) + self.inner.render(&spec.inner).map_err(ggsql_error_to_pyerr) } } diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index 75475717..a8d35cfd 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -626,7 +626,9 @@ mod tests { reader.register("my_table", df).unwrap(); // Query the registered table - let result = reader.execute_sql("SELECT * FROM my_table ORDER BY x").unwrap(); + let result = reader + .execute_sql("SELECT * FROM my_table ORDER BY x") + .unwrap(); assert_eq!(result.shape(), (3, 2)); assert_eq!(result.get_column_names(), vec!["x", "y"]); } From 3bafd5fc696bd9d2bbae0b03a889164f12a4719d Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 29 Jan 2026 19:54:23 -0600 Subject: [PATCH 6/7] Fix REST API for NoVisualise error and clippy warnings - Add Writer trait import for render() method - Handle NoVisualise variant in error response mapping - Fix collapsible-str-replace: combine consecutive replace() calls - Allow vec_init_then_push for feature-flag dependent version handler Co-Authored-By: Claude Opus 4.5 --- src/rest.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/rest.rs b/src/rest.rs index dd894440..82c46542 100644 --- a/src/rest.rs +++ b/src/rest.rs @@ -39,7 +39,7 @@ use ggsql::prepare; use ggsql::reader::DuckDBReader; #[cfg(feature = "vegalite")] -use ggsql::writer::VegaLiteWriter; +use ggsql::writer::{VegaLiteWriter, Writer}; /// CLI arguments for the REST API server #[derive(Parser)] @@ -196,6 +196,7 @@ impl From for ApiErrorResponse { GgsqlError::ReaderError(_) => (StatusCode::BAD_REQUEST, "ReaderError"), GgsqlError::WriterError(_) => (StatusCode::INTERNAL_SERVER_ERROR, "WriterError"), GgsqlError::InternalError(_) => (StatusCode::INTERNAL_SERVER_ERROR, "InternalError"), + GgsqlError::NoVisualise => (StatusCode::BAD_REQUEST, "NoVisualise"), }; ApiErrorResponse { @@ -258,8 +259,7 @@ fn load_data_files(reader: &DuckDBReader, files: &[String]) -> Result<(), GgsqlE .file_stem() .and_then(|s| s.to_str()) .unwrap_or("data") - .replace('-', "_") - .replace(' ', "_"); + .replace(['-', ' '], "_"); info!("Loading {} into table '{}'", file_path, table_name); @@ -572,6 +572,7 @@ async fn health_handler() -> Json { } /// GET /api/v1/version - Version information +#[allow(clippy::vec_init_then_push)] // Feature-flag dependent pushes async fn version_handler() -> Json { let mut features = Vec::new(); From 84091f6fb7eafcf9399755f91d0ebb945264cb5b Mon Sep 17 00:00:00 2001 From: Carson Date: Thu, 29 Jan 2026 20:13:57 -0600 Subject: [PATCH 7/7] Add Reader ABC for custom reader implementations The Reader abstract base class defines the interface that custom readers must implement: - execute(query, data) - Execute ggsql query with optional data registration - execute_sql(sql) - Execute plain SQL and return DataFrame - register(name, df) - Register a DataFrame as a table - unregister(name) - Unregister a table DuckDB now inherits from Reader, providing a complete reference implementation. Co-Authored-By: Claude Opus 4.5 --- ggsql-python/python/ggsql/readers.py | 120 ++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 2 deletions(-) diff --git a/ggsql-python/python/ggsql/readers.py b/ggsql-python/python/ggsql/readers.py index 7a5e5436..b317eef8 100644 --- a/ggsql-python/python/ggsql/readers.py +++ b/ggsql-python/python/ggsql/readers.py @@ -2,6 +2,7 @@ from __future__ import annotations +from abc import ABC, abstractmethod from typing import TYPE_CHECKING import narwhals as nw @@ -13,7 +14,122 @@ import polars as pl from ggsql._ggsql import Prepared -__all__ = ["DuckDB"] +__all__ = ["Reader", "DuckDB"] + + +class Reader(ABC): + """Abstract base class for ggsql readers. + + Custom reader implementations should subclass this and implement + the required abstract methods. The built-in `DuckDB` reader provides + a complete implementation. + + Examples + -------- + >>> from ggsql.readers import Reader + >>> import polars as pl + >>> + >>> class MyReader(Reader): + ... def __init__(self): + ... self._tables = {} + ... + ... def execute(self, query: str, data=None) -> "Prepared": + ... # Register tables, execute query, cleanup + ... ... + ... + ... def execute_sql(self, sql: str) -> pl.DataFrame: + ... # Your SQL execution logic here + ... return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + ... + ... def register(self, name: str, df) -> None: + ... self._tables[name] = df + ... + ... def unregister(self, name: str) -> None: + ... self._tables.pop(name, None) + """ + + @abstractmethod + def execute( + self, + query: str, + data: dict[str, IntoDataFrame] | None = None, + ) -> "Prepared": + """Execute a ggsql query with optional DataFrame registration. + + DataFrames should be registered before query execution and automatically + unregistered afterward (even on error) to avoid polluting the namespace. + + Parameters + ---------- + query + The ggsql query to execute. Must contain a VISUALISE clause. + data + DataFrames to register as queryable tables. Keys are table names. + + Returns + ------- + Prepared + A prepared visualization ready for rendering. + + Raises + ------ + NoVisualiseError + If the query has no VISUALISE clause. + """ + ... + + @abstractmethod + def execute_sql(self, sql: str) -> "pl.DataFrame": + """Execute a SQL query and return the result as a DataFrame. + + This is for plain SQL queries without visualization. For ggsql queries + with VISUALISE clauses, use execute() instead. + + Parameters + ---------- + sql + The SQL query to execute. + + Returns + ------- + polars.DataFrame + The query result as a polars DataFrame. + """ + ... + + @abstractmethod + def register(self, name: str, df: IntoDataFrame) -> None: + """Register a DataFrame as a queryable table. + + Parameters + ---------- + name + The table name to register under. + df + The DataFrame to register. + """ + ... + + @abstractmethod + def unregister(self, name: str) -> None: + """Unregister a table by name. + + Should fail silently if the table doesn't exist. + + Parameters + ---------- + name + The table name to unregister. + """ + ... + + def __enter__(self) -> "Reader": + """Enter context manager.""" + return self + + def __exit__(self, _exc_type, _exc_val, _exc_tb) -> None: + """Exit context manager.""" + pass def _to_polars(df: IntoDataFrame) -> "pl.DataFrame": @@ -29,7 +145,7 @@ def _to_polars(df: IntoDataFrame) -> "pl.DataFrame": return nw_df.to_polars() -class DuckDB: +class DuckDB(Reader): """DuckDB database reader for executing SQL queries and ggsql visualizations. Creates an in-memory or file-based DuckDB connection that can execute