diff --git a/CLAUDE.md b/CLAUDE.md index aa093221..275ba1ab 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -165,7 +165,7 @@ let prepared = ggsql::prepare( // Render to Vega-Lite JSON let writer = VegaLiteWriter::new(); -let json = prepared.render(&writer)?; +let json = writer.render(&prepared)?; ``` ### Core Functions @@ -869,13 +869,12 @@ When running in Positron IDE, the extension provides enhanced functionality: **Features**: - PyO3-based Rust bindings compiled to a native Python extension -- Two-stage API mirroring the Rust API: `prepare()` → `render()` -- DuckDB reader with DataFrame registration -- Custom Python reader support: any object with `execute(sql) -> DataFrame` method -- Works with any narwhals-compatible DataFrame (polars, pandas, etc.) -- LazyFrames are collected automatically -- Returns native `altair.Chart` objects via `render_altair()` convenience function +- Two-stage API: `reader.execute()` → `writer.render()` +- DuckDB reader with inline DataFrame registration via `execute(query, data_dict)` +- Automatic table cleanup after query execution +- Returns native `altair.Chart` objects via `writer.render_chart()` - Query validation and introspection (SQL, layer queries, stat queries) +- `NoVisualiseError` exception for queries without VISUALISE clause **Installation**: @@ -892,40 +891,24 @@ maturin develop import ggsql import polars as pl -# Create reader and register data -reader = ggsql.DuckDBReader("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -reader.register("data", df) -# Prepare visualization -prepared = ggsql.prepare( +# Execute with inline data registration (auto-registers and unregisters) +reader = ggsql.DuckDBReader("duckdb://memory") +spec = reader.execute( "SELECT * FROM data VISUALISE x, y DRAW point", - reader + {"data": df} ) # Inspect metadata -print(f"Rows: {prepared.metadata()['rows']}") -print(f"Columns: {prepared.metadata()['columns']}") -print(f"SQL: {prepared.sql()}") +print(f"Rows: {spec.metadata()['rows']}") +print(f"Columns: {spec.metadata()['columns']}") +print(f"SQL: {spec.sql()}") -# Render to Vega-Lite JSON +# Render to Vega-Lite JSON or Altair chart writer = ggsql.VegaLiteWriter() -json_output = prepared.render(writer) -``` - -**Convenience Function** (`render_altair`): - -For quick visualizations without explicit reader setup: - -```python -import ggsql -import polars as pl - -df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - -# Render DataFrame to Altair chart in one call -chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") -chart.display() # In Jupyter +json_output = writer.render(spec) +chart = writer.render_chart(spec) ``` **Query Validation**: @@ -941,73 +924,71 @@ print(f"SQL portion: {validated.sql()}") print(f"Errors: {validated.errors()}") ``` -**Classes**: - -| Class | Description | -| -------------------------- | -------------------------------------------- | -| `DuckDBReader(connection)` | Database reader with DataFrame registration | -| `VegaLiteWriter()` | Vega-Lite JSON output writer | -| `Validated` | Result of `validate()` with query inspection | -| `Prepared` | Result of `prepare()`, ready for rendering | +**Handling Plain SQL**: -**Functions**: +```python +try: + spec = reader.execute("SELECT * FROM data", {"data": df}) +except ggsql.NoVisualiseError: + # Use execute_sql() for queries without VISUALISE + result_df = reader.execute_sql("SELECT * FROM data") +``` -| Function | Description | -| ------------------------ | ------------------------------------------------- | -| `validate(query)` | Syntax/semantic validation with query inspection | -| `prepare(query, reader)` | Full preparation (reader can be native or custom) | -| `render_altair(df, viz)` | Convenience: render DataFrame to Altair chart | +**Classes**: -**Prepared Object Methods**: +| Class | Description | +| -------------------------- | --------------------------------------------------- | +| `DuckDBReader(connection)` | Database reader with DataFrame registration | +| `VegaLiteWriter()` | Vega-Lite JSON output writer with render methods | +| `Validated` | Result of `validate()` with query inspection | +| `Prepared` | Result of `reader.execute()`, ready for rendering | +| `NoVisualiseError` | Exception for queries without VISUALISE clause | -| Method | Description | -| ---------------- | -------------------------------------------- | -| `render(writer)` | Generate Vega-Lite JSON | -| `metadata()` | Get rows, columns, layer_count | -| `sql()` | Get the SQL portion | -| `visual()` | Get the VISUALISE portion | -| `layer_count()` | Number of DRAW layers | -| `data()` | Get the main DataFrame | -| `layer_data(i)` | Get layer-specific DataFrame (if filtered) | -| `stat_data(i)` | Get stat transform DataFrame (if applicable) | -| `layer_sql(i)` | Get layer filter SQL (if applicable) | -| `stat_sql(i)` | Get stat transform SQL (if applicable) | -| `warnings()` | Get validation warnings | +**Functions**: -**Custom Python Readers**: +| Function | Description | +| ----------------- | ------------------------------------------------ | +| `validate(query)` | Syntax/semantic validation with query inspection | -Any Python object with an `execute(sql: str) -> polars.DataFrame` method can be used as a reader: +**DuckDBReader Methods**: -```python -import ggsql -import polars as pl +| Method | Description | +| ---------------------------- | -------------------------------------------------------- | +| `execute(query, data=None)` | Execute ggsql query with optional data dict registration | +| `execute_sql(sql)` | Execute plain SQL, return DataFrame | +| `register(name, df)` | Manually register DataFrame as table | +| `unregister(name)` | Unregister table (fails silently if not found) | -class MyReader: - """Custom reader that returns static data.""" +**VegaLiteWriter Methods**: - def execute(self, sql: str) -> pl.DataFrame: - return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) +| Method | Description | +| ------------------------- | ------------------------------------ | +| `render(spec)` | Render to Vega-Lite JSON string | +| `render_chart(spec)` | Render to Altair chart object | -# Use custom reader with prepare() -reader = MyReader() -prepared = ggsql.prepare( - "SELECT * FROM data VISUALISE x, y DRAW point", - reader -) -``` +**Prepared Object Methods**: -Optional methods for custom readers: +| Method | Description | +| --------------- | -------------------------------------------- | +| `metadata()` | Get rows, columns, layer_count | +| `sql()` | Get the SQL portion | +| `visual()` | Get the VISUALISE portion | +| `layer_count()` | Number of DRAW layers | +| `data()` | Get the main DataFrame | +| `layer_data(i)` | Get layer-specific DataFrame (if filtered) | +| `stat_data(i)` | Get stat transform DataFrame (if applicable) | +| `layer_sql(i)` | Get layer filter SQL (if applicable) | +| `stat_sql(i)` | Get stat transform SQL (if applicable) | +| `warnings()` | Get validation warnings | -- `supports_register() -> bool` - Return `True` if registration is supported -- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a table +**Type Stubs**: -Native readers (e.g., `DuckDBReader`) use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization. +The Python package includes manually maintained type stubs (`ggsql-python/python/ggsql/_ggsql.pyi`) that provide IDE support and type checking for the native Rust extension. When making API changes to `ggsql-python/src/lib.rs`, always update the corresponding stubs to keep them in sync. The stubs include detailed docstrings that appear in IDE tooltips, so they provide significant value beyond just type information. **Dependencies**: - Python >= 3.10 - altair >= 5.0 -- narwhals >= 2.15 - polars >= 1.0 --- diff --git a/ggsql-jupyter/src/executor.rs b/ggsql-jupyter/src/executor.rs index 40f74f72..d1a2db89 100644 --- a/ggsql-jupyter/src/executor.rs +++ b/ggsql-jupyter/src/executor.rs @@ -8,7 +8,7 @@ use ggsql::{ prepare, reader::{DuckDBReader, Reader}, validate, - writer::VegaLiteWriter, + writer::{VegaLiteWriter, Writer}, }; use polars::frame::DataFrame; @@ -60,7 +60,7 @@ impl QueryExecutor { // 2. Check if there's a visualization if !validated.has_visual() { // Pure SQL query - execute directly and return DataFrame - let df = self.reader.execute(code)?; + let df = self.reader.execute_sql(code)?; tracing::info!( "Pure SQL executed: {} rows, {} cols", df.height(), @@ -79,7 +79,7 @@ impl QueryExecutor { ); // 4. Render to Vega-Lite - let vega_json = prepared.render(&self.writer)?; + let vega_json = self.writer.render(&prepared)?; tracing::debug!("Generated Vega-Lite spec: {} chars", vega_json.len()); diff --git a/ggsql-python/README.md b/ggsql-python/README.md index 3ea2c603..cf9b408b 100644 --- a/ggsql-python/README.md +++ b/ggsql-python/README.md @@ -42,13 +42,9 @@ pip install target/wheels/ggsql-*.whl ## Quick Start -### Simple Usage with `render_altair` - -For quick visualizations, use the `render_altair` convenience function: - ```python -import ggsql import polars as pl +import ggsql # Create a DataFrame df = pl.DataFrame({ @@ -57,89 +53,83 @@ df = pl.DataFrame({ "category": ["A", "B", "A", "B", "A"] }) -# Render to Altair chart -chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") +# Create reader and execute query with inline data registration +reader = ggsql.readers.DuckDB("duckdb://memory") +spec = reader.execute( + "SELECT * FROM data VISUALISE x, y, category AS color DRAW point", + {"data": df} +) -# Display or save +# Render to Vega-Lite JSON +writer = ggsql.writers.VegaLite() +json_str = writer.render_json(spec) + +# Or render to Altair chart +chart = writer.render_chart(spec) chart.display() # In Jupyter -chart.save("chart.html") # Save to file ``` -### Two-Stage API - -For more control, use the two-stage API with explicit reader and writer: +## API Reference -```python -import ggsql -import polars as pl +### Modules -# 1. Create a DuckDB reader -reader = ggsql.DuckDBReader("duckdb://memory") +#### `ggsql.readers` -# 2. Register your DataFrame as a table -df = pl.DataFrame({ - "date": ["2024-01-01", "2024-01-02", "2024-01-03"], - "revenue": [100, 150, 120], - "region": ["North", "South", "North"] -}) -reader.register("sales", df) +Database reader classes. -# 3. Prepare the visualization -prepared = ggsql.prepare( - """ - SELECT * FROM sales - VISUALISE date AS x, revenue AS y, region AS color - DRAW line - LABEL title => 'Sales by Region' - """, - reader -) +##### `DuckDB(connection: str)` -# 4. Inspect metadata -print(f"Rows: {prepared.metadata()['rows']}") -print(f"Columns: {prepared.metadata()['columns']}") -print(f"Layers: {prepared.layer_count()}") +Database reader that executes SQL and manages DataFrames. -# 5. Inspect SQL/VISUALISE portions and data -print(f"SQL: {prepared.sql()}") -print(f"Visual: {prepared.visual()}") -print(prepared.data()) # Returns polars DataFrame +```python +import ggsql -# 6. Render to Vega-Lite JSON -writer = ggsql.VegaLiteWriter() -vegalite_json = prepared.render(writer) -print(vegalite_json) +reader = ggsql.readers.DuckDB("duckdb://memory") # In-memory database +reader = ggsql.readers.DuckDB("duckdb:///path/to/file.db") # File database ``` -## API Reference - -### Classes +**Methods:** -#### `DuckDBReader(connection: str)` +- `execute(query: str, data: dict[str, DataFrame] | None = None) -> Prepared` - Execute a ggsql query with optional DataFrame registration. DataFrames are automatically registered before execution and unregistered afterward. Raises `NoVisualiseError` if query has no VISUALISE clause. +- `execute_sql(sql: str) -> pl.DataFrame` - Execute plain SQL and return results (no VISUALISE clause needed) +- `register(name: str, df: DataFrame) -> None` - Manually register a DataFrame as a queryable table +- `unregister(name: str) -> None` - Unregister a table (fails silently if not found) -Database reader that executes SQL and manages DataFrames. +**Context manager:** DuckDB supports the context manager protocol for use with `with` statements: ```python -reader = ggsql.DuckDBReader("duckdb://memory") # In-memory database -reader = ggsql.DuckDBReader("duckdb:///path/to/file.db") # File database +with ggsql.readers.DuckDB("duckdb://memory") as reader: + spec = reader.execute(query, {"data": df}) ``` -**Methods:** +**DataFrame support:** Accepts any [narwhals](https://narwhals-dev.github.io/narwhals/)-compatible DataFrame (polars, pandas, pyarrow, etc.). + +#### `ggsql.writers` -- `register(name: str, df: polars.DataFrame)` - Register a DataFrame as a queryable table -- `execute(sql: str) -> polars.DataFrame` - Execute SQL and return results -- `supports_register() -> bool` - Check if registration is supported +Output writer classes. -#### `VegaLiteWriter()` +##### `VegaLite()` Writer that generates Vega-Lite v6 JSON specifications. ```python -writer = ggsql.VegaLiteWriter() -json_output = prepared.render(writer) +import ggsql + +writer = ggsql.writers.VegaLite() +json_str = writer.render_json(spec) +chart = writer.render_chart(spec) ``` -#### `Validated` +**Methods:** + +- `render_json(spec: Prepared) -> str` - Render to Vega-Lite JSON string +- `render_chart(spec: Prepared, **kwargs) -> AltairChart` - Render to Altair chart object + +#### `ggsql.types` + +Type classes returned by ggsql functions. + +##### `Validated` Result of `validate()` containing query analysis without SQL execution. @@ -152,24 +142,54 @@ Result of `validate()` containing query analysis without SQL execution. - `errors() -> list[dict]` - Validation errors with messages and locations - `warnings() -> list[dict]` - Validation warnings -#### `Prepared` +##### `Prepared` -Result of `prepare()`, containing resolved visualization ready for rendering. +Result of `reader.execute()`, containing resolved visualization ready for rendering. **Methods:** -- `render(writer: VegaLiteWriter) -> str` - Generate Vega-Lite JSON - `metadata() -> dict` - Get `{"rows": int, "columns": list[str], "layer_count": int}` - `sql() -> str` - The executed SQL query - `visual() -> str` - The VISUALISE clause - `layer_count() -> int` - Number of DRAW layers -- `data() -> polars.DataFrame | None` - Main query result DataFrame -- `layer_data(index: int) -> polars.DataFrame | None` - Layer-specific data (if filtered) -- `stat_data(index: int) -> polars.DataFrame | None` - Statistical transform data +- `data() -> pl.DataFrame | None` - Main query result DataFrame +- `layer_data(index: int) -> pl.DataFrame | None` - Layer-specific data (if filtered) +- `stat_data(index: int) -> pl.DataFrame | None` - Statistical transform data - `layer_sql(index: int) -> str | None` - Layer filter SQL - `stat_sql(index: int) -> str | None` - Stat transform SQL - `warnings() -> list[dict]` - Validation warnings from preparation +### Exceptions + +All ggsql exceptions inherit from `GgsqlError`, allowing you to catch all ggsql-specific errors: + +```python +try: + spec = reader.execute(query) +except ggsql.types.GgsqlError as e: + print(f"ggsql error: {e}") +``` + +#### Exception Hierarchy + +- `GgsqlError` - Base exception for all ggsql errors + - `ParseError` - Query parsing failed + - `ValidationError` - Query validation failed (e.g., missing required aesthetics) + - `ReaderError` - Database/SQL execution failed + - `WriterError` - Output generation failed + - `NoVisualiseError` - Query has no VISUALISE clause + +#### `NoVisualiseError` + +Raised when `reader.execute()` is called on a query without a VISUALISE clause. Use `reader.execute_sql()` for plain SQL queries. + +```python +try: + spec = reader.execute("SELECT * FROM data") # No VISUALISE +except ggsql.types.NoVisualiseError: + df = reader.execute_sql("SELECT * FROM data") # Use this instead +``` + ### Functions #### `validate(query: str) -> Validated` @@ -185,36 +205,48 @@ else: print(f"Error: {error['message']}") ``` -#### `prepare(query: str, reader: DuckDBReader) -> Prepared` +## Examples -Parse, validate, and execute a ggsql query. +### Basic Usage ```python -reader = ggsql.DuckDBReader("duckdb://memory") -prepared = ggsql.prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) -``` - -#### `render_altair(df, viz: str, **kwargs) -> altair.Chart` +import polars as pl +import ggsql -Convenience function to render a DataFrame with a VISUALISE spec to an Altair chart. +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -**Parameters:** +reader = ggsql.readers.DuckDB("duckdb://memory") +spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point", {"data": df}) -- `df` - Any narwhals-compatible DataFrame (polars, pandas, etc.). LazyFrames are collected automatically. -- `viz` - The VISUALISE specification string -- `**kwargs` - Additional arguments passed to `altair.Chart.from_json()` (e.g., `validate=False`) +writer = ggsql.writers.VegaLite() +chart = writer.render_chart(spec) +``` -**Returns:** An Altair chart object (Chart, LayerChart, FacetChart, etc.) +### Multiple Tables ```python -import polars as pl -import ggsql +sales = pl.DataFrame({"id": [1, 2], "product_id": [1, 1], "amount": [100, 200]}) +products = pl.DataFrame({"id": [1], "name": ["Widget"]}) -df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") +spec = reader.execute( + """ + SELECT s.id, s.amount, p.name + FROM sales s JOIN products p ON s.product_id = p.id + VISUALISE id AS x, amount AS y, name AS color + DRAW bar + """, + {"sales": sales, "products": products} +) ``` -## Examples +### VISUALISE FROM Shorthand + +```python +spec = reader.execute( + "VISUALISE FROM data DRAW point MAPPING x AS x, y AS y", + {"data": df} +) +``` ### Mapping Styles @@ -222,72 +254,46 @@ chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30], "category": ["A", "B", "A"]}) # Explicit mapping -ggsql.render_altair(df, "VISUALISE x AS x, y AS y DRAW point") +spec = reader.execute("SELECT * FROM df VISUALISE x AS x, y AS y DRAW point", {"df": df}) # Implicit mapping (column name = aesthetic name) -ggsql.render_altair(df, "VISUALISE x, y DRAW point") +spec = reader.execute("SELECT * FROM df VISUALISE x, y DRAW point", {"df": df}) # Wildcard mapping (map all matching columns) -ggsql.render_altair(df, "VISUALISE * DRAW point") +spec = reader.execute("SELECT * FROM df VISUALISE * DRAW point", {"df": df}) # With color encoding -ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point") +spec = reader.execute("SELECT * FROM df VISUALISE x, y, category AS color DRAW point", {"df": df}) ``` -### Custom Readers - -You can use any Python object with an `execute(sql: str) -> polars.DataFrame` method as a reader. This enables integration with any data source. +### Using Pandas DataFrames ```python +import pandas as pd import ggsql -import polars as pl - -class CSVReader: - """Custom reader that loads data from CSV files.""" - def __init__(self, data_dir: str): - self.data_dir = data_dir +# Works with pandas DataFrames (via narwhals) +df = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - def execute(self, sql: str) -> pl.DataFrame: - # Simple implementation: ignore SQL and return fixed data - # A real implementation would parse SQL to determine which file to load - return pl.read_csv(f"{self.data_dir}/data.csv") +reader = ggsql.readers.DuckDB("duckdb://memory") +spec = reader.execute("SELECT * FROM data VISUALISE x, y DRAW point", {"data": df}) -# Use custom reader with prepare() -reader = CSVReader("/path/to/data") -prepared = ggsql.prepare( - "SELECT * FROM data VISUALISE x, y DRAW point", - reader -) -writer = ggsql.VegaLiteWriter() -json_output = prepared.render(writer) +writer = ggsql.writers.VegaLite() +chart = writer.render_chart(spec) ``` -**Optional methods** for custom readers: - -- `supports_register() -> bool` - Return `True` if your reader supports DataFrame registration -- `register(name: str, df: polars.DataFrame) -> None` - Register a DataFrame as a queryable table +### Handling Plain SQL ```python -class AdvancedReader: - """Custom reader with registration support.""" - - def __init__(self): - self.tables = {} - - def execute(self, sql: str) -> pl.DataFrame: - # Your SQL execution logic here - ... - - def supports_register(self) -> bool: - return True +import ggsql - def register(self, name: str, df: pl.DataFrame) -> None: - self.tables[name] = df +try: + spec = reader.execute("SELECT * FROM data", {"data": df}) +except ggsql.types.NoVisualiseError: + # Use execute_sql() for queries without VISUALISE + result_df = reader.execute_sql("SELECT * FROM data") ``` -Native readers like `DuckDBReader` use an optimized fast path, while custom Python readers are automatically bridged via IPC serialization. - ## Development ### Keeping in sync with the monorepo @@ -319,7 +325,7 @@ pytest tests/ -v - Python >= 3.10 - altair >= 5.0 -- narwhals >= 2.15 +- narwhals >= 1.0 - polars >= 1.0 ## License diff --git a/ggsql-python/pyproject.toml b/ggsql-python/pyproject.toml index 1a0ff8ef..1039f0c4 100644 --- a/ggsql-python/pyproject.toml +++ b/ggsql-python/pyproject.toml @@ -16,12 +16,12 @@ classifiers = [ ] dependencies = [ "altair>=5.0", - "narwhals>=2.15.0", + "narwhals>=1.0", "polars>=1.0", ] [project.optional-dependencies] -test = ["pytest>=7.0"] +test = ["pytest>=7.0", "pandas>=2.0"] dev = ["maturin>=1.4"] [tool.maturin] diff --git a/ggsql-python/python/ggsql/__init__.py b/ggsql-python/python/ggsql/__init__.py index 06b5f720..cbcb16f7 100644 --- a/ggsql-python/python/ggsql/__init__.py +++ b/ggsql-python/python/ggsql/__init__.py @@ -1,106 +1,17 @@ -from __future__ import annotations - -import json -from typing import Any, Union +"""ggsql - SQL extension for declarative data visualization.""" -import altair -import narwhals as nw -from narwhals.typing import IntoFrame +from __future__ import annotations -from ggsql._ggsql import ( - DuckDBReader, - VegaLiteWriter, - Validated, - Prepared, - validate, - prepare, -) +from ggsql import readers, types, writers +from ggsql._ggsql import validate __all__ = [ - # Classes - "DuckDBReader", - "VegaLiteWriter", - "Validated", - "Prepared", + # Submodules + "readers", + "writers", + "types", # Functions "validate", - "prepare", - "render_altair", ] __version__ = "0.1.0" - -# Type alias for any Altair chart type -AltairChart = Union[ - altair.Chart, - altair.LayerChart, - altair.FacetChart, - altair.ConcatChart, - altair.HConcatChart, - altair.VConcatChart, - altair.RepeatChart, -] - - -def render_altair( - df: IntoFrame, - viz: str, - **kwargs: Any, -) -> AltairChart: - """Render a DataFrame with a VISUALISE spec to an Altair chart. - - Parameters - ---------- - df - Data to visualize. Accepts polars, pandas, or any narwhals-compatible - DataFrame. LazyFrames are collected automatically. - viz - VISUALISE spec string (e.g., "VISUALISE x, y DRAW point") - **kwargs - Additional keyword arguments passed to `from_json()`. - Common options include `validate=False` to skip schema validation. - - Returns - ------- - AltairChart - An Altair chart object (Chart, LayerChart, FacetChart, etc.). - """ - df = nw.from_native(df, pass_through=True) - - if isinstance(df, nw.LazyFrame): - df = df.collect() - - if not isinstance(df, nw.DataFrame): - raise TypeError("df must be a narwhals DataFrame or compatible type") - - pl_df = df.to_polars() - - # Create temporary reader and register data - reader = DuckDBReader("duckdb://memory") - reader.register("__data__", pl_df) - - # Build full query: SELECT * FROM __data__ + VISUALISE clause - query = f"SELECT * FROM __data__ {viz}" - - # Prepare and render - prepared = prepare(query, reader) - writer = VegaLiteWriter() - vegalite_json = prepared.render(writer) - - # Parse to determine the correct Altair class - spec = json.loads(vegalite_json) - - # Determine the correct Altair class based on spec structure - if "layer" in spec: - return altair.LayerChart.from_json(vegalite_json, **kwargs) - elif "facet" in spec or "spec" in spec: - return altair.FacetChart.from_json(vegalite_json, **kwargs) - elif "concat" in spec: - return altair.ConcatChart.from_json(vegalite_json, **kwargs) - elif "hconcat" in spec: - return altair.HConcatChart.from_json(vegalite_json, **kwargs) - elif "vconcat" in spec: - return altair.VConcatChart.from_json(vegalite_json, **kwargs) - elif "repeat" in spec: - return altair.RepeatChart.from_json(vegalite_json, **kwargs) - else: - return altair.Chart.from_json(vegalite_json, **kwargs) +version_info = (0, 1, 0) diff --git a/ggsql-python/python/ggsql/_ggsql.pyi b/ggsql-python/python/ggsql/_ggsql.pyi new file mode 100644 index 00000000..7ce1976c --- /dev/null +++ b/ggsql-python/python/ggsql/_ggsql.pyi @@ -0,0 +1,313 @@ +"""Type stubs for the ggsql native extension module.""" + +from typing import Any + +import polars as pl + +# ============================================================================ +# Exception Types +# ============================================================================ + + +class GgsqlError(Exception): + """Base exception for all ggsql errors.""" + + ... + + +class ParseError(GgsqlError): + """Raised when query parsing fails.""" + + ... + + +class ValidationError(GgsqlError): + """Raised when query validation fails (semantic errors).""" + + ... + + +class ReaderError(GgsqlError): + """Raised when database/data source operations fail.""" + + ... + + +class WriterError(GgsqlError): + """Raised when output generation fails.""" + + ... + + +class NoVisualiseError(GgsqlError): + """Raised when execute() is called on a query without VISUALISE clause.""" + + ... + + +# ============================================================================ +# Classes +# ============================================================================ + + +class DuckDBReader: + """DuckDB database reader for executing SQL queries and ggsql visualizations.""" + + def __init__(self, connection: str) -> None: + """Create a new DuckDB reader from a connection string. + + Parameters + ---------- + connection + Connection string. Use "duckdb://memory" for in-memory database + or "duckdb://path/to/file.db" for file-based database. + """ + ... + + def __repr__(self) -> str: ... + + def execute( + self, query: str, data: dict[str, pl.DataFrame] | None = None + ) -> Prepared: + """Execute a ggsql query with optional DataFrame registration. + + DataFrames are registered before query execution and automatically + unregistered afterward (even on error) to avoid polluting the namespace. + + Parameters + ---------- + query + The ggsql query to execute. Must contain a VISUALISE clause. + data + DataFrames to register as queryable tables. Keys are table names. + + Returns + ------- + Prepared + A prepared visualization ready for rendering. + + Raises + ------ + NoVisualiseError + If the query has no VISUALISE clause. + ParseError + If query parsing fails. + ValidationError + If query validation fails. + ReaderError + If SQL execution fails. + """ + ... + + def execute_sql(self, sql: str) -> pl.DataFrame: + """Execute a SQL query and return the result as a DataFrame. + + This is for plain SQL queries without visualization. For ggsql queries + with VISUALISE clauses, use execute() instead. + + Parameters + ---------- + sql + The SQL query to execute. + + Returns + ------- + polars.DataFrame + The query result as a polars DataFrame. + """ + ... + + def register(self, name: str, df: pl.DataFrame) -> None: + """Register a DataFrame as a queryable table. + + After registration, the DataFrame can be queried by name in SQL. + Note: When using execute(), DataFrames are automatically registered + and unregistered, so manual registration is usually unnecessary. + + Parameters + ---------- + name + The table name to register under. + df + The DataFrame to register. + """ + ... + + def unregister(self, name: str) -> None: + """Unregister a table by name. + + Fails silently if the table doesn't exist. + + Parameters + ---------- + name + The table name to unregister. + """ + ... + + +class _VegaLiteWriter: + """Vega-Lite JSON output writer (internal). + + Use the Python VegaLiteWriter class which wraps this and adds render_chart(). + """ + + def __init__(self) -> None: + """Create a new Vega-Lite writer.""" + ... + + def __repr__(self) -> str: ... + + def render(self, spec: Prepared) -> str: + """Render a prepared visualization to Vega-Lite JSON. + + Parameters + ---------- + spec + The prepared visualization (from reader.execute()). + + Returns + ------- + str + The Vega-Lite JSON specification as a string. + """ + ... + + +class Validated: + """Result of validate() - query inspection and validation without SQL execution.""" + + def __repr__(self) -> str: ... + + def has_visual(self) -> bool: + """Whether the query contains a VISUALISE clause.""" + ... + + def sql(self) -> str: + """The SQL portion (before VISUALISE).""" + ... + + def visual(self) -> str: + """The VISUALISE portion (raw text).""" + ... + + def valid(self) -> bool: + """Whether the query is valid (no errors).""" + ... + + def errors(self) -> list[dict[str, Any]]: + """Validation errors (fatal issues). + + Returns + ------- + list[dict] + List of error dictionaries with 'message' and optional 'location' keys. + """ + ... + + def warnings(self) -> list[dict[str, Any]]: + """Validation warnings (non-fatal issues). + + Returns + ------- + list[dict] + List of warning dictionaries with 'message' and optional 'location' keys. + """ + ... + + +class Prepared: + """Result of reader.execute(), ready for rendering.""" + + def __repr__(self) -> str: ... + + def metadata(self) -> dict[str, Any]: + """Get visualization metadata. + + Returns + ------- + dict + Dictionary with 'rows', 'columns', and 'layer_count' keys. + """ + ... + + def sql(self) -> str: + """The main SQL query that was executed.""" + ... + + def visual(self) -> str: + """The VISUALISE portion (raw text).""" + ... + + def layer_count(self) -> int: + """Number of layers.""" + ... + + def data(self) -> pl.DataFrame | None: + """Get global data (main query result).""" + ... + + def layer_data(self, index: int) -> pl.DataFrame | None: + """Get layer-specific data (from FILTER or FROM clause). + + Parameters + ---------- + index + The layer index (0-based). + """ + ... + + def stat_data(self, index: int) -> pl.DataFrame | None: + """Get stat transform data (e.g., histogram bins, density estimates). + + Parameters + ---------- + index + The layer index (0-based). + """ + ... + + def layer_sql(self, index: int) -> str | None: + """Layer filter/source query, or None if using global data. + + Parameters + ---------- + index + The layer index (0-based). + """ + ... + + def stat_sql(self, index: int) -> str | None: + """Stat transform query, or None if no stat transform. + + Parameters + ---------- + index + The layer index (0-based). + """ + ... + + def warnings(self) -> list[dict[str, Any]]: + """Validation warnings from preparation.""" + ... + + +# ============================================================================ +# Functions +# ============================================================================ + + +def validate(query: str) -> Validated: + """Validate query syntax and semantics without executing SQL. + + Parameters + ---------- + query + The ggsql query to validate. + + Returns + ------- + Validated + Validation result with query inspection methods. + """ + ... diff --git a/ggsql-python/python/ggsql/readers.py b/ggsql-python/python/ggsql/readers.py new file mode 100644 index 00000000..b317eef8 --- /dev/null +++ b/ggsql-python/python/ggsql/readers.py @@ -0,0 +1,272 @@ +"""Reader classes for ggsql.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +import narwhals as nw +from narwhals.typing import IntoDataFrame + +from ggsql._ggsql import DuckDBReader as _DuckDBReader + +if TYPE_CHECKING: + import polars as pl + from ggsql._ggsql import Prepared + +__all__ = ["Reader", "DuckDB"] + + +class Reader(ABC): + """Abstract base class for ggsql readers. + + Custom reader implementations should subclass this and implement + the required abstract methods. The built-in `DuckDB` reader provides + a complete implementation. + + Examples + -------- + >>> from ggsql.readers import Reader + >>> import polars as pl + >>> + >>> class MyReader(Reader): + ... def __init__(self): + ... self._tables = {} + ... + ... def execute(self, query: str, data=None) -> "Prepared": + ... # Register tables, execute query, cleanup + ... ... + ... + ... def execute_sql(self, sql: str) -> pl.DataFrame: + ... # Your SQL execution logic here + ... return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + ... + ... def register(self, name: str, df) -> None: + ... self._tables[name] = df + ... + ... def unregister(self, name: str) -> None: + ... self._tables.pop(name, None) + """ + + @abstractmethod + def execute( + self, + query: str, + data: dict[str, IntoDataFrame] | None = None, + ) -> "Prepared": + """Execute a ggsql query with optional DataFrame registration. + + DataFrames should be registered before query execution and automatically + unregistered afterward (even on error) to avoid polluting the namespace. + + Parameters + ---------- + query + The ggsql query to execute. Must contain a VISUALISE clause. + data + DataFrames to register as queryable tables. Keys are table names. + + Returns + ------- + Prepared + A prepared visualization ready for rendering. + + Raises + ------ + NoVisualiseError + If the query has no VISUALISE clause. + """ + ... + + @abstractmethod + def execute_sql(self, sql: str) -> "pl.DataFrame": + """Execute a SQL query and return the result as a DataFrame. + + This is for plain SQL queries without visualization. For ggsql queries + with VISUALISE clauses, use execute() instead. + + Parameters + ---------- + sql + The SQL query to execute. + + Returns + ------- + polars.DataFrame + The query result as a polars DataFrame. + """ + ... + + @abstractmethod + def register(self, name: str, df: IntoDataFrame) -> None: + """Register a DataFrame as a queryable table. + + Parameters + ---------- + name + The table name to register under. + df + The DataFrame to register. + """ + ... + + @abstractmethod + def unregister(self, name: str) -> None: + """Unregister a table by name. + + Should fail silently if the table doesn't exist. + + Parameters + ---------- + name + The table name to unregister. + """ + ... + + def __enter__(self) -> "Reader": + """Enter context manager.""" + return self + + def __exit__(self, _exc_type, _exc_val, _exc_tb) -> None: + """Exit context manager.""" + pass + + +def _to_polars(df: IntoDataFrame) -> "pl.DataFrame": + """Convert any narwhals-compatible DataFrame to polars.""" + nw_df = nw.from_native(df, pass_through=True) + + if isinstance(nw_df, nw.LazyFrame): + nw_df = nw_df.collect() + + if not isinstance(nw_df, nw.DataFrame): + raise TypeError("df must be a DataFrame (polars, pandas, pyarrow, etc.)") + + return nw_df.to_polars() + + +class DuckDB(Reader): + """DuckDB database reader for executing SQL queries and ggsql visualizations. + + Creates an in-memory or file-based DuckDB connection that can execute + SQL queries and register DataFrames as queryable tables. + + Accepts any narwhals-compatible DataFrame (polars, pandas, pyarrow, etc.) + for data registration. + + Examples + -------- + >>> import ggsql.readers + >>> reader = ggsql.readers.DuckDB("duckdb://memory") + >>> reader = ggsql.readers.DuckDB("duckdb:///path/to/file.db") + """ + + def __init__(self, connection: str) -> None: + """Create a new DuckDB reader from a connection string. + + Parameters + ---------- + connection + Connection string. Use "duckdb://memory" for in-memory database + or "duckdb:///path/to/file.db" for file-based database. + """ + self._inner = _DuckDBReader(connection) + self._connection = connection + + def __repr__(self) -> str: + return f"" + + def execute( + self, + query: str, + data: dict[str, IntoDataFrame] | None = None, + ) -> "Prepared": + """Execute a ggsql query with optional DataFrame registration. + + DataFrames are registered before query execution and automatically + unregistered afterward (even on error) to avoid polluting the namespace. + + Parameters + ---------- + query + The ggsql query to execute. Must contain a VISUALISE clause. + data + DataFrames to register as queryable tables. Keys are table names. + Accepts any narwhals-compatible DataFrame (polars, pandas, pyarrow, etc.). + + Returns + ------- + Prepared + A prepared visualization ready for rendering. + + Raises + ------ + NoVisualiseError + If the query has no VISUALISE clause. + ValueError + If parsing, validation, or SQL execution fails. + """ + polars_data: dict[str, "pl.DataFrame"] | None = None + if data is not None: + polars_data = {name: _to_polars(df) for name, df in data.items()} + + return self._inner.execute(query, polars_data) + + def execute_sql(self, sql: str) -> "pl.DataFrame": + """Execute a SQL query and return the result as a DataFrame. + + This is for plain SQL queries without visualization. For ggsql queries + with VISUALISE clauses, use execute() instead. + + Parameters + ---------- + sql + The SQL query to execute. + + Returns + ------- + polars.DataFrame + The query result as a polars DataFrame. + """ + return self._inner.execute_sql(sql) + + def register(self, name: str, df: IntoDataFrame) -> None: + """Register a DataFrame as a queryable table. + + After registration, the DataFrame can be queried by name in SQL. + Note: When using execute(), DataFrames are automatically registered + and unregistered, so manual registration is usually unnecessary. + + Parameters + ---------- + name + The table name to register under. + df + The DataFrame to register. Accepts any narwhals-compatible + DataFrame (polars, pandas, pyarrow, etc.). + """ + self._inner.register(name, _to_polars(df)) + + def unregister(self, name: str) -> None: + """Unregister a table by name. + + Fails silently if the table doesn't exist. + + Parameters + ---------- + name + The table name to unregister. + """ + self._inner.unregister(name) + + def __enter__(self) -> "DuckDB": + """Enter context manager.""" + return self + + def __exit__(self, _exc_type, _exc_val, _exc_tb) -> None: + """Exit context manager. + + Currently a no-op since DuckDB connections don't require explicit cleanup, + but future-proofs the API for connection management. + """ + pass diff --git a/ggsql-python/python/ggsql/types.py b/ggsql-python/python/ggsql/types.py new file mode 100644 index 00000000..a56fbccd --- /dev/null +++ b/ggsql-python/python/ggsql/types.py @@ -0,0 +1,26 @@ +"""Type classes and exceptions for ggsql.""" + +from ggsql._ggsql import ( + GgsqlError, + NoVisualiseError, + ParseError, + Prepared, + ReaderError, + Validated, + ValidationError, + WriterError, +) + +__all__ = [ + # Base exception + "GgsqlError", + # Specific exceptions + "ParseError", + "ValidationError", + "ReaderError", + "WriterError", + "NoVisualiseError", + # Type classes + "Prepared", + "Validated", +] diff --git a/ggsql-python/python/ggsql/writers.py b/ggsql-python/python/ggsql/writers.py new file mode 100644 index 00000000..a84d96a2 --- /dev/null +++ b/ggsql-python/python/ggsql/writers.py @@ -0,0 +1,112 @@ +"""Writer classes for ggsql.""" + +from __future__ import annotations + +import json +from typing import Any, Union + +import altair + +from ggsql._ggsql import _VegaLiteWriter, Prepared + +__all__ = ["VegaLite", "AltairChart"] + +# Type alias for any Altair chart type +AltairChart = Union[ + altair.Chart, + altair.LayerChart, + altair.FacetChart, + altair.ConcatChart, + altair.HConcatChart, + altair.VConcatChart, + altair.RepeatChart, +] + + +class VegaLite: + """Vega-Lite JSON output writer. + + Converts prepared visualization specifications to Vega-Lite v6 JSON. + + Examples + -------- + >>> writer = ggsql.writers.VegaLite() + >>> json_str = writer.render_json(spec) + >>> chart = writer.render_chart(spec) + """ + + def __init__(self) -> None: + """Create a new Vega-Lite writer.""" + self._inner = _VegaLiteWriter() + + def __repr__(self) -> str: + return "" + + def render_json(self, spec: Prepared) -> str: + """Render a prepared visualization to Vega-Lite JSON. + + Parameters + ---------- + spec : Prepared + The prepared visualization (from reader.execute()). + + Returns + ------- + str + The Vega-Lite JSON specification as a string. + + Raises + ------ + WriterError + If rendering fails. + """ + return self._inner.render(spec) + + def render_chart(self, spec: Prepared, **kwargs: Any) -> AltairChart: + """Render a prepared visualization to an Altair chart object. + + Parameters + ---------- + spec : Prepared + The prepared visualization (from reader.execute()). + **kwargs + Additional keyword arguments passed to Altair's `from_json()`. + Common options include `validate=False` to skip schema validation. + Note: `validate=False` is used by default since ggsql produces + Vega-Lite v6 specs. + + Returns + ------- + AltairChart + An Altair chart object (Chart, LayerChart, FacetChart, etc.) + appropriate for the visualization structure. + + Raises + ------ + WriterError + If rendering fails. + """ + json_str = self._inner.render(spec) + + # Default to validate=False since ggsql produces v6 specs + if "validate" not in kwargs: + kwargs["validate"] = False + + # Parse the JSON to determine the chart type + spec_dict = json.loads(json_str) + + # Determine the correct Altair class based on spec structure + if "layer" in spec_dict: + return altair.LayerChart.from_json(json_str, **kwargs) + elif "facet" in spec_dict or "spec" in spec_dict: + return altair.FacetChart.from_json(json_str, **kwargs) + elif "concat" in spec_dict: + return altair.ConcatChart.from_json(json_str, **kwargs) + elif "hconcat" in spec_dict: + return altair.HConcatChart.from_json(json_str, **kwargs) + elif "vconcat" in spec_dict: + return altair.VConcatChart.from_json(json_str, **kwargs) + elif "repeat" in spec_dict: + return altair.RepeatChart.from_json(json_str, **kwargs) + else: + return altair.Chart.from_json(json_str, **kwargs) diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index b9d6496d..c688f412 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -8,11 +8,39 @@ use std::io::Cursor; use ggsql::api::{prepare as rust_prepare, validate as rust_validate, Prepared, ValidationWarning}; use ggsql::reader::{DuckDBReader as RustDuckDBReader, Reader}; -use ggsql::writer::VegaLiteWriter as RustVegaLiteWriter; -use ggsql::GgsqlError; +use ggsql::writer::{VegaLiteWriter as RustVegaLiteWriter, Writer}; use polars::prelude::{DataFrame, IpcReader, IpcWriter, SerReader, SerWriter}; +// ============================================================================ +// Custom Exception Types +// ============================================================================ + +// Base exception for all ggsql errors +pyo3::create_exception!(ggsql, PyGgsqlError, pyo3::exceptions::PyException); + +// Specific exception types +pyo3::create_exception!(ggsql, PyParseError, PyGgsqlError); +pyo3::create_exception!(ggsql, PyValidationError, PyGgsqlError); +pyo3::create_exception!(ggsql, PyReaderError, PyGgsqlError); +pyo3::create_exception!(ggsql, PyWriterError, PyGgsqlError); +pyo3::create_exception!(ggsql, NoVisualiseError, PyGgsqlError); + +/// Convert a GgsqlError to the appropriate Python exception +fn ggsql_error_to_pyerr(e: ggsql::GgsqlError) -> PyErr { + use ggsql::GgsqlError; + match e { + GgsqlError::ParseError(msg) => PyParseError::new_err(msg), + GgsqlError::ValidationError(msg) => PyValidationError::new_err(msg), + GgsqlError::ReaderError(msg) => PyReaderError::new_err(msg), + GgsqlError::WriterError(msg) => PyWriterError::new_err(msg), + GgsqlError::NoVisualise => { + NoVisualiseError::new_err("Query has no VISUALISE clause".to_string()) + } + GgsqlError::InternalError(msg) => PyGgsqlError::new_err(format!("Internal error: {}", msg)), + } +} + // ============================================================================ // Helper Functions for DataFrame Conversion // ============================================================================ @@ -53,31 +81,6 @@ fn py_to_polars(py: Python<'_>, df: &Bound<'_, PyAny>) -> PyResult { }) } -/// Convert a Python polars DataFrame to Rust DataFrame - for use inside Python::attach -/// This variant is used by PyReaderBridge where we already hold the GIL. -fn py_to_polars_inner(df: &Bound<'_, PyAny>) -> PyResult { - let py = df.py(); - let io = py.import("io")?; - let bytes_io = io.call_method0("BytesIO")?; - - df.call_method1("write_ipc", (&bytes_io,)).map_err(|_| { - PyErr::new::( - "Reader.execute() must return a polars.DataFrame", - ) - })?; - - bytes_io.call_method1("seek", (0i64,))?; - let ipc_bytes: Vec = bytes_io.call_method0("read")?.extract()?; - let cursor = Cursor::new(ipc_bytes); - - IpcReader::new(cursor).finish().map_err(|e| { - PyErr::new::(format!( - "Failed to deserialize DataFrame: {}", - e - )) - }) -} - /// Convert validation errors/warnings to a Python list of dicts fn errors_to_pylist( py: Python<'_>, @@ -114,172 +117,113 @@ fn warnings_to_pylist(py: Python<'_>, warnings: &[ValidationWarning]) -> PyResul errors_to_pylist(py, &items) } -// ============================================================================ -// PyReaderBridge - Bridges Python reader objects to Rust Reader trait -// ============================================================================ - -/// Bridges a Python reader object to the Rust Reader trait. -/// -/// This allows any Python object with an `execute(sql: str) -> polars.DataFrame` -/// method to be used as a ggsql reader. -struct PyReaderBridge { - obj: Py, -} - -impl Reader for PyReaderBridge { - fn execute(&self, sql: &str) -> ggsql::Result { - Python::attach(|py| { - let bound = self.obj.bind(py); - let result = bound - .call_method1("execute", (sql,)) - .map_err(|e| GgsqlError::ReaderError(format!("Reader.execute() failed: {}", e)))?; - py_to_polars_inner(&result).map_err(|e| GgsqlError::ReaderError(e.to_string())) - }) - } - - fn supports_register(&self) -> bool { - Python::attach(|py| { - self.obj - .bind(py) - .call_method0("supports_register") - .and_then(|r| r.extract::()) - .unwrap_or(false) - }) - } - - fn register(&mut self, name: &str, df: DataFrame) -> ggsql::Result<()> { - Python::attach(|py| { - let py_df = - polars_to_py(py, &df).map_err(|e| GgsqlError::ReaderError(e.to_string()))?; - self.obj - .bind(py) - .call_method1("register", (name, py_df)) - .map_err(|e| GgsqlError::ReaderError(format!("Reader.register() failed: {}", e)))?; - Ok(()) - }) - } -} - -// ============================================================================ -// Native Reader Detection Macro -// ============================================================================ - -/// Macro to try native readers and fall back to bridge. -/// Adding new native readers = add to the macro invocation list. -macro_rules! try_native_readers { - ($query:expr, $reader:expr, $($native_type:ty),*) => {{ - $( - if let Ok(native) = $reader.downcast::<$native_type>() { - return rust_prepare($query, &native.borrow().inner) - .map(|p| PyPrepared { inner: p }) - .map_err(|e| PyErr::new::(e.to_string())); - } - )* - }}; -} - // ============================================================================ // PyDuckDBReader // ============================================================================ -/// DuckDB database reader for executing SQL queries. +/// DuckDB database reader for executing SQL queries and ggsql visualizations. /// /// Creates an in-memory or file-based DuckDB connection that can execute /// SQL queries and register DataFrames as queryable tables. -/// -/// Examples -/// -------- -/// >>> reader = DuckDBReader("duckdb://memory") -/// >>> df = reader.execute("SELECT 1 as x, 2 as y") -/// -/// >>> reader = DuckDBReader("duckdb://memory") -/// >>> reader.register("data", pl.DataFrame({"x": [1, 2, 3]})) -/// >>> df = reader.execute("SELECT * FROM data WHERE x > 1") #[pyclass(name = "DuckDBReader", unsendable)] struct PyDuckDBReader { inner: RustDuckDBReader, + connection: String, } #[pymethods] impl PyDuckDBReader { /// Create a new DuckDB reader from a connection string. - /// - /// Parameters - /// ---------- - /// connection : str - /// Connection string. Use "duckdb://memory" for in-memory database - /// or "duckdb://path/to/file.db" for file-based database. - /// - /// Returns - /// ------- - /// DuckDBReader - /// A configured DuckDB reader instance. - /// - /// Raises - /// ------ - /// ValueError - /// If the connection string is invalid or the database cannot be opened. #[new] fn new(connection: &str) -> PyResult { - let inner = RustDuckDBReader::from_connection_string(connection) - .map_err(|e| PyErr::new::(e.to_string()))?; - Ok(Self { inner }) + let inner = + RustDuckDBReader::from_connection_string(connection).map_err(ggsql_error_to_pyerr)?; + Ok(Self { + inner, + connection: connection.to_string(), + }) + } + + fn __repr__(&self) -> String { + format!("", self.connection) + } + + /// Execute a ggsql query with optional DataFrame registration. + /// + /// DataFrames are registered before query execution and automatically + /// unregistered afterward (even on error) to avoid polluting the namespace. + #[pyo3(signature = (query, data=None))] + fn execute( + &mut self, + py: Python<'_>, + query: &str, + data: Option<&Bound<'_, PyDict>>, + ) -> PyResult { + // First, validate that the query has a VISUALISE clause + let validated = rust_validate(query).map_err(ggsql_error_to_pyerr)?; + + if !validated.has_visual() { + return Err(NoVisualiseError::new_err( + "Query has no VISUALISE clause. Use execute_sql() for plain SQL queries.", + )); + } + + // Collect table names to register + let mut table_names: Vec = Vec::new(); + + // Register DataFrames + if let Some(data_dict) = data { + for (key, value) in data_dict.iter() { + let name: String = key.extract()?; + let rust_df = py_to_polars(py, &value)?; + self.inner + .register(&name, rust_df) + .map_err(ggsql_error_to_pyerr)?; + table_names.push(name); + } + } + + // Execute the query, ensuring cleanup happens even on error + let result = rust_prepare(query, &self.inner); + + // Always unregister tables (cleanup in finally-style) + for name in &table_names { + self.inner.unregister(name); + } + + // Return the result (or propagate the error) + result + .map(|p| PyPrepared { inner: p }) + .map_err(ggsql_error_to_pyerr) + } + + /// Execute a SQL query and return the result as a DataFrame. + /// + /// This is for plain SQL queries without visualization. For ggsql queries + /// with VISUALISE clauses, use execute() instead. + #[pyo3(name = "execute_sql")] + fn execute_sql(&self, py: Python<'_>, sql: &str) -> PyResult> { + let df = self.inner.execute_sql(sql).map_err(ggsql_error_to_pyerr)?; + polars_to_py(py, &df) } /// Register a DataFrame as a queryable table. /// /// After registration, the DataFrame can be queried by name in SQL. - /// - /// Parameters - /// ---------- - /// name : str - /// The table name to register under. - /// df : polars.DataFrame - /// The DataFrame to register. Must be a polars DataFrame. - /// - /// Raises - /// ------ - /// ValueError - /// If registration fails or the table name is invalid. + /// Note: When using execute(), DataFrames are automatically registered + /// and unregistered, so manual registration is usually unnecessary. fn register(&mut self, py: Python<'_>, name: &str, df: &Bound<'_, PyAny>) -> PyResult<()> { let rust_df = py_to_polars(py, df)?; self.inner .register(name, rust_df) - .map_err(|e| PyErr::new::(e.to_string())) - } - - /// Execute a SQL query and return the result as a DataFrame. - /// - /// Parameters - /// ---------- - /// sql : str - /// The SQL query to execute. - /// - /// Returns - /// ------- - /// polars.DataFrame - /// The query result as a polars DataFrame. - /// - /// Raises - /// ------ - /// ValueError - /// If the SQL is invalid or execution fails. - fn execute(&self, py: Python<'_>, sql: &str) -> PyResult> { - let df = self - .inner - .execute(sql) - .map_err(|e| PyErr::new::(e.to_string()))?; - polars_to_py(py, &df) + .map_err(ggsql_error_to_pyerr) } - /// Check if this reader supports DataFrame registration. + /// Unregister a table by name. /// - /// Returns - /// ------- - /// bool - /// True if register() is supported, False otherwise. - fn supports_register(&self) -> bool { - self.inner.supports_register() + /// Fails silently if the table doesn't exist. + fn unregister(&mut self, name: &str) { + self.inner.unregister(name); } } @@ -287,16 +231,11 @@ impl PyDuckDBReader { // PyVegaLiteWriter // ============================================================================ -/// Vega-Lite JSON output writer. +/// Vega-Lite JSON output writer (internal). /// /// Converts prepared visualization specifications to Vega-Lite v6 JSON. -/// -/// Examples -/// -------- -/// >>> writer = VegaLiteWriter() -/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) -/// >>> json_output = prepared.render(writer) -#[pyclass(name = "VegaLiteWriter")] +/// Use the Python VegaLiteWriter class which wraps this and adds render_chart(). +#[pyclass(name = "_VegaLiteWriter")] struct PyVegaLiteWriter { inner: RustVegaLiteWriter, } @@ -304,17 +243,21 @@ struct PyVegaLiteWriter { #[pymethods] impl PyVegaLiteWriter { /// Create a new Vega-Lite writer. - /// - /// Returns - /// ------- - /// VegaLiteWriter - /// A configured Vega-Lite writer instance. #[new] fn new() -> Self { Self { inner: RustVegaLiteWriter::new(), } } + + fn __repr__(&self) -> &'static str { + "" + } + + /// Render a prepared visualization to Vega-Lite JSON. + fn render(&self, spec: &PyPrepared) -> PyResult { + self.inner.render(&spec.inner).map_err(ggsql_error_to_pyerr) + } } // ============================================================================ @@ -337,62 +280,41 @@ struct PyValidated { #[pymethods] impl PyValidated { + fn __repr__(&self) -> String { + format!( + "", + self.valid, + self.has_visual, + self.errors.len() + ) + } + /// Whether the query contains a VISUALISE clause. - /// - /// Returns - /// ------- - /// bool - /// True if the query has a VISUALISE clause. fn has_visual(&self) -> bool { self.has_visual } /// The SQL portion (before VISUALISE). - /// - /// Returns - /// ------- - /// str - /// The SQL part of the query. fn sql(&self) -> &str { &self.sql } /// The VISUALISE portion (raw text). - /// - /// Returns - /// ------- - /// str - /// The VISUALISE part of the query. fn visual(&self) -> &str { &self.visual } /// Whether the query is valid (no errors). - /// - /// Returns - /// ------- - /// bool - /// True if the query is syntactically and semantically valid. fn valid(&self) -> bool { self.valid } /// Validation errors (fatal issues). - /// - /// Returns - /// ------- - /// list[dict] - /// List of error dictionaries with 'message' and optional 'location' keys. fn errors(&self, py: Python<'_>) -> PyResult> { errors_to_pylist(py, &self.errors) } /// Validation warnings (non-fatal issues). - /// - /// Returns - /// ------- - /// list[dict] - /// List of warning dictionaries with 'message' and optional 'location' keys. fn warnings(&self, py: Python<'_>) -> PyResult> { errors_to_pylist(py, &self.warnings) } @@ -402,16 +324,10 @@ impl PyValidated { // PyPrepared // ============================================================================ -/// Result of prepare(), ready for rendering. +/// Result of reader.execute(), ready for rendering. /// /// Contains the resolved plot specification, data, and metadata. -/// Use render() to generate Vega-Lite JSON output. -/// -/// Examples -/// -------- -/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) -/// >>> print(f"Rows: {prepared.metadata()['rows']}") -/// >>> json_output = prepared.render(VegaLiteWriter()) +/// Use writer.render(spec) or writer.render_chart(spec) to generate output. #[pyclass(name = "Prepared")] struct PyPrepared { inner: Prepared, @@ -419,34 +335,17 @@ struct PyPrepared { #[pymethods] impl PyPrepared { - /// Render to output format (Vega-Lite JSON). - /// - /// Parameters - /// ---------- - /// writer : VegaLiteWriter - /// The writer to use for rendering. - /// - /// Returns - /// ------- - /// str - /// The Vega-Lite JSON specification as a string. - /// - /// Raises - /// ------ - /// ValueError - /// If rendering fails. - fn render(&self, writer: &PyVegaLiteWriter) -> PyResult { - self.inner - .render(&writer.inner) - .map_err(|e| PyErr::new::(e.to_string())) + fn __repr__(&self) -> String { + let m = self.inner.metadata(); + format!( + "", + m.rows, + m.columns.len(), + m.layer_count + ) } /// Get visualization metadata. - /// - /// Returns - /// ------- - /// dict - /// Dictionary with 'rows', 'columns', and 'layer_count' keys. fn metadata(&self, py: Python<'_>) -> PyResult> { let m = self.inner.metadata(); let dict = PyDict::new(py); @@ -457,56 +356,26 @@ impl PyPrepared { } /// The main SQL query that was executed. - /// - /// Returns - /// ------- - /// str - /// The SQL query string. fn sql(&self) -> &str { self.inner.sql() } /// The VISUALISE portion (raw text). - /// - /// Returns - /// ------- - /// str - /// The VISUALISE clause text. fn visual(&self) -> &str { self.inner.visual() } /// Number of layers. - /// - /// Returns - /// ------- - /// int - /// The number of DRAW clauses in the visualization. fn layer_count(&self) -> usize { self.inner.layer_count() } /// Get global data (main query result). - /// - /// Returns - /// ------- - /// polars.DataFrame | None - /// The main query result DataFrame, or None if not available. fn data(&self, py: Python<'_>) -> PyResult>> { self.inner.data().map(|df| polars_to_py(py, df)).transpose() } /// Get layer-specific data (from FILTER or FROM clause). - /// - /// Parameters - /// ---------- - /// index : int - /// The layer index (0-based). - /// - /// Returns - /// ------- - /// polars.DataFrame | None - /// The layer-specific DataFrame, or None if the layer uses global data. fn layer_data(&self, py: Python<'_>, index: usize) -> PyResult>> { self.inner .layer_data(index) @@ -515,16 +384,6 @@ impl PyPrepared { } /// Get stat transform data (e.g., histogram bins, density estimates). - /// - /// Parameters - /// ---------- - /// index : int - /// The layer index (0-based). - /// - /// Returns - /// ------- - /// polars.DataFrame | None - /// The stat transform DataFrame, or None if no stat transform. fn stat_data(&self, py: Python<'_>, index: usize) -> PyResult>> { self.inner .stat_data(index) @@ -533,41 +392,16 @@ impl PyPrepared { } /// Layer filter/source query, or None if using global data. - /// - /// Parameters - /// ---------- - /// index : int - /// The layer index (0-based). - /// - /// Returns - /// ------- - /// str | None - /// The filter SQL query, or None if the layer uses global data directly. fn layer_sql(&self, index: usize) -> Option { self.inner.layer_sql(index).map(|s| s.to_string()) } /// Stat transform query, or None if no stat transform. - /// - /// Parameters - /// ---------- - /// index : int - /// The layer index (0-based). - /// - /// Returns - /// ------- - /// str | None - /// The stat transform SQL query, or None if no stat transform. fn stat_sql(&self, index: usize) -> Option { self.inner.stat_sql(index).map(|s| s.to_string()) } /// Validation warnings from preparation. - /// - /// Returns - /// ------- - /// list[dict] - /// List of warning dictionaries with 'message' and optional 'location' keys. fn warnings(&self, py: Python<'_>) -> PyResult> { warnings_to_pylist(py, self.inner.warnings()) } @@ -578,25 +412,9 @@ impl PyPrepared { // ============================================================================ /// Validate query syntax and semantics without executing SQL. -/// -/// Parameters -/// ---------- -/// query : str -/// The ggsql query to validate. -/// -/// Returns -/// ------- -/// Validated -/// Validation result with query inspection methods. -/// -/// Raises -/// ------ -/// ValueError -/// If validation fails unexpectedly (not for syntax errors, which are captured). #[pyfunction] fn validate(query: &str) -> PyResult { - let v = rust_validate(query) - .map_err(|e| PyErr::new::(e.to_string()))?; + let v = rust_validate(query).map_err(ggsql_error_to_pyerr)?; Ok(PyValidated { sql: v.sql().to_string(), @@ -626,61 +444,20 @@ fn validate(query: &str) -> PyResult { }) } -/// Prepare a query for visualization. Main entry point for the Rust API. -/// -/// Parameters -/// ---------- -/// query : str -/// The ggsql query to prepare. -/// reader : DuckDBReader | object -/// The database reader to execute SQL against. Can be a native DuckDBReader -/// for optimal performance, or any Python object with an -/// `execute(sql: str) -> polars.DataFrame` method. -/// -/// Returns -/// ------- -/// Prepared -/// A prepared visualization ready for rendering. -/// -/// Raises -/// ------ -/// ValueError -/// If parsing, validation, or SQL execution fails. -/// -/// Examples -/// -------- -/// >>> # Using native reader (fast path) -/// >>> reader = DuckDBReader("duckdb://memory") -/// >>> prepared = prepare("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader) -/// >>> json_output = prepared.render(VegaLiteWriter()) -/// -/// >>> # Using custom Python reader -/// >>> class MyReader: -/// ... def execute(self, sql: str) -> pl.DataFrame: -/// ... return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -/// >>> reader = MyReader() -/// >>> prepared = prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) -#[pyfunction] -fn prepare(query: &str, reader: &Bound<'_, PyAny>) -> PyResult { - // Fast path: try all known native reader types - // Add new native readers to this list as they're implemented - try_native_readers!(query, reader, PyDuckDBReader); - - // Bridge path: wrap Python object as Reader - let bridge = PyReaderBridge { - obj: reader.clone().unbind(), - }; - rust_prepare(query, &bridge) - .map(|p| PyPrepared { inner: p }) - .map_err(|e| PyErr::new::(e.to_string())) -} - // ============================================================================ // Module Registration // ============================================================================ #[pymodule] fn _ggsql(m: &Bound<'_, PyModule>) -> PyResult<()> { + // Exception classes (exported without the Py prefix) + m.add("GgsqlError", m.py().get_type::())?; + m.add("ParseError", m.py().get_type::())?; + m.add("ValidationError", m.py().get_type::())?; + m.add("ReaderError", m.py().get_type::())?; + m.add("WriterError", m.py().get_type::())?; + m.add("NoVisualiseError", m.py().get_type::())?; + // Classes m.add_class::()?; m.add_class::()?; @@ -689,7 +466,6 @@ fn _ggsql(m: &Bound<'_, PyModule>) -> PyResult<()> { // Functions m.add_function(wrap_pyfunction!(validate, m)?)?; - m.add_function(wrap_pyfunction!(prepare, m)?)?; Ok(()) } diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index 970dcf5a..2c642154 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -1,9 +1,10 @@ """Tests for ggsql Python bindings. These tests focus on Python-specific logic: -- DataFrame conversion via narwhals -- Return type handling -- Two-stage API (prepare -> render) +- DataFrame conversion +- New API: reader.execute() -> writer.render_json() +- NoVisualiseError handling +- Two-stage API (execute -> render) Rust logic (parsing, Vega-Lite generation) is tested in the Rust test suite. """ @@ -53,71 +54,129 @@ def test_missing_required_aesthetic(self): assert any("y" in e["message"] for e in errors) -class TestDuckDBReader: - """Tests for DuckDBReader class.""" +class TestDuckDB: + """Tests for DuckDB class.""" def test_create_in_memory(self): - reader = ggsql.DuckDBReader("duckdb://memory") + reader = ggsql.readers.DuckDB("duckdb://memory") assert reader is not None - def test_execute_simple_query(self): - reader = ggsql.DuckDBReader("duckdb://memory") - df = reader.execute("SELECT 1 AS x, 2 AS y") + def test_execute_sql_simple_query(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + df = reader.execute_sql("SELECT 1 AS x, 2 AS y") assert isinstance(df, pl.DataFrame) assert df.shape == (1, 2) assert list(df.columns) == ["x", "y"] def test_register_and_query(self): - reader = ggsql.DuckDBReader("duckdb://memory") + reader = ggsql.readers.DuckDB("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) reader.register("my_data", df) - result = reader.execute("SELECT * FROM my_data WHERE x > 1") + result = reader.execute_sql("SELECT * FROM my_data WHERE x > 1") assert isinstance(result, pl.DataFrame) assert result.shape == (2, 2) - def test_supports_register(self): - reader = ggsql.DuckDBReader("duckdb://memory") - assert reader.supports_register() is True + def test_unregister(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3]}) + reader.register("test_table", df) + + # Table should exist + result = reader.execute_sql("SELECT * FROM test_table") + assert result.shape[0] == 3 + + # Unregister + reader.unregister("test_table") + + # Table should no longer exist + with pytest.raises(ggsql.types.ReaderError): + reader.execute_sql("SELECT * FROM test_table") + + def test_unregister_nonexistent_silent(self): + """Unregistering a non-existent table should not raise.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + # Should not raise + reader.unregister("nonexistent_table") def test_invalid_connection_string(self): - with pytest.raises(ValueError): - ggsql.DuckDBReader("invalid://connection") + with pytest.raises(ggsql.types.ReaderError): + ggsql.readers.DuckDB("invalid://connection") -class TestVegaLiteWriter: - """Tests for VegaLiteWriter class.""" +class TestVegaLite: + """Tests for VegaLite class.""" def test_create_writer(self): - writer = ggsql.VegaLiteWriter() + writer = ggsql.writers.VegaLite() assert writer is not None -class TestPrepare: - """Tests for prepare() function.""" +class TestExecute: + """Tests for reader.execute() method.""" - def test_prepare_simple_query(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) + def test_execute_simple_query(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") assert prepared is not None assert prepared.layer_count() == 1 - def test_prepare_with_registered_data(self): - reader = ggsql.DuckDBReader("duckdb://memory") + def test_execute_with_data_dict(self): + reader = ggsql.readers.DuckDB("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - reader.register("data", df) - prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) assert prepared.metadata()["rows"] == 3 - def test_prepare_metadata(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( + def test_execute_with_multiple_tables(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + sales = pl.DataFrame({"id": [1, 2], "product_id": [1, 1]}) + products = pl.DataFrame({"id": [1], "name": ["Widget"]}) + + prepared = reader.execute( + """ + SELECT s.id, p.name FROM sales s + JOIN products p ON s.product_id = p.id + VISUALISE id AS x, name AS color DRAW bar + """, + {"sales": sales, "products": products}, + ) + assert prepared.metadata()["rows"] == 2 + + def test_execute_tables_unregistered_after(self): + """Tables should be unregistered after execute().""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + + # Execute with data dict + reader.execute("SELECT * FROM data VISUALISE x, y DRAW point", {"data": df}) + + # Table should no longer exist + with pytest.raises(ggsql.types.ReaderError): + reader.execute_sql("SELECT * FROM data") + + def test_execute_tables_unregistered_on_error(self): + """Tables should be unregistered even if execute() fails.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3]}) # Missing 'y' column + + # This should fail because we reference 'y' which doesn't exist + with pytest.raises(ggsql.types.ValidationError): + reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + + # Table should still be unregistered + with pytest.raises(ggsql.types.ReaderError): + reader.execute_sql("SELECT * FROM data") + + def test_execute_metadata(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute( "SELECT * FROM (VALUES (1, 10), (2, 20), (3, 30)) AS t(x, y) " - "VISUALISE x, y DRAW point", - reader, + "VISUALISE x, y DRAW point" ) metadata = prepared.metadata() @@ -126,46 +185,53 @@ def test_prepare_metadata(self): assert "y" in metadata["columns"] assert metadata["layer_count"] == 1 - def test_prepare_sql_accessor(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) + def test_execute_sql_accessor(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") assert "SELECT" in prepared.sql() - def test_prepare_visual_accessor(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) + def test_execute_visual_accessor(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") assert "VISUALISE" in prepared.visual() - def test_prepare_data_accessor(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) + def test_execute_data_accessor(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") data = prepared.data() assert isinstance(data, pl.DataFrame) assert data.shape == (1, 2) - def test_prepare_without_visualise_fails(self): - reader = ggsql.DuckDBReader("duckdb://memory") - with pytest.raises(ValueError): - ggsql.prepare("SELECT 1 AS x, 2 AS y", reader) +class TestNoVisualiseError: + """Tests for NoVisualiseError exception.""" -class TestPreparedRender: - """Tests for Prepared.render() method.""" + def test_execute_without_visualise_raises(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + with pytest.raises(ggsql.types.NoVisualiseError): + reader.execute("SELECT 1 AS x, 2 AS y") + + def test_novisualise_error_message(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + with pytest.raises(ggsql.types.NoVisualiseError) as exc_info: + reader.execute("SELECT 1 AS x, 2 AS y") + assert "VISUALISE" in str(exc_info.value) + assert "execute_sql" in str(exc_info.value) + + def test_novisualise_error_is_exception(self): + """NoVisualiseError should be a proper exception type.""" + assert issubclass(ggsql.types.NoVisualiseError, Exception) + + +class TestWriterRender: + """Tests for VegaLite.render_json() method.""" def test_render_to_vegalite(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) - writer = ggsql.VegaLiteWriter() + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + writer = ggsql.writers.VegaLite() - result = prepared.render(writer) + result = writer.render_json(prepared) assert isinstance(result, str) spec = json.loads(result) @@ -173,116 +239,71 @@ def test_render_to_vegalite(self): assert "vega-lite" in spec["$schema"] def test_render_contains_data(self): - reader = ggsql.DuckDBReader("duckdb://memory") + reader = ggsql.readers.DuckDB("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - reader.register("data", df) - prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) - writer = ggsql.VegaLiteWriter() + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + writer = ggsql.writers.VegaLite() - result = prepared.render(writer) + result = writer.render_json(prepared) spec = json.loads(result) # Data should be in the spec (either inline or in datasets) assert "data" in spec or "datasets" in spec def test_render_multi_layer(self): - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute( "SELECT * FROM (VALUES (1, 10), (2, 20)) AS t(x, y) " "VISUALISE " "DRAW point MAPPING x AS x, y AS y " - "DRAW line MAPPING x AS x, y AS y", - reader, + "DRAW line MAPPING x AS x, y AS y" ) - writer = ggsql.VegaLiteWriter() + writer = ggsql.writers.VegaLite() - result = prepared.render(writer) + result = writer.render_json(prepared) spec = json.loads(result) assert "layer" in spec -class TestRenderAltairDataFrameConversion: - """Tests for DataFrame handling in render_altair().""" +class TestWriterRenderChart: + """Tests for VegaLite.render_chart() method.""" - def test_accepts_polars_dataframe(self): - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") - assert isinstance(chart, altair.TopLevelMixin) + def test_render_chart_returns_altair(self): + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + writer = ggsql.writers.VegaLite() - def test_accepts_polars_lazyframe(self): - lf = pl.LazyFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(lf, "VISUALISE x, y DRAW point") + chart = writer.render_chart(prepared) assert isinstance(chart, altair.TopLevelMixin) - def test_accepts_narwhals_dataframe(self): - import narwhals as nw - - pl_df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - nw_df = nw.from_native(pl_df) - - chart = ggsql.render_altair(nw_df, "VISUALISE x, y DRAW point") - assert isinstance(chart, altair.TopLevelMixin) - - def test_accepts_pandas_dataframe(self): - pd = pytest.importorskip("pandas") - - pd_df = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(pd_df, "VISUALISE x, y DRAW point") - assert isinstance(chart, altair.TopLevelMixin) - - def test_rejects_invalid_dataframe_type(self): - with pytest.raises(TypeError, match="must be a narwhals DataFrame"): - ggsql.render_altair({"x": [1, 2, 3]}, "VISUALISE x, y DRAW point") - - -class TestRenderAltairReturnType: - """Tests for render_altair() return type.""" + def test_render_chart_layer_chart(self): + """Simple DRAW specs produce LayerChart (ggsql always wraps in layer).""" + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + writer = ggsql.writers.VegaLite() - def test_returns_altair_chart(self): - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") - assert isinstance(chart, altair.TopLevelMixin) + chart = writer.render_chart(prepared) + # ggsql wraps all charts in a layer + assert isinstance(chart, altair.LayerChart) - def test_chart_has_data(self): + def test_render_chart_can_serialize(self): + reader = ggsql.readers.DuckDB("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") - spec = chart.to_dict() - # Data should be embedded in datasets - assert "datasets" in spec + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + writer = ggsql.writers.VegaLite() - def test_chart_can_be_serialized(self): - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") + chart = writer.render_chart(prepared) # Should not raise json_str = chart.to_json() assert len(json_str) > 0 - -class TestRenderAltairChartTypeDetection: - """Tests for correct Altair chart type detection based on spec structure.""" - - def test_simple_chart_returns_layer_chart(self): - """Simple DRAW specs produce LayerChart (ggsql always wraps in layer).""" - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") - # ggsql wraps all charts in a layer - assert isinstance(chart, altair.LayerChart) - - def test_layered_chart_can_round_trip(self): - """LayerChart can be converted to dict and back.""" - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") - - # Convert to dict and back - spec = chart.to_dict() - assert "layer" in spec - - # Should be able to recreate from dict - recreated = altair.LayerChart.from_dict(spec) - assert isinstance(recreated, altair.LayerChart) - - def test_faceted_chart_returns_facet_chart(self): + def test_render_chart_faceted(self): """FACET WRAP specs produce FacetChart.""" + reader = ggsql.readers.DuckDB("duckdb://memory") df = pl.DataFrame( { "x": [1, 2, 3, 4, 5, 6], @@ -291,64 +312,25 @@ def test_faceted_chart_returns_facet_chart(self): } ) # Need validate=False because ggsql produces v6 specs - chart = ggsql.render_altair( - df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False - ) - assert isinstance(chart, altair.FacetChart) - - def test_faceted_chart_can_round_trip(self): - """FacetChart can be converted to dict and back.""" - df = pl.DataFrame( - { - "x": [1, 2, 3, 4, 5, 6], - "y": [10, 20, 30, 40, 50, 60], - "group": ["A", "A", "A", "B", "B", "B"], - } - ) - chart = ggsql.render_altair( - df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y FACET WRAP group DRAW point", + {"data": df}, ) + writer = ggsql.writers.VegaLite() - # Convert to dict (skip validation for ggsql specs) - spec = chart.to_dict(validate=False) - assert "facet" in spec or "spec" in spec - - # Should be able to recreate from dict (with validation disabled) - recreated = altair.FacetChart.from_dict(spec, validate=False) - assert isinstance(recreated, altair.FacetChart) - - def test_chart_with_color_encoding(self): - """Charts with color encoding still return correct type.""" - df = pl.DataFrame( - { - "x": [1, 2, 3, 4], - "y": [10, 20, 30, 40], - "category": ["A", "B", "A", "B"], - } - ) - chart = ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point") - # Should still be a LayerChart (ggsql wraps in layer) - assert isinstance(chart, altair.LayerChart) - - -class TestRenderAltairErrorHandling: - """Tests for error handling in render_altair().""" - - def test_invalid_viz_raises(self): - df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - with pytest.raises(ValueError): - ggsql.render_altair(df, "NOT VALID SYNTAX") + chart = writer.render_chart(prepared) + assert isinstance(chart, altair.FacetChart) class TestTwoStageAPIIntegration: - """Integration tests for the two-stage prepare -> render API.""" + """Integration tests for the two-stage execute -> render API.""" def test_end_to_end_workflow(self): - """Complete workflow: create reader, register data, prepare, render.""" + """Complete workflow: create reader, execute with data, render.""" # Create reader - reader = ggsql.DuckDBReader("duckdb://memory") + reader = ggsql.readers.DuckDB("duckdb://memory") - # Register data + # Create data df = pl.DataFrame( { "date": ["2024-01-01", "2024-01-02", "2024-01-03"], @@ -356,12 +338,11 @@ def test_end_to_end_workflow(self): "region": ["North", "South", "North"], } ) - reader.register("sales", df) - # Prepare visualization - prepared = ggsql.prepare( + # Execute visualization + prepared = reader.execute( "SELECT * FROM sales VISUALISE date AS x, value AS y, region AS color DRAW line", - reader, + {"sales": df}, ) # Verify metadata @@ -369,8 +350,8 @@ def test_end_to_end_workflow(self): assert prepared.layer_count() == 1 # Render to Vega-Lite - writer = ggsql.VegaLiteWriter() - result = prepared.render(writer) + writer = ggsql.writers.VegaLite() + result = writer.render_json(prepared) # Verify output spec = json.loads(result) @@ -379,10 +360,8 @@ def test_end_to_end_workflow(self): def test_can_introspect_prepared(self): """Test all introspection methods on Prepared.""" - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") # All these should work without error assert prepared.sql() is not None @@ -398,120 +377,189 @@ def test_can_introspect_prepared(self): _ = prepared.layer_sql(0) _ = prepared.stat_sql(0) + def test_visualise_from_shorthand(self): + """Test VISUALISE FROM syntax.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) -class TestCustomReader: - """Tests for custom Python reader support.""" + prepared = reader.execute( + "VISUALISE FROM data DRAW point MAPPING x AS x, y AS y", {"data": df} + ) + assert prepared.metadata()["rows"] == 3 - def test_simple_custom_reader(self): - """Custom reader with execute() method works.""" + def test_render_chart_workflow(self): + """Test workflow using render_chart().""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - class SimpleReader: - def execute(self, sql: str) -> pl.DataFrame: - return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + spec = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + writer = ggsql.writers.VegaLite() + chart = writer.render_chart(spec) + + # Should be able to convert to dict + spec_dict = chart.to_dict() + assert "layer" in spec_dict + + +class TestVersionInfo: + """Tests for version information.""" + + def test_version_string(self): + """__version__ should be a string.""" + assert isinstance(ggsql.__version__, str) + assert ggsql.__version__ == "0.1.0" + + def test_version_info_tuple(self): + """version_info should be a tuple.""" + assert hasattr(ggsql, "version_info") + assert isinstance(ggsql.version_info, tuple) + assert ggsql.version_info == (0, 1, 0) + + +class TestReprMethods: + """Tests for __repr__ methods.""" + + def test_duckdb_repr(self): + """DuckDB should have a useful repr.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + repr_str = repr(reader) + assert "DuckDB" in repr_str + assert "duckdb://memory" in repr_str + + def test_vegalite_repr(self): + """VegaLite should have a useful repr.""" + writer = ggsql.writers.VegaLite() + repr_str = repr(writer) + assert "VegaLite" in repr_str + + def test_validated_repr(self): + """Validated should have a useful repr.""" + validated = ggsql.validate("SELECT 1 AS x VISUALISE x DRAW point") + repr_str = repr(validated) + assert "Validated" in repr_str + assert "valid=" in repr_str + + def test_prepared_repr(self): + """Prepared should have a useful repr.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + repr_str = repr(prepared) + assert "Prepared" in repr_str + assert "rows=" in repr_str + assert "layers=" in repr_str + + +class TestNarwhalsSupport: + """Tests for narwhals DataFrame support.""" + + def test_execute_with_pandas_dataframe(self): + """execute() should accept pandas DataFrames.""" + import pandas as pd + + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + assert prepared.metadata()["rows"] == 3 + + def test_register_with_pandas_dataframe(self): + """register() should accept pandas DataFrames.""" + import pandas as pd + + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + + reader.register("my_data", df) + result = reader.execute_sql("SELECT * FROM my_data") + assert result.shape == (3, 2) + + def test_execute_with_polars_dataframe(self): + """execute() should still work with polars DataFrames.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - reader = SimpleReader() - prepared = ggsql.prepare("SELECT * FROM data VISUALISE x, y DRAW point", reader) + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) assert prepared.metadata()["rows"] == 3 - def test_custom_reader_with_register(self): - """Custom reader with register() support.""" - class RegisterReader: - def __init__(self): - self.tables = {} +class TestRenderJsonMethod: + """Tests for render_json() method.""" - def execute(self, sql: str) -> pl.DataFrame: - # Simple: just return the first registered table - if self.tables: - return next(iter(self.tables.values())) - return pl.DataFrame({"x": [1], "y": [2]}) + def test_render_json_returns_json(self): + """render_json() should return a valid JSON string.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + prepared = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + writer = ggsql.writers.VegaLite() - def supports_register(self) -> bool: - return True + result = writer.render_json(prepared) + assert isinstance(result, str) - def register(self, name: str, df: pl.DataFrame) -> None: - self.tables[name] = df + spec = json.loads(result) + assert "$schema" in spec - reader = RegisterReader() - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) - assert prepared is not None - def test_custom_reader_error_handling(self): - """Custom reader errors are propagated.""" +class TestContextManager: + """Tests for context manager protocol.""" - class ErrorReader: - def execute(self, sql: str) -> pl.DataFrame: - raise ValueError("Custom reader error") + def test_context_manager_basic(self): + """DuckDB should work as context manager.""" + with ggsql.readers.DuckDB("duckdb://memory") as reader: + df = reader.execute_sql("SELECT 1 AS x, 2 AS y") + assert df.shape == (1, 2) - reader = ErrorReader() - with pytest.raises(ValueError, match="Custom reader error"): - ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader) + def test_context_manager_with_execute(self): + """execute() should work inside context manager.""" + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - def test_custom_reader_wrong_return_type(self): - """Custom reader returning wrong type raises TypeError.""" + with ggsql.readers.DuckDB("duckdb://memory") as reader: + prepared = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) + assert prepared.metadata()["rows"] == 3 - class WrongTypeReader: - def execute(self, sql: str): - return {"x": [1, 2, 3]} # dict, not DataFrame - reader = WrongTypeReader() - with pytest.raises((ValueError, TypeError)): - ggsql.prepare("SELECT 1 VISUALISE x, y DRAW point", reader) +class TestExceptionHierarchy: + """Tests for exception type hierarchy.""" - def test_native_reader_fast_path(self): - """Native DuckDBReader still works (fast path).""" - reader = ggsql.DuckDBReader("duckdb://memory") - prepared = ggsql.prepare( - "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader - ) - assert prepared.metadata()["rows"] == 1 - - def test_custom_reader_can_render(self): - """Custom reader result can be rendered to Vega-Lite.""" - - class StaticReader: - def execute(self, sql: str) -> pl.DataFrame: - return pl.DataFrame( - { - "x": [1, 2, 3, 4, 5], - "y": [10, 40, 20, 50, 30], - "category": ["A", "B", "A", "B", "A"], - } - ) - - reader = StaticReader() - prepared = ggsql.prepare( - "SELECT * FROM data VISUALISE x, y, category AS color DRAW point", - reader, - ) + def test_ggsql_error_is_base(self): + """All exceptions should inherit from GgsqlError.""" + assert issubclass(ggsql.types.ParseError, ggsql.types.GgsqlError) + assert issubclass(ggsql.types.ValidationError, ggsql.types.GgsqlError) + assert issubclass(ggsql.types.ReaderError, ggsql.types.GgsqlError) + assert issubclass(ggsql.types.WriterError, ggsql.types.GgsqlError) + assert issubclass(ggsql.types.NoVisualiseError, ggsql.types.GgsqlError) - writer = ggsql.VegaLiteWriter() - result = prepared.render(writer) + def test_ggsql_error_is_exception(self): + """GgsqlError should be a proper exception type.""" + assert issubclass(ggsql.types.GgsqlError, Exception) - spec = json.loads(result) - assert "$schema" in spec - assert "vega-lite" in spec["$schema"] + def test_catch_all_ggsql_errors(self): + """Should be able to catch all errors with GgsqlError.""" + reader = ggsql.readers.DuckDB("duckdb://memory") - def test_custom_reader_execute_called(self): - """Verify execute() is called on the custom reader.""" + # This should raise ReaderError (missing table) + with pytest.raises(ggsql.types.GgsqlError): + reader.execute_sql("SELECT * FROM nonexistent_table") - class RecordingReader: - def __init__(self): - self.execute_calls = [] + def test_reader_error_for_sql_failure(self): + """ReaderError should be raised for SQL execution failures.""" + reader = ggsql.readers.DuckDB("duckdb://memory") - def execute(self, sql: str) -> pl.DataFrame: - self.execute_calls.append(sql) - return pl.DataFrame({"x": [1], "y": [2]}) + with pytest.raises(ggsql.types.ReaderError): + reader.execute_sql("SELECT * FROM nonexistent_table") - reader = RecordingReader() - ggsql.prepare( - "SELECT * FROM data VISUALISE x, y DRAW point", - reader, - ) + def test_validation_error_for_missing_column(self): + """ValidationError should be raised for missing column references.""" + reader = ggsql.readers.DuckDB("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3]}) # Missing 'y' column - # execute() should have been called at least once - assert len(reader.execute_calls) > 0 - # All calls should be valid SQL strings - assert all(isinstance(sql, str) for sql in reader.execute_calls) + with pytest.raises(ggsql.types.ValidationError): + reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", {"data": df} + ) diff --git a/src/api.rs b/src/api.rs index ecfbdeaf..0cf18448 100644 --- a/src/api.rs +++ b/src/api.rs @@ -13,9 +13,6 @@ use crate::execute::prepare_data_with_executor; #[cfg(feature = "duckdb")] use crate::reader::Reader; -#[cfg(feature = "vegalite")] -use crate::writer::Writer; - // ============================================================================ // Core Types // ============================================================================ @@ -89,12 +86,6 @@ impl Prepared { } } - /// Render to output format (e.g., Vega-Lite JSON). - #[cfg(feature = "vegalite")] - pub fn render(&self, writer: &dyn Writer) -> Result { - writer.write(&self.plot, &self.data) - } - /// Get the resolved plot specification. pub fn plot(&self) -> &Plot { &self.plot @@ -245,7 +236,7 @@ pub fn prepare(query: &str, reader: &dyn Reader) -> Result { let warnings: Vec = validated.warnings().to_vec(); // Prepare data (this also validates, but we want the warnings from above) - let prepared_data = prepare_data_with_executor(query, |sql| reader.execute(sql))?; + let prepared_data = prepare_data_with_executor(query, |sql| reader.execute_sql(sql))?; Ok(Prepared::new( prepared_data.spec, @@ -429,7 +420,7 @@ mod tests { #[test] fn test_prepare_and_render() { use crate::reader::DuckDBReader; - use crate::writer::VegaLiteWriter; + use crate::writer::{VegaLiteWriter, Writer}; let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let prepared = prepare("SELECT 1 as x, 2 as y VISUALISE x, y DRAW point", &reader).unwrap(); @@ -439,7 +430,7 @@ mod tests { assert!(prepared.data().is_some()); let writer = VegaLiteWriter::new(); - let result = prepared.render(&writer).unwrap(); + let result = writer.render(&prepared).unwrap(); assert!(result.contains("point")); } @@ -489,7 +480,7 @@ mod tests { #[test] fn test_render_multi_layer() { use crate::reader::DuckDBReader; - use crate::writer::VegaLiteWriter; + use crate::writer::{VegaLiteWriter, Writer}; let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let query = r#" @@ -501,7 +492,7 @@ mod tests { let prepared = prepare(query, &reader).unwrap(); let writer = VegaLiteWriter::new(); - let result = prepared.render(&writer).unwrap(); + let result = writer.render(&prepared).unwrap(); assert!(result.contains("layer")); } @@ -510,7 +501,7 @@ mod tests { #[test] fn test_register_and_query() { use crate::reader::{DuckDBReader, Reader}; - use crate::writer::VegaLiteWriter; + use crate::writer::{VegaLiteWriter, Writer}; use polars::prelude::*; let mut reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); @@ -530,7 +521,7 @@ mod tests { assert!(prepared.metadata().columns.contains(&"x".to_string())); let writer = VegaLiteWriter::new(); - let result = prepared.render(&writer).unwrap(); + let result = writer.render(&prepared).unwrap(); assert!(result.contains("point")); } diff --git a/src/cli.rs b/src/cli.rs index 1844ff01..73f14789 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -15,7 +15,7 @@ use ggsql::reader::{DuckDBReader, Reader}; use ggsql::{prepare, validate}; #[cfg(feature = "vegalite")] -use ggsql::writer::VegaLiteWriter; +use ggsql::writer::{VegaLiteWriter, Writer}; #[derive(Parser)] #[command(name = "ggsql")] @@ -222,7 +222,7 @@ fn cmd_exec(query: String, reader: String, writer: String, output: Option r, Err(e) => { eprintln!("Failed to generate Vega-Lite output: {}", e); @@ -335,7 +335,7 @@ fn print_table_fallback(query: &str, reader: &DuckDBReader, max_rows: usize) { } let parsed = parsed.unwrap(); - let data = reader.execute(&parsed); + let data = reader.execute_sql(&parsed); if let Err(e) = data { eprintln!("Failed to execute SQL query: {}", e); std::process::exit(1) diff --git a/src/doc/API.md b/src/doc/API.md index a0f97ab6..5dd50b20 100644 --- a/src/doc/API.md +++ b/src/doc/API.md @@ -5,14 +5,14 @@ This document provides a comprehensive reference for the ggsql public API. ## Overview - **Stage 1: `prepare()`** - Parse query, execute SQL, resolve mappings, prepare data -- **Stage 2: `render()`** - Generate output (Vega-Lite JSON, etc.) +- **Stage 2: `writer.render()`** - Generate output (Vega-Lite JSON, etc.) ### API Functions | Function | Use Case | | ------------ | ---------------------------------------------------- | | `prepare()` | Main entry point - full visualization pipeline | -| `render()` | Generate output from prepared data | +| `writer.render()` | Generate output from prepared data | | `validate()` | Validate syntax + semantics, inspect query structure | --- @@ -50,7 +50,7 @@ Prepare a ggsql query for visualization. This is the main entry point for the tw **Example:** ```rust -use ggsql::{prepare, reader::DuckDBReader, writer::VegaLiteWriter}; +use ggsql::{prepare, reader::DuckDBReader, writer::{VegaLiteWriter, Writer}}; let reader = DuckDBReader::from_connection_string("duckdb://memory")?; let prepared = prepare( @@ -64,7 +64,7 @@ println!("Columns: {:?}", prepared.metadata().columns); // Render to Vega-Lite let writer = VegaLiteWriter::new(); -let result = prepared.render(&writer)?; +let result = writer.render(&prepared)?; ``` **Error Conditions:** @@ -188,20 +188,6 @@ if let Some(tree) = validated.tree() { Result of preparing a visualization, ready for rendering. -#### Rendering Methods - -| Method | Signature | Description | -| -------- | --------------------------------------------------------- | ----------------------- | -| `render` | `fn render(&self, writer: &dyn Writer) -> Result` | Render to output format | - -**Example:** - -```rust -let writer = VegaLiteWriter::new(); -let json = prepared.render(&writer)?; -println!("{}", json); -``` - #### Plot Access Methods | Method | Signature | Description | @@ -309,7 +295,8 @@ if !prepared.warnings().is_empty() { } // Continue with rendering -let json = prepared.render(&writer)?; +let writer = VegaLiteWriter::new(); +let json = writer.render(&prepared)?; ``` --- @@ -374,11 +361,14 @@ pub struct Location { ```rust pub trait Reader { /// Execute a SQL query and return a DataFrame - fn execute(&self, sql: &str) -> Result; + fn execute_sql(&self, sql: &str) -> Result; /// Register a DataFrame as a queryable table fn register(&mut self, name: &str, df: DataFrame) -> Result<()>; + /// Unregister a table (fails silently if not found) + fn unregister(&mut self, name: &str); + /// Check if this reader supports DataFrame registration fn supports_register(&self) -> bool; } @@ -392,24 +382,45 @@ pub trait Reader { ```rust pub trait Writer { - /// Render a plot specification to output format + /// Render a prepared visualization to output format + fn render(&self, prepared: &Prepared) -> Result; + + /// Lower-level: render from plot specification and data map fn write(&self, spec: &Plot, data: &HashMap) -> Result; - /// Get the file extension for this writer's output - fn file_extension(&self) -> &str; + /// Validate that a spec is compatible with this writer + fn validate(&self, spec: &Plot) -> Result<()>; } ``` +**Example:** + +```rust +use ggsql::writer::{VegaLiteWriter, Writer}; + +let writer = VegaLiteWriter::new(); +let json = writer.render(&prepared)?; +println!("{}", json); +``` + +--- + ## Python Bindings The Python bindings provide the same two-stage API with Pythonic conventions. +### Module Structure + +- `ggsql.readers` - Reader classes (`DuckDB`) +- `ggsql.writers` - Writer classes (`VegaLite`) +- `ggsql` - Types (`Validated`, `Prepared`), exceptions (`NoVisualiseError`), and functions (`validate`) + ### Classes -#### `DuckDBReader` +#### `ggsql.readers.DuckDB` ```python -class DuckDBReader: +class DuckDB: def __init__(self, connection: str) -> None: """Create a DuckDB reader. @@ -417,27 +428,58 @@ class DuckDBReader: connection: Connection string (e.g., "duckdb://memory") """ - def register(self, name: str, df: Any) -> None: - """Register a DataFrame as a queryable table. + def execute( + self, + query: str, + data: dict[str, polars.DataFrame] | None = None + ) -> Prepared: + """Execute a ggsql query with optional DataFrame registration. + + DataFrames are registered before execution and automatically + unregistered afterward (even on error). Args: - name: Table name - df: Polars DataFrame or narwhals-compatible DataFrame + query: The ggsql query (must contain VISUALISE clause) + data: DataFrames to register as tables (keys are table names) + + Returns: + Prepared visualization ready for rendering + + Raises: + NoVisualiseError: If query has no VISUALISE clause + ValueError: If parsing or execution fails """ - def execute(self, sql: str) -> polars.DataFrame: - """Execute SQL and return a Polars DataFrame.""" + def execute_sql(self, sql: str) -> polars.DataFrame: + """Execute plain SQL and return a Polars DataFrame.""" - def supports_register(self) -> bool: - """Check if registration is supported.""" + def register(self, name: str, df: polars.DataFrame) -> None: + """Manually register a DataFrame as a queryable table.""" + + def unregister(self, name: str) -> None: + """Unregister a table (fails silently if not found).""" ``` -#### `VegaLiteWriter` +#### `ggsql.writers.VegaLite` ```python -class VegaLiteWriter: +class VegaLite: def __init__(self) -> None: """Create a Vega-Lite writer.""" + + def render(self, spec: Prepared) -> str: + """Render to Vega-Lite JSON string.""" + + def render_chart(self, spec: Prepared, **kwargs) -> AltairChart: + """Render to Altair chart object. + + Args: + spec: Prepared visualization from reader.execute() + **kwargs: Additional args for altair.Chart.from_json() + + Returns: + Altair chart (Chart, LayerChart, FacetChart, etc.) + """ ``` #### `Validated` @@ -469,9 +511,6 @@ class Validated: ```python class Prepared: - def render(self, writer: VegaLiteWriter) -> str: - """Render to output format.""" - def metadata(self) -> dict: """Get metadata as dict with keys: rows, columns, layer_count.""" @@ -503,6 +542,13 @@ class Prepared: """Get stat transform query.""" ``` +### Exceptions + +```python +class NoVisualiseError(Exception): + """Raised when execute() is called on a query without VISUALISE clause.""" +``` + ### Functions ```python @@ -511,7 +557,26 @@ def validate(query: str) -> Validated: Returns Validated object with query inspection and validation methods. """ +``` -def prepare(query: str, reader: DuckDBReader) -> Prepared: - """Prepare a query for visualization.""" +### Usage Example + +```python +import polars as pl +from ggsql.readers import DuckDB +from ggsql.writers import VegaLite + +# Create data +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + +# Execute with inline data registration +reader = DuckDB("duckdb://memory") +spec = reader.execute( + "SELECT * FROM data VISUALISE x, y DRAW point", + {"data": df} +) + +# Render to Altair chart +writer = VegaLite() +chart = writer.render_chart(spec) ``` diff --git a/src/execute.rs b/src/execute.rs index b7683f56..3bf2be33 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -1185,7 +1185,7 @@ where /// Convenience wrapper around `prepare_data_with_executor` for direct DuckDB reader usage. #[cfg(feature = "duckdb")] pub fn prepare_data(query: &str, reader: &DuckDBReader) -> Result { - prepare_data_with_executor(query, |sql| reader.execute(sql)) + prepare_data_with_executor(query, |sql| reader.execute_sql(sql)) } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index cf13aaa5..8edad501 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,6 +78,9 @@ pub enum GgsqlError { #[error("Internal error: {0}")] InternalError(String), + + #[error("Query has no VISUALISE clause")] + NoVisualise, } pub type Result = std::result::Result; @@ -116,7 +119,7 @@ mod integration_tests { FROM generate_series(0, 4) as t(n) "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify DataFrame has temporal type (DuckDB returns Datetime for DATE + INTERVAL) assert_eq!(df.get_column_names(), vec!["date", "revenue"]); @@ -176,7 +179,7 @@ mod integration_tests { FROM generate_series(0, 3) as t(n) "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify DataFrame has Datetime type let timestamp_col = df.column("timestamp").unwrap(); @@ -224,7 +227,7 @@ mod integration_tests { // Real SQL that users would write let sql = "SELECT 1 as int_col, 2.5 as float_col, true as bool_col"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types are preserved // DuckDB treats numeric literals as DECIMAL, which we convert to Float64 @@ -279,7 +282,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT * FROM (VALUES (1, 2.5, 'a'), (2, NULL, 'b'), (NULL, 3.5, NULL)) AS t(int_col, float_col, str_col)"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types assert!(matches!( @@ -329,7 +332,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT * FROM (VALUES ('A', 10), ('B', 20), ('A', 15), ('C', 30)) AS t(category, value)"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); let mut spec = Plot::new(); let layer = Layer::new(Geom::bar()) @@ -375,7 +378,7 @@ mod integration_tests { GROUP BY day "#; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify temporal type is preserved through aggregation // DATE_TRUNC returns Date type (not Datetime) @@ -413,7 +416,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT 0.1 as small, 123.456 as medium, 999999.999999 as large"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // All should be Float64 assert!(matches!( @@ -465,7 +468,7 @@ mod integration_tests { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let sql = "SELECT CAST(1 AS TINYINT) as tiny, CAST(1000 AS SMALLINT) as small, CAST(1000000 AS INTEGER) as int, CAST(1000000000000 AS BIGINT) as big"; - let df = reader.execute(sql).unwrap(); + let df = reader.execute_sql(sql).unwrap(); // Verify types assert!(matches!( @@ -533,7 +536,7 @@ mod integration_tests { // Prepare data - this parses, injects constants into global data, and replaces literals with columns let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // Verify constants were injected into global data (not layer-specific data) // Both layers share __global__ data for faceting compatibility @@ -641,7 +644,7 @@ mod integration_tests { "#; let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // All layers should use global data for faceting to work assert!( @@ -729,7 +732,7 @@ mod integration_tests { "#; let prepared = - execute::prepare_data_with_executor(query, |sql| reader.execute(sql)).unwrap(); + execute::prepare_data_with_executor(query, |sql| reader.execute_sql(sql)).unwrap(); // Should have global data with the constant injected assert!( diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index b3cf46d7..a8d35cfd 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -24,11 +24,11 @@ use std::io::Cursor; /// /// // In-memory database /// let reader = DuckDBReader::from_connection_string("duckdb://memory")?; -/// let df = reader.execute("SELECT 1 as x, 2 as y")?; +/// let df = reader.execute_sql("SELECT 1 as x, 2 as y")?; /// /// // File-based database /// let reader = DuckDBReader::from_connection_string("duckdb://data.db")?; -/// let df = reader.execute("SELECT * FROM sales")?; +/// let df = reader.execute_sql("SELECT * FROM sales")?; /// ``` pub struct DuckDBReader { conn: Connection, @@ -380,7 +380,7 @@ impl ColumnBuilder { } impl Reader for DuckDBReader { - fn execute(&self, sql: &str) -> Result { + fn execute_sql(&self, sql: &str) -> Result { use polars::prelude::*; // Check if this is a DDL statement (CREATE, DROP, INSERT, UPDATE, DELETE, ALTER) @@ -526,6 +526,12 @@ impl Reader for DuckDBReader { Ok(()) } + fn unregister(&mut self, name: &str) { + // Fail silently if table doesn't exist (DROP TABLE IF EXISTS) + let sql = format!("DROP TABLE IF EXISTS \"{}\"", name); + let _ = self.conn.execute(&sql, duckdb::params![]); + } + fn supports_register(&self) -> bool { true } @@ -544,7 +550,7 @@ mod tests { #[test] fn test_simple_query() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let df = reader.execute("SELECT 1 as x, 2 as y").unwrap(); + let df = reader.execute_sql("SELECT 1 as x, 2 as y").unwrap(); assert_eq!(df.shape(), (1, 2)); assert_eq!(df.get_column_names(), vec!["x", "y"]); @@ -567,7 +573,7 @@ mod tests { .unwrap(); // Query data - let df = reader.execute("SELECT * FROM test").unwrap(); + let df = reader.execute_sql("SELECT * FROM test").unwrap(); assert_eq!(df.shape(), (2, 2)); assert_eq!(df.get_column_names(), vec!["x", "y"]); @@ -576,7 +582,7 @@ mod tests { #[test] fn test_invalid_sql() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); - let result = reader.execute("INVALID SQL SYNTAX"); + let result = reader.execute_sql("INVALID SQL SYNTAX"); assert!(result.is_err()); } @@ -598,7 +604,7 @@ mod tests { .unwrap(); let df = reader - .execute("SELECT region, SUM(revenue) as total FROM sales GROUP BY region") + .execute_sql("SELECT region, SUM(revenue) as total FROM sales GROUP BY region") .unwrap(); assert_eq!(df.shape(), (2, 2)); @@ -620,7 +626,9 @@ mod tests { reader.register("my_table", df).unwrap(); // Query the registered table - let result = reader.execute("SELECT * FROM my_table ORDER BY x").unwrap(); + let result = reader + .execute_sql("SELECT * FROM my_table ORDER BY x") + .unwrap(); assert_eq!(result.shape(), (3, 2)); assert_eq!(result.get_column_names(), vec!["x", "y"]); } @@ -698,7 +706,7 @@ mod tests { reader.register("empty_table", df).unwrap(); // Query should return empty result with correct schema - let result = reader.execute("SELECT * FROM empty_table").unwrap(); + let result = reader.execute_sql("SELECT * FROM empty_table").unwrap(); assert_eq!(result.shape(), (0, 2)); assert_eq!(result.get_column_names(), vec!["x", "y"]); } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 762c0319..519d1581 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -17,12 +17,12 @@ //! //! // Basic usage //! let reader = DuckDBReader::from_connection_string("duckdb://memory")?; -//! let df = reader.execute("SELECT * FROM table")?; +//! let df = reader.execute_sql("SELECT * FROM table")?; //! //! // With DataFrame registration //! let mut reader = DuckDBReader::from_connection_string("duckdb://memory")?; //! reader.register("my_table", some_dataframe)?; -//! let result = reader.execute("SELECT * FROM my_table")?; +//! let result = reader.execute_sql("SELECT * FROM my_table")?; //! ``` use crate::{DataFrame, GgsqlError, Result}; @@ -53,7 +53,10 @@ pub use duckdb::DuckDBReader; /// reader.register("sales", sales_df)?; /// /// // Now you can query it -/// let result = reader.execute("SELECT * FROM sales WHERE amount > 100")?; +/// let result = reader.execute_sql("SELECT * FROM sales WHERE amount > 100")?; +/// +/// // Unregister when done (fails silently if table doesn't exist) +/// reader.unregister("sales"); /// ``` pub trait Reader { /// Execute a SQL query and return the result as a DataFrame @@ -72,7 +75,7 @@ pub trait Reader { /// - The SQL is invalid /// - The connection fails /// - The table or columns don't exist - fn execute(&self, sql: &str) -> Result; + fn execute_sql(&self, sql: &str) -> Result; /// Register a DataFrame as a queryable table (takes ownership) /// @@ -100,6 +103,22 @@ pub trait Reader { ))) } + /// Unregister a table by name. + /// + /// This removes a previously registered DataFrame from the reader. + /// Fails silently if the table doesn't exist. + /// + /// # Arguments + /// + /// * `name` - The table name to unregister + /// + /// # Default Implementation + /// + /// Does nothing by default. Override for readers that support registration. + fn unregister(&mut self, _name: &str) { + // Default: fail silently + } + /// Check if this reader supports DataFrame registration /// /// # Returns diff --git a/src/rest.rs b/src/rest.rs index e87a14f9..82c46542 100644 --- a/src/rest.rs +++ b/src/rest.rs @@ -39,7 +39,7 @@ use ggsql::prepare; use ggsql::reader::DuckDBReader; #[cfg(feature = "vegalite")] -use ggsql::writer::VegaLiteWriter; +use ggsql::writer::{VegaLiteWriter, Writer}; /// CLI arguments for the REST API server #[derive(Parser)] @@ -196,6 +196,7 @@ impl From for ApiErrorResponse { GgsqlError::ReaderError(_) => (StatusCode::BAD_REQUEST, "ReaderError"), GgsqlError::WriterError(_) => (StatusCode::INTERNAL_SERVER_ERROR, "WriterError"), GgsqlError::InternalError(_) => (StatusCode::INTERNAL_SERVER_ERROR, "InternalError"), + GgsqlError::NoVisualise => (StatusCode::BAD_REQUEST, "NoVisualise"), }; ApiErrorResponse { @@ -258,8 +259,7 @@ fn load_data_files(reader: &DuckDBReader, files: &[String]) -> Result<(), GgsqlE .file_stem() .and_then(|s| s.to_str()) .unwrap_or("data") - .replace('-', "_") - .replace(' ', "_"); + .replace(['-', ' '], "_"); info!("Loading {} into table '{}'", file_path, table_name); @@ -461,7 +461,7 @@ async fn query_handler( #[cfg(feature = "vegalite")] if request.writer == "vegalite" { let writer = VegaLiteWriter::new(); - let json_output = prepared.render(&writer)?; + let json_output = writer.render(&prepared)?; let spec_value: serde_json::Value = serde_json::from_str(&json_output) .map_err(|e| GgsqlError::WriterError(format!("Failed to parse JSON: {}", e)))?; @@ -572,6 +572,7 @@ async fn health_handler() -> Json { } /// GET /api/v1/version - Version information +#[allow(clippy::vec_init_then_push)] // Feature-flag dependent pushes async fn version_handler() -> Json { let mut features = Vec::new(); diff --git a/src/writer/mod.rs b/src/writer/mod.rs index 7f026e6b..e1928e60 100644 --- a/src/writer/mod.rs +++ b/src/writer/mod.rs @@ -1,13 +1,13 @@ //! Output writer abstraction layer for ggsql //! //! The writer module provides a pluggable interface for generating visualization -//! outputs from Plot + DataFrame combinations. +//! outputs from Prepared specifications. //! //! # Architecture //! //! All writers implement the `Writer` trait, which provides: -//! - Spec + Data → Output conversion -//! - Validation for writer compatibility +//! - Prepared → Output conversion via `render()` +//! - Low-level Plot + Data → Output via `write()` //! - Format-specific rendering logic //! //! # Example @@ -16,10 +16,11 @@ //! use ggsql::writer::{Writer, VegaLiteWriter}; //! //! let writer = VegaLiteWriter::new(); -//! let json = writer.write(&spec, &dataframe)?; +//! let json = writer.render(&prepared)?; //! println!("{}", json); //! ``` +use crate::api::Prepared; use crate::{DataFrame, Plot, Result}; use std::collections::HashMap; @@ -31,11 +32,34 @@ pub use vegalite::VegaLiteWriter; /// Trait for visualization output writers /// -/// Writers take a Plot and data sources and produce formatted output +/// Writers take a Prepared specification and produce formatted output /// (JSON, R code, PNG bytes, etc.). pub trait Writer { + /// Render a prepared visualization to output format + /// + /// This is the primary rendering method. It extracts the plot and data + /// from the Prepared object and generates the output. + /// + /// # Arguments + /// + /// * `prepared` - The prepared visualization (from `reader.execute()`) + /// + /// # Returns + /// + /// A string containing the formatted output (JSON, code, etc.) + /// + /// # Errors + /// + /// Returns `GgsqlError::WriterError` if rendering fails + fn render(&self, prepared: &Prepared) -> Result { + self.write(prepared.plot(), prepared.data_map()) + } + /// Generate output from a visualization specification and data sources /// + /// This is a lower-level method that takes the plot and data separately. + /// Most callers should use `render()` instead. + /// /// # Arguments /// /// * `spec` - The parsed ggsql specification