diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 6b720744..d8e7e6de 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -20,6 +20,14 @@ jobs: sudo docker image prune --all --force sudo docker builder prune -a + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install tree-sitter-cli + run: npm install -g tree-sitter-cli + - name: Install Rust uses: dtolnay/rust-toolchain@stable diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index e01e41a2..465b1672 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -21,6 +21,14 @@ jobs: sudo docker image prune --all --force sudo docker builder prune -a + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install tree-sitter-cli + run: npm install -g tree-sitter-cli + - name: Install Rust uses: dtolnay/rust-toolchain@stable @@ -39,7 +47,7 @@ jobs: - name: Run Rust tests if: success() - run: cargo test + run: cargo test --lib --bins - name: Run Rust formatting check if: success() diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index a36f64fc..64d4d76e 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -22,6 +22,14 @@ jobs: sudo docker image prune --all --force sudo docker builder prune -a + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install tree-sitter-cli + run: npm install -g tree-sitter-cli + - name: Install Rust uses: dtolnay/rust-toolchain@stable diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 00000000..761994b8 --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,72 @@ +name: Python + +on: + push: + paths: ['ggsql-python/**', '.github/workflows/python.yml'] + pull_request: + paths: ['ggsql-python/**', '.github/workflows/python.yml'] + +jobs: + test: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python: ['3.10', '3.11', '3.12', '3.13'] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + + - uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install tree-sitter-cli + run: npm install -g tree-sitter-cli + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Rust cache + uses: Swatinem/rust-cache@v2 + with: + workspaces: ggsql-python + shared-key: ${{ matrix.os }}-python + + - name: Build wheel + uses: PyO3/maturin-action@v1 + with: + working-directory: ggsql-python + command: build + args: --release + sccache: true + + - name: Install wheel and test dependencies + shell: bash + run: pip install --find-links target/wheels/ ggsql[test] + + - name: Run tests + shell: bash + run: pytest ggsql-python/tests/ -v + + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install tree-sitter-cli + run: npm install -g tree-sitter-cli + + - name: Check Rust formatting + run: cargo fmt --package ggsql-python -- --check + + - name: Clippy + run: cargo clippy --package ggsql-python -- -D warnings diff --git a/CLAUDE.md b/CLAUDE.md index 2499bee4..bdc93293 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -790,6 +790,63 @@ When running in Positron IDE, the extension provides enhanced functionality: --- +### 8. Python Bindings (`ggsql-python/`) + +**Responsibility**: Python bindings for ggsql, enabling Python users to render Altair charts using ggsql's VISUALISE syntax. + +**Features**: + +- PyO3-based Rust bindings compiled to a native Python extension +- Works with any narwhals-compatible DataFrame (polars, pandas, etc.) +- LazyFrames are collected automatically +- Returns native `altair.Chart` objects for easy display and customization +- Query splitting to separate SQL from VISUALISE portions + +**Installation**: + +```bash +# From source (requires Rust toolchain and maturin) +cd ggsql-python +pip install maturin +maturin develop +``` + +**API**: + +```python +import ggsql +import polars as pl + +# Split a ggSQL query into SQL and VISUALISE portions +sql, viz = ggsql.split_query(""" + SELECT date, revenue FROM sales + VISUALISE date AS x, revenue AS y + DRAW line +""") + +# Execute SQL and render to Altair chart +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) +chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") + +# Display or save +chart.display() # In Jupyter +chart.save("chart.html") +``` + +**Functions**: + +- `split_query(query: str) -> tuple[str, str]` - Split ggSQL query into SQL and VISUALISE portions +- `render_altair(df, viz, **kwargs) -> altair.Chart` - Render DataFrame with VISUALISE spec to Altair chart + +**Dependencies**: + +- Python >= 3.10 +- altair >= 5.0 +- narwhals >= 2.15 +- polars >= 1.0 + +--- + ## Feature Flags and Build Configuration ggsql uses Cargo feature flags to enable optional functionality and reduce binary size. @@ -822,9 +879,9 @@ ggsql uses Cargo feature flags to enable optional functionality and reduce binar - Includes: `axum`, `tokio`, `tower-http`, `tracing`, `duckdb`, `vegalite` - Required for building `ggsql-rest` server -**Future features**: +**Python bindings**: -- `python` - Python bindings via PyO3 (planned) +- `ggsql-python` - Python bindings via PyO3 (separate crate, not a feature flag) ### Building with Custom Features @@ -850,7 +907,7 @@ cargo build --all-features - `postgres` → `postgres` crate (future) - `sqlite` → `rusqlite` crate (future) - `rest-api` → `axum`, `tokio`, `tower-http`, `tracing`, `tracing-subscriber` -- `python` → `pyo3` crate (future) +- `ggsql-python` → `pyo3`, `narwhals`, `altair` (separate workspace crate) --- diff --git a/Cargo.toml b/Cargo.toml index 54b426fb..cd5b672c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,13 @@ [workspace] members = [ + "tree-sitter-ggsql", + "src", + "ggsql-jupyter", + "ggsql-python" +] +# ggsql-python is excluded from default builds because it's a PyO3 extension +# that requires Python dev headers. Build it separately with maturin. +default-members = [ "tree-sitter-ggsql", "src", "ggsql-jupyter" @@ -21,7 +29,7 @@ tree-sitter = "0.25" csscolorparser = "0.8.1" # Data processing -polars = { version = "0.52", features = ["lazy", "sql"] } +polars = { version = "0.52", features = ["lazy", "sql", "ipc"] } # Readers duckdb = { version = "1.1", features = ["bundled"] } diff --git a/ggsql-python/.gitignore b/ggsql-python/.gitignore new file mode 100644 index 00000000..84e69a9d --- /dev/null +++ b/ggsql-python/.gitignore @@ -0,0 +1,3 @@ +.vscode/ + +uv.lock diff --git a/ggsql-python/Cargo.toml b/ggsql-python/Cargo.toml new file mode 100644 index 00000000..62229afd --- /dev/null +++ b/ggsql-python/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "ggsql-python" +version = "0.1.0" +edition = "2021" +license = "MIT" +description = "Python bindings for ggsql" + +[lib] +name = "_ggsql" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.26", features = ["extension-module"] } +polars = { workspace = true, features = ["ipc"] } +ggsql = { path = "../src", default-features = false, features = ["vegalite"] } + +[features] +default = [] diff --git a/ggsql-python/README.md b/ggsql-python/README.md new file mode 100644 index 00000000..0d97bbee --- /dev/null +++ b/ggsql-python/README.md @@ -0,0 +1,154 @@ +# ggsql + +Python bindings for [ggsql](https://github.com/georgestagg/ggsql), a SQL extension for declarative data visualization. + +This package provides a thin wrapper around the Rust `ggsql` crate, enabling Python users to render Altair charts from DataFrames using ggsql's VISUALISE syntax. + +## Installation + +### From PyPI (when published) + +```bash +pip install ggsql +``` + +### From source + +Building from source requires: +- Rust toolchain (install via [rustup](https://rustup.rs/)) +- Python 3.10+ +- [maturin](https://github.com/PyO3/maturin) + +```bash +# Clone the monorepo +git clone https://github.com/georgestagg/ggsql.git +cd ggsql/ggsql-python + +# Create a virtual environment +python -m venv .venv +source .venv/bin/activate # or `.venv\Scripts\activate` on Windows + +# Install build dependencies +pip install maturin + +# Build and install in development mode +maturin develop + +# Or build a wheel +maturin build --release +pip install target/wheels/ggsql-*.whl +``` + +## Usage + +```python +import ggsql +import duckdb + +# Split a ggSQL query into SQL and VISUALISE portions +sql, viz = ggsql.split_query(""" + SELECT date, revenue, region FROM sales + WHERE year = 2024 + VISUALISE date AS x, revenue AS y, region AS color + DRAW line + LABEL title => 'Sales Trends' +""") + +# Execute SQL with DuckDB +df = duckdb.sql(sql).pl() + +# Render DataFrame + VISUALISE spec to Altair chart +chart = ggsql.render_altair(df, viz) + +# Display or save the chart +chart.display() # In Jupyter +chart.save("chart.html") # Save to file +``` + +### Mapping styles + +The `render_altair()` function supports various mapping styles: + +```python +df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30], "category": ["A", "B", "A"]}) + +# Explicit mapping +ggsql.render_altair(df, "VISUALISE x AS x, y AS y DRAW point") + +# Implicit mapping (column name = aesthetic name) +ggsql.render_altair(df, "VISUALISE x, y DRAW point") + +# Wildcard mapping (map all matching columns) +ggsql.render_altair(df, "VISUALISE * DRAW point") + +# With color encoding +ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point") +``` + +## API + +### `split_query(query: str) -> tuple[str, str]` + +Split a ggSQL query into SQL and VISUALISE portions. + +**Parameters:** +- `query`: The full ggSQL query string + +**Returns:** +- Tuple of `(sql_portion, visualise_portion)` + +**Raises:** +- `ValueError`: If the query cannot be parsed + +### `render_altair(df, viz, **kwargs) -> altair.Chart` + +Render a DataFrame with a VISUALISE specification to an Altair chart. + +**Parameters:** +- `df`: Any narwhals-compatible DataFrame (polars, pandas, etc.). LazyFrames are collected automatically. +- `viz`: The VISUALISE specification string +- `**kwargs`: Additional keyword arguments passed to `altair.Chart.from_json()`. Common options include `validate=False` to skip schema validation. + +**Returns:** +- An `altair.Chart` object that can be displayed, saved, or further customized + +**Raises:** +- `ValueError`: If the spec cannot be parsed or rendered + +## Development + +### Keeping in sync with the monorepo + +The `ggsql-python` package is part of the [ggsql monorepo](https://github.com/georgestagg/ggsql) and depends on the Rust `ggsql` crate via a path dependency. When the Rust crate is updated, you may need to rebuild: + +```bash +cd ggsql-python + +# Rebuild after Rust changes +maturin develop + +# If tree-sitter grammar changed, clean and rebuild +cd .. && cargo clean -p tree-sitter-ggsql && cd ggsql-python +maturin develop +``` + +### Running tests + +```bash +# Install test dependencies +pip install pytest + +# Run all tests +pytest tests/ -v +``` + +## Requirements + +- Python >= 3.10 +- altair >= 5.0 +- narwhals >= 2.15 +- polars >= 1.0 + +## License + +MIT diff --git a/ggsql-python/pyproject.toml b/ggsql-python/pyproject.toml new file mode 100644 index 00000000..1a0ff8ef --- /dev/null +++ b/ggsql-python/pyproject.toml @@ -0,0 +1,36 @@ +[build-system] +requires = ["maturin>=1.4"] +build-backend = "maturin" + +[project] +name = "ggsql" +version = "0.1.0" +description = "SQL extension for declarative data visualization" +readme = "README.md" +requires-python = ">=3.10" +license = { text = "MIT" } +keywords = ["sql", "visualization", "vega-lite", "grammar-of-graphics"] +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", +] +dependencies = [ + "altair>=5.0", + "narwhals>=2.15.0", + "polars>=1.0", +] + +[project.optional-dependencies] +test = ["pytest>=7.0"] +dev = ["maturin>=1.4"] + +[tool.maturin] +features = ["pyo3/extension-module"] +python-source = "python" +module-name = "ggsql._ggsql" + +[dependency-groups] +dev = [ + "maturin>=1.11.5", + "pytest>=9.0.2", +] diff --git a/ggsql-python/python/ggsql/__init__.py b/ggsql-python/python/ggsql/__init__.py new file mode 100644 index 00000000..dbbb5afb --- /dev/null +++ b/ggsql-python/python/ggsql/__init__.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +import io +import json +from typing import Any, Union + +import altair +import narwhals as nw +from narwhals.typing import IntoFrame + +from ggsql._ggsql import split_query, render as _render + +__all__ = ["split_query", "render_altair"] +__version__ = "0.1.0" + +# Type alias for any Altair chart type +AltairChart = Union[ + altair.Chart, + altair.LayerChart, + altair.FacetChart, + altair.ConcatChart, + altair.HConcatChart, + altair.VConcatChart, + altair.RepeatChart, +] + + +def render_altair( + df: IntoFrame, + viz: str, + **kwargs: Any, +) -> AltairChart: + """Render a DataFrame with a VISUALISE spec to an Altair chart. + + Parameters + ---------- + df + Data to visualize. Accepts polars, pandas, or any narwhals-compatible + DataFrame. LazyFrames are collected automatically. + viz + VISUALISE spec string (e.g., "VISUALISE x, y DRAW point") + **kwargs + Additional keyword arguments passed to `from_json()`. + Common options include `validate=False` to skip schema validation. + + Returns + ------- + AltairChart + An Altair chart object (Chart, LayerChart, FacetChart, etc.). + """ + df = nw.from_native(df, pass_through=True) + + if isinstance(df, nw.LazyFrame): + df = df.collect() + + if not isinstance(df, nw.DataFrame): + raise TypeError("df must be a narwhals DataFrame or compatible type") + + # Convert to polars and serialize to IPC bytes + pl_df = df.to_polars() + buffer = io.BytesIO() + pl_df.write_ipc(buffer) + ipc_bytes = buffer.getvalue() + + vegalite_json = _render(ipc_bytes, viz, writer="vegalite") + + # Parse to determine the correct Altair class + spec = json.loads(vegalite_json) + + # Determine the correct Altair class based on spec structure + if "layer" in spec: + return altair.LayerChart.from_json(vegalite_json, **kwargs) + elif "facet" in spec or "spec" in spec: + return altair.FacetChart.from_json(vegalite_json, **kwargs) + elif "concat" in spec: + return altair.ConcatChart.from_json(vegalite_json, **kwargs) + elif "hconcat" in spec: + return altair.HConcatChart.from_json(vegalite_json, **kwargs) + elif "vconcat" in spec: + return altair.VConcatChart.from_json(vegalite_json, **kwargs) + elif "repeat" in spec: + return altair.RepeatChart.from_json(vegalite_json, **kwargs) + else: + return altair.Chart.from_json(vegalite_json, **kwargs) diff --git a/ggsql-python/python/ggsql/py.typed b/ggsql-python/python/ggsql/py.typed new file mode 100644 index 00000000..93766668 --- /dev/null +++ b/ggsql-python/python/ggsql/py.typed @@ -0,0 +1 @@ +# PEP 561 marker file diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs new file mode 100644 index 00000000..7c472c35 --- /dev/null +++ b/ggsql-python/src/lib.rs @@ -0,0 +1,102 @@ +// Allow useless_conversion due to false positive from pyo3 macro expansion +// See: https://github.com/PyO3/pyo3/issues/4327 +#![allow(clippy::useless_conversion)] + +use pyo3::prelude::*; +use pyo3::types::PyBytes; +use std::collections::{HashMap, HashSet}; +use std::io::Cursor; + +use ggsql::naming::GLOBAL_DATA_KEY; +use ggsql::parser::parse_query; +use ggsql::writer::{VegaLiteWriter, Writer}; +use ggsql::AestheticValue; + +use polars::prelude::{DataFrame, IpcReader, SerReader}; + +#[pyfunction] +fn split_query(query: &str) -> PyResult<(String, String)> { + ggsql::parser::split_query(query) + .map_err(|e| PyErr::new::(e.to_string())) +} + +#[pyfunction] +#[pyo3(signature = (ipc_bytes, viz, *, writer = "vegalite"))] +fn render(ipc_bytes: &Bound<'_, PyBytes>, viz: &str, writer: &str) -> PyResult { + // Read DataFrame from IPC bytes + let bytes = ipc_bytes.as_bytes(); + let cursor = Cursor::new(bytes); + let df: DataFrame = IpcReader::new(cursor).finish().map_err(|e| { + PyErr::new::(format!("Failed to read IPC data: {}", e)) + })?; + + // Parse the visualization spec + // The viz string should be a complete VISUALISE statement + let specs = parse_query(viz) + .map_err(|e| PyErr::new::(e.to_string()))?; + + let mut spec = specs.into_iter().next().ok_or_else(|| { + PyErr::new::("No visualization spec found") + })?; + + // Get column names for resolving global mappings + let column_names: HashSet<&str> = df.get_column_names().iter().map(|s| s.as_str()).collect(); + + // Merge global mappings into layers and handle wildcards + for layer in &mut spec.layers { + let supported_aesthetics = layer.geom.aesthetics().supported; + + // 1. Merge explicit global aesthetics into layer (layer takes precedence) + for (aesthetic, value) in &spec.global_mappings.aesthetics { + if supported_aesthetics.contains(&aesthetic.as_str()) { + layer + .mappings + .aesthetics + .entry(aesthetic.clone()) + .or_insert_with(|| value.clone()); + } + } + + // 2. Handle wildcard expansion: map columns to aesthetics with matching names + let has_wildcard = layer.mappings.wildcard || spec.global_mappings.wildcard; + if has_wildcard { + for &aes in supported_aesthetics { + // Only create mapping if column exists in the dataframe + if column_names.contains(aes) { + layer + .mappings + .aesthetics + .entry(aes.to_string()) + .or_insert_with(|| AestheticValue::standard_column(aes)); + } + } + } + } + + // Compute aesthetic labels from column names + spec.compute_aesthetic_labels(); + + // Create data map with the DataFrame as global data + let mut data_map: HashMap = HashMap::new(); + data_map.insert(GLOBAL_DATA_KEY.to_string(), df); + + // Write using the specified writer + match writer { + "vegalite" => { + let w = VegaLiteWriter::new(); + w.write(&spec, &data_map) + .map_err(|e| PyErr::new::(e.to_string())) + } + _ => Err(PyErr::new::(format!( + "Unknown writer: {}", + writer + ))), + } +} + +#[pymodule] +fn _ggsql(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(split_query, m)?)?; + m.add_function(wrap_pyfunction!(render, m)?)?; + Ok(()) +} diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py new file mode 100644 index 00000000..9df03779 --- /dev/null +++ b/ggsql-python/tests/test_ggsql.py @@ -0,0 +1,159 @@ +"""Tests for ggsql Python bindings. + +These tests focus on Python-specific logic: +- DataFrame conversion via narwhals +- Return type handling + +Rust logic (parsing, Vega-Lite generation) is tested in the Rust test suite. +""" + +import pytest +import polars as pl +import altair + +import ggsql + + +class TestSplitQuery: + """Tests for split_query() function.""" + + def test_splits_sql_and_visualise(self): + sql, viz = ggsql.split_query( + "SELECT x, y FROM data VISUALISE x, y DRAW point" + ) + assert "SELECT" in sql + assert "VISUALISE" in viz + + def test_no_visualise_returns_empty_viz(self): + sql, viz = ggsql.split_query("SELECT * FROM data") + assert sql == "SELECT * FROM data" + assert viz == "" + + +class TestRenderAltairDataFrameConversion: + """Tests for DataFrame handling in render_altair().""" + + def test_accepts_polars_dataframe(self): + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") + assert isinstance(chart, altair.TopLevelMixin) + + def test_accepts_polars_lazyframe(self): + lf = pl.LazyFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + chart = ggsql.render_altair(lf, "VISUALISE x, y DRAW point") + assert isinstance(chart, altair.TopLevelMixin) + + def test_accepts_narwhals_dataframe(self): + import narwhals as nw + + pl_df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + nw_df = nw.from_native(pl_df) + + chart = ggsql.render_altair(nw_df, "VISUALISE x, y DRAW point") + assert isinstance(chart, altair.TopLevelMixin) + + def test_accepts_pandas_dataframe(self): + pd = pytest.importorskip("pandas") + + pd_df = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + chart = ggsql.render_altair(pd_df, "VISUALISE x, y DRAW point") + assert isinstance(chart, altair.TopLevelMixin) + + def test_rejects_invalid_dataframe_type(self): + with pytest.raises(TypeError, match="must be a narwhals DataFrame"): + ggsql.render_altair({"x": [1, 2, 3]}, "VISUALISE x, y DRAW point") + + +class TestRenderAltairReturnType: + """Tests for render_altair() return type.""" + + def test_returns_altair_chart(self): + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") + assert isinstance(chart, altair.TopLevelMixin) + + def test_chart_has_data(self): + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") + spec = chart.to_dict() + # Data should be embedded in datasets + assert "datasets" in spec + + def test_chart_can_be_serialized(self): + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") + # Should not raise + json_str = chart.to_json() + assert len(json_str) > 0 + + +class TestRenderAltairChartTypeDetection: + """Tests for correct Altair chart type detection based on spec structure.""" + + def test_simple_chart_returns_layer_chart(self): + """Simple DRAW specs produce LayerChart (ggsql always wraps in layer).""" + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") + # ggsql wraps all charts in a layer + assert isinstance(chart, altair.LayerChart) + + def test_layered_chart_can_round_trip(self): + """LayerChart can be converted to dict and back.""" + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + chart = ggsql.render_altair(df, "VISUALISE x, y DRAW point") + + # Convert to dict and back + spec = chart.to_dict() + assert "layer" in spec + + # Should be able to recreate from dict + recreated = altair.LayerChart.from_dict(spec) + assert isinstance(recreated, altair.LayerChart) + + def test_faceted_chart_returns_facet_chart(self): + """FACET WRAP specs produce FacetChart.""" + df = pl.DataFrame({ + "x": [1, 2, 3, 4, 5, 6], + "y": [10, 20, 30, 40, 50, 60], + "group": ["A", "A", "A", "B", "B", "B"], + }) + # Need validate=False because ggsql produces v6 specs + chart = ggsql.render_altair(df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False) + assert isinstance(chart, altair.FacetChart) + + def test_faceted_chart_can_round_trip(self): + """FacetChart can be converted to dict and back.""" + df = pl.DataFrame({ + "x": [1, 2, 3, 4, 5, 6], + "y": [10, 20, 30, 40, 50, 60], + "group": ["A", "A", "A", "B", "B", "B"], + }) + chart = ggsql.render_altair(df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False) + + # Convert to dict (skip validation for ggsql specs) + spec = chart.to_dict(validate=False) + assert "facet" in spec or "spec" in spec + + # Should be able to recreate from dict (with validation disabled) + recreated = altair.FacetChart.from_dict(spec, validate=False) + assert isinstance(recreated, altair.FacetChart) + + def test_chart_with_color_encoding(self): + """Charts with color encoding still return correct type.""" + df = pl.DataFrame({ + "x": [1, 2, 3, 4], + "y": [10, 20, 30, 40], + "category": ["A", "B", "A", "B"], + }) + chart = ggsql.render_altair(df, "VISUALISE x, y, category AS color DRAW point") + # Should still be a LayerChart (ggsql wraps in layer) + assert isinstance(chart, altair.LayerChart) + + +class TestRenderAltairErrorHandling: + """Tests for error handling in render_altair().""" + + def test_invalid_viz_raises(self): + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + with pytest.raises(ValueError): + ggsql.render_altair(df, "NOT VALID SYNTAX") diff --git a/tree-sitter-ggsql/bindings/rust/build.rs b/tree-sitter-ggsql/bindings/rust/build.rs index abc2f3a5..6072306f 100644 --- a/tree-sitter-ggsql/bindings/rust/build.rs +++ b/tree-sitter-ggsql/bindings/rust/build.rs @@ -1,38 +1,109 @@ use std::path::PathBuf; use std::process::Command; -fn main() { - let check = Command::new("tree-sitter").arg("--version").output(); - match check { - Ok(output) if output.status.success() => {} - _ => { - println!("tree-sitter-cli not found. Attempting to install..."); - let installation = Command::new("npm") - .args(["install", "-g", "tree-sitter-cli"]) - .status(); - - match installation { - Ok(installation) if installation.success() => {} - _ => { - eprintln!("Failed to install tree-sitter-cli.") +/// Find tree-sitter executable, checking PATH and common npm global locations +fn find_tree_sitter() -> Option { + // First, check if tree-sitter is in PATH + if Command::new("tree-sitter") + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) + { + return Some(PathBuf::from("tree-sitter")); + } + + // On Windows, check common npm global install locations + #[cfg(windows)] + { + // Check C:\npm\prefix (GitHub Actions setup-node location) + for ext in &["cmd", "ps1", "exe"] { + let npm_path = PathBuf::from(r"C:\npm\prefix").join(format!("tree-sitter.{}", ext)); + if npm_path.exists() { + return Some(npm_path); + } + } + + if let Some(appdata) = std::env::var_os("APPDATA") { + for ext in &["cmd", "ps1", "exe"] { + let npm_path = PathBuf::from(&appdata) + .join("npm") + .join(format!("tree-sitter.{}", ext)); + if npm_path.exists() { + return Some(npm_path); + } + } + } + + // Also check USERPROFILE\AppData\Roaming\npm + if let Some(userprofile) = std::env::var_os("USERPROFILE") { + for ext in &["cmd", "ps1", "exe"] { + let npm_path = PathBuf::from(&userprofile) + .join("AppData") + .join("Roaming") + .join("npm") + .join(format!("tree-sitter.{}", ext)); + if npm_path.exists() { + return Some(npm_path); } } } } - let regenerate = Command::new("tree-sitter").arg("generate").status(); + None +} - match regenerate { - Ok(regenerate) if regenerate.success() => {} - _ => { - eprintln!("Failed to regenerate tree sitter grammar."); - } +fn run_tree_sitter( + tree_sitter: &PathBuf, + grammar_dir: &PathBuf, +) -> std::io::Result { + // Check if this is a PowerShell script + let ext = tree_sitter + .extension() + .and_then(|e| e.to_str()) + .unwrap_or(""); + + if ext == "ps1" { + // Run PowerShell scripts through powershell.exe + Command::new("powershell") + .args(["-ExecutionPolicy", "Bypass", "-File"]) + .arg(tree_sitter) + .arg("generate") + .current_dir(grammar_dir) + .status() + } else { + // Run cmd/exe directly + Command::new(tree_sitter) + .arg("generate") + .current_dir(grammar_dir) + .status() } +} + +fn main() { + // CARGO_MANIFEST_DIR points to tree-sitter-ggsql/ where Cargo.toml and grammar.js live + let grammar_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let src_dir = grammar_dir.join("src"); - let dir: PathBuf = ["src"].iter().collect(); + let tree_sitter = find_tree_sitter().unwrap_or_else(|| { + panic!("tree-sitter-cli not found. Please install it: npm install -g tree-sitter-cli"); + }); + + let generate_result = run_tree_sitter(&tree_sitter, &grammar_dir); + + match generate_result { + Ok(status) if status.success() => {} + Ok(status) => { + panic!("tree-sitter generate failed with status: {}", status); + } + Err(e) => { + panic!("Failed to run tree-sitter generate: {}", e); + } + } + // The generated files are in the grammar_dir/src directory cc::Build::new() - .include(&dir) - .file(dir.join("parser.c")) + .include(&src_dir) + .file(src_dir.join("parser.c")) .compile("tree-sitter-ggsql"); }