From e94b8bd177f670a69ee9bc45bee2ebd0bb17f27e Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 2 Mar 2026 16:13:24 -0600 Subject: [PATCH 1/4] feat(python): add type stubs for native _ggsql module Co-Authored-By: Claude Opus 4.6 --- ggsql-python/python/ggsql/_ggsql.pyi | 413 +++++++++++++++++++++++++++ ggsql-python/tests/test_ggsql.py | 37 +++ 2 files changed, 450 insertions(+) create mode 100644 ggsql-python/python/ggsql/_ggsql.pyi diff --git a/ggsql-python/python/ggsql/_ggsql.pyi b/ggsql-python/python/ggsql/_ggsql.pyi new file mode 100644 index 0000000..31b602d --- /dev/null +++ b/ggsql-python/python/ggsql/_ggsql.pyi @@ -0,0 +1,413 @@ +"""Type stubs for the ggsql native module (_ggsql).""" + +from __future__ import annotations + +import polars as pl + +# --------------------------------------------------------------------------- +# Exceptions (all subclass ValueError for backwards compatibility) +# --------------------------------------------------------------------------- + +class ParseError(ValueError): ... +class ValidationError(ValueError): ... +class ReaderError(ValueError): ... +class WriterError(ValueError): ... + +# --------------------------------------------------------------------------- +# DuckDBReader +# --------------------------------------------------------------------------- + +class DuckDBReader: + """DuckDB database reader for executing SQL queries. + + Creates an in-memory or file-based DuckDB connection that can execute + SQL queries and register DataFrames as queryable tables. + + Parameters + ---------- + connection + DuckDB connection string. Use ``"duckdb://memory"`` for in-memory + database or ``"duckdb://path/to/file.db"`` for file-based database. + + Raises + ------ + ValueError + If the connection string is invalid or the database cannot be opened. + """ + + def __init__(self, connection: str) -> None: ... + def execute_sql(self, sql: str) -> pl.DataFrame: + """Execute a SQL query and return results as a polars DataFrame. + + Parameters + ---------- + sql + The SQL query to execute. + + Returns + ------- + polars.DataFrame + The query result as a polars DataFrame. + + Raises + ------ + ValueError + If the SQL is invalid or execution fails. + """ + ... + + def register( + self, name: str, df: pl.DataFrame, replace: bool = False + ) -> None: + """Register a polars DataFrame as a named table. + + After registration the DataFrame can be queried by name in SQL. + + Parameters + ---------- + name + The table name to register under. + df + The DataFrame to register. Must be a polars DataFrame. + replace + Whether to replace an existing table with the same name. + + Raises + ------ + ValueError + If registration fails or the table name is invalid. + """ + ... + + def unregister(self, name: str) -> None: + """Unregister a previously registered table. + + Parameters + ---------- + name + The table name to unregister. + + Raises + ------ + ValueError + If the table was not registered or unregistration fails. + """ + ... + + def execute(self, query: str) -> Spec: + """Execute a ggsql query and return the visualization specification. + + This is the main entry point for creating visualizations. It parses + the query, executes the SQL portion, and returns a ``Spec`` ready + for rendering. + + Parameters + ---------- + query + The ggsql query (SQL + VISUALISE clause). + + Returns + ------- + Spec + The resolved visualization specification ready for rendering. + + Raises + ------ + ValueError + If the query syntax is invalid, has no VISUALISE clause, or + SQL execution fails. + """ + ... + +# --------------------------------------------------------------------------- +# VegaLiteWriter +# --------------------------------------------------------------------------- + +class VegaLiteWriter: + """Vega-Lite v6 JSON output writer. + + Converts visualization specifications to Vega-Lite v6 JSON. + """ + + def __init__(self) -> None: ... + def render(self, spec: Spec) -> str: + """Render a Spec to a Vega-Lite JSON string. + + Parameters + ---------- + spec + The visualization specification from ``reader.execute()``. + + Returns + ------- + str + The Vega-Lite JSON string. + + Raises + ------ + ValueError + If rendering fails. + """ + ... + +# --------------------------------------------------------------------------- +# Validated +# --------------------------------------------------------------------------- + +class Validated: + """Result of ``validate()`` — query inspection without SQL execution. + + Contains information about query structure and any validation + errors/warnings. + """ + + def has_visual(self) -> bool: + """Whether the query contains a VISUALISE clause. + + Returns + ------- + bool + ``True`` if the query has a VISUALISE clause. + """ + ... + + def sql(self) -> str: + """The SQL portion (before VISUALISE). + + Returns + ------- + str + The SQL part of the query. + """ + ... + + def visual(self) -> str: + """The VISUALISE portion (raw text). + + Returns + ------- + str + The VISUALISE part of the query. + """ + ... + + def valid(self) -> bool: + """Whether the query is valid (no errors). + + Returns + ------- + bool + ``True`` if the query is syntactically and semantically valid. + """ + ... + + def errors(self) -> list[dict[str, object]]: + """Validation errors (fatal issues). + + Returns + ------- + list[dict] + List of error dictionaries with ``"message"`` (str) and + ``"location"`` (``{"line": int, "column": int}`` or ``None``) + keys. + """ + ... + + def warnings(self) -> list[dict[str, object]]: + """Validation warnings (non-fatal issues). + + Returns + ------- + list[dict] + List of warning dictionaries with ``"message"`` (str) and + ``"location"`` (``{"line": int, "column": int}`` or ``None``) + keys. + """ + ... + +# --------------------------------------------------------------------------- +# Spec +# --------------------------------------------------------------------------- + +class Spec: + """Result of ``reader.execute()`` — resolved visualization spec. + + Contains the resolved plot specification, data, and metadata. + Use ``writer.render(spec)`` to generate output. + """ + + def metadata(self) -> dict[str, object]: + """Get visualization metadata. + + Returns + ------- + dict + Dictionary with ``"rows"`` (int), ``"columns"`` (list[str]), + and ``"layer_count"`` (int) keys. + """ + ... + + def sql(self) -> str: + """The main SQL query that was executed. + + Returns + ------- + str + The SQL query string. + """ + ... + + def visual(self) -> str: + """The VISUALISE portion (raw text). + + Returns + ------- + str + The VISUALISE clause text. + """ + ... + + def layer_count(self) -> int: + """Number of DRAW layers. + + Returns + ------- + int + The number of DRAW clauses in the visualization. + """ + ... + + def data(self) -> pl.DataFrame | None: + """Main query result DataFrame. + + Returns + ------- + polars.DataFrame or None + The main query result DataFrame, or ``None`` if not available. + """ + ... + + def layer_data(self, index: int) -> pl.DataFrame | None: + """Layer-specific DataFrame (from FILTER or FROM clause). + + Parameters + ---------- + index + The layer index (0-based). + + Returns + ------- + polars.DataFrame or None + The layer-specific DataFrame, or ``None`` if the layer uses + global data. + """ + ... + + def stat_data(self, index: int) -> pl.DataFrame | None: + """Statistical transform DataFrame. + + Parameters + ---------- + index + The layer index (0-based). + + Returns + ------- + polars.DataFrame or None + The stat transform DataFrame, or ``None`` if no stat transform. + """ + ... + + def layer_sql(self, index: int) -> str | None: + """Layer filter/source query. + + Parameters + ---------- + index + The layer index (0-based). + + Returns + ------- + str or None + The filter SQL query, or ``None`` if the layer uses global data. + """ + ... + + def stat_sql(self, index: int) -> str | None: + """Stat transform query. + + Parameters + ---------- + index + The layer index (0-based). + + Returns + ------- + str or None + The stat transform SQL query, or ``None`` if no stat transform. + """ + ... + + def warnings(self) -> list[dict[str, object]]: + """Validation warnings from preparation. + + Returns + ------- + list[dict] + List of warning dictionaries with ``"message"`` (str) and + ``"location"`` (``{"line": int, "column": int}`` or ``None``) + keys. + """ + ... + +# --------------------------------------------------------------------------- +# Module-level functions +# --------------------------------------------------------------------------- + +def validate(query: str) -> Validated: + """Validate query syntax and semantics without executing SQL. + + Parameters + ---------- + query + The ggsql query to validate. + + Returns + ------- + Validated + Validation result with query inspection methods. + + Raises + ------ + ValueError + If validation fails unexpectedly (syntax errors are captured in + the returned ``Validated`` object, not raised). + """ + ... + +def execute(query: str, reader: object) -> Spec: + """Execute a ggsql query with a reader (native or custom Python object). + + This is a convenience function for custom readers. For native readers, + prefer using ``reader.execute()`` directly. + + Parameters + ---------- + query + The ggsql query to execute. + reader + The database reader to execute SQL against. Can be a native + ``DuckDBReader`` for optimal performance, or any Python object with + an ``execute_sql(sql: str) -> polars.DataFrame`` method. + + Returns + ------- + Spec + The resolved visualization specification ready for rendering. + + Raises + ------ + ValueError + If parsing, validation, or SQL execution fails. + """ + ... diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index 962a628..41ac270 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -654,3 +654,40 @@ def test_render_chart_facet(self): writer = ggsql.VegaLiteWriter() chart = writer.render_chart(spec, validate=False) assert isinstance(chart, altair.FacetChart) + + +class TestTypeStubs: + """Tests for type stub presence and correctness.""" + + def test_stub_file_exists(self): + """Type stub file exists for the native module.""" + import pathlib + + assert ggsql.__file__ is not None + ggsql_dir = pathlib.Path(ggsql.__file__).parent + stub_path = ggsql_dir / "_ggsql.pyi" + assert stub_path.exists(), f"Type stub not found at {stub_path}" + + def test_stub_exports_match_module(self): + """All public names from _ggsql are in the stub.""" + import pathlib + + assert ggsql.__file__ is not None + ggsql_dir = pathlib.Path(ggsql.__file__).parent + stub_path = ggsql_dir / "_ggsql.pyi" + stub_text = stub_path.read_text() + + # Core classes and functions should be in the stub + for name in [ + "DuckDBReader", + "VegaLiteWriter", + "Validated", + "Spec", + "validate", + "execute", + "ParseError", + "ValidationError", + "ReaderError", + "WriterError", + ]: + assert name in stub_text, f"{name} not found in type stub" From 7a90164e9ab67d0fcf9db0afaa61ef2b15a0f948 Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 2 Mar 2026 16:20:32 -0600 Subject: [PATCH 2/4] feat(python): support data= dict parameter on execute() for inline DataFrames Allows passing a dict of DataFrames to reader.execute() and the module-level execute() function, which are registered before query execution and unregistered afterward (cleanup happens even on error). Co-Authored-By: Claude Opus 4.6 --- ggsql-python/python/ggsql/_ggsql.pyi | 22 ++++- ggsql-python/src/lib.rs | 122 +++++++++++++++++++++++++-- ggsql-python/tests/test_ggsql.py | 77 +++++++++++++++++ 3 files changed, 210 insertions(+), 11 deletions(-) diff --git a/ggsql-python/python/ggsql/_ggsql.pyi b/ggsql-python/python/ggsql/_ggsql.pyi index 31b602d..ba53009 100644 --- a/ggsql-python/python/ggsql/_ggsql.pyi +++ b/ggsql-python/python/ggsql/_ggsql.pyi @@ -94,7 +94,12 @@ class DuckDBReader: """ ... - def execute(self, query: str) -> Spec: + def execute( + self, + query: str, + *, + data: dict[str, pl.DataFrame] | None = None, + ) -> Spec: """Execute a ggsql query and return the visualization specification. This is the main entry point for creating visualizations. It parses @@ -105,6 +110,10 @@ class DuckDBReader: ---------- query The ggsql query (SQL + VISUALISE clause). + data + Optional dictionary mapping table names to DataFrames. Tables are + registered before execution and unregistered afterward (even on + error). Returns ------- @@ -385,7 +394,12 @@ def validate(query: str) -> Validated: """ ... -def execute(query: str, reader: object) -> Spec: +def execute( + query: str, + reader: object, + *, + data: dict[str, pl.DataFrame] | None = None, +) -> Spec: """Execute a ggsql query with a reader (native or custom Python object). This is a convenience function for custom readers. For native readers, @@ -399,6 +413,10 @@ def execute(query: str, reader: object) -> Spec: The database reader to execute SQL against. Can be a native ``DuckDBReader`` for optimal performance, or any Python object with an ``execute_sql(sql: str) -> polars.DataFrame`` method. + data + Optional dictionary mapping table names to DataFrames. Tables are + registered before execution and unregistered afterward (even on + error). Returns ------- diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index 91710c2..68b333c 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -352,6 +352,9 @@ impl PyDuckDBReader { /// ---------- /// query : str /// The ggsql query (SQL + VISUALISE clause). + /// data : dict[str, polars.DataFrame] | None + /// Optional dictionary mapping table names to DataFrames. Tables are + /// registered before execution and unregistered afterward (even on error). /// /// Returns /// ------- @@ -369,11 +372,48 @@ impl PyDuckDBReader { /// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") /// >>> writer = VegaLiteWriter() /// >>> json_output = writer.render(spec) - fn execute(&self, query: &str) -> PyResult { - self.inner + #[pyo3(signature = (query, *, data=None))] + fn execute(&self, py: Python<'_>, query: &str, data: Option<&Bound<'_, PyDict>>) -> PyResult { + // Register DataFrames from data dict + let registered_names = if let Some(data_dict) = data { + self.register_data_dict(py, data_dict)? + } else { + vec![] + }; + + // Execute query (capture result, don't return early) + let result = self.inner .execute(query) .map(|s| PySpec { inner: s }) - .map_err(ggsql_err_to_py) + .map_err(ggsql_err_to_py); + + // Cleanup: unregister temporary tables (even on error) + for name in ®istered_names { + let _ = self.inner.unregister(name); + } + + result + } +} + +impl PyDuckDBReader { + /// Register DataFrames from a Python dict. Returns list of registered names for cleanup. + /// This is a private Rust helper, not exposed to Python. + fn register_data_dict( + &self, + py: Python<'_>, + data: &Bound<'_, PyDict>, + ) -> PyResult> { + let mut names = Vec::new(); + for (key, value) in data.iter() { + let name: String = key.extract()?; + let df = py_to_polars(py, &value)?; + self.inner + .register(&name, df, true) + .map_err(ggsql_err_to_py)?; + names.push(name); + } + Ok(names) } } @@ -741,6 +781,9 @@ fn validate(query: &str) -> PyResult { /// The database reader to execute SQL against. Can be a native Reader /// for optimal performance, or any Python object with an /// `execute_sql(sql: str) -> polars.DataFrame` method. +/// data : dict[str, polars.DataFrame] | None +/// Optional dictionary mapping table names to DataFrames. Tables are +/// registered before execution and unregistered afterward (even on error). /// /// Returns /// ------- @@ -767,19 +810,80 @@ fn validate(query: &str) -> PyResult { /// >>> reader = MyReader() /// >>> spec = execute("SELECT * FROM data VISUALISE x, y DRAW point", reader) #[pyfunction] -fn execute(query: &str, reader: &Bound<'_, PyAny>) -> PyResult { - // Fast path: try all known native reader types - // Add new native readers to this list as they're implemented - try_native_readers!(query, reader, PyDuckDBReader); +#[pyo3(signature = (query, reader, *, data=None))] +fn execute(py: Python<'_>, query: &str, reader: &Bound<'_, PyAny>, data: Option<&Bound<'_, PyDict>>) -> PyResult { + // Native reader fast path: DuckDBReader + // Note: we can't use the try_native_readers! macro here because it uses `return` + // which would skip cleanup of registered tables. + if let Ok(native) = reader.downcast::() { + // Register DataFrames if provided + let registered_names = if let Some(data_dict) = data { + native.borrow().register_data_dict(py, data_dict)? + } else { + vec![] + }; + + // Execute (capture result for cleanup) + let result = native.borrow().inner.execute(query) + .map(|s| PySpec { inner: s }) + .map_err(ggsql_err_to_py); + + // Cleanup: unregister temporary tables (even on error) + for name in ®istered_names { + let _ = native.borrow().inner.unregister(name); + } + + return result; + } // Bridge path: wrap Python object as Reader + // Register DataFrames if provided + let registered_names = if let Some(data_dict) = data { + register_data_on_reader(py, reader, data_dict)? + } else { + vec![] + }; + let bridge = PyReaderBridge { obj: reader.clone().unbind(), }; - bridge + let result = bridge .execute(query) .map(|s| PySpec { inner: s }) - .map_err(ggsql_err_to_py) + .map_err(ggsql_err_to_py); + + // Cleanup for bridge path + for name in ®istered_names { + let _ = call_unregister(py, reader, name); + } + + result +} + +/// Register DataFrames from a Python dict onto a Python reader object. +/// Returns list of registered names for cleanup. +fn register_data_on_reader( + py: Python<'_>, + reader: &Bound<'_, PyAny>, + data: &Bound<'_, PyDict>, +) -> PyResult> { + let mut names = Vec::new(); + for (key, value) in data.iter() { + let name: String = key.extract()?; + let df = py_to_polars(py, &value)?; + let py_df = polars_to_py(py, &df)?; + reader.call_method("register", (&name, py_df, true), None)?; + names.push(name); + } + Ok(names) +} + +/// Call unregister on a reader if the method exists. +fn call_unregister(_py: Python<'_>, reader: &Bound<'_, PyAny>, name: &str) -> PyResult<()> { + if reader.hasattr("unregister")? { + reader.call_method1("unregister", (name,))?; + } + Ok(()) } // ============================================================================ diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index 41ac270..1f2089e 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -656,6 +656,83 @@ def test_render_chart_facet(self): assert isinstance(chart, altair.FacetChart) +class TestExecuteWithData: + """Tests for reader.execute() with data= parameter.""" + + def test_execute_with_single_dataframe(self): + """Can pass a single DataFrame via data dict.""" + reader = ggsql.DuckDBReader("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + spec = reader.execute( + "SELECT * FROM mydata VISUALISE x, y DRAW point", + data={"mydata": df}, + ) + assert spec.metadata()["rows"] == 3 + + def test_execute_with_multiple_dataframes(self): + """Can pass multiple DataFrames via data dict.""" + reader = ggsql.DuckDBReader("duckdb://memory") + df1 = pl.DataFrame({"id": [1, 2, 3], "y": [10, 20, 30]}) + df2 = pl.DataFrame({"id": [2, 3], "category": ["A", "B"]}) + spec = reader.execute( + "SELECT t1.id AS x, t1.y FROM t1 JOIN t2 ON t1.id = t2.id " + "VISUALISE x, y DRAW point", + data={"t1": df1, "t2": df2}, + ) + assert spec.metadata()["rows"] == 2 + + def test_execute_with_data_cleans_up(self): + """DataFrames passed via data= are unregistered after execution.""" + reader = ggsql.DuckDBReader("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + reader.execute( + "SELECT * FROM temp VISUALISE x, y DRAW point", + data={"temp": df}, + ) + # Table should be cleaned up — querying it should fail + with pytest.raises((ggsql.ReaderError, ValueError)): + reader.execute_sql("SELECT * FROM temp") + + def test_execute_with_data_cleans_up_on_error(self): + """DataFrames are unregistered even if execution fails.""" + reader = ggsql.DuckDBReader("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + with pytest.raises((ggsql.ParseError, ggsql.ValidationError, ValueError)): + reader.execute( + "SELECT * FROM temp VISUALISE DRAW not_a_geom", + data={"temp": df}, + ) + # Table should still be cleaned up + with pytest.raises((ggsql.ReaderError, ValueError)): + reader.execute_sql("SELECT * FROM temp") + + def test_execute_without_data_still_works(self): + """Calling execute() without data= still works as before.""" + reader = ggsql.DuckDBReader("duckdb://memory") + spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point") + assert spec.metadata()["rows"] == 1 + + def test_execute_with_empty_data(self): + """Passing empty data= dict works fine.""" + reader = ggsql.DuckDBReader("duckdb://memory") + spec = reader.execute( + "SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", + data={}, + ) + assert spec.metadata()["rows"] == 1 + + def test_module_execute_with_data(self): + """Module-level execute() also supports data= parameter.""" + reader = ggsql.DuckDBReader("duckdb://memory") + df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) + spec = ggsql.execute( + "SELECT * FROM mydata VISUALISE x, y DRAW point", + reader, + data={"mydata": df}, + ) + assert spec.metadata()["rows"] == 3 + + class TestTypeStubs: """Tests for type stub presence and correctness.""" From 16ef14b684471e29beb3d3583e3615db4e93401e Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 9 Mar 2026 15:40:31 -0500 Subject: [PATCH 3/4] fix(python): improve type stubs, data= cleanup safety, and exception specificity - Update type stubs to document typed exceptions (ParseError, ReaderError, etc.) instead of generic ValueError - Guard data= cleanup against destroying pre-existing tables by checking table existence before registration - Pass replace as keyword arg in bridge register_data_on_reader for compatibility with custom readers that omit the parameter - Tighten test assertions to expect specific exception types - Remove unused try_native_readers! macro Co-Authored-By: Claude Opus 4.6 --- ggsql-python/python/ggsql/_ggsql.pyi | 29 +++++++++------ ggsql-python/src/lib.rs | 53 ++++++++++++++++------------ ggsql-python/tests/test_ggsql.py | 24 +++++++++++-- 3 files changed, 69 insertions(+), 37 deletions(-) diff --git a/ggsql-python/python/ggsql/_ggsql.pyi b/ggsql-python/python/ggsql/_ggsql.pyi index ba53009..756a287 100644 --- a/ggsql-python/python/ggsql/_ggsql.pyi +++ b/ggsql-python/python/ggsql/_ggsql.pyi @@ -31,7 +31,7 @@ class DuckDBReader: Raises ------ - ValueError + ReaderError If the connection string is invalid or the database cannot be opened. """ @@ -51,7 +51,7 @@ class DuckDBReader: Raises ------ - ValueError + ReaderError If the SQL is invalid or execution fails. """ ... @@ -74,7 +74,7 @@ class DuckDBReader: Raises ------ - ValueError + ReaderError If registration fails or the table name is invalid. """ ... @@ -89,7 +89,7 @@ class DuckDBReader: Raises ------ - ValueError + ReaderError If the table was not registered or unregistration fails. """ ... @@ -122,9 +122,12 @@ class DuckDBReader: Raises ------ - ValueError - If the query syntax is invalid, has no VISUALISE clause, or - SQL execution fails. + ParseError + If the query syntax is invalid. + ValidationError + If the query has no VISUALISE clause or fails semantic checks. + ReaderError + If SQL execution fails. """ ... @@ -154,7 +157,7 @@ class VegaLiteWriter: Raises ------ - ValueError + WriterError If rendering fails. """ ... @@ -388,7 +391,7 @@ def validate(query: str) -> Validated: Raises ------ - ValueError + ParseError If validation fails unexpectedly (syntax errors are captured in the returned ``Validated`` object, not raised). """ @@ -425,7 +428,11 @@ def execute( Raises ------ - ValueError - If parsing, validation, or SQL execution fails. + ParseError + If the query syntax is invalid. + ValidationError + If semantic validation fails. + ReaderError + If SQL execution fails. """ ... diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index 68b333c..21f6892 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -211,24 +211,6 @@ impl Reader for PyReaderBridge { } } -// ============================================================================ -// Native Reader Detection Macro -// ============================================================================ - -/// Macro to try native readers and fall back to bridge. -/// Adding new native readers = add to the macro invocation list. -macro_rules! try_native_readers { - ($query:expr, $reader:expr, $($native_type:ty),*) => {{ - $( - if let Ok(native) = $reader.downcast::<$native_type>() { - return native.borrow().inner.execute($query) - .map(|s| PySpec { inner: s }) - .map_err(ggsql_err_to_py); - } - )* - }}; -} - // ============================================================================ // PyDuckDBReader // ============================================================================ @@ -397,8 +379,16 @@ impl PyDuckDBReader { } impl PyDuckDBReader { - /// Register DataFrames from a Python dict. Returns list of registered names for cleanup. - /// This is a private Rust helper, not exposed to Python. + /// Check whether a table already exists in the reader. + fn table_exists(&self, name: &str) -> bool { + // A lightweight probe: try to select zero rows from the table. + self.inner + .execute_sql(&format!("SELECT 1 FROM {name} LIMIT 0")) + .is_ok() + } + + /// Register DataFrames from a Python dict. Returns list of *newly created* + /// table names for cleanup (pre-existing tables are not tracked). fn register_data_dict( &self, py: Python<'_>, @@ -407,11 +397,14 @@ impl PyDuckDBReader { let mut names = Vec::new(); for (key, value) in data.iter() { let name: String = key.extract()?; + let existed = self.table_exists(&name); let df = py_to_polars(py, &value)?; self.inner .register(&name, df, true) .map_err(ggsql_err_to_py)?; - names.push(name); + if !existed { + names.push(name); + } } Ok(names) } @@ -862,6 +855,15 @@ fn execute(py: Python<'_>, query: &str, reader: &Bound<'_, PyAny>, data: Option< /// Register DataFrames from a Python dict onto a Python reader object. /// Returns list of registered names for cleanup. +/// Check whether a table exists via a Python reader's execute_sql method. +fn reader_table_exists(reader: &Bound<'_, PyAny>, name: &str) -> bool { + reader + .call_method1("execute_sql", (format!("SELECT 1 FROM {name} LIMIT 0"),)) + .is_ok() +} + +/// Register DataFrames from a Python dict onto a Python reader object. +/// Returns list of *newly created* table names for cleanup. fn register_data_on_reader( py: Python<'_>, reader: &Bound<'_, PyAny>, @@ -870,10 +872,15 @@ fn register_data_on_reader( let mut names = Vec::new(); for (key, value) in data.iter() { let name: String = key.extract()?; + let existed = reader_table_exists(reader, &name); let df = py_to_polars(py, &value)?; let py_df = polars_to_py(py, &df)?; - reader.call_method("register", (&name, py_df, true), None)?; - names.push(name); + let kwargs = PyDict::new(py); + kwargs.set_item("replace", true)?; + reader.call_method("register", (&name, py_df), Some(&kwargs))?; + if !existed { + names.push(name); + } } Ok(names) } diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index 1f2089e..eaf6e2f 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -690,20 +690,20 @@ def test_execute_with_data_cleans_up(self): data={"temp": df}, ) # Table should be cleaned up — querying it should fail - with pytest.raises((ggsql.ReaderError, ValueError)): + with pytest.raises(ggsql.ReaderError): reader.execute_sql("SELECT * FROM temp") def test_execute_with_data_cleans_up_on_error(self): """DataFrames are unregistered even if execution fails.""" reader = ggsql.DuckDBReader("duckdb://memory") df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) - with pytest.raises((ggsql.ParseError, ggsql.ValidationError, ValueError)): + with pytest.raises(ggsql.ParseError): reader.execute( "SELECT * FROM temp VISUALISE DRAW not_a_geom", data={"temp": df}, ) # Table should still be cleaned up - with pytest.raises((ggsql.ReaderError, ValueError)): + with pytest.raises(ggsql.ReaderError): reader.execute_sql("SELECT * FROM temp") def test_execute_without_data_still_works(self): @@ -721,6 +721,24 @@ def test_execute_with_empty_data(self): ) assert spec.metadata()["rows"] == 1 + def test_execute_with_data_preserves_preexisting_table(self): + """data= does not unregister a table that existed before the call.""" + reader = ggsql.DuckDBReader("duckdb://memory") + existing = pl.DataFrame({"x": [1, 2], "y": [10, 20]}) + reader.register("mytable", existing) + + # Pass same name via data= — should replace temporarily, then NOT unregister + override = pl.DataFrame({"x": [3, 4, 5], "y": [30, 40, 50]}) + spec = reader.execute( + "SELECT * FROM mytable VISUALISE x, y DRAW point", + data={"mytable": override}, + ) + assert spec.metadata()["rows"] == 3 + + # The pre-existing table should still be queryable (not unregistered) + result = reader.execute_sql("SELECT * FROM mytable") + assert result.shape[0] > 0 + def test_module_execute_with_data(self): """Module-level execute() also supports data= parameter.""" reader = ggsql.DuckDBReader("duckdb://memory") From b3ec22b6d86399f59d3ad4717078fb8111f4a4cc Mon Sep 17 00:00:00 2001 From: Carson Date: Mon, 9 Mar 2026 16:52:48 -0500 Subject: [PATCH 4/4] style: format Rust code Co-Authored-By: Claude Opus 4.6 --- ggsql-python/src/lib.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/ggsql-python/src/lib.rs b/ggsql-python/src/lib.rs index 21f6892..f8f35fd 100644 --- a/ggsql-python/src/lib.rs +++ b/ggsql-python/src/lib.rs @@ -355,7 +355,12 @@ impl PyDuckDBReader { /// >>> writer = VegaLiteWriter() /// >>> json_output = writer.render(spec) #[pyo3(signature = (query, *, data=None))] - fn execute(&self, py: Python<'_>, query: &str, data: Option<&Bound<'_, PyDict>>) -> PyResult { + fn execute( + &self, + py: Python<'_>, + query: &str, + data: Option<&Bound<'_, PyDict>>, + ) -> PyResult { // Register DataFrames from data dict let registered_names = if let Some(data_dict) = data { self.register_data_dict(py, data_dict)? @@ -364,7 +369,8 @@ impl PyDuckDBReader { }; // Execute query (capture result, don't return early) - let result = self.inner + let result = self + .inner .execute(query) .map(|s| PySpec { inner: s }) .map_err(ggsql_err_to_py); @@ -804,7 +810,12 @@ fn validate(query: &str) -> PyResult { /// >>> spec = execute("SELECT * FROM data VISUALISE x, y DRAW point", reader) #[pyfunction] #[pyo3(signature = (query, reader, *, data=None))] -fn execute(py: Python<'_>, query: &str, reader: &Bound<'_, PyAny>, data: Option<&Bound<'_, PyDict>>) -> PyResult { +fn execute( + py: Python<'_>, + query: &str, + reader: &Bound<'_, PyAny>, + data: Option<&Bound<'_, PyDict>>, +) -> PyResult { // Native reader fast path: DuckDBReader // Note: we can't use the try_native_readers! macro here because it uses `return` // which would skip cleanup of registered tables. @@ -817,7 +828,10 @@ fn execute(py: Python<'_>, query: &str, reader: &Bound<'_, PyAny>, data: Option< }; // Execute (capture result for cleanup) - let result = native.borrow().inner.execute(query) + let result = native + .borrow() + .inner + .execute(query) .map(|s| PySpec { inner: s }) .map_err(ggsql_err_to_py);