From a9cababcfc1dafe7e39d9187a6ee7c108df7730e Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Mar 2026 16:51:45 +0100 Subject: [PATCH 01/10] Add PLACE clause for annotation layers Introduces PLACE as a new top-level clause for creating annotation layers with literal values, similar to ggplot2's annotate() function. Changes: - Grammar: Add place_clause accepting geom and optional SETTING - DataSource: Add Annotation variant to mark annotation layers - Builder: Handle place_clause and set DataSource::Annotation - Execution: Add stub for annotation data source generation - Tests: Add parser test for PLACE clause PLACE layers use DataSource::Annotation as a marker and don't inherit global mappings. All aesthetic values come from SETTING parameters. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 5 +++++ src/parser/builder.rs | 6 +++++ src/parser/mod.rs | 43 ++++++++++++++++++++++++++++++++++++ src/plot/types.rs | 8 +++++++ tree-sitter-ggsql/grammar.js | 9 ++++++++ 5 files changed, 71 insertions(+) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index de370d86..c3f35851 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -223,6 +223,11 @@ pub fn determine_layer_source( Some(DataSource::FilePath(path)) => { format!("'{}'", path) } + Some(DataSource::Annotation) => { + // TODO: Implement annotation layer data source generation + // For now, generate a dummy single-row table + "(SELECT 1 AS __ggsql_dummy__)".to_string() + } None => { // Layer uses global data - caller must ensure has_global is true debug_assert!(has_global, "Layer has no source and no global data"); diff --git a/src/parser/builder.rs b/src/parser/builder.rs index f7bc9cb1..a4760bd1 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -305,6 +305,12 @@ fn process_viz_clause(node: &Node, source: &SourceTree, spec: &mut Plot) -> Resu let layer = build_layer(&child, source)?; spec.layers.push(layer); } + "place_clause" => { + let mut layer = build_layer(&child, source)?; + // Mark as annotation layer with dummy data source + layer.source = Some(DataSource::Annotation); + spec.layers.push(layer); + } "scale_clause" => { let scale = build_scale(&child, source)?; spec.scales.push(scale); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6c80d63a..7efa5c35 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -413,4 +413,47 @@ mod tests { Some(DataSource::Identifier("cte".to_string())) ); } + + #[test] + fn test_place_clause() { + let query = r#" + SELECT x, y FROM data + VISUALISE x, y + DRAW point + PLACE text SETTING x => 5, y => 10, label => 'Hello' + "#; + + let result = parse_query(query); + assert!(result.is_ok(), "Failed to parse PLACE clause: {:?}", result); + + let specs = result.unwrap(); + assert_eq!(specs.len(), 1); + assert_eq!(specs[0].layers.len(), 2, "Expected 2 layers (DRAW + PLACE)"); + + // First layer: regular DRAW point + assert_eq!(specs[0].layers[0].geom, Geom::point()); + assert!(specs[0].layers[0].source.is_none(), "DRAW layer should have no explicit source"); + + // Second layer: PLACE text with annotation source + assert_eq!(specs[0].layers[1].geom, Geom::text()); + assert_eq!( + specs[0].layers[1].source, + Some(DataSource::Annotation), + "PLACE layer should have Annotation source" + ); + + // Verify SETTING parameters are preserved + assert_eq!( + specs[0].layers[1].parameters.get("x"), + Some(&ParameterValue::Number(5.0)) + ); + assert_eq!( + specs[0].layers[1].parameters.get("y"), + Some(&ParameterValue::Number(10.0)) + ); + assert_eq!( + specs[0].layers[1].parameters.get("label"), + Some(&ParameterValue::String("Hello".to_string())) + ); + } } diff --git a/src/plot/types.rs b/src/plot/types.rs index 95bfc2d6..4414ff7e 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -140,6 +140,8 @@ pub enum DataSource { Identifier(String), /// File path (quoted string like 'data.csv') FilePath(String), + /// Annotation layer (PLACE clause) - generates dummy single-row source + Annotation, } impl DataSource { @@ -148,6 +150,7 @@ impl DataSource { match self { DataSource::Identifier(s) => s, DataSource::FilePath(s) => s, + DataSource::Annotation => "__annotation__", } } @@ -155,6 +158,11 @@ impl DataSource { pub fn is_file(&self) -> bool { matches!(self, DataSource::FilePath(_)) } + + /// Returns true if this is an annotation layer source + pub fn is_annotation(&self) -> bool { + matches!(self, DataSource::Annotation) + } } // ============================================================================= diff --git a/tree-sitter-ggsql/grammar.js b/tree-sitter-ggsql/grammar.js index 9302f195..dc1a8487 100644 --- a/tree-sitter-ggsql/grammar.js +++ b/tree-sitter-ggsql/grammar.js @@ -442,6 +442,7 @@ module.exports = grammar({ // All the visualization clauses (same as current grammar) viz_clause: $ => choice( $.draw_clause, + $.place_clause, $.scale_clause, $.facet_clause, $.project_clause, @@ -461,6 +462,14 @@ module.exports = grammar({ optional($.order_clause) ), + // PLACE clause - syntax: PLACE geom [SETTING ...] + // For annotation layers with literal values only (no data mappings) + place_clause: $ => seq( + caseInsensitive('PLACE'), + $.geom_type, + optional($.setting_clause) + ), + // REMAPPING clause: maps stat-computed columns to aesthetics // Syntax: REMAPPING count AS y, sum AS size // Reuses mapping_list for parsing - stat names are treated as column references From 214b53a065e11775f7df5c4fc72ae729a063905e Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Mar 2026 17:12:24 +0100 Subject: [PATCH 02/10] Implement execution for PLACE annotation layers Completes the execution pipeline for PLACE layers: Changes: - Skip annotation layers in merge_global_mappings_into_layers() to prevent inheritance of global mappings from VISUALISE - Clarify annotation data source comment: 1-row dummy satisfies execution pipeline requirements (schema detection, type resolution) even though SETTING literals render as Vega-Lite datum values Annotation layers now properly isolated from global mappings while still working with the existing execution infrastructure. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 7 +++++-- src/execute/mod.rs | 7 ++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index c3f35851..f2639203 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -224,8 +224,11 @@ pub fn determine_layer_source( format!("'{}'", path) } Some(DataSource::Annotation) => { - // TODO: Implement annotation layer data source generation - // For now, generate a dummy single-row table + // Annotation layer - generate a single-row dummy table. + // The execution pipeline expects all layers to have a DataFrame, even though + // SETTING literals eventually render as Vega-Lite datum values ({"value": ...}) + // that don't reference the data. The 1-row dummy satisfies schema detection, + // type resolution, and other intermediate steps that expect data to exist. "(SELECT 1 AS __ggsql_dummy__)".to_string() } None => { diff --git a/src/execute/mod.rs b/src/execute/mod.rs index a8f17e07..ac74982c 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -26,7 +26,7 @@ use crate::parser; use crate::plot::aesthetic::{is_positional_aesthetic, AestheticContext}; use crate::plot::facet::{resolve_properties as resolve_facet_properties, FacetDataContext}; use crate::plot::{AestheticValue, Layer, Scale, ScaleTypeKind, Schema}; -use crate::{DataFrame, GgsqlError, Plot, Result}; +use crate::{DataFrame, DataSource, GgsqlError, Plot, Result}; use std::collections::{HashMap, HashSet}; use crate::reader::Reader; @@ -153,6 +153,11 @@ fn validate(layers: &[Layer], layer_schemas: &[Schema]) -> Result<()> { fn merge_global_mappings_into_layers(specs: &mut [Plot], layer_schemas: &[Schema]) { for spec in specs { for (layer, schema) in spec.layers.iter_mut().zip(layer_schemas.iter()) { + // Skip annotation layers - they don't inherit global mappings + if matches!(layer.source, Some(DataSource::Annotation)) { + continue; + } + let supported = layer.geom.aesthetics().supported(); let schema_columns: HashSet<&str> = schema.iter().map(|c| c.name.as_str()).collect(); From cada5bdc3a6427436cbc9a14c92ec5e75de23c14 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 11:21:08 +0100 Subject: [PATCH 03/10] Transform position aesthetics for PLACE annotation layers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enables annotation layers to participate in scale training and coordinate transformation pipeline by moving positional/required aesthetics from SETTING parameters to mappings. Changes: - Transform parameter keys to internal space (x → pos1, y → pos2) - Move positional and required aesthetics from parameters to mappings for annotation layers in transform_aesthetics_to_internal() - Refactor place_clause handling in builder.rs into build_place_layer() - Add comprehensive test coverage: parser, execution, validation, and rendering tests verifying field vs value encoding Annotation position aesthetics now materialize as columns, enabling them to affect scale ranges and be properly encoded in Vega-Lite output as field encodings (not datum values). Co-Authored-By: Claude Sonnet 4.5 --- src/execute/mod.rs | 207 ++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 65 +++++++++++++ src/parser/builder.rs | 24 ++++- src/parser/mod.rs | 20 ++-- src/plot/main.rs | 55 ++++++++++- 5 files changed, 359 insertions(+), 12 deletions(-) diff --git a/src/execute/mod.rs b/src/execute/mod.rs index ac74982c..cad3ffe8 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -1230,6 +1230,7 @@ pub fn prepare_data_with_reader(query: &str, reader: &R) -> Result 2, y => 25, label => 'Annotation', size => 14 + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + assert_eq!(result.specs.len(), 1); + assert_eq!(result.specs[0].layers.len(), 2, "Should have DRAW + PLACE layers"); + + // First layer: regular DRAW point + let point_layer = &result.specs[0].layers[0]; + assert_eq!(point_layer.geom, crate::Geom::point()); + assert!(point_layer.source.is_none(), "DRAW layer should have no explicit source"); + + // Second layer: PLACE text annotation + let annotation_layer = &result.specs[0].layers[1]; + assert_eq!(annotation_layer.geom, crate::Geom::text()); + assert_eq!( + annotation_layer.source, + Some(DataSource::Annotation), + "PLACE layer should have Annotation source" + ); + + // Verify annotation layer has 1-row data + let annotation_key = annotation_layer.data_key.as_ref().unwrap(); + let annotation_df = result.data.get(annotation_key).unwrap(); + assert_eq!( + annotation_df.height(), + 1, + "Annotation layer should have exactly 1 row" + ); + + // Verify positional aesthetics are moved from SETTING to mappings with transformed names + // They become Column references (not Literals) so they can participate in scale training + assert!( + matches!( + annotation_layer.mappings.get("pos1"), + Some(AestheticValue::Column { name, .. }) if name == "__ggsql_aes_pos1__" + ), + "x should be transformed to pos1, moved to mappings, and materialized as column" + ); + assert!( + matches!( + annotation_layer.mappings.get("pos2"), + Some(AestheticValue::Column { name, .. }) if name == "__ggsql_aes_pos2__" + ), + "y should be transformed to pos2, moved to mappings, and materialized as column" + ); + + // Verify non-positional aesthetics are in mappings as Literals (via resolve_aesthetics) + assert!( + matches!( + annotation_layer.mappings.get("label"), + Some(AestheticValue::Literal(ParameterValue::String(s))) if s == "Annotation" + ), + "label should be in mappings as Literal" + ); + assert!( + matches!( + annotation_layer.mappings.get("size"), + Some(AestheticValue::Literal(ParameterValue::Number(n))) if *n == 14.0 + ), + "size should be in mappings as Literal" + ); + + // Parameters should be empty after resolve_aesthetics moves them to mappings + assert!( + annotation_layer.parameters.is_empty(), + "All SETTING parameters should be moved to mappings" + ); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_missing_required_aesthetic() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 2 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => 5, label => 'Missing y!' + "#; + + let result = prepare_data_with_reader(query, &reader); + assert!(result.is_err(), "Should fail validation"); + + match result { + Err(GgsqlError::ValidationError(msg)) => { + assert!( + msg.contains("pos2"), + "Error should mention missing pos2 aesthetic: {}", + msg + ); + } + Err(e) => panic!("Expected ValidationError, got: {}", e), + Ok(_) => panic!("Expected error, got success"), + } + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_affects_scale_ranges() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Data has x from 1-3, but PLACE annotation at x=10 should extend the range + let query = r#" + SELECT 1 AS x, 10 AS y UNION ALL + SELECT 2 AS x, 20 AS y UNION ALL + SELECT 3 AS x, 30 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => 10, y => 50, label => 'Extended' + "#; + + let result = prepare_data_with_reader(query, &reader); + assert!(result.is_ok(), "Query should execute: {:?}", result.err()); + + let prep = result.unwrap(); + + // Check that x scale input_range includes both data range (1-3) and annotation point (10) + let x_scale = prep.specs[0].find_scale("pos1"); + assert!(x_scale.is_some(), "Should have x scale"); + + let scale = x_scale.unwrap(); + if let Some(input_range) = &scale.input_range { + // Input range should include the annotation x value (10) + // The scale input_range is resolved from the combined data + annotation DataFrames + assert!( + input_range.len() >= 2, + "Scale input_range should have min/max values" + ); + // Check that the range max is at least 10 (the annotation x value) + if let Some(max_val) = input_range.last() { + match max_val { + crate::plot::types::ArrayElement::Number(n) => { + assert!( + *n >= 10.0, + "Scale input_range max should include annotation point at x=10, got: {}", + n + ); + } + _ => panic!("Expected numeric input_range value"), + } + } + } else { + panic!("Scale should have an input_range"); + } + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_no_global_mapping_inheritance() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 2 AS y, 'red' AS color + VISUALISE x, y, color + DRAW point + PLACE text SETTING x => 5, y => 10, label => 'Test' + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + // DRAW layer should have inherited global color mapping + let point_layer = &result.specs[0].layers[0]; + assert!( + point_layer.mappings.contains_key("color") || + point_layer.mappings.contains_key("fill") || + point_layer.mappings.contains_key("stroke"), + "DRAW layer should inherit color from global mappings" + ); + + // PLACE layer should NOT have inherited global mappings + let annotation_layer = &result.specs[0].layers[1]; + assert!( + !annotation_layer.mappings.contains_key("color"), + "PLACE layer should not inherit color from global mappings" + ); + assert!( + !annotation_layer.mappings.contains_key("fill"), + "PLACE layer should not inherit fill from global mappings" + ); + assert!( + !annotation_layer.mappings.contains_key("stroke"), + "PLACE layer should not inherit stroke from global mappings" + ); + } } diff --git a/src/lib.rs b/src/lib.rs index 6e3b8074..0363aa59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -758,6 +758,71 @@ mod integration_tests { ); } + #[test] + fn test_end_to_end_place_field_vs_value_encoding() { + // Test that PLACE annotation layers render correctly: + // - Positional aesthetics (x, y) as field encodings (reference columns) + // - Non-positional aesthetics (size, stroke) as value encodings (datum values) + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 10 AS y UNION ALL SELECT 2 AS x, 20 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => 5, y => 30, label => 'Annotation', size => 16, stroke => 'red' + "#; + + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + + // Render to Vega-Lite + let writer = VegaLiteWriter::new(); + let json_str = writer.write(&prepared.specs[0], &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + // Find the text layer (should be second layer) + let text_layer = &vl_spec["layer"][1]; + + // Mark can be either a string or an object with type + let mark_type = if text_layer["mark"].is_string() { + text_layer["mark"].as_str().unwrap() + } else { + text_layer["mark"]["type"].as_str().unwrap() + }; + assert_eq!(mark_type, "text", "Second layer should be text"); + + let encoding = &text_layer["encoding"]; + + // Positional aesthetics should be field encodings (have "field" key) + assert!( + encoding["x"]["field"].is_string(), + "x should be a field encoding: {:?}", + encoding["x"] + ); + assert!( + encoding["y"]["field"].is_string(), + "y should be a field encoding: {:?}", + encoding["y"] + ); + + // Non-positional aesthetics should be value encodings (have "value" key) + // Note: size may be scaled/transformed, so just check it's present as a value + assert!( + encoding["size"]["value"].is_number(), + "size should be a value encoding with numeric value: {:?}", + encoding["size"] + ); + + // Note: stroke color goes through resolve_aesthetics and may be in different location + // Just verify it's present somewhere as a literal + let text_mark = &text_layer["mark"]; + let has_stroke_value = encoding.get("stroke") + .and_then(|s| s.get("value")) + .is_some() + || text_mark.get("stroke").is_some(); + assert!(has_stroke_value, "stroke should be present as a value"); + } + #[test] fn test_end_to_end_global_constant_in_visualise() { // Test that global constants in VISUALISE clause work correctly diff --git a/src/parser/builder.rs b/src/parser/builder.rs index a4760bd1..5fe74b5c 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -293,6 +293,10 @@ fn build_visualise_statement(node: &Node, source: &SourceTree) -> Result { // since geom definitions use internal names for their supported/required aesthetics spec.transform_aesthetics_to_internal(); + // Process annotation layers: move positional/required parameters to mappings + // This must happen AFTER transform_aesthetics_to_internal() so parameter keys are in internal space + spec.process_annotation_layers(); + Ok(spec) } @@ -306,9 +310,7 @@ fn process_viz_clause(node: &Node, source: &SourceTree, spec: &mut Plot) -> Resu spec.layers.push(layer); } "place_clause" => { - let mut layer = build_layer(&child, source)?; - // Mark as annotation layer with dummy data source - layer.source = Some(DataSource::Annotation); + let layer = build_place_layer(&child, source)?; spec.layers.push(layer); } "scale_clause" => { @@ -495,6 +497,22 @@ fn build_layer(node: &Node, source: &SourceTree) -> Result { Ok(layer) } +/// Build an annotation Layer from a place_clause node +/// This is similar to build_layer but marks it as an annotation layer. +/// The transformation of positional/required aesthetics from SETTING to mappings +/// happens later in Plot::transform_aesthetics_to_internal(). +/// Syntax: PLACE geom [MAPPING col AS x, ...] [SETTING param => val, ...] [FILTER condition] +fn build_place_layer(node: &Node, source: &SourceTree) -> Result { + // Build the layer using standard logic + let mut layer = build_layer(node, source)?; + + // Mark as annotation layer with dummy data source + // This triggers special handling in transform_aesthetics_to_internal() + layer.source = Some(DataSource::Annotation); + + Ok(layer) +} + /// Parse a setting_clause: SETTING param => value, ... fn parse_setting_clause( node: &Node, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7efa5c35..385ff4b9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -442,18 +442,22 @@ mod tests { "PLACE layer should have Annotation source" ); - // Verify SETTING parameters are preserved - assert_eq!( - specs[0].layers[1].parameters.get("x"), - Some(&ParameterValue::Number(5.0)) + // Verify positional aesthetics moved to mappings with internal names + // (transform_aesthetics_to_internal runs as part of parse_query) + assert!( + specs[0].layers[1].mappings.contains_key("pos1"), + "x should be transformed to pos1 and moved to mappings" ); - assert_eq!( - specs[0].layers[1].parameters.get("y"), - Some(&ParameterValue::Number(10.0)) + assert!( + specs[0].layers[1].mappings.contains_key("pos2"), + "y should be transformed to pos2 and moved to mappings" ); + + // Verify non-positional parameter (label) stays in parameters (with internal name = same) assert_eq!( specs[0].layers[1].parameters.get("label"), - Some(&ParameterValue::String("Hello".to_string())) + Some(&ParameterValue::String("Hello".to_string())), + "Non-positional parameters remain in parameters" ); } } diff --git a/src/plot/main.rs b/src/plot/main.rs index a211ac34..9cf9bc65 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -168,6 +168,7 @@ impl Plot { /// - Global mappings /// - Layer aesthetics /// - Layer remappings + /// - Layer parameters (SETTING aesthetics) /// - Scale aesthetics pub fn transform_aesthetics_to_internal(&mut self) { let ctx = self.get_aesthetic_context(); @@ -175,10 +176,21 @@ impl Plot { // Transform global mappings self.global_mappings.transform_to_internal(&ctx); - // Transform layer aesthetics and remappings + // Transform layer aesthetics, remappings, and parameters for layer in &mut self.layers { layer.mappings.transform_to_internal(&ctx); layer.remappings.transform_to_internal(&ctx); + + // Transform parameter keys from user-facing to internal names + // This ensures position aesthetics in SETTING (e.g., x => 5) become internal (pos1 => 5) + let original_parameters = std::mem::take(&mut layer.parameters); + for (param_name, value) in original_parameters { + let internal_name = ctx + .map_user_to_internal(¶m_name) + .map(|s| s.to_string()) + .unwrap_or(param_name); + layer.parameters.insert(internal_name, value); + } } // Transform scale aesthetics @@ -189,6 +201,47 @@ impl Plot { } } + /// Process annotation layers by moving positional and required aesthetics + /// from parameters to mappings. + /// + /// This must be called AFTER transform_aesthetics_to_internal() so that + /// parameter keys are already in internal space (pos1, pos2, etc.). + /// + /// Positional aesthetics need to be in mappings so they go through the same + /// transformation pipeline (internal→user space) as data-mapped aesthetics, + /// enabling them to participate in scale training and coordinate transformations. + pub fn process_annotation_layers(&mut self) { + for layer in &mut self.layers { + if !matches!(layer.source, Some(crate::DataSource::Annotation)) { + continue; + } + + let required_aesthetics = layer.geom.aesthetics().required(); + // Collect parameter keys first to avoid borrow checker issues + let param_keys: Vec = layer.parameters.keys().cloned().collect(); + + for param_name in param_keys { + // Check if this is a positional aesthetic OR a required aesthetic + let is_positional = crate::plot::aesthetic::is_positional_aesthetic(¶m_name); + let is_required = required_aesthetics.contains(¶m_name.as_str()); + + if is_positional || is_required { + // Skip if already in mappings + if layer.mappings.contains_key(¶m_name) { + continue; + } + + // Move from parameters to mappings (both now use internal names) + if let Some(value) = layer.parameters.remove(¶m_name) { + layer + .mappings + .insert(¶m_name, crate::AestheticValue::Literal(value)); + } + } + } + } + } + /// Check if the spec has any layers pub fn has_layers(&self) -> bool { !self.layers.is_empty() From d541c34f53124d38d5c86f77f62c5c39eb54f029 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 12:26:45 +0100 Subject: [PATCH 04/10] Add length parameter to DataSource::Annotation Change DataSource::Annotation from a unit variant to a tuple variant containing the number of rows to generate (usize). This prepares for array expansion support in PLACE annotation layers. Changes: - DataSource::Annotation becomes DataSource::Annotation(usize) - Initialize annotation layers with length 1 in build_place_layer() - Generate multi-row VALUES clause in determine_layer_source() when n > 1 - Update all matches!() checks to use DataSource::Annotation(_) - Update test assertions to use pattern matching The length will be updated in process_annotation_layers() when arrays are present (to be implemented next). Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 14 ++++++++++---- src/execute/mod.rs | 7 +++---- src/parser/builder.rs | 6 +++--- src/parser/mod.rs | 5 ++--- src/plot/main.rs | 2 +- src/plot/types.rs | 9 +++++---- 6 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index f2639203..3f6ec86c 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -223,13 +223,19 @@ pub fn determine_layer_source( Some(DataSource::FilePath(path)) => { format!("'{}'", path) } - Some(DataSource::Annotation) => { - // Annotation layer - generate a single-row dummy table. + Some(DataSource::Annotation(n)) => { + // Annotation layer - generate a dummy table with n rows. // The execution pipeline expects all layers to have a DataFrame, even though // SETTING literals eventually render as Vega-Lite datum values ({"value": ...}) - // that don't reference the data. The 1-row dummy satisfies schema detection, + // that don't reference the data. The n-row dummy satisfies schema detection, // type resolution, and other intermediate steps that expect data to exist. - "(SELECT 1 AS __ggsql_dummy__)".to_string() + if *n == 1 { + "(SELECT 1 AS __ggsql_dummy__)".to_string() + } else { + // Generate VALUES clause with n rows: VALUES (1), (2), ..., (n) + let rows: Vec = (1..=*n).map(|i| format!("({})", i)).collect(); + format!("(VALUES {} AS t(__ggsql_dummy__))", rows.join(", ")) + } } None => { // Layer uses global data - caller must ensure has_global is true diff --git a/src/execute/mod.rs b/src/execute/mod.rs index cad3ffe8..5399eab7 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -154,7 +154,7 @@ fn merge_global_mappings_into_layers(specs: &mut [Plot], layer_schemas: &[Schema for spec in specs { for (layer, schema) in spec.layers.iter_mut().zip(layer_schemas.iter()) { // Skip annotation layers - they don't inherit global mappings - if matches!(layer.source, Some(DataSource::Annotation)) { + if matches!(layer.source, Some(DataSource::Annotation(_))) { continue; } @@ -2237,9 +2237,8 @@ mod tests { // Second layer: PLACE text annotation let annotation_layer = &result.specs[0].layers[1]; assert_eq!(annotation_layer.geom, crate::Geom::text()); - assert_eq!( - annotation_layer.source, - Some(DataSource::Annotation), + assert!( + matches!(annotation_layer.source, Some(DataSource::Annotation(_))), "PLACE layer should have Annotation source" ); diff --git a/src/parser/builder.rs b/src/parser/builder.rs index 5fe74b5c..37baa911 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -506,9 +506,9 @@ fn build_place_layer(node: &Node, source: &SourceTree) -> Result { // Build the layer using standard logic let mut layer = build_layer(node, source)?; - // Mark as annotation layer with dummy data source - // This triggers special handling in transform_aesthetics_to_internal() - layer.source = Some(DataSource::Annotation); + // Mark as annotation layer with initial length of 1 + // The length may be updated in process_annotation_layers() if arrays are present + layer.source = Some(DataSource::Annotation(1)); Ok(layer) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 385ff4b9..0d9983f3 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -436,9 +436,8 @@ mod tests { // Second layer: PLACE text with annotation source assert_eq!(specs[0].layers[1].geom, Geom::text()); - assert_eq!( - specs[0].layers[1].source, - Some(DataSource::Annotation), + assert!( + matches!(specs[0].layers[1].source, Some(DataSource::Annotation(_))), "PLACE layer should have Annotation source" ); diff --git a/src/plot/main.rs b/src/plot/main.rs index 9cf9bc65..752fe6c6 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -212,7 +212,7 @@ impl Plot { /// enabling them to participate in scale training and coordinate transformations. pub fn process_annotation_layers(&mut self) { for layer in &mut self.layers { - if !matches!(layer.source, Some(crate::DataSource::Annotation)) { + if !matches!(layer.source, Some(crate::DataSource::Annotation(_))) { continue; } diff --git a/src/plot/types.rs b/src/plot/types.rs index 4414ff7e..8b5e4edb 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -140,8 +140,9 @@ pub enum DataSource { Identifier(String), /// File path (quoted string like 'data.csv') FilePath(String), - /// Annotation layer (PLACE clause) - generates dummy single-row source - Annotation, + /// Annotation layer (PLACE clause) with number of annotation rows + /// The usize represents how many rows to generate (from array expansion) + Annotation(usize), } impl DataSource { @@ -150,7 +151,7 @@ impl DataSource { match self { DataSource::Identifier(s) => s, DataSource::FilePath(s) => s, - DataSource::Annotation => "__annotation__", + DataSource::Annotation(_) => "__annotation__", } } @@ -161,7 +162,7 @@ impl DataSource { /// Returns true if this is an annotation layer source pub fn is_annotation(&self) -> bool { - matches!(self, DataSource::Annotation) + matches!(self, DataSource::Annotation(_)) } } From a6ab787cc57e49f2acc83f5865b5622d894ee23b Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 14:15:11 +0100 Subject: [PATCH 05/10] Add array recycling support for PLACE annotation layers This commit implements array parameter recycling for annotation layers, allowing scalars and length-1 arrays to be automatically recycled to match the length of the longest array parameter. Changes: - Updated DataSource::Annotation to store row count: Annotation(usize) - Added recycle_value_to_length() helper in plot/main.rs - Implemented process_annotation_layers() to handle array recycling - Updated schema inference to handle arrays in literal aesthetic mappings - Modified literal_to_sql() to generate CASE WHEN statements for arrays - Fixed SQL generation for multi-row VALUES clause in annotation layers - Added tests for array recycling and mismatched length validation Recycling Rules: - Scalars recycle to max array length - Length-1 arrays recycle to max array length - All non-1 arrays must have same length (error on mismatch) - Only required/positional aesthetics get recycled when moved to mappings Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 45 +++++++++++++++-- src/execute/mod.rs | 54 +++++++++++++++++++++ src/execute/schema.rs | 85 +++++++++++++++++++++++--------- src/parser/builder.rs | 3 +- src/plot/main.rs | 108 ++++++++++++++++++++++++++++++++++++++--- 5 files changed, 260 insertions(+), 35 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index 3f6ec86c..43dbf018 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -4,7 +4,7 @@ //! scale requirements and updating type info accordingly. use crate::plot::scale::coerce_dtypes; -use crate::plot::{CastTargetType, Layer, ParameterValue, Plot, SqlTypeNames}; +use crate::plot::{ArrayElement, CastTargetType, Layer, ParameterValue, Plot, SqlTypeNames}; use crate::{naming, DataSource}; use polars::prelude::{DataType, TimeUnit}; use std::collections::{HashMap, HashSet}; @@ -34,9 +34,44 @@ pub fn literal_to_sql(lit: &ParameterValue) -> String { "FALSE".to_string() } } - ParameterValue::Array(_) | ParameterValue::Null => { - unreachable!("Grammar prevents arrays and null in literal aesthetic mappings") + ParameterValue::Array(arr) => { + // Generate CASE WHEN statement to select array element by row number + // The annotation layer dummy table has __ggsql_dummy__ column with values 1, 2, 3, ... + let mut case_stmt = String::from("CASE __ggsql_dummy__"); + for (i, elem) in arr.iter().enumerate() { + let row_num = i + 1; // Row numbers start at 1 + let value_sql = match elem { + ArrayElement::String(s) => format!("'{}'", s.replace('\'', "''")), + ArrayElement::Number(n) => n.to_string(), + ArrayElement::Boolean(b) => { + if *b { + "TRUE".to_string() + } else { + "FALSE".to_string() + } + } + ArrayElement::Date(d) => { + // Convert days since epoch to DATE + format!("DATE '1970-01-01' + INTERVAL {} DAY", d) + } + ArrayElement::DateTime(dt) => { + // Convert microseconds since epoch to TIMESTAMP + format!("TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", dt) + } + ArrayElement::Time(t) => { + // Convert nanoseconds since midnight to TIME + let seconds = t / 1_000_000_000; + let nanos = t % 1_000_000_000; + format!("TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", seconds, nanos) + } + ArrayElement::Null => "NULL".to_string(), + }; + case_stmt.push_str(&format!(" WHEN {} THEN {}", row_num, value_sql)); + } + case_stmt.push_str(" END"); + case_stmt } + ParameterValue::Null => "NULL".to_string(), } } @@ -232,9 +267,9 @@ pub fn determine_layer_source( if *n == 1 { "(SELECT 1 AS __ggsql_dummy__)".to_string() } else { - // Generate VALUES clause with n rows: VALUES (1), (2), ..., (n) + // Generate VALUES clause with n rows: (VALUES (1), (2), ..., (n)) AS t(col) let rows: Vec = (1..=*n).map(|i| format!("({})", i)).collect(); - format!("(VALUES {} AS t(__ggsql_dummy__))", rows.join(", ")) + format!("(SELECT * FROM (VALUES {}) AS t(__ggsql_dummy__))", rows.join(", ")) } } None => { diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 5399eab7..7ec4d3db 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -2369,6 +2369,60 @@ mod tests { } } + #[cfg(feature = "duckdb")] + #[test] + fn test_place_array_recycling_scalar() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 10 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => [1, 2, 3], y => 10, label => 'Same' + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + let annotation_layer = &result.specs[0].layers[1]; + assert_eq!( + annotation_layer.source, + Some(DataSource::Annotation(3)), + "Should recycle scalar y to length 3" + ); + + let annotation_key = annotation_layer.data_key.as_ref().unwrap(); + let annotation_df = result.data.get(annotation_key).unwrap(); + assert_eq!(annotation_df.height(), 3, "Should have 3 rows"); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_place_array_mismatched_lengths_error() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT 1 AS x, 10 AS y + VISUALISE x, y + DRAW point + PLACE text SETTING x => [1, 2, 3], y => [10, 20], label => 'Test' + "#; + + let result = prepare_data_with_reader(query, &reader); + assert!(result.is_err(), "Should error on mismatched array lengths"); + + match result { + Err(GgsqlError::ValidationError(msg)) => { + assert!( + msg.contains("mismatched array lengths"), + "Error should mention mismatched lengths: {}", + msg + ); + } + Err(e) => panic!("Expected ValidationError, got: {}", e), + Ok(_) => panic!("Expected error, got success"), + } + } + #[cfg(feature = "duckdb")] #[test] fn test_place_no_global_mapping_inheritance() { diff --git a/src/execute/schema.rs b/src/execute/schema.rs index b5b7cad8..c59b390a 100644 --- a/src/execute/schema.rs +++ b/src/execute/schema.rs @@ -6,9 +6,9 @@ //! 2. Apply casting to queries //! 3. complete_schema_ranges() - get min/max from cast queries -use crate::plot::{AestheticValue, ColumnInfo, Layer, ParameterValue, Schema}; +use crate::plot::{AestheticValue, ArrayElement, ColumnInfo, Layer, ParameterValue, Schema}; use crate::{naming, DataFrame, Result}; -use polars::prelude::DataType; +use polars::prelude::{DataType, TimeUnit}; /// Simple type info tuple: (name, dtype, is_discrete) pub type TypeInfo = (String, DataType, bool); @@ -246,16 +246,37 @@ pub fn add_literal_columns_to_type_info(layers: &[Layer], layer_type_info: &mut for (layer, type_info) in layers.iter().zip(layer_type_info.iter_mut()) { for (aesthetic, value) in &layer.mappings.aesthetics { if let AestheticValue::Literal(lit) = value { - let dtype = match lit { - ParameterValue::String(_) => DataType::String, - ParameterValue::Number(_) => DataType::Float64, - ParameterValue::Boolean(_) => DataType::Boolean, - ParameterValue::Array(_) | ParameterValue::Null => unreachable!( - "Grammar prevents arrays and null in literal aesthetic mappings" - ), + let (dtype, is_discrete) = match lit { + ParameterValue::String(_) => (DataType::String, true), + ParameterValue::Number(_) => (DataType::Float64, false), + ParameterValue::Boolean(_) => (DataType::Boolean, true), + ParameterValue::Array(arr) => { + // Infer dtype from first element (arrays are homogeneous) + if let Some(first) = arr.first() { + match first { + ArrayElement::String(_) => (DataType::String, true), + ArrayElement::Number(_) => (DataType::Float64, false), + ArrayElement::Boolean(_) => (DataType::Boolean, true), + ArrayElement::Date(_) => (DataType::Date, false), + ArrayElement::DateTime(_) => { + (DataType::Datetime(TimeUnit::Microseconds, None), false) + } + ArrayElement::Time(_) => (DataType::Time, false), + ArrayElement::Null => { + // Null element: default to Float64 + (DataType::Float64, false) + } + } + } else { + // Empty array: default to Float64 + (DataType::Float64, false) + } + } + ParameterValue::Null => { + // Skip null literals - they don't create columns + continue; + } }; - let is_discrete = - matches!(lit, ParameterValue::String(_) | ParameterValue::Boolean(_)); let col_name = naming::aesthetic_column(aesthetic); // Only add if not already present @@ -310,21 +331,41 @@ pub fn build_aesthetic_schema(layer: &Layer, schema: &Schema) -> Schema { } AestheticValue::Literal(lit) => { // Literals become columns with appropriate type - let dtype = match lit { - ParameterValue::String(_) => DataType::String, - ParameterValue::Number(_) => DataType::Float64, - ParameterValue::Boolean(_) => DataType::Boolean, - ParameterValue::Array(_) | ParameterValue::Null => unreachable!( - "Grammar prevents arrays and null in literal aesthetic mappings" - ), + let (dtype, is_discrete) = match lit { + ParameterValue::String(_) => (DataType::String, true), + ParameterValue::Number(_) => (DataType::Float64, false), + ParameterValue::Boolean(_) => (DataType::Boolean, true), + ParameterValue::Array(arr) => { + // Infer dtype from first element (arrays are homogeneous) + if let Some(first) = arr.first() { + match first { + ArrayElement::String(_) => (DataType::String, true), + ArrayElement::Number(_) => (DataType::Float64, false), + ArrayElement::Boolean(_) => (DataType::Boolean, true), + ArrayElement::Date(_) => (DataType::Date, false), + ArrayElement::DateTime(_) => { + (DataType::Datetime(TimeUnit::Microseconds, None), false) + } + ArrayElement::Time(_) => (DataType::Time, false), + ArrayElement::Null => { + // Null element: default to Float64 + (DataType::Float64, false) + } + } + } else { + // Empty array: default to Float64 + (DataType::Float64, false) + } + } + ParameterValue::Null => { + // Null: default to Float64 + (DataType::Float64, false) + } }; aesthetic_schema.push(ColumnInfo { name: aes_col_name, dtype, - is_discrete: matches!( - lit, - ParameterValue::String(_) | ParameterValue::Boolean(_) - ), + is_discrete, min: None, max: None, }); diff --git a/src/parser/builder.rs b/src/parser/builder.rs index 37baa911..6f82f1c7 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -295,7 +295,8 @@ fn build_visualise_statement(node: &Node, source: &SourceTree) -> Result { // Process annotation layers: move positional/required parameters to mappings // This must happen AFTER transform_aesthetics_to_internal() so parameter keys are in internal space - spec.process_annotation_layers(); + // Returns error if arrays have mismatched lengths + spec.process_annotation_layers()?; Ok(spec) } diff --git a/src/plot/main.rs b/src/plot/main.rs index 752fe6c6..ec3d0ffd 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -47,6 +47,59 @@ pub use super::projection::{Coord, Projection}; // Re-export Facet types from the facet module pub use super::facet::{Facet, FacetLayout}; +// ============================================================================= +// Helper Functions +// ============================================================================= + +/// Recycle a scalar or length-1 array to a target length. +/// +/// Recycling rules: +/// - Scalar values (Number, String, Boolean) → Array with n copies as ArrayElements +/// - Length-1 arrays → Expand to n copies of the single element +/// - Length-n arrays → Return unchanged if n matches target, error otherwise +fn recycle_value_to_length( + value: ParameterValue, + target_length: usize, +) -> Result { + match value { + // Scalars: convert to array with n copies as ArrayElements + ParameterValue::Number(n) => Ok(ParameterValue::Array( + vec![ArrayElement::Number(n); target_length], + )), + ParameterValue::String(s) => Ok(ParameterValue::Array( + vec![ArrayElement::String(s); target_length], + )), + ParameterValue::Boolean(b) => Ok(ParameterValue::Array( + vec![ArrayElement::Boolean(b); target_length], + )), + ParameterValue::Null => Ok(ParameterValue::Array( + vec![ArrayElement::Null; target_length], + )), + // Arrays: recycle if length 1, return unchanged if matches target, error otherwise + ParameterValue::Array(arr) => { + if arr.len() == 1 { + // Recycle the single element + let element = arr[0].clone(); + Ok(ParameterValue::Array(vec![element; target_length])) + } else if arr.len() == target_length { + // Already correct length + Ok(ParameterValue::Array(arr)) + } else { + // Mismatched length - shouldn't happen if validation passed + Err(crate::GgsqlError::InternalError(format!( + "Attempted to recycle array of length {} to length {} (should have been caught earlier)", + arr.len(), + target_length + ))) + } + } + } +} + +// ============================================================================= +// Plot Type +// ============================================================================= + /// Complete ggsql visualization specification #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Plot { @@ -202,22 +255,50 @@ impl Plot { } /// Process annotation layers by moving positional and required aesthetics - /// from parameters to mappings. + /// from parameters to mappings, with array recycling support. /// /// This must be called AFTER transform_aesthetics_to_internal() so that /// parameter keys are already in internal space (pos1, pos2, etc.). /// + /// Array recycling rules: + /// - Scalars and length-1 arrays are recycled to match the max array length + /// - All non-1 arrays must have the same length (error on mismatch) + /// - Only required/positional aesthetics that are moved to mappings get recycled + /// /// Positional aesthetics need to be in mappings so they go through the same /// transformation pipeline (internal→user space) as data-mapped aesthetics, /// enabling them to participate in scale training and coordinate transformations. - pub fn process_annotation_layers(&mut self) { + pub fn process_annotation_layers(&mut self) -> Result<(), crate::GgsqlError> { for layer in &mut self.layers { - if !matches!(layer.source, Some(crate::DataSource::Annotation(_))) { - continue; + // Get initial length from DataSource + let mut max_length = match &layer.source { + Some(DataSource::Annotation(n)) => *n, + _ => continue, + }; + + // Step 1: Determine max array length from all parameters + let mut array_lengths: std::collections::HashMap = + std::collections::HashMap::new(); + + for (param_name, value) in &layer.parameters { + if let ParameterValue::Array(arr) = value { + let len = arr.len(); + array_lengths.insert(param_name.clone(), len); + if len > 1 && len != max_length { + if max_length > 1 { + // Multiple different non-1 lengths - error + return Err(crate::GgsqlError::ValidationError(format!( + "PLACE annotation layer has mismatched array lengths: '{}' has length {}, but another has length {}", + param_name, len, max_length + ))); + } + max_length = len; + } + } } + // Step 2: Move required/positional aesthetics to mappings with recycling let required_aesthetics = layer.geom.aesthetics().required(); - // Collect parameter keys first to avoid borrow checker issues let param_keys: Vec = layer.parameters.keys().cloned().collect(); for param_name in param_keys { @@ -231,15 +312,28 @@ impl Plot { continue; } - // Move from parameters to mappings (both now use internal names) + // Move from parameters to mappings with recycling if let Some(value) = layer.parameters.remove(¶m_name) { + let recycled_value = if max_length > 1 { + recycle_value_to_length(value, max_length)? + } else { + value + }; + layer .mappings - .insert(¶m_name, crate::AestheticValue::Literal(value)); + .insert(¶m_name, AestheticValue::Literal(recycled_value)); } } } + + // Step 3: Update DataSource with the correct length + if max_length > 1 { + layer.source = Some(DataSource::Annotation(max_length)); + } } + + Ok(()) } /// Check if the spec has any layers From 17d0de2e408b15d227b35f468a6922726ab1a9df Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 16:01:59 +0100 Subject: [PATCH 06/10] Fix PLACE annotation layers with mixed-type array parameters This commit fixes a bug where PLACE text annotation layers with array parameters (e.g., label => [1, 'foo']) displayed incorrectly. Changes: - Add ArrayElement::homogenize() to coerce mixed-type arrays to a common type - Process annotation layers to move array parameters to mappings - Skip non-positional aesthetics for annotation layers in scale training - Remove string-to-number parsing in JSON serialization to preserve types The fix ensures that arrays like [1, 'foo'] are homogenized to ["1", "foo"] and rendered correctly as separate labels in the visualization. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 1 + src/execute/scale.rs | 14 ++++++++++- src/plot/main.rs | 16 +++++++++--- src/plot/types.rs | 50 +++++++++++++++++++++++++++++++++++++ src/writer/vegalite/data.rs | 12 ++------- 5 files changed, 78 insertions(+), 15 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index 43dbf018..443eb3d7 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -37,6 +37,7 @@ pub fn literal_to_sql(lit: &ParameterValue) -> String { ParameterValue::Array(arr) => { // Generate CASE WHEN statement to select array element by row number // The annotation layer dummy table has __ggsql_dummy__ column with values 1, 2, 3, ... + // Arrays are homogeneous (homogenized in recycle_value_to_length), so no type mixing let mut case_stmt = String::from("CASE __ggsql_dummy__"); for (i, elem) in arr.iter().enumerate() { let row_num = i + 1; // Row numbers start at 1 diff --git a/src/execute/scale.rs b/src/execute/scale.rs index 3ad4cf2b..a9e52d7d 100644 --- a/src/execute/scale.rs +++ b/src/execute/scale.rs @@ -14,7 +14,7 @@ use crate::plot::{ AestheticValue, ArrayElement, ArrayElementType, ColumnInfo, Layer, ParameterValue, Plot, Scale, ScaleType, ScaleTypeKind, Schema, }; -use crate::{DataFrame, GgsqlError, Result}; +use crate::{DataFrame, DataSource, GgsqlError, Result}; use polars::prelude::Column; use std::collections::{HashMap, HashSet}; @@ -995,6 +995,8 @@ pub fn find_columns_for_aesthetic<'a>( data_map: &'a HashMap, aesthetic_ctx: &AestheticContext, ) -> Vec<&'a Column> { + use crate::plot::aesthetic::is_positional_aesthetic; + let mut column_refs = Vec::new(); let aesthetics_to_check = aesthetic_ctx .internal_positional_family(aesthetic) @@ -1003,6 +1005,16 @@ pub fn find_columns_for_aesthetic<'a>( // Check each layer's mapping - every layer has its own data for (i, layer) in layers.iter().enumerate() { + // For annotation layers, only train scales for positional aesthetics (pos1, pos2) + // Other aesthetics (label, color, size, etc.) should use literal values + let is_annotation = matches!(layer.source, Some(DataSource::Annotation(_))); + let is_positional_family = is_positional_aesthetic(aesthetic); + + if is_annotation && !is_positional_family { + // Skip non-positional aesthetics for annotation layers + continue; + } + if let Some(df) = data_map.get(&naming::layer_key(i)) { for aes_name in &aesthetics_to_check { if let Some(AestheticValue::Column { name, .. }) = layer.mappings.get(aes_name) { diff --git a/src/plot/main.rs b/src/plot/main.rs index ec3d0ffd..00c95bd7 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -75,8 +75,12 @@ fn recycle_value_to_length( ParameterValue::Null => Ok(ParameterValue::Array( vec![ArrayElement::Null; target_length], )), - // Arrays: recycle if length 1, return unchanged if matches target, error otherwise + // Arrays: homogenize types if mixed, then recycle if needed ParameterValue::Array(arr) => { + // Homogenize array if it has mixed incompatible types + let arr = ArrayElement::homogenize(&arr); + + // Now handle recycling if arr.len() == 1 { // Recycle the single element let element = arr[0].clone(); @@ -297,16 +301,20 @@ impl Plot { } } - // Step 2: Move required/positional aesthetics to mappings with recycling + // Step 2: Move required/positional/array aesthetics to mappings with recycling let required_aesthetics = layer.geom.aesthetics().required(); let param_keys: Vec = layer.parameters.keys().cloned().collect(); for param_name in param_keys { - // Check if this is a positional aesthetic OR a required aesthetic + // Check if this is a positional aesthetic OR a required aesthetic OR an array let is_positional = crate::plot::aesthetic::is_positional_aesthetic(¶m_name); let is_required = required_aesthetics.contains(¶m_name.as_str()); + let is_array = matches!( + layer.parameters.get(¶m_name), + Some(ParameterValue::Array(_)) + ); - if is_positional || is_required { + if is_positional || is_required || is_array { // Skip if already in mappings if layer.mappings.contains_key(¶m_name) { continue; diff --git a/src/plot/types.rs b/src/plot/types.rs index 8b5e4edb..a9db97db 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -618,6 +618,34 @@ impl ArrayElement { } } + /// Homogenize a slice of array elements to a common type. + /// + /// Infers the target type from all elements, then attempts to coerce all elements + /// to that type. If coercion fails (e.g., string + number), falls back to String type. + /// + /// Returns a new vector with homogenized elements. + pub fn homogenize(values: &[Self]) -> Vec { + // Infer target type from all elements + let Some(target_type) = Self::infer_type(values) else { + // All nulls or empty array - return cloned as-is + return values.to_vec(); + }; + + // Try to coerce all elements to the inferred type + let coerced: Result, _> = values.iter().map(|elem| elem.coerce_to(target_type)).collect(); + + match coerced { + Ok(coerced_arr) => coerced_arr, + Err(_) => { + // Coercion failed - fall back to String type + values + .iter() + .map(|elem| elem.coerce_to(ArrayElementType::String).unwrap_or(Self::Null)) + .collect() + } + } + } + /// Get the type name for error messages. fn type_name(&self) -> &'static str { match self { @@ -1375,4 +1403,26 @@ mod tests { let result = elem.coerce_to(ArrayElementType::Date); assert!(result.is_err()); } + + #[test] + fn test_homogenize_mixed_number_string() { + let arr = vec![ + ArrayElement::Number(1.0), + ArrayElement::String("foo".to_string()), + ]; + + let homogenized = ArrayElement::homogenize(&arr); + + // Should fall back to String type since "foo" can't be coerced to Number + assert_eq!(homogenized.len(), 2); + assert!(matches!(homogenized[0], ArrayElement::String(_))); + assert!(matches!(homogenized[1], ArrayElement::String(_))); + + if let ArrayElement::String(s) = &homogenized[0] { + assert_eq!(s, "1"); + } + if let ArrayElement::String(s) = &homogenized[1] { + assert_eq!(s, "foo"); + } + } } diff --git a/src/writer/vegalite/data.rs b/src/writer/vegalite/data.rs index 4b1c89e3..a19b7e36 100644 --- a/src/writer/vegalite/data.rs +++ b/src/writer/vegalite/data.rs @@ -97,16 +97,8 @@ pub(super) fn series_value_at(series: &Series, idx: usize) -> Result { let ca = series .str() .map_err(|e| GgsqlError::WriterError(format!("Failed to cast to string: {}", e)))?; - // Try to parse as number if it looks numeric - if let Some(val) = ca.get(idx) { - if let Ok(num) = val.parse::() { - Ok(json!(num)) - } else { - Ok(json!(val)) - } - } else { - Ok(Value::Null) - } + // Keep strings as strings (don't parse to numbers) + Ok(ca.get(idx).map(|v| json!(v)).unwrap_or(Value::Null)) } Date => { // Convert days since epoch to ISO date string: "YYYY-MM-DD" From 16abba9e391b6c8b4a153ddf7b3cde78b3193230 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 16:36:47 +0100 Subject: [PATCH 07/10] Refactor SQL generation and array recycling to methods on types Extract standalone functions into methods on their respective types for better encapsulation and API design: - Add ArrayElement::to_sql() for SQL literal conversion - Add ParameterValue::to_sql() for SQL literal conversion (including CASE statements for arrays) - Add ParameterValue::rep(n) for array recycling (replaces recycle_value_to_length) - Add ParameterValue::to_array_element() helper for scalar conversion Key improvements: - Simplified rep() to only homogenize arrays when arr.len() == n - Unified scalar handling through to_array_element() catch-all - Removed literal_to_sql() standalone function - Removed recycle_value_to_length() standalone function Co-Authored-By: Claude Sonnet 4.5 --- src/execute/casting.rs | 56 +-------------------- src/execute/layer.rs | 4 +- src/plot/main.rs | 45 +---------------- src/plot/types.rs | 109 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 101 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index 443eb3d7..513b171f 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -4,7 +4,7 @@ //! scale requirements and updating type info accordingly. use crate::plot::scale::coerce_dtypes; -use crate::plot::{ArrayElement, CastTargetType, Layer, ParameterValue, Plot, SqlTypeNames}; +use crate::plot::{CastTargetType, Layer, Plot, SqlTypeNames}; use crate::{naming, DataSource}; use polars::prelude::{DataType, TimeUnit}; use std::collections::{HashMap, HashSet}; @@ -22,60 +22,6 @@ pub struct TypeRequirement { pub sql_type_name: String, } -/// Format a literal value as SQL -pub fn literal_to_sql(lit: &ParameterValue) -> String { - match lit { - ParameterValue::String(s) => format!("'{}'", s.replace('\'', "''")), - ParameterValue::Number(n) => n.to_string(), - ParameterValue::Boolean(b) => { - if *b { - "TRUE".to_string() - } else { - "FALSE".to_string() - } - } - ParameterValue::Array(arr) => { - // Generate CASE WHEN statement to select array element by row number - // The annotation layer dummy table has __ggsql_dummy__ column with values 1, 2, 3, ... - // Arrays are homogeneous (homogenized in recycle_value_to_length), so no type mixing - let mut case_stmt = String::from("CASE __ggsql_dummy__"); - for (i, elem) in arr.iter().enumerate() { - let row_num = i + 1; // Row numbers start at 1 - let value_sql = match elem { - ArrayElement::String(s) => format!("'{}'", s.replace('\'', "''")), - ArrayElement::Number(n) => n.to_string(), - ArrayElement::Boolean(b) => { - if *b { - "TRUE".to_string() - } else { - "FALSE".to_string() - } - } - ArrayElement::Date(d) => { - // Convert days since epoch to DATE - format!("DATE '1970-01-01' + INTERVAL {} DAY", d) - } - ArrayElement::DateTime(dt) => { - // Convert microseconds since epoch to TIMESTAMP - format!("TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", dt) - } - ArrayElement::Time(t) => { - // Convert nanoseconds since midnight to TIME - let seconds = t / 1_000_000_000; - let nanos = t % 1_000_000_000; - format!("TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", seconds, nanos) - } - ArrayElement::Null => "NULL".to_string(), - }; - case_stmt.push_str(&format!(" WHEN {} THEN {}", row_num, value_sql)); - } - case_stmt.push_str(" END"); - case_stmt - } - ParameterValue::Null => "NULL".to_string(), - } -} - /// Determine which columns need casting based on scale requirements. /// /// For each layer, collects columns that need casting to match the scale's diff --git a/src/execute/layer.rs b/src/execute/layer.rs index d4d911fb..42684b90 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -11,7 +11,7 @@ use crate::{naming, DataFrame, GgsqlError, Result}; use polars::prelude::DataType; use std::collections::{HashMap, HashSet}; -use super::casting::{literal_to_sql, TypeRequirement}; +use super::casting::TypeRequirement; use super::schema::build_aesthetic_schema; /// Build the source query for a layer. @@ -90,7 +90,7 @@ pub fn build_layer_select_list( } AestheticValue::Literal(lit) => { // Literals become columns with prefixed aesthetic name - format!("{} AS \"{}\"", literal_to_sql(lit), aes_col_name) + format!("{} AS \"{}\"", lit.to_sql(), aes_col_name) } }; diff --git a/src/plot/main.rs b/src/plot/main.rs index 00c95bd7..1b423095 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -57,49 +57,6 @@ pub use super::facet::{Facet, FacetLayout}; /// - Scalar values (Number, String, Boolean) → Array with n copies as ArrayElements /// - Length-1 arrays → Expand to n copies of the single element /// - Length-n arrays → Return unchanged if n matches target, error otherwise -fn recycle_value_to_length( - value: ParameterValue, - target_length: usize, -) -> Result { - match value { - // Scalars: convert to array with n copies as ArrayElements - ParameterValue::Number(n) => Ok(ParameterValue::Array( - vec![ArrayElement::Number(n); target_length], - )), - ParameterValue::String(s) => Ok(ParameterValue::Array( - vec![ArrayElement::String(s); target_length], - )), - ParameterValue::Boolean(b) => Ok(ParameterValue::Array( - vec![ArrayElement::Boolean(b); target_length], - )), - ParameterValue::Null => Ok(ParameterValue::Array( - vec![ArrayElement::Null; target_length], - )), - // Arrays: homogenize types if mixed, then recycle if needed - ParameterValue::Array(arr) => { - // Homogenize array if it has mixed incompatible types - let arr = ArrayElement::homogenize(&arr); - - // Now handle recycling - if arr.len() == 1 { - // Recycle the single element - let element = arr[0].clone(); - Ok(ParameterValue::Array(vec![element; target_length])) - } else if arr.len() == target_length { - // Already correct length - Ok(ParameterValue::Array(arr)) - } else { - // Mismatched length - shouldn't happen if validation passed - Err(crate::GgsqlError::InternalError(format!( - "Attempted to recycle array of length {} to length {} (should have been caught earlier)", - arr.len(), - target_length - ))) - } - } - } -} - // ============================================================================= // Plot Type // ============================================================================= @@ -323,7 +280,7 @@ impl Plot { // Move from parameters to mappings with recycling if let Some(value) = layer.parameters.remove(¶m_name) { let recycled_value = if max_length > 1 { - recycle_value_to_length(value, max_length)? + value.rep(max_length)? } else { value }; diff --git a/src/plot/types.rs b/src/plot/types.rs index a9db97db..8c663d8e 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -646,6 +646,38 @@ impl ArrayElement { } } + /// Convert this element to a SQL literal string. + /// + /// Used for generating SQL expressions from literal values. + pub fn to_sql(&self) -> String { + match self { + Self::String(s) => format!("'{}'", s.replace('\'', "''")), + Self::Number(n) => n.to_string(), + Self::Boolean(b) => { + if *b { + "TRUE".to_string() + } else { + "FALSE".to_string() + } + } + Self::Date(d) => { + // Convert days since epoch to DATE + format!("DATE '1970-01-01' + INTERVAL {} DAY", d) + } + Self::DateTime(dt) => { + // Convert microseconds since epoch to TIMESTAMP + format!("TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", dt) + } + Self::Time(t) => { + // Convert nanoseconds since midnight to TIME + let seconds = t / 1_000_000_000; + let nanos = t % 1_000_000_000; + format!("TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", seconds, nanos) + } + Self::Null => "NULL".to_string(), + } + } + /// Get the type name for error messages. fn type_name(&self) -> &'static str { match self { @@ -825,6 +857,83 @@ impl ParameterValue { _ => None, } } + + /// Convert this parameter value to a SQL literal string. + /// + /// For arrays, generates a CASE WHEN statement that selects array elements by row number + /// using the __ggsql_dummy__ column. + pub fn to_sql(&self) -> String { + match self { + ParameterValue::String(s) => format!("'{}'", s.replace('\'', "''")), + ParameterValue::Number(n) => n.to_string(), + ParameterValue::Boolean(b) => { + if *b { + "TRUE".to_string() + } else { + "FALSE".to_string() + } + } + ParameterValue::Array(arr) => { + let mut case_stmt = String::from("CASE __ggsql_dummy__"); + for (i, elem) in arr.iter().enumerate() { + let row_num = i + 1; + let value_sql = elem.to_sql(); + case_stmt.push_str(&format!(" WHEN {} THEN {}", row_num, value_sql)); + } + case_stmt.push_str(" END"); + case_stmt + } + ParameterValue::Null => "NULL".to_string(), + } + } + + /// Convert a scalar ParameterValue to an ArrayElement. + /// + /// Panics if called on an Array variant. + fn to_array_element(self) -> ArrayElement { + match self { + ParameterValue::Number(num) => ArrayElement::Number(num), + ParameterValue::String(s) => ArrayElement::String(s), + ParameterValue::Boolean(b) => ArrayElement::Boolean(b), + ParameterValue::Null => ArrayElement::Null, + ParameterValue::Array(_) => panic!("Cannot convert Array to single ArrayElement"), + } + } + + /// Recycle this value to a target array length. + /// + /// - Scalars (String, Number, Boolean, Null) are converted to arrays with n copies + /// - Arrays of length 1 are recycled to n copies of that element + /// - Arrays of target length are returned as-is (after homogenization) + /// - Arrays of other lengths produce an error + pub fn rep(self, n: usize) -> Result { + match self { + // Arrays: homogenize types if mixed, then recycle if needed + ParameterValue::Array(arr) => { + if arr.len() == 1 { + // Recycle the single element + let element = arr[0].clone(); + Ok(ParameterValue::Array(vec![element; n])) + } else if arr.len() == n { + // Already correct length - homogenize for type consistency + let arr = ArrayElement::homogenize(&arr); + Ok(ParameterValue::Array(arr)) + } else { + // Mismatched length - shouldn't happen if validation passed + Err(crate::GgsqlError::InternalError(format!( + "Attempted to recycle array of length {} to length {} (should have been caught earlier)", + arr.len(), + n + ))) + } + } + // Scalars: convert to ArrayElement and replicate n times + scalar => { + let elem = scalar.to_array_element(); + Ok(ParameterValue::Array(vec![elem; n])) + } + } + } } // ============================================================================= From 6b41815bf72a3d77a75ccf9e5fc887092e8c71f2 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Thu, 5 Mar 2026 17:32:29 +0100 Subject: [PATCH 08/10] Add is_scaled field to prevent annotation literals from using scales Fixes issue where PLACE layer literals (e.g., stroke => ['red', 'blue']) would incorrectly use scales from other layers. Changes: - Add is_scaled field to AestheticValue::Column - Add identity_column() helper for creating unscaled columns - Mark annotation layer non-positional literals with is_scaled: false - Use identity_scale when is_scaled is false in encoding Example: PLACE text with stroke => ['red', 'blue'] now renders with literal colors instead of being transformed by scales from other layers. Co-Authored-By: Claude Sonnet 4.5 --- src/execute/layer.rs | 1 + src/execute/mod.rs | 1 + src/execute/scale.rs | 24 ++++++++---------------- src/plot/layer/mod.rs | 17 ++++++++++++++--- src/plot/types.rs | 17 +++++++++++++++++ src/writer/vegalite/encoding.rs | 6 ++++-- 6 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/execute/layer.rs b/src/execute/layer.rs index 42684b90..fcbe46c1 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -498,6 +498,7 @@ where name: prefixed_name, original_name, is_dummy, + is_scaled: true, }; layer.mappings.insert(aesthetic.clone(), value); } diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 7ec4d3db..78520661 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -308,6 +308,7 @@ fn add_facet_mappings_to_layers( name: var.to_string(), original_name: Some(var.to_string()), is_dummy: false, + is_scaled: true, }, ); } diff --git a/src/execute/scale.rs b/src/execute/scale.rs index a9e52d7d..28094526 100644 --- a/src/execute/scale.rs +++ b/src/execute/scale.rs @@ -14,7 +14,7 @@ use crate::plot::{ AestheticValue, ArrayElement, ArrayElementType, ColumnInfo, Layer, ParameterValue, Plot, Scale, ScaleType, ScaleTypeKind, Schema, }; -use crate::{DataFrame, DataSource, GgsqlError, Result}; +use crate::{DataFrame, GgsqlError, Result}; use polars::prelude::Column; use std::collections::{HashMap, HashSet}; @@ -995,8 +995,6 @@ pub fn find_columns_for_aesthetic<'a>( data_map: &'a HashMap, aesthetic_ctx: &AestheticContext, ) -> Vec<&'a Column> { - use crate::plot::aesthetic::is_positional_aesthetic; - let mut column_refs = Vec::new(); let aesthetics_to_check = aesthetic_ctx .internal_positional_family(aesthetic) @@ -1005,21 +1003,15 @@ pub fn find_columns_for_aesthetic<'a>( // Check each layer's mapping - every layer has its own data for (i, layer) in layers.iter().enumerate() { - // For annotation layers, only train scales for positional aesthetics (pos1, pos2) - // Other aesthetics (label, color, size, etc.) should use literal values - let is_annotation = matches!(layer.source, Some(DataSource::Annotation(_))); - let is_positional_family = is_positional_aesthetic(aesthetic); - - if is_annotation && !is_positional_family { - // Skip non-positional aesthetics for annotation layers - continue; - } - if let Some(df) = data_map.get(&naming::layer_key(i)) { for aes_name in &aesthetics_to_check { - if let Some(AestheticValue::Column { name, .. }) = layer.mappings.get(aes_name) { - if let Ok(column) = df.column(name) { - column_refs.push(column); + if let Some(AestheticValue::Column { name, is_scaled, .. }) = layer.mappings.get(aes_name) { + // Only train scales for columns that should be scaled + // Annotation layer literals have is_scaled: false + if *is_scaled { + if let Ok(column) = df.column(name) { + column_refs.push(column); + } } } } diff --git a/src/plot/layer/mod.rs b/src/plot/layer/mod.rs index 84076071..5c7e80ab 100644 --- a/src/plot/layer/mod.rs +++ b/src/plot/layer/mod.rs @@ -239,6 +239,9 @@ impl Layer { /// happens in Polars after query execution, before the data goes to the writer. pub fn update_mappings_for_aesthetic_columns(&mut self) { use crate::naming; + use crate::plot::aesthetic::is_positional_aesthetic; + + let is_annotation = matches!(self.source, Some(crate::DataSource::Annotation(_))); for (aesthetic, value) in self.mappings.aesthetics.iter_mut() { let aes_col_name = naming::aesthetic_column(aesthetic); @@ -256,9 +259,14 @@ impl Layer { *name = aes_col_name; } AestheticValue::Literal(_) => { - // Literals are also columns with prefixed aesthetic name - // Note: literals don't have an original_name to preserve - *value = AestheticValue::standard_column(aes_col_name); + // Literals become columns with prefixed aesthetic name + // For annotation layers, non-positional aesthetics should use identity scales + let is_positional = is_positional_aesthetic(aesthetic); + *value = if is_annotation && !is_positional { + AestheticValue::identity_column(aes_col_name) + } else { + AestheticValue::standard_column(aes_col_name) + }; } } } @@ -285,6 +293,7 @@ impl Layer { AestheticValue::Column { original_name, is_dummy, + is_scaled, .. } => { // Use the stat name from remappings as the original_name for labels @@ -293,6 +302,7 @@ impl Layer { name: prefixed_name, original_name: original_name.clone(), is_dummy: *is_dummy, + is_scaled: *is_scaled, } } AestheticValue::Literal(_) => { @@ -302,6 +312,7 @@ impl Layer { name: prefixed_name, original_name: None, is_dummy: false, + is_scaled: true, } } }; diff --git a/src/plot/types.rs b/src/plot/types.rs index 8c663d8e..e1821779 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -182,6 +182,10 @@ pub enum AestheticValue { original_name: Option, /// Whether this is a dummy/placeholder column (e.g., for bar charts without x mapped) is_dummy: bool, + /// Whether scales should be applied to this column + /// Set to false for annotation layer literals (e.g., stroke => ['red', 'blue']) + /// that are converted to columns but should use identity scales + is_scaled: bool, }, /// Literal value (quoted string, number, or boolean) Literal(ParameterValue), @@ -194,6 +198,17 @@ impl AestheticValue { name: name.into(), original_name: None, is_dummy: false, + is_scaled: true, + } + } + + /// Create a column mapping with identity scale (no scale transformation) + pub fn identity_column(name: impl Into) -> Self { + Self::Column { + name: name.into(), + original_name: None, + is_dummy: false, + is_scaled: false, } } @@ -203,6 +218,7 @@ impl AestheticValue { name: name.into(), original_name: None, is_dummy: true, + is_scaled: true, } } @@ -215,6 +231,7 @@ impl AestheticValue { name: name.into(), original_name: Some(original_name.into()), is_dummy: false, + is_scaled: true, } } diff --git a/src/writer/vegalite/encoding.rs b/src/writer/vegalite/encoding.rs index f91ace3a..b5411a90 100644 --- a/src/writer/vegalite/encoding.rs +++ b/src/writer/vegalite/encoding.rs @@ -802,7 +802,8 @@ pub(super) fn build_encoding_channel( name: col, original_name, is_dummy, - } => build_column_encoding(aesthetic, col, original_name, *is_dummy, ctx), + is_scaled, + } => build_column_encoding(aesthetic, col, original_name, *is_dummy, *is_scaled, ctx), AestheticValue::Literal(lit) => build_literal_encoding(aesthetic, lit), } } @@ -813,13 +814,14 @@ fn build_column_encoding( col: &str, original_name: &Option, is_dummy: bool, + is_scaled: bool, ctx: &mut EncodingContext, ) -> Result { let aesthetic_ctx = ctx.spec.get_aesthetic_context(); let primary = aesthetic_ctx .primary_internal_positional(aesthetic) .unwrap_or(aesthetic); - let mut identity_scale = false; + let mut identity_scale = !is_scaled; // Determine field type from scale or infer from data let field_type = determine_field_type_for_aesthetic( From 8bc43e8f4550b84b8b27f833644733635e73e655 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 6 Mar 2026 11:00:35 +0100 Subject: [PATCH 09/10] error when writing array value encoding --- src/execute/casting.rs | 5 ++++- src/execute/mod.rs | 17 ++++++++++++----- src/execute/scale.rs | 5 ++++- src/lib.rs | 3 ++- src/parser/mod.rs | 5 ++++- src/plot/main.rs | 10 ---------- src/plot/types.rs | 28 ++++++++++++++++++++-------- src/writer/vegalite/encoding.rs | 6 ++++++ 8 files changed, 52 insertions(+), 27 deletions(-) diff --git a/src/execute/casting.rs b/src/execute/casting.rs index 513b171f..403ff606 100644 --- a/src/execute/casting.rs +++ b/src/execute/casting.rs @@ -216,7 +216,10 @@ pub fn determine_layer_source( } else { // Generate VALUES clause with n rows: (VALUES (1), (2), ..., (n)) AS t(col) let rows: Vec = (1..=*n).map(|i| format!("({})", i)).collect(); - format!("(SELECT * FROM (VALUES {}) AS t(__ggsql_dummy__))", rows.join(", ")) + format!( + "(SELECT * FROM (VALUES {}) AS t(__ggsql_dummy__))", + rows.join(", ") + ) } } None => { diff --git a/src/execute/mod.rs b/src/execute/mod.rs index 78520661..6de7bda1 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -2228,12 +2228,19 @@ mod tests { let result = prepare_data_with_reader(query, &reader).unwrap(); assert_eq!(result.specs.len(), 1); - assert_eq!(result.specs[0].layers.len(), 2, "Should have DRAW + PLACE layers"); + assert_eq!( + result.specs[0].layers.len(), + 2, + "Should have DRAW + PLACE layers" + ); // First layer: regular DRAW point let point_layer = &result.specs[0].layers[0]; assert_eq!(point_layer.geom, crate::Geom::point()); - assert!(point_layer.source.is_none(), "DRAW layer should have no explicit source"); + assert!( + point_layer.source.is_none(), + "DRAW layer should have no explicit source" + ); // Second layer: PLACE text annotation let annotation_layer = &result.specs[0].layers[1]; @@ -2441,9 +2448,9 @@ mod tests { // DRAW layer should have inherited global color mapping let point_layer = &result.specs[0].layers[0]; assert!( - point_layer.mappings.contains_key("color") || - point_layer.mappings.contains_key("fill") || - point_layer.mappings.contains_key("stroke"), + point_layer.mappings.contains_key("color") + || point_layer.mappings.contains_key("fill") + || point_layer.mappings.contains_key("stroke"), "DRAW layer should inherit color from global mappings" ); diff --git a/src/execute/scale.rs b/src/execute/scale.rs index 28094526..b8127e90 100644 --- a/src/execute/scale.rs +++ b/src/execute/scale.rs @@ -1005,7 +1005,10 @@ pub fn find_columns_for_aesthetic<'a>( for (i, layer) in layers.iter().enumerate() { if let Some(df) = data_map.get(&naming::layer_key(i)) { for aes_name in &aesthetics_to_check { - if let Some(AestheticValue::Column { name, is_scaled, .. }) = layer.mappings.get(aes_name) { + if let Some(AestheticValue::Column { + name, is_scaled, .. + }) = layer.mappings.get(aes_name) + { // Only train scales for columns that should be scaled // Annotation layer literals have is_scaled: false if *is_scaled { diff --git a/src/lib.rs b/src/lib.rs index 0363aa59..f968a673 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -816,7 +816,8 @@ mod integration_tests { // Note: stroke color goes through resolve_aesthetics and may be in different location // Just verify it's present somewhere as a literal let text_mark = &text_layer["mark"]; - let has_stroke_value = encoding.get("stroke") + let has_stroke_value = encoding + .get("stroke") .and_then(|s| s.get("value")) .is_some() || text_mark.get("stroke").is_some(); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0d9983f3..2979a51e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -432,7 +432,10 @@ mod tests { // First layer: regular DRAW point assert_eq!(specs[0].layers[0].geom, Geom::point()); - assert!(specs[0].layers[0].source.is_none(), "DRAW layer should have no explicit source"); + assert!( + specs[0].layers[0].source.is_none(), + "DRAW layer should have no explicit source" + ); // Second layer: PLACE text with annotation source assert_eq!(specs[0].layers[1].geom, Geom::text()); diff --git a/src/plot/main.rs b/src/plot/main.rs index 1b423095..390b37e2 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -47,16 +47,6 @@ pub use super::projection::{Coord, Projection}; // Re-export Facet types from the facet module pub use super::facet::{Facet, FacetLayout}; -// ============================================================================= -// Helper Functions -// ============================================================================= - -/// Recycle a scalar or length-1 array to a target length. -/// -/// Recycling rules: -/// - Scalar values (Number, String, Boolean) → Array with n copies as ArrayElements -/// - Length-1 arrays → Expand to n copies of the single element -/// - Length-n arrays → Return unchanged if n matches target, error otherwise // ============================================================================= // Plot Type // ============================================================================= diff --git a/src/plot/types.rs b/src/plot/types.rs index e1821779..d5955b68 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -649,7 +649,10 @@ impl ArrayElement { }; // Try to coerce all elements to the inferred type - let coerced: Result, _> = values.iter().map(|elem| elem.coerce_to(target_type)).collect(); + let coerced: Result, _> = values + .iter() + .map(|elem| elem.coerce_to(target_type)) + .collect(); match coerced { Ok(coerced_arr) => coerced_arr, @@ -657,7 +660,10 @@ impl ArrayElement { // Coercion failed - fall back to String type values .iter() - .map(|elem| elem.coerce_to(ArrayElementType::String).unwrap_or(Self::Null)) + .map(|elem| { + elem.coerce_to(ArrayElementType::String) + .unwrap_or(Self::Null) + }) .collect() } } @@ -683,13 +689,19 @@ impl ArrayElement { } Self::DateTime(dt) => { // Convert microseconds since epoch to TIMESTAMP - format!("TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", dt) + format!( + "TIMESTAMP '1970-01-01 00:00:00' + INTERVAL {} MICROSECOND", + dt + ) } Self::Time(t) => { // Convert nanoseconds since midnight to TIME let seconds = t / 1_000_000_000; let nanos = t % 1_000_000_000; - format!("TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", seconds, nanos) + format!( + "TIME '00:00:00' + INTERVAL {} SECOND + INTERVAL {} NANOSECOND", + seconds, nanos + ) } Self::Null => "NULL".to_string(), } @@ -907,11 +919,11 @@ impl ParameterValue { /// Convert a scalar ParameterValue to an ArrayElement. /// /// Panics if called on an Array variant. - fn to_array_element(self) -> ArrayElement { + fn to_array_element(&self) -> ArrayElement { match self { - ParameterValue::Number(num) => ArrayElement::Number(num), - ParameterValue::String(s) => ArrayElement::String(s), - ParameterValue::Boolean(b) => ArrayElement::Boolean(b), + ParameterValue::Number(num) => ArrayElement::Number(*num), + ParameterValue::String(s) => ArrayElement::String(s.clone()), + ParameterValue::Boolean(b) => ArrayElement::Boolean(*b), ParameterValue::Null => ArrayElement::Null, ParameterValue::Array(_) => panic!("Cannot convert Array to single ArrayElement"), } diff --git a/src/writer/vegalite/encoding.rs b/src/writer/vegalite/encoding.rs index b5411a90..d6f53d2f 100644 --- a/src/writer/vegalite/encoding.rs +++ b/src/writer/vegalite/encoding.rs @@ -939,6 +939,12 @@ fn build_literal_encoding(aesthetic: &str, lit: &ParameterValue) -> Result json!(n), } } + ParameterValue::Array(_) => { + return Err(crate::GgsqlError::WriterError(format!( + "The `{aes}` SETTING must be scalar, not an array.", + aes = aesthetic + ))) + } _ => lit.to_json(), }; Ok(json!({"value": val})) From 3c63669c614acf82dfc5c3e3dc01e69cce6a62ea Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 6 Mar 2026 11:35:38 +0100 Subject: [PATCH 10/10] add docs --- doc/_quarto.yml | 4 ++++ doc/syntax/clause/place.qmd | 37 +++++++++++++++++++++++++++++++++++++ doc/syntax/index.qmd | 1 + 3 files changed, 42 insertions(+) create mode 100644 doc/syntax/clause/place.qmd diff --git a/doc/_quarto.yml b/doc/_quarto.yml index a5801133..f5684647 100644 --- a/doc/_quarto.yml +++ b/doc/_quarto.yml @@ -39,6 +39,8 @@ website: href: syntax/clause/visualise.qmd - text: "`DRAW`" href: syntax/clause/draw.qmd + - text: "`PLACE`" + href: syntax/clause/place.qmd - text: "`SCALE`" href: syntax/clause/scale.qmd - text: "`FACET`" @@ -69,6 +71,8 @@ website: href: syntax/clause/visualise.qmd - text: "`DRAW`" href: syntax/clause/draw.qmd + - text: "`PLACE`" + href: syntax/clause/place.qmd - text: "`SCALE`" href: syntax/clause/scale.qmd - text: "`FACET`" diff --git a/doc/syntax/clause/place.qmd b/doc/syntax/clause/place.qmd new file mode 100644 index 00000000..6321bbed --- /dev/null +++ b/doc/syntax/clause/place.qmd @@ -0,0 +1,37 @@ +--- +title: "Create annotation layers with `PLACE`" +--- + +The `PLACE` clause is the little sibling of the [`DRAW` clause](../clause/draw.qmd) that also creates layers. +A layer created with `PLACE` has no mappings to data, all aesthetics are set using literal values instead. + +## Clause syntax +The `PLACE` clause takes just a type and a setting clause, all of them required. + +```ggsql +PLACE + SETTING => , ... +``` + +Like `DRAW`, the layer type is required and specifies the type of layer to draw, like `point` or `text`. +It defines how the remaining settings are interpreted. +The [main syntax page](../index.qmd#layers) has a list of all available layer types + +Unlike the `DRAW` clause, the `PLACE` clause does not support `FILTER`, `PARTITION BY`, and `ORDER BY` clauses since +everything is declared inline. + +### `SETTING` +```ggsql +SETTING => , ... +``` + +The `SETTING` clause can be used for to different things: + +* *Setting aesthetics*: All aesthetics in `PLACE` layers are specified using literal value, e.g. 'red' (as in the color red). +Aesthetics that are set will not go through a scale but will use the provided value as-is. +You cannot set an aesthetic to a column, only to a literal values. +Contrary to `DRAW` layers, `PLACE` layers can take multiple literal values in an array. +* *Setting parameters*: Some layers take additional arguments that control how they behave. +Often, but not always, these modify the statistical transformation in some way. +An example would be the binwidth parameter in histogram which controls the width of each bin during histogram calculation. +This is not a statistical property since it is not related to each record, but to the calculation as a whole. diff --git a/doc/syntax/index.qmd b/doc/syntax/index.qmd index 1400280f..c58fa89e 100644 --- a/doc/syntax/index.qmd +++ b/doc/syntax/index.qmd @@ -7,6 +7,7 @@ ggsql augments the standard SQL syntax with a number of new clauses to describe - [`VISUALISE`](clause/visualise.qmd) initiates the visualisation part of the query - [`DRAW`](clause/draw.qmd) adds a new layer to the visualisation + - [`PLACE`](clause/place.qmd) adds an annotation layer - [`SCALE`](clause/scale.qmd) specify how an aesthetic should be scaled - [`FACET`](clause/facet.qmd) describes how data should be split into small multiples - [`PROJECT`](clause/project.qmd) is used for selecting the coordinate system to use