From 33af223b8d853930e68a8281a0b2a1f2483aeca0 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Fri, 20 Feb 2026 14:53:56 +0100 Subject: [PATCH 01/29] Add fontsize aesthetic for linear text sizing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a separate 'fontsize' aesthetic as an alternative to 'size' for text/label geoms. Unlike 'size' (which uses area-based scaling with radius² conversion for point marks), 'fontsize' uses linear scaling for font sizes. Changes: - Grammar: Add 'fontsize' to aesthetic names - Geoms: Add 'fontsize' to Text and Label supported aesthetics - Aesthetics: Register 'fontsize' in NON_POSITIONAL list - Writer: Map 'fontsize' → 'size' channel in Vega-Lite output - Scale: Add default range [8.0, 20.0] for fontsize aesthetic - Tests: Add test_fontsize_linear_scaling integration test Usage: DRAW text MAPPING x AS x, y AS y, value AS fontsize SCALE fontsize TO [10, 20] -- Linear: 10pt to 20pt (not area-converted) Co-Authored-By: Claude Sonnet 4.5 --- src/plot/aesthetic.rs | 1 + src/plot/layer/geom/label.rs | 2 +- src/plot/layer/geom/text.rs | 2 +- src/plot/scale/scale_type/continuous.rs | 4 ++ src/writer/vegalite/encoding.rs | 1 + src/writer/vegalite/mod.rs | 63 +++++++++++++++++++++++++ tree-sitter-ggsql/grammar.js | 2 +- 7 files changed, 72 insertions(+), 3 deletions(-) diff --git a/src/plot/aesthetic.rs b/src/plot/aesthetic.rs index e4f3471a..636782fb 100644 --- a/src/plot/aesthetic.rs +++ b/src/plot/aesthetic.rs @@ -58,6 +58,7 @@ pub const NON_POSITIONAL: &[&str] = &[ "label", "family", "fontface", + "fontsize", "hjust", "vjust", ]; diff --git a/src/plot/layer/geom/label.rs b/src/plot/layer/geom/label.rs index 8481898a..4933e28f 100644 --- a/src/plot/layer/geom/label.rs +++ b/src/plot/layer/geom/label.rs @@ -14,7 +14,7 @@ impl GeomTrait for Label { fn aesthetics(&self) -> GeomAesthetics { GeomAesthetics { supported: &[ - "x", "y", "label", "fill", "stroke", "size", "opacity", "family", "fontface", + "x", "y", "label", "fill", "stroke", "fontsize", "opacity", "family", "fontface", "hjust", "vjust", ], required: &["x", "y"], diff --git a/src/plot/layer/geom/text.rs b/src/plot/layer/geom/text.rs index 7107f5c5..b37bb79b 100644 --- a/src/plot/layer/geom/text.rs +++ b/src/plot/layer/geom/text.rs @@ -14,7 +14,7 @@ impl GeomTrait for Text { fn aesthetics(&self) -> GeomAesthetics { GeomAesthetics { supported: &[ - "x", "y", "label", "stroke", "size", "opacity", "family", "fontface", "hjust", + "x", "y", "label", "stroke", "fontsize", "opacity", "family", "fontface", "hjust", "vjust", ], required: &["x", "y"], diff --git a/src/plot/scale/scale_type/continuous.rs b/src/plot/scale/scale_type/continuous.rs index d06b125c..1aa0505c 100644 --- a/src/plot/scale/scale_type/continuous.rs +++ b/src/plot/scale/scale_type/continuous.rs @@ -138,6 +138,10 @@ impl ScaleTypeTrait for Continuous { ArrayElement::Number(1.0), ArrayElement::Number(6.0), ])), + "fontsize" => Ok(Some(vec![ + ArrayElement::Number(8.0), + ArrayElement::Number(20.0), + ])), "opacity" => Ok(Some(vec![ ArrayElement::Number(0.1), ArrayElement::Number(1.0), diff --git a/src/writer/vegalite/encoding.rs b/src/writer/vegalite/encoding.rs index f489d2d7..81232d09 100644 --- a/src/writer/vegalite/encoding.rs +++ b/src/writer/vegalite/encoding.rs @@ -887,6 +887,7 @@ pub(super) fn map_aesthetic_name(aesthetic: &str) -> String { "linewidth" => "strokeWidth", // Text aesthetics "label" => "text", + "fontsize" => "size", // All other aesthetics pass through directly // (fill and stroke map to Vega-Lite's separate fill/stroke channels) _ => aesthetic, diff --git a/src/writer/vegalite/mod.rs b/src/writer/vegalite/mod.rs index e7d72da3..366500e0 100644 --- a/src/writer/vegalite/mod.rs +++ b/src/writer/vegalite/mod.rs @@ -605,6 +605,7 @@ mod tests { assert_eq!(map_aesthetic_name("linetype"), "strokeDash"); assert_eq!(map_aesthetic_name("linewidth"), "strokeWidth"); assert_eq!(map_aesthetic_name("label"), "text"); + assert_eq!(map_aesthetic_name("fontsize"), "size"); } #[test] @@ -691,6 +692,68 @@ mod tests { assert_eq!(vl_spec["layer"][0]["mark"]["clip"], true); } + #[test] + fn test_fontsize_linear_scaling() { + use crate::plot::{ArrayElement, OutputRange, Scale, ScaleType}; + + let writer = VegaLiteWriter::new(); + + // Create spec with text geom using fontsize aesthetic + let mut spec = Plot::new(); + let layer = Layer::new(Geom::text()) + .with_aesthetic( + "x".to_string(), + AestheticValue::standard_column("x".to_string()), + ) + .with_aesthetic( + "y".to_string(), + AestheticValue::standard_column("y".to_string()), + ) + .with_aesthetic( + "fontsize".to_string(), + AestheticValue::standard_column("value".to_string()), + ); + spec.layers.push(layer); + + // Add fontsize scale with explicit range + let mut scale = Scale::new("fontsize"); + scale.scale_type = Some(ScaleType::continuous()); + scale.output_range = Some(OutputRange::Array(vec![ + ArrayElement::Number(10.0), + ArrayElement::Number(20.0), + ])); + spec.scales.push(scale); + + // Create DataFrame + let df = df! { + "x" => &[1, 2, 3], + "y" => &[1, 2, 3], + "value" => &[1.0, 2.0, 3.0], + } + .unwrap(); + + // Generate Vega-Lite JSON + let json_str = writer.write(&spec, &wrap_data(df)).unwrap(); + let vl_spec: Value = serde_json::from_str(&json_str).unwrap(); + + // Verify fontsize maps to size channel + let encoding = &vl_spec["layer"][0]["encoding"]; + assert!(encoding["size"].is_object(), "Should have size encoding"); + assert!( + encoding["fontsize"].is_null(), + "Should not have fontsize encoding" + ); + + // Verify scale range is linear (no area conversion) + let scale_range = &encoding["size"]["scale"]["range"]; + assert!(scale_range.is_array(), "Scale should have range array"); + let range = scale_range.as_array().unwrap(); + assert_eq!(range.len(), 2); + // Should be 10 and 20, NOT ~31 and ~126 (which would be area-converted) + assert_eq!(range[0].as_f64().unwrap(), 10.0); + assert_eq!(range[1].as_f64().unwrap(), 20.0); + } + #[test] fn test_literal_color() { let writer = VegaLiteWriter::new(); diff --git a/tree-sitter-ggsql/grammar.js b/tree-sitter-ggsql/grammar.js index dfb18c53..5d5d13b0 100644 --- a/tree-sitter-ggsql/grammar.js +++ b/tree-sitter-ggsql/grammar.js @@ -647,7 +647,7 @@ module.exports = grammar({ // Size and shape 'size', 'shape', 'linetype', 'linewidth', 'width', 'height', // Text aesthetics - 'label', 'family', 'fontface', 'hjust', 'vjust', + 'label', 'family', 'fontface', 'fontsize', 'hjust', 'vjust', // Computed variables 'offset' ), From e8822bee72a99f2fdc09a840a16be44d64ea6d1c Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 11:49:59 +0100 Subject: [PATCH 02/29] Implement TextRenderer with data-splitting for font properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TextRenderer implementation that handles font aesthetics (family, fontface, hjust, vjust) by splitting data into multiple Vega-Lite layers when font properties vary across rows. Key features: - Single-layer optimization: When all fonts are constant, generates one layer with mark properties set directly - Multi-layer splitting: When fonts vary, creates one layer per unique font combination while preserving ORDER BY - Proper SOURCE_COLUMN filtering: Uses empty string for single-layer and suffix keys for multi-layer to match BoxplotRenderer pattern - Font property mapping: - family → mark.font - fontface → mark.fontWeight/fontStyle - hjust → mark.align - vjust → mark.baseline Tests included for both constant and varying font cases. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 483 +++++++++++++++++++++++++++++++++++ 1 file changed, 483 insertions(+) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 00fe0ae4..37d7300f 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -222,6 +222,428 @@ impl GeomRenderer for PathRenderer { } } +// ============================================================================= +// Text Renderer +// ============================================================================= + +/// Metadata for text rendering +struct TextMetadata { + strategy: FontStrategy, +} + +/// Strategy for handling font properties in text layers +enum FontStrategy { + /// All font properties are constant - use single layer with mark properties + SingleLayer { + mark_properties: HashMap, + }, + /// Font properties vary - split into multiple layers, one per unique combination + MultiLayer { + groups: Vec, + common_properties: HashMap, + }, +} + +/// A group of rows with identical font property values +struct FontGroup { + /// Unique signature for this combination (for data key suffix) + #[allow(dead_code)] + signature: String, + /// Mark properties for this group + properties: HashMap, + /// Row indices belonging to this group + row_indices: Vec, +} + +/// Renderer for text geom - handles font properties via data splitting +pub struct TextRenderer; + +impl TextRenderer { + /// Analyze DataFrame columns to find font aesthetics + fn analyze_font_columns(df: &DataFrame) -> Result { + + let mut varying_columns: Vec<(String, String)> = Vec::new(); // (aesthetic, column_name) + let mut constant_values: HashMap = HashMap::new(); + + // Check for font aesthetic columns in DataFrame + for &aesthetic in &["family", "fontface", "hjust", "vjust"] { + let col_name = naming::aesthetic_column(aesthetic); + + if let Ok(col) = df.column(&col_name) { + let unique_count = col + .n_unique() + .map_err(|e| GgsqlError::WriterError(e.to_string()))?; + + if unique_count == 1 { + // All same → treat as constant + let value_str = col + .str() + .map_err(|e| GgsqlError::WriterError(e.to_string()))? + .get(0) + .unwrap_or(""); + let converted = Self::convert_to_mark_property(aesthetic, value_str); + constant_values.insert(aesthetic.to_string(), converted); + } else if unique_count > 1 { + // Multiple values → needs splitting + varying_columns.push((aesthetic.to_string(), col_name)); + } + } + } + + if varying_columns.is_empty() { + // All constant or not present → single layer + Ok(FontStrategy::SingleLayer { + mark_properties: constant_values, + }) + } else { + // Some varying → multi-layer + let groups = Self::build_font_groups_from_df(df, &varying_columns, &constant_values)?; + Ok(FontStrategy::MultiLayer { + groups, + common_properties: constant_values, + }) + } + } + + /// Build groups from DataFrame columns (used in prepare_data) + fn build_font_groups_from_df( + data: &DataFrame, + varying: &[(String, String)], // (aesthetic, column_name) + constant: &HashMap, + ) -> Result> { + use polars::prelude::{ChunkedArray, StringType}; + + let nrows = data.height(); + let mut groups: Vec = Vec::new(); + let mut signature_to_idx: HashMap = HashMap::new(); + + // Pre-fetch all varying font columns + let font_columns: Vec<(String, ChunkedArray)> = varying + .iter() + .map(|(aes, col)| { + let series = data + .column(col) + .map_err(|e| GgsqlError::WriterError(e.to_string()))?; + let ca = series + .str() + .map_err(|e| GgsqlError::WriterError(e.to_string()))? + .clone(); + Ok((aes.clone(), ca)) + }) + .collect::>>()?; + + // Iterate rows and assign to groups + for row_idx in 0..nrows { + // Build signature for this row + let mut sig_parts: Vec = Vec::new(); + let mut properties = constant.clone(); + + for (aesthetic, ca) in &font_columns { + let value = ca.get(row_idx).unwrap_or(""); + sig_parts.push(format!("{}:{}", aesthetic, value)); + + // Convert to mark property + let prop_value = Self::convert_to_mark_property(aesthetic, value); + properties.insert(aesthetic.to_string(), prop_value); + } + + let signature = sig_parts.join("|"); + + // Add to existing group or create new group + if let Some(&group_idx) = signature_to_idx.get(&signature) { + groups[group_idx].row_indices.push(row_idx); + } else { + let group_idx = groups.len(); + signature_to_idx.insert(signature.clone(), group_idx); + groups.push(FontGroup { + signature, + properties, + row_indices: vec![row_idx], + }); + } + } + + Ok(groups) + } + + /// Convert ggsql font aesthetic value to Vega-Lite mark property + fn convert_to_mark_property(aesthetic: &str, value: &str) -> Value { + match aesthetic { + "family" => json!(value), + "fontface" => { + // Map ggplot2 fontface to fontWeight/fontStyle + match value { + "bold" => json!({"fontWeight": "bold"}), + "italic" => json!({"fontStyle": "italic"}), + "bold.italic" | "bolditalic" => json!({ + "fontWeight": "bold", + "fontStyle": "italic" + }), + _ => json!({"fontWeight": "normal"}), + } + } + "hjust" => { + // Map 0/0.5/1 or string to left/center/right + let align = match value.parse::() { + Ok(v) if v <= 0.25 => "left", + Ok(v) if v >= 0.75 => "right", + _ => match value { + "left" => "left", + "right" => "right", + _ => "center", + }, + }; + json!(align) + } + "vjust" => { + // Map 0/0.5/1 or string to bottom/middle/top + let baseline = match value.parse::() { + Ok(v) if v <= 0.25 => "bottom", + Ok(v) if v >= 0.75 => "top", + _ => match value { + "top" => "top", + "bottom" => "bottom", + _ => "middle", + }, + }; + json!(baseline) + } + _ => json!(value), + } + } + + /// Filter DataFrame to specific row indices + fn filter_by_indices(data: &DataFrame, indices: &[usize]) -> Result { + use polars::prelude::{BooleanChunked, NamedFrom}; + + let nrows = data.height(); + let mut mask_data = vec![false; nrows]; + for &idx in indices { + if idx < nrows { + mask_data[idx] = true; + } + } + + let mask = BooleanChunked::new("".into(), mask_data); + + data.filter(&mask) + .map_err(|e| GgsqlError::WriterError(e.to_string())) + } + + /// Map aesthetic name to Vega-Lite mark property name + fn map_aesthetic_to_mark_property(aesthetic: &str) -> &str { + match aesthetic { + "family" => "font", + "hjust" => "align", + "vjust" => "baseline", + _ => aesthetic, + } + } + + /// Apply mark property, handling special cases like fontface + fn apply_mark_property(mark_obj: &mut Map, key: &str, value: &Value) { + if key == "fontface" || value.is_object() { + // fontface may contain multiple properties (fontWeight + fontStyle) + if let Some(obj) = value.as_object() { + for (k, v) in obj { + mark_obj.insert(k.clone(), v.clone()); + } + return; + } + } + mark_obj.insert(key.to_string(), value.clone()); + } + + /// Finalize single layer case + fn finalize_single_layer( + &self, + mut prototype: Value, + data_key: &str, + mark_properties: &HashMap, + ) -> Result> { + // Apply mark properties + if let Some(mark) = prototype.get_mut("mark") { + if let Some(mark_obj) = mark.as_object_mut() { + for (aesthetic, value) in mark_properties { + let vl_key = Self::map_aesthetic_to_mark_property(aesthetic); + Self::apply_mark_property(mark_obj, vl_key, value); + } + } + } + + // Add source filter (matching BoxplotRenderer pattern) + let source_filter = json!({ + "filter": { + "field": naming::SOURCE_COLUMN, + "equal": data_key + } + }); + + // Prepend source filter to any existing transforms + let existing_transforms = prototype + .get("transform") + .and_then(|t| t.as_array()) + .cloned() + .unwrap_or_default(); + + let mut new_transforms = vec![source_filter]; + new_transforms.extend(existing_transforms); + prototype["transform"] = json!(new_transforms); + + Ok(vec![prototype]) + } + + /// Finalize multi-layer case + fn finalize_multi_layer( + &self, + prototype: Value, + data_key: &str, + groups: &[FontGroup], + common_properties: &HashMap, + ) -> Result> { + let mut layers = Vec::new(); + + for (group_idx, group) in groups.iter().enumerate() { + let mut layer_spec = prototype.clone(); + let suffix = format!("_font_{}", group_idx); + let source_key = format!("{}{}", data_key, suffix); + + // Apply mark properties (common + group-specific) + if let Some(mark) = layer_spec.get_mut("mark") { + if let Some(mark_obj) = mark.as_object_mut() { + // Apply common properties first + for (aesthetic, value) in common_properties { + let vl_key = Self::map_aesthetic_to_mark_property(aesthetic); + Self::apply_mark_property(mark_obj, vl_key, value); + } + + // Apply group-specific properties (override common if needed) + for (aesthetic, value) in &group.properties { + let vl_key = Self::map_aesthetic_to_mark_property(aesthetic); + Self::apply_mark_property(mark_obj, vl_key, value); + } + } + } + + // Add source filter for this group + let source_filter = json!({ + "filter": { + "field": naming::SOURCE_COLUMN, + "equal": source_key + } + }); + + let existing_transforms = layer_spec + .get("transform") + .and_then(|t| t.as_array()) + .cloned() + .unwrap_or_default(); + + let mut new_transforms = vec![source_filter]; + new_transforms.extend(existing_transforms); + layer_spec["transform"] = json!(new_transforms); + + layers.push(layer_spec); + } + + Ok(layers) + } +} + +impl GeomRenderer for TextRenderer { + fn prepare_data( + &self, + df: &DataFrame, + data_key: &str, + binned_columns: &HashMap>, + ) -> Result { + // Analyze font columns to determine strategy + let strategy = Self::analyze_font_columns(df)?; + + match strategy { + FontStrategy::SingleLayer { .. } => { + // Single layer - use empty string as component key + // The writer will prepend data_key, so empty string results in just data_key + let values = if binned_columns.is_empty() { + dataframe_to_values(df)? + } else { + dataframe_to_values_with_bins(df, binned_columns)? + }; + + Ok(PreparedData::Composite { + components: HashMap::from([(String::new(), values)]), + metadata: Box::new(TextMetadata { strategy }), + }) + } + FontStrategy::MultiLayer { ref groups, .. } => { + // Multi-layer - split data by groups + let mut components: HashMap> = HashMap::new(); + + for (group_idx, group) in groups.iter().enumerate() { + let suffix = format!("_font_{}", group_idx); + // Use just the suffix as component key - writer will prepend data_key + + let filtered = Self::filter_by_indices(df, &group.row_indices)?; + let values = if binned_columns.is_empty() { + dataframe_to_values(&filtered)? + } else { + dataframe_to_values_with_bins(&filtered, binned_columns)? + }; + + components.insert(suffix, values); + } + + Ok(PreparedData::Composite { + components, + metadata: Box::new(TextMetadata { strategy }), + }) + } + } + } + + fn modify_encoding(&self, encoding: &mut Map, _layer: &Layer) -> Result<()> { + // Remove font aesthetics from encoding - they only work as mark properties + for &aesthetic in &["family", "fontface", "hjust", "vjust"] { + encoding.remove(aesthetic); + } + Ok(()) + } + + fn needs_source_filter(&self) -> bool { + // TextRenderer handles source filtering in finalize() + false + } + + fn finalize( + &self, + prototype: Value, + _layer: &Layer, + data_key: &str, + prepared: &PreparedData, + ) -> Result> { + let PreparedData::Composite { metadata, .. } = prepared else { + return Err(GgsqlError::InternalError( + "TextRenderer::finalize called with non-composite data".to_string(), + )); + }; + + // Downcast metadata to TextMetadata + let info = metadata.downcast_ref::().ok_or_else(|| { + GgsqlError::InternalError("Failed to downcast text metadata".to_string()) + })?; + + match &info.strategy { + FontStrategy::SingleLayer { mark_properties } => { + self.finalize_single_layer(prototype, data_key, mark_properties) + } + FontStrategy::MultiLayer { + groups, + common_properties, + } => self.finalize_multi_layer(prototype, data_key, groups, common_properties), + } + } +} + // ============================================================================= // Ribbon Renderer // ============================================================================= @@ -809,6 +1231,7 @@ pub fn get_renderer(geom: &Geom) -> Box { GeomType::Boxplot => Box::new(BoxplotRenderer), GeomType::Density => Box::new(AreaRenderer), GeomType::Violin => Box::new(ViolinRenderer), + GeomType::Text => Box::new(TextRenderer), // All other geoms (Point, Line, Tile, etc.) use the default renderer _ => Box::new(DefaultRenderer), } @@ -911,4 +1334,64 @@ mod tests { ])) ); } + + #[test] + fn test_text_constant_font() { + use crate::naming; + use polars::prelude::*; + + let renderer = TextRenderer; + + // Create DataFrame where all rows have the same font + let df = df! { + naming::aesthetic_column("x").as_str() => &[1.0, 2.0, 3.0], + naming::aesthetic_column("y").as_str() => &[10.0, 20.0, 30.0], + naming::aesthetic_column("label").as_str() => &["A", "B", "C"], + naming::aesthetic_column("family").as_str() => &["Arial", "Arial", "Arial"], + } + .unwrap(); + + // Prepare data - should result in single layer with empty component key + let prepared = renderer.prepare_data(&df, "test", &HashMap::new()).unwrap(); + + match prepared { + PreparedData::Composite { components, .. } => { + // Should have single component with empty key + assert_eq!(components.len(), 1); + assert!(components.contains_key("")); + } + _ => panic!("Expected Composite"), + } + } + + #[test] + fn test_text_varying_font() { + use crate::naming; + use polars::prelude::*; + + let renderer = TextRenderer; + + // Create DataFrame with different fonts per row + let df = df! { + naming::aesthetic_column("x").as_str() => &[1.0, 2.0, 3.0], + naming::aesthetic_column("y").as_str() => &[10.0, 20.0, 30.0], + naming::aesthetic_column("label").as_str() => &["A", "B", "C"], + naming::aesthetic_column("family").as_str() => &["Arial", "Courier", "Times"], + } + .unwrap(); + + // Prepare data - should result in multiple layers + let prepared = renderer.prepare_data(&df, "test", &HashMap::new()).unwrap(); + + match prepared { + PreparedData::Composite { components, .. } => { + // Should have 3 components (one per unique font) with suffix keys + assert_eq!(components.len(), 3); + assert!(components.contains_key("_font_0")); + assert!(components.contains_key("_font_1")); + assert!(components.contains_key("_font_2")); + } + _ => panic!("Expected Composite"), + } + } } From b19bc758d064d7fc49acac0fc408bf2696a0839a Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 12:01:42 +0100 Subject: [PATCH 03/29] Simplify FontStrategy by unifying single and multi-layer cases Remove the FontStrategy enum variants and use a single struct with a groups vector. The single-layer case now has 1 group containing all rows, while the multi-layer case has N groups. Benefits: - Eliminates redundant code paths (no more match statements) - Simpler prepare_data() - just iterate over groups - Simpler finalize() - unified layer generation logic - Fewer lines of code overall Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 150 ++++++++++++----------------------- 1 file changed, 49 insertions(+), 101 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 37d7300f..1b8b106b 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -232,16 +232,9 @@ struct TextMetadata { } /// Strategy for handling font properties in text layers -enum FontStrategy { - /// All font properties are constant - use single layer with mark properties - SingleLayer { - mark_properties: HashMap, - }, - /// Font properties vary - split into multiple layers, one per unique combination - MultiLayer { - groups: Vec, - common_properties: HashMap, - }, +struct FontStrategy { + groups: Vec, + common_properties: HashMap, } /// A group of rows with identical font property values @@ -291,14 +284,21 @@ impl TextRenderer { } if varying_columns.is_empty() { - // All constant or not present → single layer - Ok(FontStrategy::SingleLayer { - mark_properties: constant_values, + // All constant or not present → single group with all rows + let all_indices: Vec = (0..df.height()).collect(); + let groups = vec![FontGroup { + signature: String::new(), + properties: constant_values.clone(), + row_indices: all_indices, + }]; + Ok(FontStrategy { + groups, + common_properties: HashMap::new(), // All properties in the group }) } else { // Some varying → multi-layer let groups = Self::build_font_groups_from_df(df, &varying_columns, &constant_values)?; - Ok(FontStrategy::MultiLayer { + Ok(FontStrategy { groups, common_properties: constant_values, }) @@ -455,46 +455,8 @@ impl TextRenderer { } /// Finalize single layer case - fn finalize_single_layer( - &self, - mut prototype: Value, - data_key: &str, - mark_properties: &HashMap, - ) -> Result> { - // Apply mark properties - if let Some(mark) = prototype.get_mut("mark") { - if let Some(mark_obj) = mark.as_object_mut() { - for (aesthetic, value) in mark_properties { - let vl_key = Self::map_aesthetic_to_mark_property(aesthetic); - Self::apply_mark_property(mark_obj, vl_key, value); - } - } - } - - // Add source filter (matching BoxplotRenderer pattern) - let source_filter = json!({ - "filter": { - "field": naming::SOURCE_COLUMN, - "equal": data_key - } - }); - - // Prepend source filter to any existing transforms - let existing_transforms = prototype - .get("transform") - .and_then(|t| t.as_array()) - .cloned() - .unwrap_or_default(); - - let mut new_transforms = vec![source_filter]; - new_transforms.extend(existing_transforms); - prototype["transform"] = json!(new_transforms); - - Ok(vec![prototype]) - } - - /// Finalize multi-layer case - fn finalize_multi_layer( + /// Finalize layers from font groups (handles both single and multi-group cases) + fn finalize_layers( &self, prototype: Value, data_key: &str, @@ -505,7 +467,13 @@ impl TextRenderer { for (group_idx, group) in groups.iter().enumerate() { let mut layer_spec = prototype.clone(); - let suffix = format!("_font_{}", group_idx); + // For single-group case (all constant), use empty suffix + // For multi-group case, use _font_N suffix + let suffix = if groups.len() == 1 { + String::new() + } else { + format!("_font_{}", group_idx) + }; let source_key = format!("{}{}", data_key, suffix); // Apply mark properties (common + group-specific) @@ -554,51 +522,38 @@ impl GeomRenderer for TextRenderer { fn prepare_data( &self, df: &DataFrame, - data_key: &str, + _data_key: &str, binned_columns: &HashMap>, ) -> Result { // Analyze font columns to determine strategy let strategy = Self::analyze_font_columns(df)?; - match strategy { - FontStrategy::SingleLayer { .. } => { - // Single layer - use empty string as component key - // The writer will prepend data_key, so empty string results in just data_key - let values = if binned_columns.is_empty() { - dataframe_to_values(df)? - } else { - dataframe_to_values_with_bins(df, binned_columns)? - }; + // Split data by groups (even if just 1 group for constant fonts) + let mut components: HashMap> = HashMap::new(); - Ok(PreparedData::Composite { - components: HashMap::from([(String::new(), values)]), - metadata: Box::new(TextMetadata { strategy }), - }) - } - FontStrategy::MultiLayer { ref groups, .. } => { - // Multi-layer - split data by groups - let mut components: HashMap> = HashMap::new(); - - for (group_idx, group) in groups.iter().enumerate() { - let suffix = format!("_font_{}", group_idx); - // Use just the suffix as component key - writer will prepend data_key - - let filtered = Self::filter_by_indices(df, &group.row_indices)?; - let values = if binned_columns.is_empty() { - dataframe_to_values(&filtered)? - } else { - dataframe_to_values_with_bins(&filtered, binned_columns)? - }; - - components.insert(suffix, values); - } + for (group_idx, group) in strategy.groups.iter().enumerate() { + // For single-group case (all constant), use empty suffix + // For multi-group case, use _font_N suffix + let suffix = if strategy.groups.len() == 1 { + String::new() + } else { + format!("_font_{}", group_idx) + }; - Ok(PreparedData::Composite { - components, - metadata: Box::new(TextMetadata { strategy }), - }) - } + let filtered = Self::filter_by_indices(df, &group.row_indices)?; + let values = if binned_columns.is_empty() { + dataframe_to_values(&filtered)? + } else { + dataframe_to_values_with_bins(&filtered, binned_columns)? + }; + + components.insert(suffix, values); } + + Ok(PreparedData::Composite { + components, + metadata: Box::new(TextMetadata { strategy }), + }) } fn modify_encoding(&self, encoding: &mut Map, _layer: &Layer) -> Result<()> { @@ -632,15 +587,8 @@ impl GeomRenderer for TextRenderer { GgsqlError::InternalError("Failed to downcast text metadata".to_string()) })?; - match &info.strategy { - FontStrategy::SingleLayer { mark_properties } => { - self.finalize_single_layer(prototype, data_key, mark_properties) - } - FontStrategy::MultiLayer { - groups, - common_properties, - } => self.finalize_multi_layer(prototype, data_key, groups, common_properties), - } + // Generate layers from groups (1 group = single layer, N groups = N layers) + self.finalize_layers(prototype, data_key, &info.strategy.groups, &info.strategy.common_properties) } } From 0fe62e23c39385ac84b6edac7dab3aac742d8fc4 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 12:07:26 +0100 Subject: [PATCH 04/29] Remove TextMetadata wrapper, use FontStrategy directly TextMetadata was simply wrapping FontStrategy with no additional value. Store FontStrategy directly in PreparedData metadata instead. This eliminates 4 lines and one level of indirection. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 1b8b106b..234afe6d 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -226,11 +226,6 @@ impl GeomRenderer for PathRenderer { // Text Renderer // ============================================================================= -/// Metadata for text rendering -struct TextMetadata { - strategy: FontStrategy, -} - /// Strategy for handling font properties in text layers struct FontStrategy { groups: Vec, @@ -552,7 +547,7 @@ impl GeomRenderer for TextRenderer { Ok(PreparedData::Composite { components, - metadata: Box::new(TextMetadata { strategy }), + metadata: Box::new(strategy), }) } @@ -582,13 +577,13 @@ impl GeomRenderer for TextRenderer { )); }; - // Downcast metadata to TextMetadata - let info = metadata.downcast_ref::().ok_or_else(|| { - GgsqlError::InternalError("Failed to downcast text metadata".to_string()) + // Downcast metadata to FontStrategy + let strategy = metadata.downcast_ref::().ok_or_else(|| { + GgsqlError::InternalError("Failed to downcast font strategy".to_string()) })?; // Generate layers from groups (1 group = single layer, N groups = N layers) - self.finalize_layers(prototype, data_key, &info.strategy.groups, &info.strategy.common_properties) + self.finalize_layers(prototype, data_key, &strategy.groups, &strategy.common_properties) } } From a4d256338393b3b2288e6aa7382847187e0eeaaf Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 12:12:26 +0100 Subject: [PATCH 05/29] Remove unused signature field from FontGroup The signature field was only used during group construction as a HashMap key to track row assignments. After groups are built, the field was never accessed (marked with #[allow(dead_code)]). Removed the field and its assignments, keeping the local signature variable for grouping logic. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 234afe6d..e6813e67 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -234,9 +234,6 @@ struct FontStrategy { /// A group of rows with identical font property values struct FontGroup { - /// Unique signature for this combination (for data key suffix) - #[allow(dead_code)] - signature: String, /// Mark properties for this group properties: HashMap, /// Row indices belonging to this group @@ -282,7 +279,6 @@ impl TextRenderer { // All constant or not present → single group with all rows let all_indices: Vec = (0..df.height()).collect(); let groups = vec![FontGroup { - signature: String::new(), properties: constant_values.clone(), row_indices: all_indices, }]; @@ -351,7 +347,6 @@ impl TextRenderer { let group_idx = groups.len(); signature_to_idx.insert(signature.clone(), group_idx); groups.push(FontGroup { - signature, properties, row_indices: vec![row_idx], }); From 800b161c21421915fbf311e4c94cc3a1ee02734d Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 12:40:28 +0100 Subject: [PATCH 06/29] Simplify TextRenderer by using HashMap for grouping Eliminated FontGroup struct and common_properties field by: - Using HashMap for grouping during construction, then converting to sorted Vec - Storing all properties (constant + varying) in each group's HashMap - Using plain tuples (HashMap, Vec) instead of a dedicated struct This reduces code by 24 net lines while maintaining the same functionality. Properties are now the HashMap keys (via signature) and row indices are values, making the data structure more direct. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 80 +++++++++++++----------------------- 1 file changed, 28 insertions(+), 52 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index e6813e67..74b95016 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -228,16 +228,8 @@ impl GeomRenderer for PathRenderer { /// Strategy for handling font properties in text layers struct FontStrategy { - groups: Vec, - common_properties: HashMap, -} - -/// A group of rows with identical font property values -struct FontGroup { - /// Mark properties for this group - properties: HashMap, - /// Row indices belonging to this group - row_indices: Vec, + /// Each group is (properties, row_indices). Properties include both constant and varying values. + groups: Vec<(HashMap, Vec)>, } /// Renderer for text geom - handles font properties via data splitting @@ -278,21 +270,12 @@ impl TextRenderer { if varying_columns.is_empty() { // All constant or not present → single group with all rows let all_indices: Vec = (0..df.height()).collect(); - let groups = vec![FontGroup { - properties: constant_values.clone(), - row_indices: all_indices, - }]; - Ok(FontStrategy { - groups, - common_properties: HashMap::new(), // All properties in the group - }) + let groups = vec![(constant_values.clone(), all_indices)]; + Ok(FontStrategy { groups }) } else { // Some varying → multi-layer let groups = Self::build_font_groups_from_df(df, &varying_columns, &constant_values)?; - Ok(FontStrategy { - groups, - common_properties: constant_values, - }) + Ok(FontStrategy { groups }) } } @@ -301,12 +284,11 @@ impl TextRenderer { data: &DataFrame, varying: &[(String, String)], // (aesthetic, column_name) constant: &HashMap, - ) -> Result> { + ) -> Result, Vec)>> { use polars::prelude::{ChunkedArray, StringType}; let nrows = data.height(); - let mut groups: Vec = Vec::new(); - let mut signature_to_idx: HashMap = HashMap::new(); + let mut groups_map: HashMap, Vec)> = HashMap::new(); // Pre-fetch all varying font columns let font_columns: Vec<(String, ChunkedArray)> = varying @@ -323,7 +305,7 @@ impl TextRenderer { }) .collect::>>()?; - // Iterate rows and assign to groups + // Iterate rows and build groups for row_idx in 0..nrows { // Build signature for this row let mut sig_parts: Vec = Vec::new(); @@ -340,19 +322,21 @@ impl TextRenderer { let signature = sig_parts.join("|"); - // Add to existing group or create new group - if let Some(&group_idx) = signature_to_idx.get(&signature) { - groups[group_idx].row_indices.push(row_idx); - } else { - let group_idx = groups.len(); - signature_to_idx.insert(signature.clone(), group_idx); - groups.push(FontGroup { - properties, - row_indices: vec![row_idx], - }); - } + // Add to existing group or create new entry + groups_map + .entry(signature) + .or_insert_with(|| (properties.clone(), Vec::new())) + .1 + .push(row_idx); } + // Convert to Vec and sort by first occurrence + let mut groups: Vec<(HashMap, Vec)> = groups_map + .into_values() + .collect(); + + groups.sort_by_key(|(_, indices)| indices[0]); + Ok(groups) } @@ -450,12 +434,11 @@ impl TextRenderer { &self, prototype: Value, data_key: &str, - groups: &[FontGroup], - common_properties: &HashMap, + groups: &[(HashMap, Vec)], ) -> Result> { let mut layers = Vec::new(); - for (group_idx, group) in groups.iter().enumerate() { + for (group_idx, (properties, _indices)) in groups.iter().enumerate() { let mut layer_spec = prototype.clone(); // For single-group case (all constant), use empty suffix // For multi-group case, use _font_N suffix @@ -466,17 +449,10 @@ impl TextRenderer { }; let source_key = format!("{}{}", data_key, suffix); - // Apply mark properties (common + group-specific) + // Apply mark properties if let Some(mark) = layer_spec.get_mut("mark") { if let Some(mark_obj) = mark.as_object_mut() { - // Apply common properties first - for (aesthetic, value) in common_properties { - let vl_key = Self::map_aesthetic_to_mark_property(aesthetic); - Self::apply_mark_property(mark_obj, vl_key, value); - } - - // Apply group-specific properties (override common if needed) - for (aesthetic, value) in &group.properties { + for (aesthetic, value) in properties { let vl_key = Self::map_aesthetic_to_mark_property(aesthetic); Self::apply_mark_property(mark_obj, vl_key, value); } @@ -521,7 +497,7 @@ impl GeomRenderer for TextRenderer { // Split data by groups (even if just 1 group for constant fonts) let mut components: HashMap> = HashMap::new(); - for (group_idx, group) in strategy.groups.iter().enumerate() { + for (group_idx, (_properties, row_indices)) in strategy.groups.iter().enumerate() { // For single-group case (all constant), use empty suffix // For multi-group case, use _font_N suffix let suffix = if strategy.groups.len() == 1 { @@ -530,7 +506,7 @@ impl GeomRenderer for TextRenderer { format!("_font_{}", group_idx) }; - let filtered = Self::filter_by_indices(df, &group.row_indices)?; + let filtered = Self::filter_by_indices(df, row_indices)?; let values = if binned_columns.is_empty() { dataframe_to_values(&filtered)? } else { @@ -578,7 +554,7 @@ impl GeomRenderer for TextRenderer { })?; // Generate layers from groups (1 group = single layer, N groups = N layers) - self.finalize_layers(prototype, data_key, &strategy.groups, &strategy.common_properties) + self.finalize_layers(prototype, data_key, &strategy.groups) } } From 3181fb04e32d7d47471d7f91391463bfbac7c7cc Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 12:42:53 +0100 Subject: [PATCH 07/29] Remove FontStrategy wrapper struct FontStrategy was just wrapping a single Vec. Eliminated it by: - Returning Vec<(HashMap, Vec)> directly from analyze_font_columns() - Storing the Vec directly as metadata in PreparedData::Composite - Downcasting to Vec type directly in finalize() This removes 7 net lines while maintaining identical functionality. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 74b95016..92552678 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -226,18 +226,13 @@ impl GeomRenderer for PathRenderer { // Text Renderer // ============================================================================= -/// Strategy for handling font properties in text layers -struct FontStrategy { - /// Each group is (properties, row_indices). Properties include both constant and varying values. - groups: Vec<(HashMap, Vec)>, -} - /// Renderer for text geom - handles font properties via data splitting pub struct TextRenderer; impl TextRenderer { - /// Analyze DataFrame columns to find font aesthetics - fn analyze_font_columns(df: &DataFrame) -> Result { + /// Analyze DataFrame columns to find font aesthetics. + /// Returns groups: Vec<(properties, row_indices)> where each group has identical font values. + fn analyze_font_columns(df: &DataFrame) -> Result, Vec)>> { let mut varying_columns: Vec<(String, String)> = Vec::new(); // (aesthetic, column_name) let mut constant_values: HashMap = HashMap::new(); @@ -270,12 +265,10 @@ impl TextRenderer { if varying_columns.is_empty() { // All constant or not present → single group with all rows let all_indices: Vec = (0..df.height()).collect(); - let groups = vec![(constant_values.clone(), all_indices)]; - Ok(FontStrategy { groups }) + Ok(vec![(constant_values, all_indices)]) } else { // Some varying → multi-layer - let groups = Self::build_font_groups_from_df(df, &varying_columns, &constant_values)?; - Ok(FontStrategy { groups }) + Self::build_font_groups_from_df(df, &varying_columns, &constant_values) } } @@ -491,16 +484,16 @@ impl GeomRenderer for TextRenderer { _data_key: &str, binned_columns: &HashMap>, ) -> Result { - // Analyze font columns to determine strategy - let strategy = Self::analyze_font_columns(df)?; + // Analyze font columns to get groups + let groups = Self::analyze_font_columns(df)?; // Split data by groups (even if just 1 group for constant fonts) let mut components: HashMap> = HashMap::new(); - for (group_idx, (_properties, row_indices)) in strategy.groups.iter().enumerate() { + for (group_idx, (_properties, row_indices)) in groups.iter().enumerate() { // For single-group case (all constant), use empty suffix // For multi-group case, use _font_N suffix - let suffix = if strategy.groups.len() == 1 { + let suffix = if groups.len() == 1 { String::new() } else { format!("_font_{}", group_idx) @@ -518,7 +511,7 @@ impl GeomRenderer for TextRenderer { Ok(PreparedData::Composite { components, - metadata: Box::new(strategy), + metadata: Box::new(groups), }) } @@ -548,13 +541,13 @@ impl GeomRenderer for TextRenderer { )); }; - // Downcast metadata to FontStrategy - let strategy = metadata.downcast_ref::().ok_or_else(|| { - GgsqlError::InternalError("Failed to downcast font strategy".to_string()) + // Downcast metadata to groups + let groups = metadata.downcast_ref::, Vec)>>().ok_or_else(|| { + GgsqlError::InternalError("Failed to downcast font groups".to_string()) })?; // Generate layers from groups (1 group = single layer, N groups = N layers) - self.finalize_layers(prototype, data_key, &strategy.groups) + self.finalize_layers(prototype, data_key, groups) } } From 283f9418bda84345331a6e5e31a40cbd164c8c15 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 13:30:14 +0100 Subject: [PATCH 08/29] Use HashMap> with direct property conversion Refactored TextRenderer to use FontKey tuple containing converted Vega-Lite Values instead of intermediate structures: - FontKey = (family, fontWeight, fontStyle, align, baseline) as Values - convert_fontface returns (fontWeight, fontStyle) tuple - Properties converted once during grouping (in analyze_font_columns) - finalize_layers directly inserts Values into mark object - Eliminated font_key_to_properties, apply_mark_property, and map_aesthetic_to_mark_property helpers Benefits: - No string signatures or intermediate HashMaps - Properties converted once per unique combination (not per row) - Simpler finalize_layers with direct value insertion - No special-case spreading logic for fontface This removes 70 net lines while maintaining identical functionality. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 300 ++++++++++++++--------------------- 1 file changed, 115 insertions(+), 185 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 92552678..203628b8 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -226,157 +226,94 @@ impl GeomRenderer for PathRenderer { // Text Renderer // ============================================================================= +/// Font property tuple: (family, fontWeight, fontStyle, align, baseline) as converted Vega-Lite Values +type FontKey = (Value, Value, Value, Value, Value); + /// Renderer for text geom - handles font properties via data splitting pub struct TextRenderer; impl TextRenderer { - /// Analyze DataFrame columns to find font aesthetics. - /// Returns groups: Vec<(properties, row_indices)> where each group has identical font values. - fn analyze_font_columns(df: &DataFrame) -> Result, Vec)>> { - - let mut varying_columns: Vec<(String, String)> = Vec::new(); // (aesthetic, column_name) - let mut constant_values: HashMap = HashMap::new(); - - // Check for font aesthetic columns in DataFrame - for &aesthetic in &["family", "fontface", "hjust", "vjust"] { - let col_name = naming::aesthetic_column(aesthetic); - - if let Ok(col) = df.column(&col_name) { - let unique_count = col - .n_unique() - .map_err(|e| GgsqlError::WriterError(e.to_string()))?; - - if unique_count == 1 { - // All same → treat as constant - let value_str = col - .str() - .map_err(|e| GgsqlError::WriterError(e.to_string()))? - .get(0) - .unwrap_or(""); - let converted = Self::convert_to_mark_property(aesthetic, value_str); - constant_values.insert(aesthetic.to_string(), converted); - } else if unique_count > 1 { - // Multiple values → needs splitting - varying_columns.push((aesthetic.to_string(), col_name)); - } - } + /// Analyze DataFrame columns to build font property groups. + /// Returns HashMap mapping converted font property tuples to row indices. + fn analyze_font_columns(df: &DataFrame) -> Result>> { + let nrows = df.height(); + let mut groups: HashMap> = HashMap::new(); + + // Extract all font columns (or use defaults if missing) + let family_col = df.column(&naming::aesthetic_column("family")) + .ok() + .and_then(|s| s.str().ok()); + let fontface_col = df.column(&naming::aesthetic_column("fontface")) + .ok() + .and_then(|s| s.str().ok()); + let hjust_col = df.column(&naming::aesthetic_column("hjust")) + .ok() + .and_then(|s| s.str().ok()); + let vjust_col = df.column(&naming::aesthetic_column("vjust")) + .ok() + .and_then(|s| s.str().ok()); + + // Group rows by converted font property tuple + for row_idx in 0..nrows { + let family_str = family_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); + let fontface_str = fontface_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); + let hjust_str = hjust_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); + let vjust_str = vjust_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); + + // Convert to Vega-Lite property values immediately + let family_val = Self::convert_family(family_str); + let (font_weight_val, font_style_val) = Self::convert_fontface(fontface_str); + let hjust_val = Self::convert_hjust(hjust_str); + let vjust_val = Self::convert_vjust(vjust_str); + + let key = (family_val, font_weight_val, font_style_val, hjust_val, vjust_val); + groups.entry(key).or_insert_with(Vec::new).push(row_idx); } - if varying_columns.is_empty() { - // All constant or not present → single group with all rows - let all_indices: Vec = (0..df.height()).collect(); - Ok(vec![(constant_values, all_indices)]) - } else { - // Some varying → multi-layer - Self::build_font_groups_from_df(df, &varying_columns, &constant_values) - } + Ok(groups) } - /// Build groups from DataFrame columns (used in prepare_data) - fn build_font_groups_from_df( - data: &DataFrame, - varying: &[(String, String)], // (aesthetic, column_name) - constant: &HashMap, - ) -> Result, Vec)>> { - use polars::prelude::{ChunkedArray, StringType}; - - let nrows = data.height(); - let mut groups_map: HashMap, Vec)> = HashMap::new(); - - // Pre-fetch all varying font columns - let font_columns: Vec<(String, ChunkedArray)> = varying - .iter() - .map(|(aes, col)| { - let series = data - .column(col) - .map_err(|e| GgsqlError::WriterError(e.to_string()))?; - let ca = series - .str() - .map_err(|e| GgsqlError::WriterError(e.to_string()))? - .clone(); - Ok((aes.clone(), ca)) - }) - .collect::>>()?; - - // Iterate rows and build groups - for row_idx in 0..nrows { - // Build signature for this row - let mut sig_parts: Vec = Vec::new(); - let mut properties = constant.clone(); - - for (aesthetic, ca) in &font_columns { - let value = ca.get(row_idx).unwrap_or(""); - sig_parts.push(format!("{}:{}", aesthetic, value)); - - // Convert to mark property - let prop_value = Self::convert_to_mark_property(aesthetic, value); - properties.insert(aesthetic.to_string(), prop_value); - } - - let signature = sig_parts.join("|"); + /// Convert family string to Vega-Lite font value + fn convert_family(value: &str) -> Value { + json!(value) + } - // Add to existing group or create new entry - groups_map - .entry(signature) - .or_insert_with(|| (properties.clone(), Vec::new())) - .1 - .push(row_idx); + /// Convert fontface string to Vega-Lite fontWeight and fontStyle values + fn convert_fontface(value: &str) -> (Value, Value) { + match value { + "bold" => (json!("bold"), json!("normal")), + "italic" => (json!("normal"), json!("italic")), + "bold.italic" | "bolditalic" => (json!("bold"), json!("italic")), + _ => (json!("normal"), json!("normal")), } + } - // Convert to Vec and sort by first occurrence - let mut groups: Vec<(HashMap, Vec)> = groups_map - .into_values() - .collect(); - - groups.sort_by_key(|(_, indices)| indices[0]); - - Ok(groups) + /// Convert hjust string to Vega-Lite align value + fn convert_hjust(value: &str) -> Value { + let align = match value.parse::() { + Ok(v) if v <= 0.25 => "left", + Ok(v) if v >= 0.75 => "right", + _ => match value { + "left" => "left", + "right" => "right", + _ => "center", + }, + }; + json!(align) } - /// Convert ggsql font aesthetic value to Vega-Lite mark property - fn convert_to_mark_property(aesthetic: &str, value: &str) -> Value { - match aesthetic { - "family" => json!(value), - "fontface" => { - // Map ggplot2 fontface to fontWeight/fontStyle - match value { - "bold" => json!({"fontWeight": "bold"}), - "italic" => json!({"fontStyle": "italic"}), - "bold.italic" | "bolditalic" => json!({ - "fontWeight": "bold", - "fontStyle": "italic" - }), - _ => json!({"fontWeight": "normal"}), - } - } - "hjust" => { - // Map 0/0.5/1 or string to left/center/right - let align = match value.parse::() { - Ok(v) if v <= 0.25 => "left", - Ok(v) if v >= 0.75 => "right", - _ => match value { - "left" => "left", - "right" => "right", - _ => "center", - }, - }; - json!(align) - } - "vjust" => { - // Map 0/0.5/1 or string to bottom/middle/top - let baseline = match value.parse::() { - Ok(v) if v <= 0.25 => "bottom", - Ok(v) if v >= 0.75 => "top", - _ => match value { - "top" => "top", - "bottom" => "bottom", - _ => "middle", - }, - }; - json!(baseline) - } - _ => json!(value), - } + /// Convert vjust string to Vega-Lite baseline value + fn convert_vjust(value: &str) -> Value { + let baseline = match value.parse::() { + Ok(v) if v <= 0.25 => "bottom", + Ok(v) if v >= 0.75 => "top", + _ => match value { + "top" => "top", + "bottom" => "bottom", + _ => "middle", + }, + }; + json!(baseline) } /// Filter DataFrame to specific row indices @@ -397,62 +334,47 @@ impl TextRenderer { .map_err(|e| GgsqlError::WriterError(e.to_string())) } - /// Map aesthetic name to Vega-Lite mark property name - fn map_aesthetic_to_mark_property(aesthetic: &str) -> &str { - match aesthetic { - "family" => "font", - "hjust" => "align", - "vjust" => "baseline", - _ => aesthetic, - } - } - - /// Apply mark property, handling special cases like fontface - fn apply_mark_property(mark_obj: &mut Map, key: &str, value: &Value) { - if key == "fontface" || value.is_object() { - // fontface may contain multiple properties (fontWeight + fontStyle) - if let Some(obj) = value.as_object() { - for (k, v) in obj { - mark_obj.insert(k.clone(), v.clone()); - } - return; - } - } - mark_obj.insert(key.to_string(), value.clone()); - } - - /// Finalize single layer case /// Finalize layers from font groups (handles both single and multi-group cases) fn finalize_layers( &self, prototype: Value, data_key: &str, - groups: &[(HashMap, Vec)], + font_groups: &HashMap>, ) -> Result> { - let mut layers = Vec::new(); + // Sort groups by first index to match component key assignment order + let mut sorted_entries: Vec<_> = font_groups.iter().collect(); + sorted_entries.sort_by_key(|(_, indices)| indices[0]); - for (group_idx, (properties, _indices)) in groups.iter().enumerate() { - let mut layer_spec = prototype.clone(); - // For single-group case (all constant), use empty suffix - // For multi-group case, use _font_N suffix - let suffix = if groups.len() == 1 { + // Build layers + let mut layer_tuples: Vec<(usize, Value)> = Vec::new(); // (first_index, layer_spec) + + for (group_idx, (font_key, indices)) in sorted_entries.iter().enumerate() { + let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val) = font_key; + + // Component key suffix (matches prepare_data assignment) + let suffix = if font_groups.len() == 1 { String::new() } else { format!("_font_{}", group_idx) }; let source_key = format!("{}{}", data_key, suffix); - // Apply mark properties + // Create layer spec with font properties + let mut layer_spec = prototype.clone(); if let Some(mark) = layer_spec.get_mut("mark") { if let Some(mark_obj) = mark.as_object_mut() { - for (aesthetic, value) in properties { - let vl_key = Self::map_aesthetic_to_mark_property(aesthetic); - Self::apply_mark_property(mark_obj, vl_key, value); + // Apply font properties + if family_val.as_str().map_or(true, |s| !s.is_empty()) { + mark_obj.insert("font".to_string(), family_val.clone()); } + mark_obj.insert("fontWeight".to_string(), font_weight_val.clone()); + mark_obj.insert("fontStyle".to_string(), font_style_val.clone()); + mark_obj.insert("align".to_string(), hjust_val.clone()); + mark_obj.insert("baseline".to_string(), vjust_val.clone()); } } - // Add source filter for this group + // Add source filter let source_filter = json!({ "filter": { "field": naming::SOURCE_COLUMN, @@ -470,9 +392,13 @@ impl TextRenderer { new_transforms.extend(existing_transforms); layer_spec["transform"] = json!(new_transforms); - layers.push(layer_spec); + layer_tuples.push((indices[0], layer_spec)); } + // Sort by first index (already sorted, but explicit for clarity) + layer_tuples.sort_by_key(|(idx, _)| *idx); + let layers = layer_tuples.into_iter().map(|(_, spec)| spec).collect(); + Ok(layers) } } @@ -485,15 +411,19 @@ impl GeomRenderer for TextRenderer { binned_columns: &HashMap>, ) -> Result { // Analyze font columns to get groups - let groups = Self::analyze_font_columns(df)?; + let font_groups = Self::analyze_font_columns(df)?; - // Split data by groups (even if just 1 group for constant fonts) + // Split data by font groups let mut components: HashMap> = HashMap::new(); - for (group_idx, (_properties, row_indices)) in groups.iter().enumerate() { + // Sort groups by first index to assign component keys in order + let mut sorted_entries: Vec<_> = font_groups.iter().collect(); + sorted_entries.sort_by_key(|(_, indices)| indices[0]); + + for (group_idx, (_font_key, row_indices)) in sorted_entries.iter().enumerate() { // For single-group case (all constant), use empty suffix // For multi-group case, use _font_N suffix - let suffix = if groups.len() == 1 { + let suffix = if font_groups.len() == 1 { String::new() } else { format!("_font_{}", group_idx) @@ -511,7 +441,7 @@ impl GeomRenderer for TextRenderer { Ok(PreparedData::Composite { components, - metadata: Box::new(groups), + metadata: Box::new(font_groups), }) } @@ -541,13 +471,13 @@ impl GeomRenderer for TextRenderer { )); }; - // Downcast metadata to groups - let groups = metadata.downcast_ref::, Vec)>>().ok_or_else(|| { + // Downcast metadata to font groups + let font_groups = metadata.downcast_ref::>>().ok_or_else(|| { GgsqlError::InternalError("Failed to downcast font groups".to_string()) })?; - // Generate layers from groups (1 group = single layer, N groups = N layers) - self.finalize_layers(prototype, data_key, groups) + // Generate layers from font groups + self.finalize_layers(prototype, data_key, font_groups) } } From 814bbed9acff87b9ebd8e90fce530e3c632900ee Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 13:43:12 +0100 Subject: [PATCH 09/29] Sort font groups once in analyze_font_columns Changed analyze_font_columns to return Vec<(FontKey, Vec)> instead of HashMap, with sorting done once at the end of grouping. Before: HashMap was sorted twice - once in prepare_data() and again in finalize_layers() to maintain consistent ordering. After: Groups are sorted once after HashMap construction in analyze_font_columns(), then both prepare_data() and finalize_layers() iterate the pre-sorted Vec directly. This preserves HashMap's O(1) insertion benefit during construction while eliminating redundant sort operations. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 54 +++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 203628b8..18f66728 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -234,22 +234,26 @@ pub struct TextRenderer; impl TextRenderer { /// Analyze DataFrame columns to build font property groups. - /// Returns HashMap mapping converted font property tuples to row indices. - fn analyze_font_columns(df: &DataFrame) -> Result>> { + /// Returns sorted Vec of (font_key, row_indices) tuples, ordered by first row index. + fn analyze_font_columns(df: &DataFrame) -> Result)>> { let nrows = df.height(); let mut groups: HashMap> = HashMap::new(); // Extract all font columns (or use defaults if missing) - let family_col = df.column(&naming::aesthetic_column("family")) + let family_col = df + .column(&naming::aesthetic_column("family")) .ok() .and_then(|s| s.str().ok()); - let fontface_col = df.column(&naming::aesthetic_column("fontface")) + let fontface_col = df + .column(&naming::aesthetic_column("fontface")) .ok() .and_then(|s| s.str().ok()); - let hjust_col = df.column(&naming::aesthetic_column("hjust")) + let hjust_col = df + .column(&naming::aesthetic_column("hjust")) .ok() .and_then(|s| s.str().ok()); - let vjust_col = df.column(&naming::aesthetic_column("vjust")) + let vjust_col = df + .column(&naming::aesthetic_column("vjust")) .ok() .and_then(|s| s.str().ok()); @@ -266,11 +270,21 @@ impl TextRenderer { let hjust_val = Self::convert_hjust(hjust_str); let vjust_val = Self::convert_vjust(vjust_str); - let key = (family_val, font_weight_val, font_style_val, hjust_val, vjust_val); + let key = ( + family_val, + font_weight_val, + font_style_val, + hjust_val, + vjust_val, + ); groups.entry(key).or_insert_with(Vec::new).push(row_idx); } - Ok(groups) + // Convert to Vec and sort by first occurrence (for ORDER BY preservation) + let mut sorted_groups: Vec<(FontKey, Vec)> = groups.into_iter().collect(); + sorted_groups.sort_by_key(|(_, indices)| indices[0]); + + Ok(sorted_groups) } /// Convert family string to Vega-Lite font value @@ -339,16 +353,12 @@ impl TextRenderer { &self, prototype: Value, data_key: &str, - font_groups: &HashMap>, + font_groups: &[(FontKey, Vec)], ) -> Result> { - // Sort groups by first index to match component key assignment order - let mut sorted_entries: Vec<_> = font_groups.iter().collect(); - sorted_entries.sort_by_key(|(_, indices)| indices[0]); - // Build layers let mut layer_tuples: Vec<(usize, Value)> = Vec::new(); // (first_index, layer_spec) - for (group_idx, (font_key, indices)) in sorted_entries.iter().enumerate() { + for (group_idx, (font_key, indices)) in font_groups.iter().enumerate() { let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val) = font_key; // Component key suffix (matches prepare_data assignment) @@ -410,17 +420,13 @@ impl GeomRenderer for TextRenderer { _data_key: &str, binned_columns: &HashMap>, ) -> Result { - // Analyze font columns to get groups + // Analyze font columns to get sorted groups let font_groups = Self::analyze_font_columns(df)?; // Split data by font groups let mut components: HashMap> = HashMap::new(); - // Sort groups by first index to assign component keys in order - let mut sorted_entries: Vec<_> = font_groups.iter().collect(); - sorted_entries.sort_by_key(|(_, indices)| indices[0]); - - for (group_idx, (_font_key, row_indices)) in sorted_entries.iter().enumerate() { + for (group_idx, (_font_key, row_indices)) in font_groups.iter().enumerate() { // For single-group case (all constant), use empty suffix // For multi-group case, use _font_N suffix let suffix = if font_groups.len() == 1 { @@ -472,9 +478,11 @@ impl GeomRenderer for TextRenderer { }; // Downcast metadata to font groups - let font_groups = metadata.downcast_ref::>>().ok_or_else(|| { - GgsqlError::InternalError("Failed to downcast font groups".to_string()) - })?; + let font_groups = metadata + .downcast_ref::)>>() + .ok_or_else(|| { + GgsqlError::InternalError("Failed to downcast font groups".to_string()) + })?; // Generate layers from font groups self.finalize_layers(prototype, data_key, font_groups) From f74170f899af072ba620f3b5f92359e80dbdbd80 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 13:59:22 +0100 Subject: [PATCH 10/29] Use Option for family and apply clippy suggestions Changes: - convert_family() returns Option instead of Value - Returns None for empty family strings - Simplifies finalize_layers to use if let Some(family_val) - Apply clippy suggestion: use or_default() instead of or_insert_with(Vec::new) This eliminates the is_none_or check and makes the intent clearer: family is optional and should be omitted from the mark object when not specified. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 18f66728..9bd1d333 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -227,7 +227,7 @@ impl GeomRenderer for PathRenderer { // ============================================================================= /// Font property tuple: (family, fontWeight, fontStyle, align, baseline) as converted Vega-Lite Values -type FontKey = (Value, Value, Value, Value, Value); +type FontKey = (Option, Value, Value, Value, Value); /// Renderer for text geom - handles font properties via data splitting pub struct TextRenderer; @@ -277,7 +277,7 @@ impl TextRenderer { hjust_val, vjust_val, ); - groups.entry(key).or_insert_with(Vec::new).push(row_idx); + groups.entry(key).or_default().push(row_idx); } // Convert to Vec and sort by first occurrence (for ORDER BY preservation) @@ -288,8 +288,12 @@ impl TextRenderer { } /// Convert family string to Vega-Lite font value - fn convert_family(value: &str) -> Value { - json!(value) + fn convert_family(value: &str) -> Option { + if value.is_empty() { + None + } else { + Some(json!(value)) + } } /// Convert fontface string to Vega-Lite fontWeight and fontStyle values @@ -374,7 +378,7 @@ impl TextRenderer { if let Some(mark) = layer_spec.get_mut("mark") { if let Some(mark_obj) = mark.as_object_mut() { // Apply font properties - if family_val.as_str().map_or(true, |s| !s.is_empty()) { + if let Some(family_val) = family_val { mark_obj.insert("font".to_string(), family_val.clone()); } mark_obj.insert("fontWeight".to_string(), font_weight_val.clone()); From 4a34cc2e1c75e3674ad56dc1367d6fe10e9a80dc Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 14:09:08 +0100 Subject: [PATCH 11/29] Split non-contiguous indices to preserve z-order When font groups have non-contiguous row indices (e.g., [0, 2, 5, 6]), split them into separate contiguous ranges ([0], [2], [5, 6]) to preserve rendering order. Example: - Row 0: Arial "A" - Row 1: Courier "B" - Row 2: Arial "C" Before: Arial layer renders A and C together, then B on top After: Three layers render in order: A, then B, then C This ensures that the DRAW clause ORDER BY is respected for z-order stacking, even when rows with the same font properties are interleaved with rows having different properties. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 9bd1d333..d97bfd37 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -284,7 +284,40 @@ impl TextRenderer { let mut sorted_groups: Vec<(FontKey, Vec)> = groups.into_iter().collect(); sorted_groups.sort_by_key(|(_, indices)| indices[0]); - Ok(sorted_groups) + // Split non-contiguous indices into separate ranges to preserve z-order + let mut split_groups = Vec::new(); + for (font_key, indices) in sorted_groups { + let ranges = Self::split_contiguous(&indices); + for range in ranges { + split_groups.push((font_key.clone(), range)); + } + } + + Ok(split_groups) + } + + /// Split indices into contiguous ranges + fn split_contiguous(indices: &[usize]) -> Vec> { + if indices.is_empty() { + return vec![]; + } + + let mut sorted = indices.to_vec(); + sorted.sort_unstable(); + + let mut ranges = Vec::new(); + let mut current = vec![sorted[0]]; + + for &idx in &sorted[1..] { + if idx == current.last().unwrap() + 1 { + current.push(idx); + } else { + ranges.push(current); + current = vec![idx]; + } + } + ranges.push(current); + ranges } /// Convert family string to Vega-Lite font value From 221ecd5fe0a7b1d5689a40c4d8f766af79eeae2d Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 14:14:21 +0100 Subject: [PATCH 12/29] Suppress legend and scale for text encoding The label aesthetic (mapped to Vega-Lite 'text' encoding) should not generate a legend or scale, as text values are literal display strings rather than data values that need scaling or legend representation. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index d97bfd37..7f732b2f 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -493,6 +493,15 @@ impl GeomRenderer for TextRenderer { for &aesthetic in &["family", "fontface", "hjust", "vjust"] { encoding.remove(aesthetic); } + + // Suppress legend and scale for text encoding + if let Some(text_encoding) = encoding.get_mut("text") { + if let Some(text_obj) = text_encoding.as_object_mut() { + text_obj.insert("legend".to_string(), Value::Null); + text_obj.insert("scale".to_string(), Value::Null); + } + } + Ok(()) } From 676a229ba2d72d4a5e04abfabdc7f590432c1d98 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 14:34:08 +0100 Subject: [PATCH 13/29] Refactor TextRenderer to use nested layers with shared encoding Changes: - Use nested layer structure for multi-group text rendering - Single group: returns one layer with full encoding - Multiple groups: returns parent layer with shared encoding, child layers only have mark + transform - Extract helper functions for code reuse: - apply_font_properties: applies font properties to mark object - build_transform_with_filter: creates transform with source filter - Both finalize_single_layer and finalize_nested_layers now use helpers to avoid duplication This approach eliminates duplicate encoding specifications in multi-layer output while preserving z-order through contiguous range splitting. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 142 ++++++++++++++++++++++++----------- 1 file changed, 99 insertions(+), 43 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 7f732b2f..da597652 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -392,61 +392,117 @@ impl TextRenderer { data_key: &str, font_groups: &[(FontKey, Vec)], ) -> Result> { - // Build layers - let mut layer_tuples: Vec<(usize, Value)> = Vec::new(); // (first_index, layer_spec) + // Single group: return as-is with full encoding + if font_groups.len() == 1 { + return self.finalize_single_layer(prototype, data_key, &font_groups[0]); + } - for (group_idx, (font_key, indices)) in font_groups.iter().enumerate() { - let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val) = font_key; + // Multiple groups: wrap in nested layer with shared encoding + self.finalize_nested_layers(prototype, data_key, font_groups) + } - // Component key suffix (matches prepare_data assignment) - let suffix = if font_groups.len() == 1 { - String::new() - } else { - format!("_font_{}", group_idx) - }; - let source_key = format!("{}{}", data_key, suffix); + /// Apply font properties to mark object + fn apply_font_properties(mark_obj: &mut Map, font_key: &FontKey) { + let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val) = font_key; - // Create layer spec with font properties - let mut layer_spec = prototype.clone(); - if let Some(mark) = layer_spec.get_mut("mark") { - if let Some(mark_obj) = mark.as_object_mut() { - // Apply font properties - if let Some(family_val) = family_val { - mark_obj.insert("font".to_string(), family_val.clone()); - } - mark_obj.insert("fontWeight".to_string(), font_weight_val.clone()); - mark_obj.insert("fontStyle".to_string(), font_style_val.clone()); - mark_obj.insert("align".to_string(), hjust_val.clone()); - mark_obj.insert("baseline".to_string(), vjust_val.clone()); - } + if let Some(family_val) = family_val { + mark_obj.insert("font".to_string(), family_val.clone()); + } + mark_obj.insert("fontWeight".to_string(), font_weight_val.clone()); + mark_obj.insert("fontStyle".to_string(), font_style_val.clone()); + mark_obj.insert("align".to_string(), hjust_val.clone()); + mark_obj.insert("baseline".to_string(), vjust_val.clone()); + } + + /// Build transform with source filter + fn build_transform_with_filter( + prototype: &Value, + source_key: &str, + ) -> Vec { + let source_filter = json!({ + "filter": { + "field": naming::SOURCE_COLUMN, + "equal": source_key } + }); - // Add source filter - let source_filter = json!({ - "filter": { - "field": naming::SOURCE_COLUMN, - "equal": source_key - } - }); + let existing_transforms = prototype + .get("transform") + .and_then(|t| t.as_array()) + .cloned() + .unwrap_or_default(); - let existing_transforms = layer_spec - .get("transform") - .and_then(|t| t.as_array()) - .cloned() - .unwrap_or_default(); + let mut new_transforms = vec![source_filter]; + new_transforms.extend(existing_transforms); + new_transforms + } - let mut new_transforms = vec![source_filter]; - new_transforms.extend(existing_transforms); - layer_spec["transform"] = json!(new_transforms); + /// Finalize a single layer (no nesting needed) + fn finalize_single_layer( + &self, + prototype: Value, + data_key: &str, + (font_key, _indices): &(FontKey, Vec), + ) -> Result> { + let mut layer_spec = prototype.clone(); + + // Apply font properties to mark + if let Some(mark) = layer_spec.get_mut("mark") { + if let Some(mark_obj) = mark.as_object_mut() { + Self::apply_font_properties(mark_obj, font_key); + } + } + + // Add source filter + layer_spec["transform"] = json!(Self::build_transform_with_filter(&prototype, data_key)); + + Ok(vec![layer_spec]) + } + + /// Finalize multiple layers as nested layer with shared encoding + fn finalize_nested_layers( + &self, + prototype: Value, + data_key: &str, + font_groups: &[(FontKey, Vec)], + ) -> Result> { + // Extract shared encoding from prototype + let shared_encoding = prototype.get("encoding").cloned(); + + // Build individual layers without encoding (mark + transform only) + let mut layer_tuples: Vec<(usize, Value)> = Vec::new(); - layer_tuples.push((indices[0], layer_spec)); + for (group_idx, (font_key, indices)) in font_groups.iter().enumerate() { + let suffix = format!("_font_{}", group_idx); + let source_key = format!("{}{}", data_key, suffix); + + // Create mark object with font properties + let mut mark_obj = json!({"type": "text"}); + if let Some(mark_map) = mark_obj.as_object_mut() { + Self::apply_font_properties(mark_map, font_key); + } + + // Create layer with mark and transform (no encoding) + let layer = json!({ + "mark": mark_obj, + "transform": Self::build_transform_with_filter(&prototype, &source_key) + }); + + layer_tuples.push((indices[0], layer)); } - // Sort by first index (already sorted, but explicit for clarity) + // Sort by first index layer_tuples.sort_by_key(|(idx, _)| *idx); - let layers = layer_tuples.into_iter().map(|(_, spec)| spec).collect(); + let nested_layers: Vec = layer_tuples.into_iter().map(|(_, spec)| spec).collect(); - Ok(layers) + // Wrap in parent spec with shared encoding + let mut parent_spec = json!({"layer": nested_layers}); + + if let Some(encoding) = shared_encoding { + parent_spec["encoding"] = encoding; + } + + Ok(vec![parent_spec]) } } From 35a7d16d753bf5a57393b53f6f2141e5d82b040a Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 14:38:34 +0100 Subject: [PATCH 14/29] Add test for text renderer nested layers structure - Verifies nested layer structure is correct for multiple font groups - Tests that parent spec has shared encoding - Tests that child layers only have mark + transform - Tests that font properties are applied to mark objects Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 74 ++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index da597652..daac6510 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -1341,4 +1341,78 @@ mod tests { _ => panic!("Expected Composite"), } } + + #[test] + fn test_text_nested_layers_structure() { + use crate::naming; + use polars::prelude::*; + + let renderer = TextRenderer; + + // Create DataFrame with different fonts + let df = df! { + naming::aesthetic_column("x").as_str() => &[1.0, 2.0, 3.0], + naming::aesthetic_column("y").as_str() => &[10.0, 20.0, 30.0], + naming::aesthetic_column("label").as_str() => &["A", "B", "C"], + naming::aesthetic_column("family").as_str() => &["Arial", "Courier", "Arial"], + naming::aesthetic_column("fontface").as_str() => &["bold", "italic", "bold"], + } + .unwrap(); + + // Prepare data + let prepared = renderer.prepare_data(&df, "test", &HashMap::new()).unwrap(); + + // Get the components + let components = match &prepared { + PreparedData::Composite { components, .. } => components, + _ => panic!("Expected Composite"), + }; + + // Should have 3 components due to non-contiguous indices + // (Arial+bold at index 0, Courier+italic at index 1, Arial+bold at index 2) + assert_eq!(components.len(), 3); + + // Build prototype spec + let prototype = json!({ + "mark": {"type": "text"}, + "encoding": { + "x": {"field": naming::aesthetic_column("x"), "type": "quantitative"}, + "y": {"field": naming::aesthetic_column("y"), "type": "quantitative"}, + "text": {"field": naming::aesthetic_column("label"), "type": "nominal"} + } + }); + + // Create a dummy layer + let layer = crate::plot::Layer::new(crate::plot::Geom::text()); + + // Call finalize to get layers + let layers = renderer.finalize(prototype.clone(), &layer, "test", &prepared).unwrap(); + + // For multiple font groups, should return single parent spec with nested layers + assert_eq!(layers.len(), 1); + + let parent_spec = &layers[0]; + + // Parent should have "layer" array + assert!(parent_spec.get("layer").is_some()); + let nested_layers = parent_spec["layer"].as_array().unwrap(); + + // Should have 3 nested layers (one per component) + assert_eq!(nested_layers.len(), 3); + + // Parent should have shared encoding + assert!(parent_spec.get("encoding").is_some()); + + // Each nested layer should have mark and transform, but not encoding + for nested_layer in nested_layers { + assert!(nested_layer.get("mark").is_some()); + assert!(nested_layer.get("transform").is_some()); + assert!(nested_layer.get("encoding").is_none()); + + // Mark should have font properties + let mark = nested_layer["mark"].as_object().unwrap(); + assert!(mark.contains_key("fontWeight")); + assert!(mark.contains_key("fontStyle")); + } + } } From 0bd4405778c925617175e76f819a0673ff93e8e3 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 14:46:17 +0100 Subject: [PATCH 15/29] Unify single and nested layer logic in TextRenderer Changes: - Remove finalize_single_layer function - Always use nested layer structure (works for 1 or N groups) - Simplify prepare_data to always use _font_N suffix - Update test expectations This eliminates code duplication and special-case handling for single-group scenarios, reducing implementation by ~24 lines. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 101 ++++++++++------------------------- 1 file changed, 27 insertions(+), 74 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index daac6510..598b63bb 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -385,22 +385,6 @@ impl TextRenderer { .map_err(|e| GgsqlError::WriterError(e.to_string())) } - /// Finalize layers from font groups (handles both single and multi-group cases) - fn finalize_layers( - &self, - prototype: Value, - data_key: &str, - font_groups: &[(FontKey, Vec)], - ) -> Result> { - // Single group: return as-is with full encoding - if font_groups.len() == 1 { - return self.finalize_single_layer(prototype, data_key, &font_groups[0]); - } - - // Multiple groups: wrap in nested layer with shared encoding - self.finalize_nested_layers(prototype, data_key, font_groups) - } - /// Apply font properties to mark object fn apply_font_properties(mark_obj: &mut Map, font_key: &FontKey) { let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val) = font_key; @@ -437,29 +421,7 @@ impl TextRenderer { new_transforms } - /// Finalize a single layer (no nesting needed) - fn finalize_single_layer( - &self, - prototype: Value, - data_key: &str, - (font_key, _indices): &(FontKey, Vec), - ) -> Result> { - let mut layer_spec = prototype.clone(); - - // Apply font properties to mark - if let Some(mark) = layer_spec.get_mut("mark") { - if let Some(mark_obj) = mark.as_object_mut() { - Self::apply_font_properties(mark_obj, font_key); - } - } - - // Add source filter - layer_spec["transform"] = json!(Self::build_transform_with_filter(&prototype, data_key)); - - Ok(vec![layer_spec]) - } - - /// Finalize multiple layers as nested layer with shared encoding + /// Finalize layers as nested layer with shared encoding (works for single or multiple groups) fn finalize_nested_layers( &self, prototype: Value, @@ -470,30 +432,27 @@ impl TextRenderer { let shared_encoding = prototype.get("encoding").cloned(); // Build individual layers without encoding (mark + transform only) - let mut layer_tuples: Vec<(usize, Value)> = Vec::new(); - - for (group_idx, (font_key, indices)) in font_groups.iter().enumerate() { - let suffix = format!("_font_{}", group_idx); - let source_key = format!("{}{}", data_key, suffix); - - // Create mark object with font properties - let mut mark_obj = json!({"type": "text"}); - if let Some(mark_map) = mark_obj.as_object_mut() { - Self::apply_font_properties(mark_map, font_key); - } - - // Create layer with mark and transform (no encoding) - let layer = json!({ - "mark": mark_obj, - "transform": Self::build_transform_with_filter(&prototype, &source_key) - }); - - layer_tuples.push((indices[0], layer)); - } + // font_groups is already sorted by first occurrence, so no need to re-sort + let nested_layers: Vec = font_groups + .iter() + .enumerate() + .map(|(group_idx, (font_key, _indices))| { + let suffix = format!("_font_{}", group_idx); + let source_key = format!("{}{}", data_key, suffix); + + // Create mark object with font properties + let mut mark_obj = json!({"type": "text"}); + if let Some(mark_map) = mark_obj.as_object_mut() { + Self::apply_font_properties(mark_map, font_key); + } - // Sort by first index - layer_tuples.sort_by_key(|(idx, _)| *idx); - let nested_layers: Vec = layer_tuples.into_iter().map(|(_, spec)| spec).collect(); + // Create layer with mark and transform (no encoding) + json!({ + "mark": mark_obj, + "transform": Self::build_transform_with_filter(&prototype, &source_key) + }) + }) + .collect(); // Wrap in parent spec with shared encoding let mut parent_spec = json!({"layer": nested_layers}); @@ -520,13 +479,7 @@ impl GeomRenderer for TextRenderer { let mut components: HashMap> = HashMap::new(); for (group_idx, (_font_key, row_indices)) in font_groups.iter().enumerate() { - // For single-group case (all constant), use empty suffix - // For multi-group case, use _font_N suffix - let suffix = if font_groups.len() == 1 { - String::new() - } else { - format!("_font_{}", group_idx) - }; + let suffix = format!("_font_{}", group_idx); let filtered = Self::filter_by_indices(df, row_indices)?; let values = if binned_columns.is_empty() { @@ -586,8 +539,8 @@ impl GeomRenderer for TextRenderer { GgsqlError::InternalError("Failed to downcast font groups".to_string()) })?; - // Generate layers from font groups - self.finalize_layers(prototype, data_key, font_groups) + // Generate nested layers from font groups (works for single or multiple groups) + self.finalize_nested_layers(prototype, data_key, font_groups) } } @@ -1298,14 +1251,14 @@ mod tests { } .unwrap(); - // Prepare data - should result in single layer with empty component key + // Prepare data - should result in single layer with _font_0 component key let prepared = renderer.prepare_data(&df, "test", &HashMap::new()).unwrap(); match prepared { PreparedData::Composite { components, .. } => { - // Should have single component with empty key + // Should have single component with _font_0 key assert_eq!(components.len(), 1); - assert!(components.contains_key("")); + assert!(components.contains_key("_font_0")); } _ => panic!("Expected Composite"), } From 3b821f3f502971b0e7257f4932905a21d3fad1c7 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Mon, 23 Feb 2026 14:54:14 +0100 Subject: [PATCH 16/29] Add angle aesthetic to text geom Changes: - Add 'angle' to supported aesthetics in Text geom - Update FontKey tuple to include angle (6th element) - Extract angle column in analyze_font_columns - Add convert_angle function (parses numeric angle in degrees) - Apply angle property in apply_font_properties - Remove angle from encoding in modify_encoding The angle aesthetic is now handled the same way as other font properties (family, fontface, hjust, vjust) via data-splitting, since Vega-Lite requires it as a mark property. Co-Authored-By: Claude Sonnet 4.5 --- src/plot/layer/geom/text.rs | 2 +- src/writer/vegalite/layer.rs | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/plot/layer/geom/text.rs b/src/plot/layer/geom/text.rs index b37bb79b..1913485c 100644 --- a/src/plot/layer/geom/text.rs +++ b/src/plot/layer/geom/text.rs @@ -15,7 +15,7 @@ impl GeomTrait for Text { GeomAesthetics { supported: &[ "x", "y", "label", "stroke", "fontsize", "opacity", "family", "fontface", "hjust", - "vjust", + "vjust", "angle", ], required: &["x", "y"], hidden: &[], diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 598b63bb..e283ce0f 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -226,8 +226,8 @@ impl GeomRenderer for PathRenderer { // Text Renderer // ============================================================================= -/// Font property tuple: (family, fontWeight, fontStyle, align, baseline) as converted Vega-Lite Values -type FontKey = (Option, Value, Value, Value, Value); +/// Font property tuple: (family, fontWeight, fontStyle, align, baseline, angle) as converted Vega-Lite Values +type FontKey = (Option, Value, Value, Value, Value, Value); /// Renderer for text geom - handles font properties via data splitting pub struct TextRenderer; @@ -256,6 +256,10 @@ impl TextRenderer { .column(&naming::aesthetic_column("vjust")) .ok() .and_then(|s| s.str().ok()); + let angle_col = df + .column(&naming::aesthetic_column("angle")) + .ok() + .and_then(|s| s.str().ok()); // Group rows by converted font property tuple for row_idx in 0..nrows { @@ -263,12 +267,14 @@ impl TextRenderer { let fontface_str = fontface_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); let hjust_str = hjust_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); let vjust_str = vjust_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); + let angle_str = angle_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); // Convert to Vega-Lite property values immediately let family_val = Self::convert_family(family_str); let (font_weight_val, font_style_val) = Self::convert_fontface(fontface_str); let hjust_val = Self::convert_hjust(hjust_str); let vjust_val = Self::convert_vjust(vjust_str); + let angle_val = Self::convert_angle(angle_str); let key = ( family_val, @@ -276,6 +282,7 @@ impl TextRenderer { font_style_val, hjust_val, vjust_val, + angle_val, ); groups.entry(key).or_default().push(row_idx); } @@ -367,6 +374,14 @@ impl TextRenderer { json!(baseline) } + /// Convert angle string to Vega-Lite angle value (degrees) + fn convert_angle(value: &str) -> Value { + match value.parse::() { + Ok(angle) => json!(angle), + Err(_) => json!(0.0), + } + } + /// Filter DataFrame to specific row indices fn filter_by_indices(data: &DataFrame, indices: &[usize]) -> Result { use polars::prelude::{BooleanChunked, NamedFrom}; @@ -387,7 +402,7 @@ impl TextRenderer { /// Apply font properties to mark object fn apply_font_properties(mark_obj: &mut Map, font_key: &FontKey) { - let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val) = font_key; + let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val, angle_val) = font_key; if let Some(family_val) = family_val { mark_obj.insert("font".to_string(), family_val.clone()); @@ -396,6 +411,7 @@ impl TextRenderer { mark_obj.insert("fontStyle".to_string(), font_style_val.clone()); mark_obj.insert("align".to_string(), hjust_val.clone()); mark_obj.insert("baseline".to_string(), vjust_val.clone()); + mark_obj.insert("angle".to_string(), angle_val.clone()); } /// Build transform with source filter @@ -499,7 +515,7 @@ impl GeomRenderer for TextRenderer { fn modify_encoding(&self, encoding: &mut Map, _layer: &Layer) -> Result<()> { // Remove font aesthetics from encoding - they only work as mark properties - for &aesthetic in &["family", "fontface", "hjust", "vjust"] { + for &aesthetic in &["family", "fontface", "hjust", "vjust", "angle"] { encoding.remove(aesthetic); } From 76d039415472a34a382e45b8bd2437c58e9d31b1 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 24 Feb 2026 09:05:43 +0100 Subject: [PATCH 17/29] Complete angle aesthetic implementation with integration test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit completes the angle aesthetic implementation: Grammar changes: - Add 'angle' to aesthetic keywords in tree-sitter grammar Label geom consistency: - Add 'angle' to supported aesthetics in Label geom - Brings label geom in line with text geom support TextRenderer improvements: - Fix convert_angle to handle both numeric and string columns - Add angle normalization to [0, 360) range - Handle integer, float, and string angle values Integration test: - Add test_text_angle_integration for full SQL → Vega-Lite pipeline - Verifies nested layer structure with angle mark properties - Tests angle normalization and data splitting - Validates non-contiguous index handling The angle aesthetic now works end-to-end: SQL query with angle column → TextRenderer splits data by unique angles → Vega-Lite generates nested layers with angle mark properties. Co-Authored-By: Claude Sonnet 4.5 --- src/plot/layer/geom/label.rs | 3 +- src/writer/vegalite/layer.rs | 275 ++++++++++++++++++++++++++++++++--- tree-sitter-ggsql/grammar.js | 2 +- 3 files changed, 260 insertions(+), 20 deletions(-) diff --git a/src/plot/layer/geom/label.rs b/src/plot/layer/geom/label.rs index 4933e28f..0045dfd8 100644 --- a/src/plot/layer/geom/label.rs +++ b/src/plot/layer/geom/label.rs @@ -1,5 +1,4 @@ //! Label geom implementation - use super::{GeomAesthetics, GeomTrait, GeomType}; /// Label geom - text labels with background @@ -15,7 +14,7 @@ impl GeomTrait for Label { GeomAesthetics { supported: &[ "x", "y", "label", "fill", "stroke", "fontsize", "opacity", "family", "fontface", - "hjust", "vjust", + "hjust", "vjust", "angle", ], required: &["x", "y"], hidden: &[], diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index e283ce0f..a86f7562 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -256,10 +256,9 @@ impl TextRenderer { .column(&naming::aesthetic_column("vjust")) .ok() .and_then(|s| s.str().ok()); - let angle_col = df - .column(&naming::aesthetic_column("angle")) - .ok() - .and_then(|s| s.str().ok()); + + // Angle can be numeric or string, so get the raw column + let angle_col = df.column(&naming::aesthetic_column("angle")).ok(); // Group rows by converted font property tuple for row_idx in 0..nrows { @@ -267,14 +266,13 @@ impl TextRenderer { let fontface_str = fontface_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); let hjust_str = hjust_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); let vjust_str = vjust_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); - let angle_str = angle_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); // Convert to Vega-Lite property values immediately let family_val = Self::convert_family(family_str); let (font_weight_val, font_style_val) = Self::convert_fontface(fontface_str); let hjust_val = Self::convert_hjust(hjust_str); let vjust_val = Self::convert_vjust(vjust_str); - let angle_val = Self::convert_angle(angle_str); + let angle_val = Self::convert_angle(angle_col, row_idx); let key = ( family_val, @@ -374,12 +372,39 @@ impl TextRenderer { json!(baseline) } - /// Convert angle string to Vega-Lite angle value (degrees) - fn convert_angle(value: &str) -> Value { - match value.parse::() { - Ok(angle) => json!(angle), - Err(_) => json!(0.0), + /// Convert angle column value to Vega-Lite angle value (degrees) + /// Handles both numeric and string columns + /// Normalizes angles to [0, 360) range + fn convert_angle(angle_col: Option<&polars::prelude::Column>, row_idx: usize) -> Value { + use polars::prelude::*; + + let normalize_angle = |angle: f64| { + let normalized = angle % 360.0; + if normalized < 0.0 { + normalized + 360.0 + } else { + normalized + } + }; + + if let Some(col) = angle_col { + // Try as numeric first (int or float) + if let Ok(num_series) = col.cast(&DataType::Float64) { + if let Some(val) = num_series.f64().ok().and_then(|ca| ca.get(row_idx)) { + return json!(normalize_angle(val)); + } + } + // Try as string + if let Ok(str_ca) = col.str() { + if let Some(s) = str_ca.get(row_idx) { + if let Ok(angle) = s.parse::() { + return json!(normalize_angle(angle)); + } + } + } } + // Default to 0.0 if column missing or value unparseable + json!(0.0) } /// Filter DataFrame to specific row indices @@ -402,7 +427,8 @@ impl TextRenderer { /// Apply font properties to mark object fn apply_font_properties(mark_obj: &mut Map, font_key: &FontKey) { - let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val, angle_val) = font_key; + let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val, angle_val) = + font_key; if let Some(family_val) = family_val { mark_obj.insert("font".to_string(), family_val.clone()); @@ -415,10 +441,7 @@ impl TextRenderer { } /// Build transform with source filter - fn build_transform_with_filter( - prototype: &Value, - source_key: &str, - ) -> Vec { + fn build_transform_with_filter(prototype: &Value, source_key: &str) -> Vec { let source_filter = json!({ "filter": { "field": naming::SOURCE_COLUMN, @@ -1355,7 +1378,9 @@ mod tests { let layer = crate::plot::Layer::new(crate::plot::Geom::text()); // Call finalize to get layers - let layers = renderer.finalize(prototype.clone(), &layer, "test", &prepared).unwrap(); + let layers = renderer + .finalize(prototype.clone(), &layer, "test", &prepared) + .unwrap(); // For multiple font groups, should return single parent spec with nested layers assert_eq!(layers.len(), 1); @@ -1384,4 +1409,220 @@ mod tests { assert!(mark.contains_key("fontStyle")); } } + + #[test] + fn test_text_varying_angle() { + use crate::naming; + use polars::prelude::*; + + let renderer = TextRenderer; + + // Create DataFrame with different angles + let df = df! { + naming::aesthetic_column("x").as_str() => &[1.0, 2.0, 3.0], + naming::aesthetic_column("y").as_str() => &[10.0, 20.0, 30.0], + naming::aesthetic_column("label").as_str() => &["A", "B", "C"], + naming::aesthetic_column("angle").as_str() => &["0", "45", "90"], + } + .unwrap(); + + // Prepare data - should result in multiple layers (one per unique angle) + let prepared = renderer.prepare_data(&df, "test", &HashMap::new()).unwrap(); + + match &prepared { + PreparedData::Composite { components, .. } => { + // Should have 3 components (one per unique angle) + assert_eq!(components.len(), 3); + assert!(components.contains_key("_font_0")); + assert!(components.contains_key("_font_1")); + assert!(components.contains_key("_font_2")); + } + _ => panic!("Expected Composite"), + } + + // Build prototype spec + let prototype = json!({ + "mark": {"type": "text"}, + "encoding": { + "x": {"field": naming::aesthetic_column("x"), "type": "quantitative"}, + "y": {"field": naming::aesthetic_column("y"), "type": "quantitative"}, + "text": {"field": naming::aesthetic_column("label"), "type": "nominal"} + } + }); + + // Create a dummy layer + let layer = crate::plot::Layer::new(crate::plot::Geom::text()); + + // Call finalize to get layers + let layers = renderer + .finalize(prototype.clone(), &layer, "test", &prepared) + .unwrap(); + + // Should return single parent spec with nested layers + assert_eq!(layers.len(), 1); + + let parent_spec = &layers[0]; + let nested_layers = parent_spec["layer"].as_array().unwrap(); + + // Should have 3 nested layers (one per unique angle) + assert_eq!(nested_layers.len(), 3); + + // Each layer should have angle property in mark + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + assert!(mark.contains_key("angle")); + } + } + + #[test] + fn test_text_varying_angle_numeric() { + use crate::naming; + use polars::prelude::*; + + let renderer = TextRenderer; + + // Create DataFrame with numeric angle column (matching actual query) + let df = df! { + naming::aesthetic_column("x").as_str() => &[1, 2, 3], + naming::aesthetic_column("y").as_str() => &[1, 2, 3], + naming::aesthetic_column("label").as_str() => &["A", "B", "C"], + naming::aesthetic_column("angle").as_str() => &[0i32, 180i32, 0i32], // integer column + } + .unwrap(); + + // Prepare data - should result in multiple layers (one per unique angle) + let prepared = renderer.prepare_data(&df, "test", &HashMap::new()).unwrap(); + + match &prepared { + PreparedData::Composite { components, .. } => { + // Should have 3 components: angle 0 at row 0, angle 180 at row 1, angle 0 at row 2 + // Due to non-contiguous indices, rows 0 and 2 should be in separate components + eprintln!("Number of components: {}", components.len()); + eprintln!( + "Component keys: {:?}", + components.keys().collect::>() + ); + assert_eq!(components.len(), 3); + } + _ => panic!("Expected Composite"), + } + } + + #[test] + fn test_text_angle_integration() { + use crate::execute; + use crate::naming; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Integration test: Full pipeline from SQL query to Vega-Lite with angle aesthetic + // This tests that angle values properly create separate layers with angle mark properties + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Query with text geom and varying angles + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + chr(65 + n::INTEGER) as label, + CASE + WHEN n = 0 THEN 0 + WHEN n = 1 THEN 45 + WHEN n = 2 THEN 90 + ELSE 0 + END as rotation + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, label, rotation AS angle + DRAW text + "#; + + // Execute and prepare data + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + assert_eq!(prepared.specs.len(), 1); + + let spec = &prepared.specs[0]; + assert_eq!(spec.layers.len(), 1); + + // Generate Vega-Lite JSON + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + // Text renderer should create nested layers structure + assert!( + vl_spec["layer"].is_array(), + "Should have top-level layer array" + ); + let top_layers = vl_spec["layer"].as_array().unwrap(); + assert_eq!(top_layers.len(), 1, "Should have one parent text layer"); + + // Parent layer should have shared encoding and nested layers + let parent_layer = &top_layers[0]; + assert!( + parent_layer["encoding"].is_object(), + "Parent layer should have shared encoding" + ); + assert!( + parent_layer["layer"].is_array(), + "Parent layer should have nested layers" + ); + + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + // Should have multiple nested layers (one per unique angle value) + // We have angles: 0, 45, 90, 0 -> but non-contiguous 0s split into separate layers + assert!( + nested_layers.len() >= 3, + "Should have at least 3 nested layers for different angles, got {}", + nested_layers.len() + ); + + // Each nested layer should have mark with angle property + for (idx, nested_layer) in nested_layers.iter().enumerate() { + let mark = nested_layer["mark"].as_object().unwrap(); + assert!( + mark.contains_key("angle"), + "Nested layer {} mark should have angle property", + idx + ); + assert_eq!(mark["type"], "text"); + + // Should have source filter transform + assert!(nested_layer["transform"].is_array()); + + // Should NOT have encoding (inherited from parent) + assert!(nested_layer.get("encoding").is_none()); + } + + // Verify angles are present and normalized [0, 360) + let angles: Vec = nested_layers + .iter() + .filter_map(|layer| { + layer["mark"] + .as_object() + .and_then(|m| m.get("angle")) + .and_then(|a| a.as_f64()) + }) + .collect(); + + // Should have the three distinct angles: 0, 45, 90 + assert!(angles.contains(&0.0), "Should have 0° angle"); + assert!(angles.contains(&45.0), "Should have 45° angle"); + assert!(angles.contains(&90.0), "Should have 90° angle"); + + // Verify data has angle column + let data_values = vl_spec["data"]["values"].as_array().unwrap(); + assert!(!data_values.is_empty()); + + let angle_col = naming::aesthetic_column("angle"); + for row in data_values { + assert!( + row[&angle_col].is_number(), + "Data row should have numeric angle: {:?}", + row + ); + } + } } diff --git a/tree-sitter-ggsql/grammar.js b/tree-sitter-ggsql/grammar.js index 5d5d13b0..79b187f2 100644 --- a/tree-sitter-ggsql/grammar.js +++ b/tree-sitter-ggsql/grammar.js @@ -647,7 +647,7 @@ module.exports = grammar({ // Size and shape 'size', 'shape', 'linetype', 'linewidth', 'width', 'height', // Text aesthetics - 'label', 'family', 'fontface', 'fontsize', 'hjust', 'vjust', + 'label', 'family', 'fontface', 'fontsize', 'hjust', 'vjust', 'angle', // Computed variables 'offset' ), From 4b3ab9bdfc31b2c8b2822e7f242bcd4124ed8d3a Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 24 Feb 2026 09:21:56 +0100 Subject: [PATCH 18/29] Refactor TextRenderer to use pure run-length encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the group-sort-split approach with elegant run-length encoding for handling font property variations in text layers. Changes: Algorithm improvement: - Replace HashMap grouping + sorting + contiguous splitting with single-pass RLE scan - Complexity: O(n log n) → O(n) - Memory: 8n bytes per run → 16 bytes per run Type simplification: - Before: Vec<(FontKey, Vec)> - explicit row indices - After: Vec<(FontKey, usize)> - run lengths with implicit positions - Start positions derived from cumulative run lengths DataFrame operations: - Replace boolean masking (filter_by_indices) with direct slicing - Use df.slice(position, length) - O(1) pointer arithmetic - Remove filter_by_indices helper function entirely Function rename: - analyze_font_columns() → build_font_rle() - Clearer name indicating RLE technique and output type Benefits: - 28 net lines removed (52 insertions, 80 deletions) - Simpler single-pass algorithm - More efficient memory usage - Faster DataFrame operations - All tests pass unchanged The refactoring maintains identical behavior while using the canonical run-length encoding pattern for grouping consecutive rows. Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 132 ++++++++++++++--------------------- 1 file changed, 52 insertions(+), 80 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index a86f7562..5812aa00 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -233,11 +233,15 @@ type FontKey = (Option, Value, Value, Value, Value, Value); pub struct TextRenderer; impl TextRenderer { - /// Analyze DataFrame columns to build font property groups. - /// Returns sorted Vec of (font_key, row_indices) tuples, ordered by first row index. - fn analyze_font_columns(df: &DataFrame) -> Result)>> { + /// Analyze DataFrame columns to build font property runs using run-length encoding. + /// Returns Vec of (font_key, length) tuples representing consecutive rows with identical font properties. + /// Start positions are implicit (derived from cumulative lengths). + fn build_font_rle(df: &DataFrame) -> Result> { let nrows = df.height(); - let mut groups: HashMap> = HashMap::new(); + + if nrows == 0 { + return Ok(Vec::new()); + } // Extract all font columns (or use defaults if missing) let family_col = df @@ -260,7 +264,11 @@ impl TextRenderer { // Angle can be numeric or string, so get the raw column let angle_col = df.column(&naming::aesthetic_column("angle")).ok(); - // Group rows by converted font property tuple + // Run-length encoding: group consecutive rows with same font properties + let mut runs = Vec::new(); + let mut current_key: Option = None; + let mut run_length = 0; + for row_idx in 0..nrows { let family_str = family_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); let fontface_str = fontface_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); @@ -272,7 +280,7 @@ impl TextRenderer { let (font_weight_val, font_style_val) = Self::convert_fontface(fontface_str); let hjust_val = Self::convert_hjust(hjust_str); let vjust_val = Self::convert_vjust(vjust_str); - let angle_val = Self::convert_angle(angle_col, row_idx); + let angle_val = Self::convert_angle(angle_col.as_deref(), row_idx); let key = ( family_val, @@ -282,47 +290,25 @@ impl TextRenderer { vjust_val, angle_val, ); - groups.entry(key).or_default().push(row_idx); - } - - // Convert to Vec and sort by first occurrence (for ORDER BY preservation) - let mut sorted_groups: Vec<(FontKey, Vec)> = groups.into_iter().collect(); - sorted_groups.sort_by_key(|(_, indices)| indices[0]); - // Split non-contiguous indices into separate ranges to preserve z-order - let mut split_groups = Vec::new(); - for (font_key, indices) in sorted_groups { - let ranges = Self::split_contiguous(&indices); - for range in ranges { - split_groups.push((font_key.clone(), range)); + // If font properties changed, emit previous run and start new one + if Some(&key) != current_key.as_ref() { + if let Some(prev_key) = current_key { + runs.push((prev_key, run_length)); + run_length = 0; + } + current_key = Some(key); } - } - - Ok(split_groups) - } - /// Split indices into contiguous ranges - fn split_contiguous(indices: &[usize]) -> Vec> { - if indices.is_empty() { - return vec![]; + run_length += 1; } - let mut sorted = indices.to_vec(); - sorted.sort_unstable(); - - let mut ranges = Vec::new(); - let mut current = vec![sorted[0]]; - - for &idx in &sorted[1..] { - if idx == current.last().unwrap() + 1 { - current.push(idx); - } else { - ranges.push(current); - current = vec![idx]; - } + // Don't forget the last run + if let Some(key) = current_key { + runs.push((key, run_length)); } - ranges.push(current); - ranges + + Ok(runs) } /// Convert family string to Vega-Lite font value @@ -407,24 +393,6 @@ impl TextRenderer { json!(0.0) } - /// Filter DataFrame to specific row indices - fn filter_by_indices(data: &DataFrame, indices: &[usize]) -> Result { - use polars::prelude::{BooleanChunked, NamedFrom}; - - let nrows = data.height(); - let mut mask_data = vec![false; nrows]; - for &idx in indices { - if idx < nrows { - mask_data[idx] = true; - } - } - - let mask = BooleanChunked::new("".into(), mask_data); - - data.filter(&mask) - .map_err(|e| GgsqlError::WriterError(e.to_string())) - } - /// Apply font properties to mark object fn apply_font_properties(mark_obj: &mut Map, font_key: &FontKey) { let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val, angle_val) = @@ -460,23 +428,23 @@ impl TextRenderer { new_transforms } - /// Finalize layers as nested layer with shared encoding (works for single or multiple groups) + /// Finalize layers as nested layer with shared encoding (works for single or multiple runs) fn finalize_nested_layers( &self, prototype: Value, data_key: &str, - font_groups: &[(FontKey, Vec)], + font_runs: &[(FontKey, usize)], ) -> Result> { // Extract shared encoding from prototype let shared_encoding = prototype.get("encoding").cloned(); // Build individual layers without encoding (mark + transform only) - // font_groups is already sorted by first occurrence, so no need to re-sort - let nested_layers: Vec = font_groups + // font_runs preserves natural row order through RLE + let nested_layers: Vec = font_runs .iter() .enumerate() - .map(|(group_idx, (font_key, _indices))| { - let suffix = format!("_font_{}", group_idx); + .map(|(run_idx, (font_key, _length))| { + let suffix = format!("_font_{}", run_idx); let source_key = format!("{}{}", data_key, suffix); // Create mark object with font properties @@ -511,28 +479,32 @@ impl GeomRenderer for TextRenderer { _data_key: &str, binned_columns: &HashMap>, ) -> Result { - // Analyze font columns to get sorted groups - let font_groups = Self::analyze_font_columns(df)?; + // Analyze font columns to get RLE runs + let font_runs = Self::build_font_rle(df)?; - // Split data by font groups + // Split data by font runs, tracking cumulative position let mut components: HashMap> = HashMap::new(); + let mut position = 0; - for (group_idx, (_font_key, row_indices)) in font_groups.iter().enumerate() { - let suffix = format!("_font_{}", group_idx); + for (run_idx, (_font_key, length)) in font_runs.iter().enumerate() { + let suffix = format!("_font_{}", run_idx); + + // Slice the contiguous run from the DataFrame (more efficient than boolean masking) + let sliced = df.slice(position as i64, *length); - let filtered = Self::filter_by_indices(df, row_indices)?; let values = if binned_columns.is_empty() { - dataframe_to_values(&filtered)? + dataframe_to_values(&sliced)? } else { - dataframe_to_values_with_bins(&filtered, binned_columns)? + dataframe_to_values_with_bins(&sliced, binned_columns)? }; components.insert(suffix, values); + position += length; } Ok(PreparedData::Composite { components, - metadata: Box::new(font_groups), + metadata: Box::new(font_runs), }) } @@ -571,15 +543,15 @@ impl GeomRenderer for TextRenderer { )); }; - // Downcast metadata to font groups - let font_groups = metadata - .downcast_ref::)>>() + // Downcast metadata to font runs + let font_runs = metadata + .downcast_ref::>() .ok_or_else(|| { - GgsqlError::InternalError("Failed to downcast font groups".to_string()) + GgsqlError::InternalError("Failed to downcast font runs".to_string()) })?; - // Generate nested layers from font groups (works for single or multiple groups) - self.finalize_nested_layers(prototype, data_key, font_groups) + // Generate nested layers from font runs (works for single or multiple runs) + self.finalize_nested_layers(prototype, data_key, font_runs) } } From d94c20a9f883e2aa9aaf11e9f1350919e021f794 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 24 Feb 2026 09:49:06 +0100 Subject: [PATCH 19/29] Add nudge_x and nudge_y parameters to text/label geoms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add nudge parameters that map to Vega-Lite's xOffset/yOffset mark properties, allowing fine-grained positioning adjustments for text labels. Changes: Text and Label geoms: - Add nudge_x and nudge_y to default_params - Default to Null (not applied unless explicitly set) TextRenderer: - Build base mark prototype with nudge offsets (if specified) - Clone and extend with font properties for each run - Pass layer to finalize_nested_layers for parameter access Integration test: - Verify nudge_x → xOffset and nudge_y → yOffset mapping - Confirm parameters apply to all nested text layers Usage: DRAW text SETTING nudge_x => 5, nudge_y => -10 This enables fine-tuning text label positions without modifying the underlying x/y data, useful for avoiding overlaps or improving label placement in dense visualizations. Co-Authored-By: Claude Sonnet 4.5 --- src/plot/layer/geom/label.rs | 15 ++++++ src/plot/layer/geom/text.rs | 15 ++++++ src/writer/vegalite/layer.rs | 88 ++++++++++++++++++++++++++++++++++-- 3 files changed, 114 insertions(+), 4 deletions(-) diff --git a/src/plot/layer/geom/label.rs b/src/plot/layer/geom/label.rs index 0045dfd8..4fcaccda 100644 --- a/src/plot/layer/geom/label.rs +++ b/src/plot/layer/geom/label.rs @@ -1,4 +1,6 @@ //! Label geom implementation +use crate::plot::{DefaultParam, DefaultParamValue}; + use super::{GeomAesthetics, GeomTrait, GeomType}; /// Label geom - text labels with background @@ -20,6 +22,19 @@ impl GeomTrait for Label { hidden: &[], } } + + fn default_params(&self) -> &'static [DefaultParam] { + &[ + DefaultParam { + name: "nudge_x", + default: DefaultParamValue::Null, + }, + DefaultParam { + name: "nudge_y", + default: DefaultParamValue::Null, + }, + ] + } } impl std::fmt::Display for Label { diff --git a/src/plot/layer/geom/text.rs b/src/plot/layer/geom/text.rs index 1913485c..359d42ba 100644 --- a/src/plot/layer/geom/text.rs +++ b/src/plot/layer/geom/text.rs @@ -1,5 +1,7 @@ //! Text geom implementation +use crate::plot::{DefaultParam, DefaultParamValue}; + use super::{GeomAesthetics, GeomTrait, GeomType}; /// Text geom - text labels at positions @@ -21,6 +23,19 @@ impl GeomTrait for Text { hidden: &[], } } + + fn default_params(&self) -> &'static [DefaultParam] { + &[ + DefaultParam { + name: "nudge_x", + default: DefaultParamValue::Null, + }, + DefaultParam { + name: "nudge_y", + default: DefaultParamValue::Null, + }, + ] + } } impl std::fmt::Display for Text { diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 5812aa00..ddb669b8 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -434,10 +434,23 @@ impl TextRenderer { prototype: Value, data_key: &str, font_runs: &[(FontKey, usize)], + layer: &Layer, ) -> Result> { // Extract shared encoding from prototype let shared_encoding = prototype.get("encoding").cloned(); + // Build base mark object with nudge parameters (prototype for all runs) + let mut base_mark = json!({"type": "text"}); + if let Some(mark_map) = base_mark.as_object_mut() { + // Extract nudge parameters (nudge_x → xOffset, nudge_y → yOffset) + if let Some(ParameterValue::Number(x_offset)) = layer.parameters.get("nudge_x") { + mark_map.insert("xOffset".to_string(), json!(x_offset)); + } + if let Some(ParameterValue::Number(y_offset)) = layer.parameters.get("nudge_y") { + mark_map.insert("yOffset".to_string(), json!(y_offset)); + } + } + // Build individual layers without encoding (mark + transform only) // font_runs preserves natural row order through RLE let nested_layers: Vec = font_runs @@ -447,8 +460,8 @@ impl TextRenderer { let suffix = format!("_font_{}", run_idx); let source_key = format!("{}{}", data_key, suffix); - // Create mark object with font properties - let mut mark_obj = json!({"type": "text"}); + // Clone base mark and add font-specific properties + let mut mark_obj = base_mark.clone(); if let Some(mark_map) = mark_obj.as_object_mut() { Self::apply_font_properties(mark_map, font_key); } @@ -533,7 +546,7 @@ impl GeomRenderer for TextRenderer { fn finalize( &self, prototype: Value, - _layer: &Layer, + layer: &Layer, data_key: &str, prepared: &PreparedData, ) -> Result> { @@ -551,7 +564,7 @@ impl GeomRenderer for TextRenderer { })?; // Generate nested layers from font runs (works for single or multiple runs) - self.finalize_nested_layers(prototype, data_key, font_runs) + self.finalize_nested_layers(prototype, data_key, font_runs, layer) } } @@ -1597,4 +1610,71 @@ mod tests { ); } } + + #[test] + fn test_text_nudge_parameters() { + use crate::execute; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Integration test: nudge_x and nudge_y parameters should map to xOffset/yOffset + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Query with nudge parameters + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + chr(65 + n::INTEGER) as label + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, label + DRAW text SETTING nudge_x => 5, nudge_y => -10 + "#; + + // Execute and prepare data + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + assert_eq!(prepared.specs.len(), 1); + + let spec = &prepared.specs[0]; + assert_eq!(spec.layers.len(), 1); + + // Generate Vega-Lite JSON + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + // Text renderer creates nested layers structure + let top_layers = vl_spec["layer"].as_array().unwrap(); + assert_eq!(top_layers.len(), 1); + + let parent_layer = &top_layers[0]; + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + // All nested layers should have xOffset and yOffset in mark + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + + assert!( + mark.contains_key("xOffset"), + "Mark should have xOffset from nudge_x" + ); + assert_eq!( + mark["xOffset"].as_f64().unwrap(), + 5.0, + "xOffset should be 5" + ); + + assert!( + mark.contains_key("yOffset"), + "Mark should have yOffset from nudge_y" + ); + assert_eq!( + mark["yOffset"].as_f64().unwrap(), + -10.0, + "yOffset should be -10" + ); + } + } } From d970e1433d8853694adbee7716d0c26f9507d028 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 24 Feb 2026 11:08:33 +0100 Subject: [PATCH 20/29] Add format parameter for text label formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add template-based label formatting to text/label geoms, reusing the existing format.rs infrastructure from SCALE RENAMING. Changes: format.rs improvements: - Add format_dataframe_column() - clean API for DataFrame column formatting - Refactor to convert columns to strings first, then apply formatting - Add format_value() helper shared by both APIs - Improved error message showing actual datatype for unsupported types - Two-step process: column→string, then template application Text/Label geoms: - Add 'format' parameter (defaults to Null) - Works with both geoms for consistency TextRenderer: - Add apply_label_formatting() helper - Apply formatting in prepare_data() before font analysis - Pass layer parameter through prepare_data() trait method - Update all GeomRenderer implementations Integration tests: - test_text_label_formatting - Title case transformation - test_text_label_formatting_numeric - Printf-style number formatting Supported placeholder syntax: - {} - Plain insertion - {:UPPER} - Uppercase - {:lower} - Lowercase - {:Title} - Title Case - {:time %fmt} - DateTime strftime format - {:num %fmt} - Number printf format Usage: DRAW text SETTING format => 'Region: {:Title}' DRAW text SETTING format => '${:num %.2f}' DRAW text SETTING format => '{:time %b %Y}' The format parameter transforms label values before rendering, enabling clean label presentation without modifying source data. Co-Authored-By: Claude Sonnet 4.5 --- src/format.rs | 108 ++++++++++++++++++++++---- src/plot/layer/geom/label.rs | 4 + src/plot/layer/geom/text.rs | 4 + src/plot/types.rs | 2 +- src/writer/vegalite/layer.rs | 142 +++++++++++++++++++++++++++++++++-- src/writer/vegalite/mod.rs | 2 +- 6 files changed, 238 insertions(+), 24 deletions(-) diff --git a/src/format.rs b/src/format.rs index 224ace2a..32a1d7ed 100644 --- a/src/format.rs +++ b/src/format.rs @@ -179,29 +179,105 @@ pub fn apply_label_template( } let key = elem.to_key_string(); - let break_val = key.clone(); // Only apply template if no explicit mapping exists - result.entry(key).or_insert_with(|| { - let label = if placeholders.is_empty() { - // No placeholders - use template as literal string - template.to_string() - } else { - // Replace each placeholder with its transformed value - // Process in reverse order to preserve string indices - let mut label = template.to_string(); - for parsed in placeholders.iter().rev() { - let transformed = apply_transformation(&break_val, &parsed.placeholder); - label = label.replace(&parsed.match_text, &transformed); - } - label - }; - Some(label) + result.entry(key.clone()).or_insert_with(|| { + // Use shared format_value helper + Some(format_value(&key, template, &placeholders)) }); } result } +/// Apply label formatting template to a DataFrame column. +/// +/// Returns a new DataFrame with the specified column formatted according to the template. +/// +/// # Arguments +/// * `df` - DataFrame containing the column to format +/// * `column_name` - Name of the column to format +/// * `template` - Template string with placeholders (e.g., "{:Title}", "{:num %.2f}") +/// +/// # Returns +/// New DataFrame with formatted column +/// +/// # Example +/// ```ignore +/// let formatted_df = format_dataframe_column(&df, "_aesthetic_label", "Region: {:Title}")?; +/// ``` +pub fn format_dataframe_column( + df: &polars::prelude::DataFrame, + column_name: &str, + template: &str, +) -> Result { + use polars::prelude::*; + + // Get the column + let column = df + .column(column_name) + .map_err(|e| format!("Column '{}' not found: {}", column_name, e))?; + + // Step 1: Convert entire column to strings + let string_values: Vec> = if let Ok(str_col) = column.str() { + // String column (includes temporal data auto-converted to ISO format) + str_col + .into_iter() + .map(|opt| opt.map(|s| s.to_string())) + .collect() + } else if let Ok(num_col) = column.cast(&DataType::Float64) { + // Numeric column - use shared format_number helper for clean integer formatting + use crate::plot::format_number; + + let f64_col = num_col + .f64() + .map_err(|e| format!("Failed to cast column to f64: {}", e))?; + + f64_col + .into_iter() + .map(|opt| opt.map(format_number)) + .collect() + } else { + return Err(format!( + "Formatting doesn't support type {:?} in column '{}'. Try string or numeric types instead.", + column.dtype(), + column_name + )); + }; + + // Step 2: Apply formatting template to all string values + let placeholders = parse_placeholders(template); + let formatted_values: Vec> = string_values + .into_iter() + .map(|opt| opt.map(|s| format_value(&s, template, &placeholders))) + .collect(); + + let formatted_col = Series::new(column_name.into(), formatted_values); + + // Replace column in DataFrame + let mut new_df = df.clone(); + new_df + .replace(column_name, formatted_col) + .map_err(|e| format!("Failed to replace column: {}", e))?; + + Ok(new_df) +} + +/// Format a single value using template and parsed placeholders +fn format_value(value: &str, template: &str, placeholders: &[ParsedPlaceholder]) -> String { + if placeholders.is_empty() { + // No placeholders - use template as literal string + template.to_string() + } else { + // Replace each placeholder with its transformed value + let mut result = template.to_string(); + for parsed in placeholders.iter().rev() { + let transformed = apply_transformation(value, &parsed.placeholder); + result = result.replace(&parsed.match_text, &transformed); + } + result + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/plot/layer/geom/label.rs b/src/plot/layer/geom/label.rs index 4fcaccda..d4c1e689 100644 --- a/src/plot/layer/geom/label.rs +++ b/src/plot/layer/geom/label.rs @@ -33,6 +33,10 @@ impl GeomTrait for Label { name: "nudge_y", default: DefaultParamValue::Null, }, + DefaultParam { + name: "format", + default: DefaultParamValue::Null, + }, ] } } diff --git a/src/plot/layer/geom/text.rs b/src/plot/layer/geom/text.rs index 359d42ba..46bd212f 100644 --- a/src/plot/layer/geom/text.rs +++ b/src/plot/layer/geom/text.rs @@ -34,6 +34,10 @@ impl GeomTrait for Text { name: "nudge_y", default: DefaultParamValue::Null, }, + DefaultParam { + name: "format", + default: DefaultParamValue::Null, + }, ] } } diff --git a/src/plot/types.rs b/src/plot/types.rs index ec1ce054..12e9f307 100644 --- a/src/plot/types.rs +++ b/src/plot/types.rs @@ -352,7 +352,7 @@ fn time_to_iso_string(nanos: i64) -> String { } /// Format number for display (remove trailing zeros for integers) -fn format_number(n: f64) -> String { +pub fn format_number(n: f64) -> String { if n.fract() == 0.0 { format!("{:.0}", n) } else { diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index ddb669b8..05b46914 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -129,6 +129,7 @@ pub trait GeomRenderer: Send + Sync { fn prepare_data( &self, df: &DataFrame, + _layer: &Layer, _data_key: &str, binned_columns: &HashMap>, ) -> Result { @@ -233,6 +234,25 @@ type FontKey = (Option, Value, Value, Value, Value, Value); pub struct TextRenderer; impl TextRenderer { + /// Apply label formatting if format parameter is specified. + /// Returns a new DataFrame with the label column formatted, or the original if no formatting. + fn apply_label_formatting(df: &DataFrame, layer: &Layer) -> Result { + use crate::format; + use crate::naming; + use crate::plot::ParameterValue; + + // Check if format parameter is specified + let format_template = match layer.parameters.get("format") { + Some(ParameterValue::String(template)) => template, + _ => return Ok(df.clone()), // No formatting, return original + }; + + // Use format.rs helper to do the formatting + let label_col_name = naming::aesthetic_column("label"); + format::format_dataframe_column(df, &label_col_name, format_template) + .map_err(|e| GgsqlError::WriterError(e)) + } + /// Analyze DataFrame columns to build font property runs using run-length encoding. /// Returns Vec of (font_key, length) tuples representing consecutive rows with identical font properties. /// Start positions are implicit (derived from cumulative lengths). @@ -489,11 +509,15 @@ impl GeomRenderer for TextRenderer { fn prepare_data( &self, df: &DataFrame, + layer: &Layer, _data_key: &str, binned_columns: &HashMap>, ) -> Result { + // Apply label formatting if specified + let df = Self::apply_label_formatting(df, layer)?; + // Analyze font columns to get RLE runs - let font_runs = Self::build_font_rle(df)?; + let font_runs = Self::build_font_rle(&df)?; // Split data by font runs, tracking cumulative position let mut components: HashMap> = HashMap::new(); @@ -1093,6 +1117,7 @@ impl GeomRenderer for BoxplotRenderer { fn prepare_data( &self, df: &DataFrame, + _layer: &Layer, _data_key: &str, binned_columns: &HashMap>, ) -> Result { @@ -1265,6 +1290,7 @@ mod tests { use polars::prelude::*; let renderer = TextRenderer; + let layer = Layer::new(crate::plot::Geom::text()); // Create DataFrame where all rows have the same font let df = df! { @@ -1276,7 +1302,7 @@ mod tests { .unwrap(); // Prepare data - should result in single layer with _font_0 component key - let prepared = renderer.prepare_data(&df, "test", &HashMap::new()).unwrap(); + let prepared = renderer.prepare_data(&df, &layer, "test", &HashMap::new()).unwrap(); match prepared { PreparedData::Composite { components, .. } => { @@ -1294,6 +1320,7 @@ mod tests { use polars::prelude::*; let renderer = TextRenderer; + let layer = Layer::new(crate::plot::Geom::text()); // Create DataFrame with different fonts per row let df = df! { @@ -1305,7 +1332,7 @@ mod tests { .unwrap(); // Prepare data - should result in multiple layers - let prepared = renderer.prepare_data(&df, "test", &HashMap::new()).unwrap(); + let prepared = renderer.prepare_data(&df, &layer, "test", &HashMap::new()).unwrap(); match prepared { PreparedData::Composite { components, .. } => { @@ -1325,6 +1352,7 @@ mod tests { use polars::prelude::*; let renderer = TextRenderer; + let layer = Layer::new(crate::plot::Geom::text()); // Create DataFrame with different fonts let df = df! { @@ -1337,7 +1365,7 @@ mod tests { .unwrap(); // Prepare data - let prepared = renderer.prepare_data(&df, "test", &HashMap::new()).unwrap(); + let prepared = renderer.prepare_data(&df, &layer, "test", &HashMap::new()).unwrap(); // Get the components let components = match &prepared { @@ -1401,6 +1429,7 @@ mod tests { use polars::prelude::*; let renderer = TextRenderer; + let layer = Layer::new(crate::plot::Geom::text()); // Create DataFrame with different angles let df = df! { @@ -1412,7 +1441,7 @@ mod tests { .unwrap(); // Prepare data - should result in multiple layers (one per unique angle) - let prepared = renderer.prepare_data(&df, "test", &HashMap::new()).unwrap(); + let prepared = renderer.prepare_data(&df, &layer, "test", &HashMap::new()).unwrap(); match &prepared { PreparedData::Composite { components, .. } => { @@ -1465,6 +1494,7 @@ mod tests { use polars::prelude::*; let renderer = TextRenderer; + let layer = Layer::new(crate::plot::Geom::text()); // Create DataFrame with numeric angle column (matching actual query) let df = df! { @@ -1476,7 +1506,7 @@ mod tests { .unwrap(); // Prepare data - should result in multiple layers (one per unique angle) - let prepared = renderer.prepare_data(&df, "test", &HashMap::new()).unwrap(); + let prepared = renderer.prepare_data(&df, &layer, "test", &HashMap::new()).unwrap(); match &prepared { PreparedData::Composite { components, .. } => { @@ -1677,4 +1707,104 @@ mod tests { ); } } + + #[test] + fn test_text_label_formatting() { + use crate::execute; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Integration test: format parameter should transform label values + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Query with format parameter using Title case transformation + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + CASE + WHEN n = 0 THEN 'north region' + WHEN n = 1 THEN 'south region' + ELSE 'east region' + END as region + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, region AS label + DRAW text SETTING format => 'Region: {:Title}' + "#; + + // Execute and prepare data + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + assert_eq!(prepared.specs.len(), 1); + + let spec = &prepared.specs[0]; + assert_eq!(spec.layers.len(), 1); + + // Generate Vega-Lite JSON + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + // Check that data has formatted labels + let data_values = vl_spec["data"]["values"].as_array().unwrap(); + assert!(!data_values.is_empty()); + + // Verify formatted labels in the data + let label_col = crate::naming::aesthetic_column("label"); + + // Check each row has properly formatted labels + let labels: Vec<&str> = data_values + .iter() + .filter_map(|row| row[&label_col].as_str()) + .collect(); + + assert_eq!(labels.len(), 3); + assert!(labels.contains(&"Region: North Region")); + assert!(labels.contains(&"Region: South Region")); + assert!(labels.contains(&"Region: East Region")); + } + + #[test] + fn test_text_label_formatting_numeric() { + use crate::execute; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Test numeric formatting with printf-style format + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + n::FLOAT * 10.5 as value + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, value AS label + DRAW text SETTING format => '${:num %.2f}' + "#; + + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + let spec = &prepared.specs[0]; + + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + let data_values = vl_spec["data"]["values"].as_array().unwrap(); + let label_col = crate::naming::aesthetic_column("label"); + + let labels: Vec<&str> = data_values + .iter() + .filter_map(|row| row[&label_col].as_str()) + .collect(); + + // Should have formatted currency values + assert_eq!(labels.len(), 3); + assert!(labels.contains(&"$0.00")); + assert!(labels.contains(&"$10.50")); + assert!(labels.contains(&"$21.00")); + } } diff --git a/src/writer/vegalite/mod.rs b/src/writer/vegalite/mod.rs index 366500e0..8926369d 100644 --- a/src/writer/vegalite/mod.rs +++ b/src/writer/vegalite/mod.rs @@ -102,7 +102,7 @@ fn prepare_layer_data( let renderer = get_renderer(&layer.geom); // Prepare data using the renderer (handles both standard and composite cases) - let prepared = renderer.prepare_data(df, data_key, binned_columns)?; + let prepared = renderer.prepare_data(df, layer, data_key, binned_columns)?; // Add data to individual datasets based on prepared type match &prepared { From 6990790cba306c45f3abbb6e743cf9646569ddcd Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 24 Feb 2026 16:55:03 +0100 Subject: [PATCH 21/29] soothe compiler --- src/writer/vegalite/layer.rs | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 5256be76..8335911e 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -250,7 +250,7 @@ impl TextRenderer { // Use format.rs helper to do the formatting let label_col_name = naming::aesthetic_column("label"); format::format_dataframe_column(df, &label_col_name, format_template) - .map_err(|e| GgsqlError::WriterError(e)) + .map_err(GgsqlError::WriterError) } /// Analyze DataFrame columns to build font property runs using run-length encoding. @@ -300,7 +300,7 @@ impl TextRenderer { let (font_weight_val, font_style_val) = Self::convert_fontface(fontface_str); let hjust_val = Self::convert_hjust(hjust_str); let vjust_val = Self::convert_vjust(vjust_str); - let angle_val = Self::convert_angle(angle_col.as_deref(), row_idx); + let angle_val = Self::convert_angle(angle_col, row_idx); let key = ( family_val, @@ -583,9 +583,7 @@ impl GeomRenderer for TextRenderer { // Downcast metadata to font runs let font_runs = metadata .downcast_ref::>() - .ok_or_else(|| { - GgsqlError::InternalError("Failed to downcast font runs".to_string()) - })?; + .ok_or_else(|| GgsqlError::InternalError("Failed to downcast font runs".to_string()))?; // Generate nested layers from font runs (works for single or multiple runs) self.finalize_nested_layers(prototype, data_key, font_runs, layer) @@ -1284,7 +1282,9 @@ mod tests { .unwrap(); // Prepare data - should result in single layer with _font_0 component key - let prepared = renderer.prepare_data(&df, &layer, "test", &HashMap::new()).unwrap(); + let prepared = renderer + .prepare_data(&df, &layer, "test", &HashMap::new()) + .unwrap(); match prepared { PreparedData::Composite { components, .. } => { @@ -1314,7 +1314,9 @@ mod tests { .unwrap(); // Prepare data - should result in multiple layers - let prepared = renderer.prepare_data(&df, &layer, "test", &HashMap::new()).unwrap(); + let prepared = renderer + .prepare_data(&df, &layer, "test", &HashMap::new()) + .unwrap(); match prepared { PreparedData::Composite { components, .. } => { @@ -1347,7 +1349,9 @@ mod tests { .unwrap(); // Prepare data - let prepared = renderer.prepare_data(&df, &layer, "test", &HashMap::new()).unwrap(); + let prepared = renderer + .prepare_data(&df, &layer, "test", &HashMap::new()) + .unwrap(); // Get the components let components = match &prepared { @@ -1423,7 +1427,9 @@ mod tests { .unwrap(); // Prepare data - should result in multiple layers (one per unique angle) - let prepared = renderer.prepare_data(&df, &layer, "test", &HashMap::new()).unwrap(); + let prepared = renderer + .prepare_data(&df, &layer, "test", &HashMap::new()) + .unwrap(); match &prepared { PreparedData::Composite { components, .. } => { @@ -1488,7 +1494,9 @@ mod tests { .unwrap(); // Prepare data - should result in multiple layers (one per unique angle) - let prepared = renderer.prepare_data(&df, &layer, "test", &HashMap::new()).unwrap(); + let prepared = renderer + .prepare_data(&df, &layer, "test", &HashMap::new()) + .unwrap(); match &prepared { PreparedData::Composite { components, .. } => { From 39e4550b649d172288b188423bd676c3ee66ca3b Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 25 Feb 2026 09:54:47 +0100 Subject: [PATCH 22/29] Handle font properties from parameters --- src/writer/vegalite/layer.rs | 124 ++++++++++++++++++++++++++++++++--- 1 file changed, 116 insertions(+), 8 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 8335911e..02ce442a 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -413,19 +413,32 @@ impl TextRenderer { json!(0.0) } - /// Apply font properties to mark object + /// Apply font properties to mark object (only if not already set by Literals) fn apply_font_properties(mark_obj: &mut Map, font_key: &FontKey) { let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val, angle_val) = font_key; + // Only apply font properties if not already set by Literal aesthetics if let Some(family_val) = family_val { - mark_obj.insert("font".to_string(), family_val.clone()); + mark_obj + .entry("font".to_string()) + .or_insert(family_val.clone()); } - mark_obj.insert("fontWeight".to_string(), font_weight_val.clone()); - mark_obj.insert("fontStyle".to_string(), font_style_val.clone()); - mark_obj.insert("align".to_string(), hjust_val.clone()); - mark_obj.insert("baseline".to_string(), vjust_val.clone()); - mark_obj.insert("angle".to_string(), angle_val.clone()); + mark_obj + .entry("fontWeight".to_string()) + .or_insert(font_weight_val.clone()); + mark_obj + .entry("fontStyle".to_string()) + .or_insert(font_style_val.clone()); + mark_obj + .entry("align".to_string()) + .or_insert(hjust_val.clone()); + mark_obj + .entry("baseline".to_string()) + .or_insert(vjust_val.clone()); + mark_obj + .entry("angle".to_string()) + .or_insert(angle_val.clone()); } /// Build transform with source filter @@ -459,7 +472,7 @@ impl TextRenderer { // Extract shared encoding from prototype let shared_encoding = prototype.get("encoding").cloned(); - // Build base mark object with nudge parameters (prototype for all runs) + // Build base mark object with fixed parameters let mut base_mark = json!({"type": "text"}); if let Some(mark_map) = base_mark.as_object_mut() { // Extract nudge parameters (nudge_x → xOffset, nudge_y → yOffset) @@ -469,6 +482,44 @@ impl TextRenderer { if let Some(ParameterValue::Number(y_offset)) = layer.parameters.get("nudge_y") { mark_map.insert("yOffset".to_string(), json!(y_offset)); } + + // Apply Literal font aesthetics from SETTING (uniform across all rows) + if let Some(ParameterValue::String(s)) = layer.get_literal("family") { + if !s.is_empty() { + mark_map.insert("font".to_string(), json!(s)); + } + } + if let Some(ParameterValue::String(s)) = layer.get_literal("fontface") { + let (font_weight, font_style) = Self::convert_fontface(s); + mark_map.insert("fontWeight".to_string(), font_weight); + mark_map.insert("fontStyle".to_string(), font_style); + } + if let Some(lit) = layer.get_literal("hjust") { + match lit { + ParameterValue::String(s) => { + mark_map.insert("align".to_string(), Self::convert_hjust(s)); + } + ParameterValue::Number(n) => { + mark_map.insert("align".to_string(), Self::convert_hjust(&n.to_string())); + } + _ => {} + } + } + if let Some(lit) = layer.get_literal("vjust") { + match lit { + ParameterValue::String(s) => { + mark_map.insert("baseline".to_string(), Self::convert_vjust(s)); + } + ParameterValue::Number(n) => { + mark_map + .insert("baseline".to_string(), Self::convert_vjust(&n.to_string())); + } + _ => {} + } + } + if let Some(ParameterValue::Number(n)) = layer.get_literal("angle") { + mark_map.insert("angle".to_string(), json!(n)); + } } // Build individual layers without encoding (mark + transform only) @@ -1797,4 +1848,61 @@ mod tests { assert!(labels.contains(&"$10.50")); assert!(labels.contains(&"$21.00")); } + + #[test] + fn test_text_setting_fontface() { + use crate::execute; + use crate::reader::DuckDBReader; + use crate::writer::vegalite::VegaLiteWriter; + use crate::writer::Writer; + + // Integration test: SETTING fontface => 'bold' should add fontWeight to base mark + + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Query with fontface in SETTING + let query = r#" + SELECT + n::INTEGER as x, + n::INTEGER as y, + chr(65 + n::INTEGER) as label + FROM generate_series(0, 2) as t(n) + VISUALISE x, y, label + DRAW text SETTING fontface => 'bold' + "#; + + // Execute and prepare data + let prepared = execute::prepare_data_with_reader(query, &reader).unwrap(); + assert_eq!(prepared.specs.len(), 1); + + let spec = &prepared.specs[0]; + assert_eq!(spec.layers.len(), 1); + + // Generate Vega-Lite JSON + let writer = VegaLiteWriter::new(); + let json_str = writer.write(spec, &prepared.data).unwrap(); + let vl_spec: serde_json::Value = serde_json::from_str(&json_str).unwrap(); + + // Text renderer creates nested layers structure + let top_layers = vl_spec["layer"].as_array().unwrap(); + assert_eq!(top_layers.len(), 1); + + let parent_layer = &top_layers[0]; + let nested_layers = parent_layer["layer"].as_array().unwrap(); + + // All nested layers should have fontWeight: "bold" in mark (from SETTING) + for nested_layer in nested_layers { + let mark = nested_layer["mark"].as_object().unwrap(); + + assert!( + mark.contains_key("fontWeight"), + "Mark should have fontWeight from SETTING fontface" + ); + assert_eq!( + mark["fontWeight"].as_str().unwrap(), + "bold", + "fontWeight should be bold" + ); + } + } } From d765bf401dbc686cddbae3615a22f27d3294cab3 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 25 Feb 2026 12:07:21 +0100 Subject: [PATCH 23/29] Refactor text geom font property handling - Separate value selection from conversion in all convert functions - Use early returns with ? operator for cleaner control flow - Inline convert function calls to eliminate intermediate variables - Change property insertion to use if let Some with .insert() - Fix column lookup to use naming::aesthetic_column() - Optimize angle extraction to handle numeric columns without cast->parse - Remove unused FontKey type alias - Fix test_fontsize_linear_scaling to include required label aesthetic All text rendering tests passing (11/11). Co-Authored-By: Claude Sonnet 4.5 --- src/writer/vegalite/layer.rs | 452 +++++++++++++++++++---------------- src/writer/vegalite/mod.rs | 5 + 2 files changed, 257 insertions(+), 200 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 02ce442a..f4a9a26b 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -227,9 +227,6 @@ impl GeomRenderer for PathRenderer { // Text Renderer // ============================================================================= -/// Font property tuple: (family, fontWeight, fontStyle, align, baseline, angle) as converted Vega-Lite Values -type FontKey = (Option, Value, Value, Value, Value, Value); - /// Renderer for text geom - handles font properties via data splitting pub struct TextRenderer; @@ -254,191 +251,283 @@ impl TextRenderer { } /// Analyze DataFrame columns to build font property runs using run-length encoding. - /// Returns Vec of (font_key, length) tuples representing consecutive rows with identical font properties. - /// Start positions are implicit (derived from cumulative lengths). - fn build_font_rle(df: &DataFrame) -> Result> { + /// Returns: + /// - DataFrame where each row represents a run's font properties (family, fontface, hjust, vjust, angle) + /// - Vec of run lengths corresponding to each row + fn build_font_rle(df: &DataFrame) -> Result<(DataFrame, Vec)> { + use polars::prelude::*; + let nrows = df.height(); if nrows == 0 { - return Ok(Vec::new()); + // Return empty DataFrame and empty run lengths + return Ok((DataFrame::default(), Vec::new())); } - // Extract all font columns (or use defaults if missing) - let family_col = df - .column(&naming::aesthetic_column("family")) - .ok() - .and_then(|s| s.str().ok()); - let fontface_col = df - .column(&naming::aesthetic_column("fontface")) - .ok() - .and_then(|s| s.str().ok()); - let hjust_col = df - .column(&naming::aesthetic_column("hjust")) - .ok() - .and_then(|s| s.str().ok()); - let vjust_col = df - .column(&naming::aesthetic_column("vjust")) - .ok() - .and_then(|s| s.str().ok()); - - // Angle can be numeric or string, so get the raw column - let angle_col = df.column(&naming::aesthetic_column("angle")).ok(); - - // Run-length encoding: group consecutive rows with same font properties - let mut runs = Vec::new(); - let mut current_key: Option = None; - let mut run_length = 0; - - for row_idx in 0..nrows { - let family_str = family_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); - let fontface_str = fontface_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); - let hjust_str = hjust_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); - let vjust_str = vjust_col.and_then(|ca| ca.get(row_idx)).unwrap_or(""); - - // Convert to Vega-Lite property values immediately - let family_val = Self::convert_family(family_str); - let (font_weight_val, font_style_val) = Self::convert_fontface(fontface_str); - let hjust_val = Self::convert_hjust(hjust_str); - let vjust_val = Self::convert_vjust(vjust_str); - let angle_val = Self::convert_angle(angle_col, row_idx); - - let key = ( - family_val, - font_weight_val, - font_style_val, - hjust_val, - vjust_val, - angle_val, - ); + // Build boolean mask showing where any font property changes + let mut changed = BooleanChunked::full("changed".into(), false, nrows); + let mut font_columns: HashMap<&str, &polars::prelude::Column> = HashMap::new(); + + for aesthetic in ["family", "fontface", "hjust", "vjust", "angle"] { + if let Ok(col) = df.column(&naming::aesthetic_column(aesthetic)) { + let col_changed = col.not_equal(&col.shift(1)).map_err(|e| { + GgsqlError::InternalError(format!("Failed to compare column: {}", e)) + })?; + changed = &changed | &col_changed; + font_columns.insert(aesthetic, col); + } + } - // If font properties changed, emit previous run and start new one - if Some(&key) != current_key.as_ref() { - if let Some(prev_key) = current_key { - runs.push((prev_key, run_length)); - run_length = 0; - } - current_key = Some(key); + // Extract change indices (where mask is true) + // shift() creates nulls at position 0, which we treat as a change point + let mut change_indices: Vec = Vec::new(); + for (i, val) in changed.iter().enumerate() { + if val == Some(true) || val == None { + // Treat null (from shift) or true as change point + change_indices.push(i); } + } - run_length += 1; + // First row is always a change point (shift comparison is null) + if !change_indices.is_empty() && change_indices[0] != 0 { + change_indices.insert(0, 0); + } else if change_indices.is_empty() { + change_indices.push(0); } - // Don't forget the last run - if let Some(key) = current_key { - runs.push((key, run_length)); + // Calculate run lengths + let run_lengths: Vec = change_indices.iter().enumerate().map(|(i, &start)| { + let end = change_indices.get(i + 1).copied().unwrap_or(nrows); + end - start + }).collect(); + + // Extract rows at change indices (only font columns) + let indices_ca = UInt32Chunked::from_vec("indices".into(), change_indices.iter().map(|&i| i as u32).collect()); + let font_aesthetics = ["family", "fontface", "hjust", "vjust", "angle"]; + + let mut result_cols = Vec::new(); + for aesthetic in font_aesthetics { + if let Some(col) = font_columns.get(aesthetic) { + let taken = col.take(&indices_ca).map_err(|e| { + GgsqlError::InternalError(format!("Failed to take indices from {}: {}", aesthetic, e)) + })?; + result_cols.push(taken); + } } - Ok(runs) + // Create result DataFrame (only font properties, no run_length column) + let result_df = DataFrame::new(result_cols).map_err(|e| { + GgsqlError::InternalError(format!("Failed to create run DataFrame: {}", e)) + })?; + + Ok((result_df, run_lengths)) } - /// Convert family string to Vega-Lite font value - fn convert_family(value: &str) -> Option { - if value.is_empty() { - None + /// Convert family to Vega-Lite font value + /// Prefers literal over column value + fn convert_family( + literal: Option<&ParameterValue>, + column_value: Option<&str>, + ) -> Option { + // First select which value to use (prefer literal) + let value = if let Some(ParameterValue::String(s)) = literal { + s.as_str() } else { + column_value? + }; + + // Then apply conversion + if !value.is_empty() { Some(json!(value)) + } else { + None } } - /// Convert fontface string to Vega-Lite fontWeight and fontStyle values - fn convert_fontface(value: &str) -> (Value, Value) { - match value { + /// Convert fontface to Vega-Lite fontWeight and fontStyle values + /// Prefers literal over column value + fn convert_fontface( + literal: Option<&ParameterValue>, + column_value: Option<&str>, + ) -> (Option, Option) { + // First select which value to use (prefer literal) + let value = if let Some(ParameterValue::String(s)) = literal { + s.as_str() + } else if let Some(s) = column_value { + s + } else { + return (None, None); + }; + + // Then apply conversion + let (weight, style) = match value { "bold" => (json!("bold"), json!("normal")), "italic" => (json!("normal"), json!("italic")), "bold.italic" | "bolditalic" => (json!("bold"), json!("italic")), _ => (json!("normal"), json!("normal")), - } + }; + (Some(weight), Some(style)) } - /// Convert hjust string to Vega-Lite align value - fn convert_hjust(value: &str) -> Value { - let align = match value.parse::() { + /// Convert hjust to Vega-Lite align value + /// Prefers literal over column value + fn convert_hjust( + literal: Option<&ParameterValue>, + column_value: Option<&str>, + ) -> Option { + // First extract which value to use (prefer literal) + let value_str = match literal { + Some(ParameterValue::String(s)) => s.to_string(), + Some(ParameterValue::Number(n)) => n.to_string(), + _ => column_value?.to_string(), + }; + + // Then apply conversion inline + let align = match value_str.parse::() { Ok(v) if v <= 0.25 => "left", Ok(v) if v >= 0.75 => "right", - _ => match value { + _ => match value_str.as_str() { "left" => "left", "right" => "right", _ => "center", }, }; - json!(align) + + Some(json!(align)) } - /// Convert vjust string to Vega-Lite baseline value - fn convert_vjust(value: &str) -> Value { - let baseline = match value.parse::() { + /// Convert vjust to Vega-Lite baseline value + /// Prefers literal over column value + fn convert_vjust( + literal: Option<&ParameterValue>, + column_value: Option<&str>, + ) -> Option { + // First extract which value to use (prefer literal) + let value_str = match literal { + Some(ParameterValue::String(s)) => s.to_string(), + Some(ParameterValue::Number(n)) => n.to_string(), + _ => column_value?.to_string(), + }; + + // Then apply conversion inline + let baseline = match value_str.parse::() { Ok(v) if v <= 0.25 => "bottom", Ok(v) if v >= 0.75 => "top", - _ => match value { + _ => match value_str.as_str() { "top" => "top", "bottom" => "bottom", _ => "middle", }, }; - json!(baseline) + + Some(json!(baseline)) } - /// Convert angle column value to Vega-Lite angle value (degrees) - /// Handles both numeric and string columns + /// Convert angle to Vega-Lite angle value (degrees) + /// Prefers literal over column value /// Normalizes angles to [0, 360) range - fn convert_angle(angle_col: Option<&polars::prelude::Column>, row_idx: usize) -> Value { - use polars::prelude::*; + fn convert_angle( + literal: Option<&ParameterValue>, + column_value: Option, + ) -> Option { + // First select which value to use (prefer literal) + let value = if let Some(ParameterValue::Number(n)) = literal { + *n + } else { + column_value? + }; - let normalize_angle = |angle: f64| { - let normalized = angle % 360.0; - if normalized < 0.0 { - normalized + 360.0 - } else { - normalized - } + // Then apply conversion inline + let normalized = value % 360.0; + let angle = if normalized < 0.0 { + normalized + 360.0 + } else { + normalized }; - if let Some(col) = angle_col { - // Try as numeric first (int or float) - if let Ok(num_series) = col.cast(&DataType::Float64) { - if let Some(val) = num_series.f64().ok().and_then(|ca| ca.get(row_idx)) { - return json!(normalize_angle(val)); - } + Some(json!(angle)) + } + + /// Apply font properties to mark object from DataFrame row and layer literals + /// Uses literals from layer parameters if present, otherwise uses DataFrame column values + fn apply_font_properties( + mark_obj: &mut Map, + df: &DataFrame, + row_idx: usize, + layer: &Layer, + ) -> Result<()> { + // Helper to extract string column values using aesthetic column naming + let get_str = |aesthetic: &str| -> Option { + let col_name = naming::aesthetic_column(aesthetic); + df.column(&col_name) + .ok() + .and_then(|col| col.str().ok()) + .and_then(|ca| ca.get(row_idx)) + .map(|s| s.to_string()) + }; + + // Helper to extract numeric column values (for angle) + let get_f64 = |aesthetic: &str| -> Option { + use polars::prelude::*; + let col_name = naming::aesthetic_column(aesthetic); + let col = df.column(&col_name).ok()?; + + // Try as string first (for string-encoded numbers) + if let Ok(ca) = col.str() { + return ca.get(row_idx).and_then(|s| s.parse::().ok()); } - // Try as string - if let Ok(str_ca) = col.str() { - if let Some(s) = str_ca.get(row_idx) { - if let Ok(angle) = s.parse::() { - return json!(normalize_angle(angle)); - } + + // Try as numeric types directly + if let Ok(casted) = col.cast(&DataType::Float64) { + if let Ok(ca) = casted.f64() { + return ca.get(row_idx); } } + + None + }; + + // Convert and apply font properties + if let Some(family_val) = Self::convert_family( + layer.get_literal("family"), + get_str("family").as_deref(), + ) { + mark_obj.insert("font".to_string(), family_val); + } + + let (font_weight_val, font_style_val) = Self::convert_fontface( + layer.get_literal("fontface"), + get_str("fontface").as_deref(), + ); + if let Some(weight) = font_weight_val { + mark_obj.insert("fontWeight".to_string(), weight); + } + if let Some(style) = font_style_val { + mark_obj.insert("fontStyle".to_string(), style); } - // Default to 0.0 if column missing or value unparseable - json!(0.0) - } - /// Apply font properties to mark object (only if not already set by Literals) - fn apply_font_properties(mark_obj: &mut Map, font_key: &FontKey) { - let (family_val, font_weight_val, font_style_val, hjust_val, vjust_val, angle_val) = - font_key; + if let Some(hjust_val) = Self::convert_hjust( + layer.get_literal("hjust"), + get_str("hjust").as_deref(), + ) { + mark_obj.insert("align".to_string(), hjust_val); + } - // Only apply font properties if not already set by Literal aesthetics - if let Some(family_val) = family_val { - mark_obj - .entry("font".to_string()) - .or_insert(family_val.clone()); + if let Some(vjust_val) = Self::convert_vjust( + layer.get_literal("vjust"), + get_str("vjust").as_deref(), + ) { + mark_obj.insert("baseline".to_string(), vjust_val); } - mark_obj - .entry("fontWeight".to_string()) - .or_insert(font_weight_val.clone()); - mark_obj - .entry("fontStyle".to_string()) - .or_insert(font_style_val.clone()); - mark_obj - .entry("align".to_string()) - .or_insert(hjust_val.clone()); - mark_obj - .entry("baseline".to_string()) - .or_insert(vjust_val.clone()); - mark_obj - .entry("angle".to_string()) - .or_insert(angle_val.clone()); + + if let Some(angle_val) = Self::convert_angle( + layer.get_literal("angle"), + get_f64("angle"), + ) { + mark_obj.insert("angle".to_string(), angle_val); + } + + Ok(()) } /// Build transform with source filter @@ -466,7 +555,8 @@ impl TextRenderer { &self, prototype: Value, data_key: &str, - font_runs: &[(FontKey, usize)], + font_runs_df: &DataFrame, + run_lengths: &[usize], layer: &Layer, ) -> Result> { // Extract shared encoding from prototype @@ -482,68 +572,29 @@ impl TextRenderer { if let Some(ParameterValue::Number(y_offset)) = layer.parameters.get("nudge_y") { mark_map.insert("yOffset".to_string(), json!(y_offset)); } - - // Apply Literal font aesthetics from SETTING (uniform across all rows) - if let Some(ParameterValue::String(s)) = layer.get_literal("family") { - if !s.is_empty() { - mark_map.insert("font".to_string(), json!(s)); - } - } - if let Some(ParameterValue::String(s)) = layer.get_literal("fontface") { - let (font_weight, font_style) = Self::convert_fontface(s); - mark_map.insert("fontWeight".to_string(), font_weight); - mark_map.insert("fontStyle".to_string(), font_style); - } - if let Some(lit) = layer.get_literal("hjust") { - match lit { - ParameterValue::String(s) => { - mark_map.insert("align".to_string(), Self::convert_hjust(s)); - } - ParameterValue::Number(n) => { - mark_map.insert("align".to_string(), Self::convert_hjust(&n.to_string())); - } - _ => {} - } - } - if let Some(lit) = layer.get_literal("vjust") { - match lit { - ParameterValue::String(s) => { - mark_map.insert("baseline".to_string(), Self::convert_vjust(s)); - } - ParameterValue::Number(n) => { - mark_map - .insert("baseline".to_string(), Self::convert_vjust(&n.to_string())); - } - _ => {} - } - } - if let Some(ParameterValue::Number(n)) = layer.get_literal("angle") { - mark_map.insert("angle".to_string(), json!(n)); - } } // Build individual layers without encoding (mark + transform only) - // font_runs preserves natural row order through RLE - let nested_layers: Vec = font_runs - .iter() - .enumerate() - .map(|(run_idx, (font_key, _length))| { - let suffix = format!("_font_{}", run_idx); - let source_key = format!("{}{}", data_key, suffix); - - // Clone base mark and add font-specific properties - let mut mark_obj = base_mark.clone(); - if let Some(mark_map) = mark_obj.as_object_mut() { - Self::apply_font_properties(mark_map, font_key); - } + // Use run_lengths to get number of runs (works even when no font columns exist) + let nruns = run_lengths.len(); + let mut nested_layers: Vec = Vec::with_capacity(nruns); - // Create layer with mark and transform (no encoding) - json!({ - "mark": mark_obj, - "transform": Self::build_transform_with_filter(&prototype, &source_key) - }) - }) - .collect(); + for run_idx in 0..nruns { + let suffix = format!("_font_{}", run_idx); + let source_key = format!("{}{}", data_key, suffix); + + // Clone base mark and apply font-specific properties + let mut mark_obj = base_mark.clone(); + if let Some(mark_map) = mark_obj.as_object_mut() { + Self::apply_font_properties(mark_map, font_runs_df, run_idx, layer)?; + } + + // Create layer with mark and transform (no encoding) + nested_layers.push(json!({ + "mark": mark_obj, + "transform": Self::build_transform_with_filter(&prototype, &source_key) + })); + } // Wrap in parent spec with shared encoding let mut parent_spec = json!({"layer": nested_layers}); @@ -568,17 +619,18 @@ impl GeomRenderer for TextRenderer { let df = Self::apply_label_formatting(df, layer)?; // Analyze font columns to get RLE runs - let font_runs = Self::build_font_rle(&df)?; + let (font_runs_df, run_lengths) = Self::build_font_rle(&df)?; // Split data by font runs, tracking cumulative position let mut components: HashMap> = HashMap::new(); let mut position = 0; - for (run_idx, (_font_key, length)) in font_runs.iter().enumerate() { + for (run_idx, &length) in run_lengths.iter().enumerate() { + let suffix = format!("_font_{}", run_idx); // Slice the contiguous run from the DataFrame (more efficient than boolean masking) - let sliced = df.slice(position as i64, *length); + let sliced = df.slice(position as i64, length); let values = if binned_columns.is_empty() { dataframe_to_values(&sliced)? @@ -592,7 +644,7 @@ impl GeomRenderer for TextRenderer { Ok(PreparedData::Composite { components, - metadata: Box::new(font_runs), + metadata: Box::new((font_runs_df, run_lengths)), }) } @@ -632,12 +684,12 @@ impl GeomRenderer for TextRenderer { }; // Downcast metadata to font runs - let font_runs = metadata - .downcast_ref::>() + let (font_runs_df, run_lengths) = metadata + .downcast_ref::<(DataFrame, Vec)>() .ok_or_else(|| GgsqlError::InternalError("Failed to downcast font runs".to_string()))?; // Generate nested layers from font runs (works for single or multiple runs) - self.finalize_nested_layers(prototype, data_key, font_runs, layer) + self.finalize_nested_layers(prototype, data_key, font_runs_df, run_lengths, layer) } } diff --git a/src/writer/vegalite/mod.rs b/src/writer/vegalite/mod.rs index 09773323..9ef976ee 100644 --- a/src/writer/vegalite/mod.rs +++ b/src/writer/vegalite/mod.rs @@ -1266,6 +1266,10 @@ mod tests { "y".to_string(), AestheticValue::standard_column("y".to_string()), ) + .with_aesthetic( + "label".to_string(), + AestheticValue::standard_column("label".to_string()), + ) .with_aesthetic( "fontsize".to_string(), AestheticValue::standard_column("value".to_string()), @@ -1285,6 +1289,7 @@ mod tests { let df = df! { "x" => &[1, 2, 3], "y" => &[1, 2, 3], + "label" => &["A", "B", "C"], "value" => &[1.0, 2.0, 3.0], } .unwrap(); From bfbf943cf736d10996899f79cc068b338216aca6 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 25 Feb 2026 14:00:21 +0100 Subject: [PATCH 24/29] specify fontsize in pt --- src/plot/main.rs | 2 +- src/writer/vegalite/encoding.rs | 4 +-- src/writer/vegalite/layer.rs | 54 ++++++++++++++++----------------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/plot/main.rs b/src/plot/main.rs index 6772ff08..ce940c41 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -405,7 +405,7 @@ mod tests { let text = Geom::text().aesthetics(); assert!(text.is_supported("label")); assert!(text.is_supported("family")); - assert_eq!(text.required(), &["x", "y"]); + assert_eq!(text.required(), &["x", "y", "label"]); // Statistical geoms only require x assert_eq!(Geom::histogram().aesthetics().required(), &["x"]); diff --git a/src/writer/vegalite/encoding.rs b/src/writer/vegalite/encoding.rs index 15e2463f..435b1456 100644 --- a/src/writer/vegalite/encoding.rs +++ b/src/writer/vegalite/encoding.rs @@ -512,7 +512,7 @@ fn convert_range_element(elem: &crate::plot::ArrayElement, aesthetic: &str) -> V // Size: convert radius (points) to area (pixels²) "size" => json!(n * n * POINTS_TO_AREA), // Linewidth: convert points to pixels - "linewidth" => json!(n * POINTS_TO_PIXELS), + "linewidth" | "fontsize" => json!(n * POINTS_TO_PIXELS), // Other aesthetics: pass through unchanged _ => json!(n), } @@ -885,7 +885,7 @@ fn build_literal_encoding(aesthetic: &str, lit: &ParameterValue) -> Result json!(n * n * POINTS_TO_AREA), // Linewidth: points → pixels - "linewidth" => json!(n * POINTS_TO_PIXELS), + "linewidth" | "fontsize" => json!(n * POINTS_TO_PIXELS), _ => json!(n), } } diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index f4a9a26b..6292c6f4 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -296,20 +296,30 @@ impl TextRenderer { } // Calculate run lengths - let run_lengths: Vec = change_indices.iter().enumerate().map(|(i, &start)| { - let end = change_indices.get(i + 1).copied().unwrap_or(nrows); - end - start - }).collect(); + let run_lengths: Vec = change_indices + .iter() + .enumerate() + .map(|(i, &start)| { + let end = change_indices.get(i + 1).copied().unwrap_or(nrows); + end - start + }) + .collect(); // Extract rows at change indices (only font columns) - let indices_ca = UInt32Chunked::from_vec("indices".into(), change_indices.iter().map(|&i| i as u32).collect()); + let indices_ca = UInt32Chunked::from_vec( + "indices".into(), + change_indices.iter().map(|&i| i as u32).collect(), + ); let font_aesthetics = ["family", "fontface", "hjust", "vjust", "angle"]; let mut result_cols = Vec::new(); for aesthetic in font_aesthetics { if let Some(col) = font_columns.get(aesthetic) { let taken = col.take(&indices_ca).map_err(|e| { - GgsqlError::InternalError(format!("Failed to take indices from {}: {}", aesthetic, e)) + GgsqlError::InternalError(format!( + "Failed to take indices from {}: {}", + aesthetic, e + )) })?; result_cols.push(taken); } @@ -426,10 +436,7 @@ impl TextRenderer { /// Convert angle to Vega-Lite angle value (degrees) /// Prefers literal over column value /// Normalizes angles to [0, 360) range - fn convert_angle( - literal: Option<&ParameterValue>, - column_value: Option, - ) -> Option { + fn convert_angle(literal: Option<&ParameterValue>, column_value: Option) -> Option { // First select which value to use (prefer literal) let value = if let Some(ParameterValue::Number(n)) = literal { *n @@ -488,10 +495,9 @@ impl TextRenderer { }; // Convert and apply font properties - if let Some(family_val) = Self::convert_family( - layer.get_literal("family"), - get_str("family").as_deref(), - ) { + if let Some(family_val) = + Self::convert_family(layer.get_literal("family"), get_str("family").as_deref()) + { mark_obj.insert("font".to_string(), family_val); } @@ -506,24 +512,19 @@ impl TextRenderer { mark_obj.insert("fontStyle".to_string(), style); } - if let Some(hjust_val) = Self::convert_hjust( - layer.get_literal("hjust"), - get_str("hjust").as_deref(), - ) { + if let Some(hjust_val) = + Self::convert_hjust(layer.get_literal("hjust"), get_str("hjust").as_deref()) + { mark_obj.insert("align".to_string(), hjust_val); } - if let Some(vjust_val) = Self::convert_vjust( - layer.get_literal("vjust"), - get_str("vjust").as_deref(), - ) { + if let Some(vjust_val) = + Self::convert_vjust(layer.get_literal("vjust"), get_str("vjust").as_deref()) + { mark_obj.insert("baseline".to_string(), vjust_val); } - if let Some(angle_val) = Self::convert_angle( - layer.get_literal("angle"), - get_f64("angle"), - ) { + if let Some(angle_val) = Self::convert_angle(layer.get_literal("angle"), get_f64("angle")) { mark_obj.insert("angle".to_string(), angle_val); } @@ -626,7 +627,6 @@ impl GeomRenderer for TextRenderer { let mut position = 0; for (run_idx, &length) in run_lengths.iter().enumerate() { - let suffix = format!("_font_{}", run_idx); // Slice the contiguous run from the DataFrame (more efficient than boolean masking) From 504f6318df101e40a7381b8ef84cf45e9d9beb83 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 25 Feb 2026 14:16:40 +0100 Subject: [PATCH 25/29] delenda est --- src/writer/vegalite/layer.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 6292c6f4..6a560df5 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -9,6 +9,7 @@ use crate::plot::layer::geom::GeomType; use crate::plot::ParameterValue; +use crate::writer::vegalite::POINTS_TO_PIXELS; use crate::{naming, AestheticValue, DataFrame, Geom, GgsqlError, Layer, Result}; use polars::prelude::ChunkCompareEq; use serde_json::{json, Map, Value}; @@ -568,10 +569,10 @@ impl TextRenderer { if let Some(mark_map) = base_mark.as_object_mut() { // Extract nudge parameters (nudge_x → xOffset, nudge_y → yOffset) if let Some(ParameterValue::Number(x_offset)) = layer.parameters.get("nudge_x") { - mark_map.insert("xOffset".to_string(), json!(x_offset)); + mark_map.insert("xOffset".to_string(), json!(x_offset * POINTS_TO_PIXELS)); } if let Some(ParameterValue::Number(y_offset)) = layer.parameters.get("nudge_y") { - mark_map.insert("yOffset".to_string(), json!(y_offset)); + mark_map.insert("yOffset".to_string(), json!(-y_offset * POINTS_TO_PIXELS)); } } From 862b6baea483650002db5855629d8473a8bc1d6c Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 25 Feb 2026 14:47:14 +0100 Subject: [PATCH 26/29] docs --- doc/syntax/index.qmd | 1 + doc/syntax/layer/text.qmd | 132 +++++++++++++++++++++++++++++++++++ src/writer/vegalite/layer.rs | 8 +-- src/writer/vegalite/mod.rs | 6 +- 4 files changed, 140 insertions(+), 7 deletions(-) create mode 100644 doc/syntax/layer/text.qmd diff --git a/doc/syntax/index.qmd b/doc/syntax/index.qmd index cbcddd26..4414c6f1 100644 --- a/doc/syntax/index.qmd +++ b/doc/syntax/index.qmd @@ -21,6 +21,7 @@ There are many different layers to choose from when visualising your data. Some - [`area`](layer/area.qmd) is used to display series as an area chart. - [`ribbon`](layer/ribbon.qmd) is used to display series extrema. - [`polygon`](layer/polygon.qmd) is used to display arbitrary shapes as polygons. +- [`text`](layer/text.qmd) is used to render datapoints as text. - [`bar`](layer/bar.qmd) creates a bar chart, optionally calculating y from the number of records in each bar - [`density`](layer/density.qmd) creates univariate kernel density estimates, showing the distribution of a variable - [`violin`](layer/violin.qmd) displays a rotated kernel density estimate diff --git a/doc/syntax/layer/text.qmd b/doc/syntax/layer/text.qmd new file mode 100644 index 00000000..6158ad72 --- /dev/null +++ b/doc/syntax/layer/text.qmd @@ -0,0 +1,132 @@ +--- +title: "Text" +--- + +> Layers are declared with the [`DRAW` clause](../clause/draw.qmd). Read the documentation for this clause for a thorough description of how to use it. + +The text layer displays rows in the data as text. It can be used as a visualisation itself, or used to annotate a different layer. + +## Aesthetics +The following aesthetics are recognised by the text layer. + +### Required +* `x` Position on the x-axis. +* `y` Position on the y-axis. +* `label` The text to dislay. + +### Optional +* `stroke` The colour at the contour lines of glyphs. Typically kept blank. +* `fill` The colour of the glyphs. +* `colour` Shorthand for setting `stroke` and `fill` simultaneously. +* `opacity` The opacity of the fill colour. +* `family` The typeface to style the lettering. +* `fontsize` The size of the text in points. +* `fontface` Font style, can be one of `'bold'`, `'italic'` or `'bold.italic'`. +* `hjust` Horizontal justification. Can be a numeric value between 0-1 or one of `"left"`, `"right"` or `"centre"` (default). Interpretation of numeric values is writer-dependent. +* `vjust` Vertical justification. Can be a numeric value between 0-1 or one of `"top"`, `"bottom"` or `"middle"` (default). Interpretation of numeric values is writer-dependent. +* `angle` Rotation of the text in degrees. + +## Settings +* `nudge_x` Horizontal offset expressed in absolute points. +* `nudge_y` Vertical offset expressed in absolute points. +* `format` Formatting specifier, see explanation below. + +### Format + +The `format` setting can take a string that will be used in formatting the `label` aesthetic. +The basic syntax for this is that the `label` value will be inserted into any place where `{}` appears. +This means that e.g. `SETTING format => '{} species'` will result in the label "adelie species" for a row where the `label` value is "adelie". +Besides simply inserting the value as-is, it is also possible to apply a formatter to `label` before insertion by naming a formatter inside the curly braces prefixed with `:`. +Known formatters are: + +* `{:Title}` will title-case the value (make the first letter in each work upper case) before insertion, e.g. `SETTING format => '{:Title} species'` will become "Adelie species" for the "adelie" label. +* `{:UPPER}` will make the value upper-case, e.g. `SETTING format => '{:UPPER} species'` will become "ADELIE species" for the "adelie" label. +* `{:lower}` works much like `{:UPPER}` but changes the value to lower-case instead. +* `{:time ...}` will format a date/datetime/time value according to the format defined afterwards. The formatting follows strftime format using the Rust chrono library. You can see an overview of the supported syntax at the [chrono docs](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). The basic usage is `SETTING format => '{:time %B %Y}` which would format a value at 2025-07-04 as "July 2025". +* `{:num ...}` will format a numeric value according to the format defined afterwards. The format follows the printf format using the Rust sprintf library. The syntax is `%[flags][width][.precision]type` with the following meaning: + - `flags`: One or more modifiers: + * `-`: left-justify + * `+`: Force sign for positive numbers + * ` `: (space) Space before positive numbers + * `0`: Zero-pad + * `#`: Alternate form (`0x` prefix for hex, etc) + - `width`: The minimum width of characters to render. Depending on the `flags` the string will be padded to be at least this width + - `precision`: The maximum precision of the number. For `%g`/`%G` it is the total number of digits whereas for the rest it is the number of digits to the right of the decimal point + - `type`: How to present the number. One of: + * `d`/`i`: Signed decimal integers + * `u`: Unsigned decimal integers + * `f`/`F`: Decimal floating point + * `e`/`E`: Scientific notation + * `g`/`G`: Shortest form of `e` and `f` + * `o`: Unsigned octal + * `x`/`X`: Unsigned hexadecimal + +## Data transformation +The text layer does not transform its data but passed it through unchanged. + +## Examples + +Standard drawing data points as labels. + +```{ggsql} +VISUALISE bill_len AS x, bill_dep AS y FROM ggsql:penguins +DRAW text MAPPING island AS label +``` + +You can use the `format` setting to tweak the display of the label. + +```{ggsql} +VISUALISE bill_len AS x, bill_dep AS y FROM ggsql:penguins +DRAW text + MAPPING island AS label + SETTING format => '{:UPPER}' +``` + +Setting font properties. Colours are typically mapped to the fill. + +```{ggsql} +VISUALISE bill_len AS x, bill_dep AS y FROM ggsql:penguins +DRAW text + MAPPING + island AS label, + species AS fill, + flipper_len AS fontsize + SETTING + opacity => 0.8, + fontface => 'bold', + family => 'Times New Roman' + SCALE fontsize TO [6, 20] +``` + +The 'stroke' aesthetic is applied to the outline of the text. + +```{ggsql} +SELECT 1 as x, 1 as y +VISUALISE x, y, 'My Label' AS label +DRAW text + SETTING fontsize => 30, stroke => 'red' +``` + +Labelling precomputed bars with the data value. + +```{ggsql} +SELECT island, COUNT(*) AS n FROM ggsql:penguins GROUP BY island +VISUALISE island AS x, n AS y + DRAW bar + DRAW text + MAPPING n AS label + SETTING vjust => 'top', nudge_y => -11, fill => 'white' +``` + +If you label bars at the extreme end, you may to expand the scale to accommodate the labels. + +```{ggsql} +SELECT island, COUNT(*) AS n FROM ggsql:penguins GROUP BY island +VISUALISE island AS x, n AS y + DRAW bar + DRAW text + MAPPING n AS label + SETTING vjust => 'bottom', nudge_y => 11 + SCALE y FROM [0, 200] +``` + diff --git a/src/writer/vegalite/layer.rs b/src/writer/vegalite/layer.rs index 6a560df5..bc6b5b18 100644 --- a/src/writer/vegalite/layer.rs +++ b/src/writer/vegalite/layer.rs @@ -1786,8 +1786,8 @@ mod tests { ); assert_eq!( mark["xOffset"].as_f64().unwrap(), - 5.0, - "xOffset should be 5" + 5.0 * POINTS_TO_PIXELS, + "xOffset should be 5 * POINTS_TO_PIXELS" ); assert!( @@ -1796,8 +1796,8 @@ mod tests { ); assert_eq!( mark["yOffset"].as_f64().unwrap(), - -10.0, - "yOffset should be -10" + 10.0 * POINTS_TO_PIXELS, + "yOffset should be 10 * POINTS_TO_PIXELS (negated from nudge_y = -10)" ); } } diff --git a/src/writer/vegalite/mod.rs b/src/writer/vegalite/mod.rs index 9ef976ee..b4b1c674 100644 --- a/src/writer/vegalite/mod.rs +++ b/src/writer/vegalite/mod.rs @@ -1311,9 +1311,9 @@ mod tests { assert!(scale_range.is_array(), "Scale should have range array"); let range = scale_range.as_array().unwrap(); assert_eq!(range.len(), 2); - // Should be 10 and 20, NOT ~31 and ~126 (which would be area-converted) - assert_eq!(range[0].as_f64().unwrap(), 10.0); - assert_eq!(range[1].as_f64().unwrap(), 20.0); + // Should be 10 and 20 converted to pixels, NOT ~31 and ~126 (which would be area-converted) + assert_eq!(range[0].as_f64().unwrap(), 10.0 * POINTS_TO_PIXELS); + assert_eq!(range[1].as_f64().unwrap(), 20.0 * POINTS_TO_PIXELS); } #[test] From 05d6bab097df207f631e7452418b95859b926a9d Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Mar 2026 15:19:11 +0100 Subject: [PATCH 27/29] fix mismerged test --- src/plot/main.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/plot/main.rs b/src/plot/main.rs index 02147241..7adb1a2c 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -493,7 +493,6 @@ mod tests { let text = Geom::text().aesthetics(); assert!(text.is_supported("label")); assert!(text.is_supported("family")); - assert_eq!(text.required(), &["x", "y", "label"]); assert_eq!(text.required(), &["pos1", "pos2", "label"]); // Statistical geoms only require pos1 From f1fd9172ba0e38fc8f6d4154308a4331d0b4227d Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Mar 2026 15:56:39 +0100 Subject: [PATCH 28/29] fix another test expectation --- src/writer/vegalite/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/writer/vegalite/mod.rs b/src/writer/vegalite/mod.rs index 09187869..38493914 100644 --- a/src/writer/vegalite/mod.rs +++ b/src/writer/vegalite/mod.rs @@ -1353,11 +1353,11 @@ mod tests { let mut spec = Plot::new(); let layer = Layer::new(Geom::text()) .with_aesthetic( - "x".to_string(), + "pos1".to_string(), AestheticValue::standard_column("x".to_string()), ) .with_aesthetic( - "y".to_string(), + "pos2".to_string(), AestheticValue::standard_column("y".to_string()), ) .with_aesthetic( From e8be14e4ad9516cb3cbbeea725de3080fc586d8a Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 4 Mar 2026 15:58:35 +0100 Subject: [PATCH 29/29] finally do something about this darn test that keeps mucking up test results on my machine --- src/reader/duckdb.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index ca53d018..e8e2aebc 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -640,6 +640,7 @@ mod tests { } #[test] + #[cfg_attr(target_os = "windows", ignore = "DuckDB crashes on Windows with invalid SQL")] fn test_invalid_sql() { let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); let result = reader.execute_sql("INVALID SQL SYNTAX");