Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ tracing-subscriber = { workspace = true, optional = true }
pyo3 = { workspace = true, optional = true }

[dev-dependencies]
jsonschema = "0.44"
proptest.workspace = true
ureq = "3"

[features]
default = ["duckdb", "sqlite", "vegalite", "ipc", "builtin-data"]
Expand Down
4 changes: 2 additions & 2 deletions src/writer/vegalite/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -848,8 +848,8 @@ fn build_column_encoding(
"type": field_type,
});

// For binned scales, add bin: "binned" for proper axis tick placement
if is_binned {
// bin: "binned" is only valid for positional channels in VL v6
if is_binned && !is_binned_legend {
encoding["bin"] = json!("binned");
}

Expand Down
182 changes: 166 additions & 16 deletions src/writer/vegalite/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,26 +340,30 @@ fn apply_faceting(

vl_spec["facet"] = facet_def;

// Move layer into spec (data reference stays at top level)
// Move layer + width/height into inner spec (FacetSpec disallows
// top-level width/height in VL v6)
let mut spec_inner = json!({});
if let Some(layer) = vl_spec.get("layer") {
spec_inner["layer"] = layer.clone();
}
if let Some(w) = vl_spec.get("width").cloned() {
spec_inner["width"] = w;
}
if let Some(h) = vl_spec.get("height").cloned() {
spec_inner["height"] = h;
}

vl_spec["spec"] = spec_inner;
vl_spec.as_object_mut().unwrap().remove("layer");
let obj = vl_spec.as_object_mut().unwrap();
obj.remove("layer");
obj.remove("width");
obj.remove("height");

// Apply scale resolution
apply_facet_scale_resolution(vl_spec, &facet.properties, coord_kind);

// Apply additional properties (columns for wrap)
apply_facet_properties(vl_spec, &facet.properties, true);
}
FacetLayout::Grid { row: _, column: _ } => {
let mut facet_spec = serde_json::Map::new();

// Row facet: use internal aesthetic column "facet1"
// Vega-Lite requires "row" key in the facet object
let row_aes_col = naming::aesthetic_column("facet1");
if facet_df.column(&row_aes_col).is_ok() {
let row_scale = scales.iter().find(|s| s.aesthetic == "facet1");
Expand Down Expand Up @@ -388,19 +392,25 @@ fn apply_faceting(

vl_spec["facet"] = Value::Object(facet_spec);

// Move layer into spec (data reference stays at top level)
// Move layer + width/height into inner spec (same as wrap above)
let mut spec_inner = json!({});
if let Some(layer) = vl_spec.get("layer") {
spec_inner["layer"] = layer.clone();
}
if let Some(w) = vl_spec.get("width").cloned() {
spec_inner["width"] = w;
}
if let Some(h) = vl_spec.get("height").cloned() {
spec_inner["height"] = h;
}

vl_spec["spec"] = spec_inner;
vl_spec.as_object_mut().unwrap().remove("layer");
let obj = vl_spec.as_object_mut().unwrap();
obj.remove("layer");
obj.remove("width");
obj.remove("height");

// Apply scale resolution
apply_facet_scale_resolution(vl_spec, &facet.properties, coord_kind);

// Apply additional properties (not columns for grid)
apply_facet_properties(vl_spec, &facet.properties, false);
}
}
Expand Down Expand Up @@ -847,19 +857,21 @@ fn apply_facet_properties(
}
}

/// Vega-Lite schema URL. Update this, the vendored schema file, and
/// the `include_str!` path in `VL_SCHEMA` when bumping versions.
const VEGALITE_SCHEMA: &str = "https://vega.github.io/schema/vega-lite/v6.json";

/// Vega-Lite JSON writer
///
/// Generates Vega-Lite v6 specifications from ggsql specs and data.
pub struct VegaLiteWriter {
/// Vega-Lite schema version
schema: String,
}

impl VegaLiteWriter {
/// Create a new Vega-Lite writer with default settings
pub fn new() -> Self {
Self {
schema: "https://vega.github.io/schema/vega-lite/v6.json".to_string(),
schema: VEGALITE_SCHEMA.to_string(),
}
}

Expand Down Expand Up @@ -1060,13 +1072,101 @@ mod tests {
use crate::plot::{Labels, Layer, ParameterValue};
use crate::Geom;
use polars::prelude::*;
use serde_json::Value;
use std::collections::HashMap;
use std::sync::LazyLock;

// Re-export test functions from submodules
use super::data::find_bin_for_value;
use super::encoding::infer_field_type;
use super::layer::geom_to_mark;

fn sanitize_def_name(s: &str) -> String {
s.chars()
.map(|c| match c {
'<' | '>' | '(' | ')' | '|' | '[' | ']' | '"' | ' ' | '{' | '}' => '_',
_ => c,
})
.collect()
}

fn rewrite_refs(val: &mut Value) {
match val {
Value::Object(obj) => {
if let Some(Value::String(s)) = obj.get_mut("$ref") {
if let Some(defname) = s.strip_prefix("#/definitions/") {
let sanitized = sanitize_def_name(defname);
if sanitized != defname {
*s = format!("#/definitions/{}", sanitized);
}
}
}
for v in obj.values_mut() {
rewrite_refs(v);
}
}
Value::Array(arr) => arr.iter_mut().for_each(rewrite_refs),
_ => {}
}
}

/// The VL v6 schema uses TypeScript-style generics in definition names (e.g.,
/// `MarkPropDef<(Gradient|string|null)>`) which are invalid in `$ref` URIs.
/// Rename those definitions and rewrite their `$ref` references.
fn sanitize_vegalite_schema(schema: &mut Value) {
let defs = match schema
.get_mut("definitions")
.and_then(|d| d.as_object_mut())
{
Some(d) => d,
None => return,
};

let substitutions: Vec<(String, String)> = defs
.keys()
.filter(|k| sanitize_def_name(k) != **k)
.map(|k| (k.clone(), sanitize_def_name(k)))
.collect();

if substitutions.is_empty() {
return;
}

for (orig, sanitized) in &substitutions {
if let Some(val) = defs.remove(orig.as_str()) {
defs.insert(sanitized.clone(), val);
}
}

rewrite_refs(schema);
}

static VL_SCHEMA: LazyLock<jsonschema::Validator> = LazyLock::new(|| {
let mut schema: Value =
serde_json::from_str(include_str!("schema/v6.json")).expect("invalid schema JSON");
sanitize_vegalite_schema(&mut schema);
jsonschema::options()
.with_draft(jsonschema::Draft::Draft7)
.should_validate_formats(false)
.build(&schema)
.expect("invalid JSON Schema")
});

fn assert_valid_vegalite(json_str: &str) {
let spec: Value = serde_json::from_str(json_str).expect("invalid JSON");
let errors: Vec<String> = VL_SCHEMA
.iter_errors(&spec)
.map(|e| format!(" - {} (at {})", e, e.instance_path()))
.collect();
if !errors.is_empty() {
panic!(
"Vega-Lite schema validation failed ({} errors):\n{}",
errors.len(),
errors.join("\n")
);
}
}

/// Helper to wrap a DataFrame in a data map for testing (uses layer 0 key)
fn wrap_data(df: DataFrame) -> HashMap<String, DataFrame> {
wrap_data_for_layers(df, 1)
Expand Down Expand Up @@ -1287,6 +1387,7 @@ mod tests {
// Generate Vega-Lite JSON
transform_spec(&mut spec);
let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
assert_valid_vegalite(&json_str);
let vl_spec: Value = serde_json::from_str(&json_str).unwrap();

// Verify structure (uses layer array with inline data)
Expand Down Expand Up @@ -1332,6 +1433,7 @@ mod tests {

transform_spec(&mut spec);
let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
assert_valid_vegalite(&json_str);
let vl_spec: Value = serde_json::from_str(&json_str).unwrap();

assert_eq!(vl_spec["title"], "My Chart");
Expand Down Expand Up @@ -1367,6 +1469,7 @@ mod tests {

transform_spec(&mut spec);
let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
assert_valid_vegalite(&json_str);
let vl_spec: Value = serde_json::from_str(&json_str).unwrap();

assert_eq!(vl_spec["layer"][0]["encoding"]["color"]["value"], "red");
Expand Down Expand Up @@ -1497,6 +1600,7 @@ mod tests {

transform_spec(&mut spec);
let json_str = writer.write(&spec, &wrap_data_for_layers(df, 2)).unwrap();
assert_valid_vegalite(&json_str);
let vl_spec: Value = serde_json::from_str(&json_str).unwrap();

// Should have layer array with 2 layers
Expand Down Expand Up @@ -1606,6 +1710,7 @@ mod tests {

transform_spec(&mut spec);
let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
assert_valid_vegalite(&json_str);
let vl_spec: Value = serde_json::from_str(&json_str).unwrap();

// Check that labelExpr contains VL's range-style format
Expand Down Expand Up @@ -1701,6 +1806,7 @@ mod tests {
let result = writer.write(&spec, &wrap_data(simple_df()));
assert!(result.is_ok());
let json_str = result.unwrap();
assert_valid_vegalite(&json_str);
let json: Value = serde_json::from_str(&json_str).unwrap();

// Single-layer spec uses layer array structure
Expand Down Expand Up @@ -1729,6 +1835,7 @@ mod tests {
let result = writer.write(&spec, &wrap_data(simple_df()));
assert!(result.is_ok());
let json_str = result.unwrap();
assert_valid_vegalite(&json_str);
let json: Value = serde_json::from_str(&json_str).unwrap();

let encoding = &json["layer"][0]["encoding"];
Expand Down Expand Up @@ -1772,6 +1879,7 @@ mod tests {
let result = writer.write(&spec, &wrap_data(df));
assert!(result.is_ok());
let json_str = result.unwrap();
assert_valid_vegalite(&json_str);
let json: Value = serde_json::from_str(&json_str).unwrap();

let encoding = &json["layer"][0]["encoding"];
Expand Down Expand Up @@ -1811,6 +1919,7 @@ mod tests {
let result = writer.write(&spec, &wrap_data(df));
assert!(result.is_ok());
let json_str = result.unwrap();
assert_valid_vegalite(&json_str);
let json: Value = serde_json::from_str(&json_str).unwrap();

// Point has linetype => Null, should not appear in encoding
Expand All @@ -1835,6 +1944,7 @@ mod tests {
let result = writer.write(&spec, &wrap_data(simple_df()));
assert!(result.is_ok());
let json_str = result.unwrap();
assert_valid_vegalite(&json_str);
let json: Value = serde_json::from_str(&json_str).unwrap();

let encoding = &json["layer"][0]["encoding"];
Expand All @@ -1854,6 +1964,7 @@ mod tests {
let result = writer.write(&spec, &wrap_data(simple_df()));
assert!(result.is_ok());
let json_str = result.unwrap();
assert_valid_vegalite(&json_str);
let json: Value = serde_json::from_str(&json_str).unwrap();

let encoding = &json["layer"][0]["encoding"];
Expand Down Expand Up @@ -2163,6 +2274,7 @@ mod tests {

transform_spec(&mut spec);
let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
assert_valid_vegalite(&json_str);
let vl_spec: Value = serde_json::from_str(&json_str).unwrap();

// Verify resolve.scale is set to independent for both axes
Expand Down Expand Up @@ -2246,6 +2358,7 @@ mod tests {

transform_spec(&mut spec);
let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
assert_valid_vegalite(&json_str);
let vl_spec: Value = serde_json::from_str(&json_str).unwrap();

// Verify only y scale is independent
Expand Down Expand Up @@ -2326,6 +2439,7 @@ mod tests {

transform_spec(&mut spec);
let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
assert_valid_vegalite(&json_str);
let vl_spec: Value = serde_json::from_str(&json_str).unwrap();

// Verify NO resolve.scale (fixed scales don't need it)
Expand All @@ -2345,4 +2459,40 @@ mod tests {
"x encoding SHOULD have domain when using fixed scales"
);
}

#[test]
fn test_schema_validation_catches_invalid_spec() {
let writer = VegaLiteWriter::new();
let mut spec = build_spec(Geom::point());
let df = simple_df();
transform_spec(&mut spec);
let json_str = writer.write(&spec, &wrap_data(df)).unwrap();
assert_valid_vegalite(&json_str);

let invalid = r#"{"$schema": "https://vega.github.io/schema/vega-lite/v6.json", "mark": "not_a_mark"}"#;
let result = std::panic::catch_unwind(|| assert_valid_vegalite(invalid));
assert!(result.is_err(), "invalid spec should fail validation");
}

#[test]
fn test_vendored_schema_matches_upstream() {
let response = match ureq::get(VEGALITE_SCHEMA).call() {
Ok(r) => r,
Err(_) => {
eprintln!(
"Skipping vendored schema check: could not reach {VEGALITE_SCHEMA}"
);
return;
}
};
let body = response.into_body().read_to_string().unwrap();
let upstream: Value = serde_json::from_str(&body).unwrap();
let vendored: Value =
serde_json::from_str(include_str!("schema/v6.json")).expect("invalid schema JSON");
assert_eq!(
upstream, vendored,
"Vendored schema does not match upstream at {VEGALITE_SCHEMA}. \
Re-download with: curl -sL '{VEGALITE_SCHEMA}' > src/writer/vegalite/schema/v6.json",
);
}
}
Loading
Loading