From 65d92d4238f96830d649cf79950bd72b863b0e05 Mon Sep 17 00:00:00 2001 From: hyperpolymath Date: Sun, 17 May 2026 06:19:47 +0100 Subject: [PATCH] feat(codegen): split sidecar DDL by dialect; reject json sidecar (#45) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit generate_sidecar_schema now takes a SqlDialect and dispatches to new overlay::sqlite / overlay::postgres modules. The portable table bodies are shared via assemble(); the only dialect-divergent fragment — the metadata upsert — is per-module: SQLite INSERT OR IGNORE vs PostgreSQL INSERT … ON CONFLICT DO NOTHING. The SQLite-only datetime('now') is replaced by portable CURRENT_TIMESTAMP. SqlDialect::from_storage maps [sidecar].storage: sqlite→Sqlite, postgres/postgresql→Postgres, and rejects "json" (previously it silently emitted SQLite DDL for a JSON store) with a pointer to the split-out tracking issue #112. main.rs derives the dialect from the manifest; SidecarConfig docs updated. 6 existing overlay tests retained (now dialect-explicit) + 5 new: sqlite seed/timestamp, postgres ON CONFLICT, shared bodies, empty schema, storage→dialect mapping. Suite: 112 lib + 9 integration green. Closes #45. Co-Authored-By: Claude Opus 4.7 --- src/codegen/overlay.rs | 291 ++++++++++++++++++++++++++++++++------ src/main.rs | 8 +- src/manifest/mod.rs | 6 +- tests/integration_test.rs | 4 +- 4 files changed, 263 insertions(+), 46 deletions(-) diff --git a/src/codegen/overlay.rs b/src/codegen/overlay.rs index b14d771..db0a83e 100644 --- a/src/codegen/overlay.rs +++ b/src/codegen/overlay.rs @@ -63,21 +63,62 @@ fn must_validate_identifier(name: &str) -> &str { } } +// --------------------------------------------------------------------------- +// SQL dialect (V-L2-F1, #45) +// --------------------------------------------------------------------------- + +/// The SQL dialect the sidecar DDL is emitted for. Selected from the +/// manifest's `[sidecar].storage`. The table bodies are written in the +/// portable subset both engines accept (`CREATE TABLE IF NOT EXISTS`, +/// `CHECK`, partial unique indexes, `CURRENT_TIMESTAMP`); the only +/// genuinely dialect-divergent fragment is the metadata upsert +/// (`INSERT OR IGNORE` vs `INSERT … ON CONFLICT DO NOTHING`), which lives +/// in the [`sqlite`] / [`postgres`] modules. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum SqlDialect { + Sqlite, + Postgres, +} + +impl SqlDialect { + /// Map a `[sidecar].storage` value to a dialect. `sqlite` → + /// [`SqlDialect::Sqlite`]; `postgres`/`postgresql` → + /// [`SqlDialect::Postgres`]. `json` and unknown values are rejected + /// (the previous behaviour silently emitted SQLite DDL regardless, + /// V-L2-F1). The JSON store is tracked separately by #112. + pub fn from_storage(storage: &str) -> anyhow::Result { + match storage.to_lowercase().as_str() { + "sqlite" => Ok(SqlDialect::Sqlite), + "postgres" | "postgresql" => Ok(SqlDialect::Postgres), + "json" => anyhow::bail!( + "[sidecar].storage = \"json\" is not implemented (it previously \ + emitted SQLite DDL silently). Use \"sqlite\". The JSON sidecar \ + store is tracked by hyperpolymath/verisimiser#112." + ), + other => anyhow::bail!( + "unknown [sidecar].storage {other:?}; supported: \"sqlite\" \ + (\"postgres\" for a PostgreSQL sidecar; \"json\" is #112)." + ), + } + } +} + // --------------------------------------------------------------------------- // Overlay generation // --------------------------------------------------------------------------- -/// Generate the complete sidecar schema DDL for all enabled octad dimensions. +/// Generate the complete sidecar schema DDL for all enabled octad +/// dimensions, in the requested SQL `dialect`. /// -/// The generated SQL is SQLite-compatible (the default sidecar backend). /// Each table is created with `IF NOT EXISTS` so the schema can be applied -/// idempotently. +/// idempotently. Dispatches to [`sqlite::generate`] / [`postgres::generate`]. /// /// # Arguments /// * `schema` - The parsed schema of the target database (used to reference /// table names in provenance and temporal tracking). /// * `octad` - The octad configuration from the manifest, controlling which /// dimension tables to generate. +/// * `dialect` - The sidecar SQL dialect (see [`SqlDialect::from_storage`]). /// /// # Returns /// `Ok(String)` with the complete DDL on success. `Err` if any table or @@ -87,6 +128,21 @@ fn must_validate_identifier(name: &str) -> &str { pub fn generate_sidecar_schema( schema: &ParsedSchema, octad: &OctadConfig, + dialect: SqlDialect, +) -> anyhow::Result { + match dialect { + SqlDialect::Sqlite => sqlite::generate(schema, octad), + SqlDialect::Postgres => postgres::generate(schema, octad), + } +} + +/// Shared schema body: validate identifiers, then assemble every enabled +/// octad table. The metadata *seed* (the only dialect-divergent fragment) +/// is supplied by the caller via `seed`. +fn assemble( + schema: &ParsedSchema, + octad: &OctadConfig, + seed: impl Fn(&ParsedSchema) -> String, ) -> anyhow::Result { use crate::codegen::ident::validate_identifier; @@ -105,7 +161,9 @@ pub fn generate_sidecar_schema( ddl.push_str("-- Do not edit manually; regenerate with `verisimiser init`.\n\n"); // Metadata table: tracks which target tables are being augmented. - ddl.push_str(&generate_metadata_table(schema)); + // The CREATE is portable; the seed upsert is dialect-specific. + ddl.push_str(&metadata_table_ddl()); + ddl.push_str(&seed(schema)); if octad.enable_provenance { ddl.push_str(&generate_provenance_table()); @@ -130,52 +188,103 @@ pub fn generate_sidecar_schema( Ok(ddl) } -/// Generate the metadata table that tracks which target tables are augmented. +/// The metadata table CREATE — portable across SQLite and PostgreSQL. /// /// This table is always created regardless of octad configuration, because -/// the Data and Metadata dimensions are always active. -fn generate_metadata_table(schema: &ParsedSchema) -> String { - let mut ddl = String::new(); +/// the Data and Metadata dimensions are always active. The per-table seed +/// rows are dialect-specific and emitted by [`sqlite`] / [`postgres`]. +fn metadata_table_ddl() -> String { + "-- Metadata: tracks augmented target tables\n\ + CREATE TABLE IF NOT EXISTS verisimdb_metadata (\n\ + \x20 table_name TEXT PRIMARY KEY,\n\ + \x20 column_count INTEGER NOT NULL,\n\ + \x20 pk_columns TEXT NOT NULL, -- comma-separated list of PK column names\n\ + \x20 discovered_at TEXT NOT NULL -- ISO 8601 timestamp\n\ + );\n\n" + .to_string() +} - ddl.push_str("-- Metadata: tracks augmented target tables\n"); - ddl.push_str( - "CREATE TABLE IF NOT EXISTS verisimdb_metadata (\n\ - \x20 table_name TEXT PRIMARY KEY,\n\ - \x20 column_count INTEGER NOT NULL,\n\ - \x20 pk_columns TEXT NOT NULL, -- comma-separated list of PK column names\n\ - \x20 discovered_at TEXT NOT NULL -- ISO 8601 timestamp\n\ - );\n\n", - ); - - // Generate INSERT statements for each discovered table. - // - // V-L2-G1: every identifier flowing into the SQL string here is - // validated. Anything that wouldn't match `^[A-Za-z_][A-Za-z0-9_]*$` - // is rejected at codegen time rather than allowed to land in DDL - // (where it would be an injection vector). - if !schema.tables.is_empty() { - ddl.push_str("-- Seed metadata from parsed schema\n"); - for table in &schema.tables { - let table_name = must_validate_identifier(&table.name); - let pk_cols: Vec<&str> = table +/// Per-table seed values, shared by both dialects. Returns +/// `(validated_table_name, column_count, validated_pk_csv)`. +/// +/// V-L2-G1: every identifier flowing into the SQL string here is +/// validated. Anything that wouldn't match `^[A-Za-z_][A-Za-z0-9_]*$` +/// is rejected at codegen time rather than allowed to land in DDL +/// (where it would be an injection vector). +fn metadata_rows(schema: &ParsedSchema) -> Vec<(&str, usize, String)> { + schema + .tables + .iter() + .map(|table| { + let name = must_validate_identifier(&table.name); + let pk_csv = table .columns .iter() .filter(|c| c.is_primary_key) .map(|c| must_validate_identifier(c.name.as_str())) - .collect(); - let pk_str = pk_cols.join(","); + .collect::>() + .join(","); + (name, table.columns.len(), pk_csv) + }) + .collect() +} + +/// SQLite-specific sidecar DDL emission (V-L2-F1, #45). +pub mod sqlite { + use super::*; + + /// SQLite metadata seed: `INSERT OR IGNORE` + portable + /// `CURRENT_TIMESTAMP` (was the SQLite-only `datetime('now')`). + pub(super) fn metadata_seed(schema: &ParsedSchema) -> String { + if schema.tables.is_empty() { + return String::new(); + } + let mut ddl = String::from("-- Seed metadata from parsed schema (SQLite)\n"); + for (name, ncols, pk_csv) in metadata_rows(schema) { ddl.push_str(&format!( "INSERT OR IGNORE INTO verisimdb_metadata (table_name, column_count, pk_columns, discovered_at)\n\ - \x20 VALUES ('{}', {}, '{}', datetime('now'));\n", - table_name, - table.columns.len(), - pk_str, + \x20 VALUES ('{}', {}, '{}', CURRENT_TIMESTAMP);\n", + name, ncols, pk_csv, + )); + } + ddl.push('\n'); + ddl + } + + /// Generate the full SQLite sidecar schema. + pub fn generate(schema: &ParsedSchema, octad: &OctadConfig) -> anyhow::Result { + assemble(schema, octad, metadata_seed) + } +} + +/// PostgreSQL-specific sidecar DDL emission (V-L2-F1, #45). +pub mod postgres { + use super::*; + + /// PostgreSQL metadata seed: `INSERT … ON CONFLICT DO NOTHING` + /// (SQLite's `INSERT OR IGNORE` is not valid PostgreSQL) + portable + /// `CURRENT_TIMESTAMP`. + pub(super) fn metadata_seed(schema: &ParsedSchema) -> String { + if schema.tables.is_empty() { + return String::new(); + } + let mut ddl = String::from("-- Seed metadata from parsed schema (PostgreSQL)\n"); + for (name, ncols, pk_csv) in metadata_rows(schema) { + ddl.push_str(&format!( + "INSERT INTO verisimdb_metadata (table_name, column_count, pk_columns, discovered_at)\n\ + \x20 VALUES ('{}', {}, '{}', CURRENT_TIMESTAMP)\n\ + \x20 ON CONFLICT (table_name) DO NOTHING;\n", + name, ncols, pk_csv, )); } ddl.push('\n'); + ddl } - ddl + /// Generate the full PostgreSQL sidecar schema. + pub fn generate(schema: &ParsedSchema, octad: &OctadConfig) -> anyhow::Result { + assemble(schema, octad, metadata_seed) + } } /// Generate the provenance log table for the Provenance dimension. @@ -380,7 +489,8 @@ mod tests { enable_constraints: true, enable_simulation: true, }; - let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate"); + let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite) + .expect("test schema must validate"); assert!(ddl.contains("verisimdb_metadata")); assert!(ddl.contains("verisimdb_provenance_log")); @@ -404,7 +514,8 @@ mod tests { enable_constraints: true, enable_simulation: true, }; - let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate"); + let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite) + .expect("test schema must validate"); // Self-referencing FK on parent_branch. assert!( @@ -447,7 +558,8 @@ mod tests { enable_constraints: false, enable_simulation: false, }; - let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate"); + let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite) + .expect("test schema must validate"); assert!(ddl.contains("verisimdb_temporal_versions")); assert!( ddl.contains("CREATE UNIQUE INDEX IF NOT EXISTS ux_temporal_current"), @@ -478,7 +590,8 @@ mod tests { enable_constraints: false, enable_simulation: false, }; - let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate"); + let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite) + .expect("test schema must validate"); assert!(ddl.contains("verisimdb_lineage_graph")); // The exact CHECK clause must be present in the emitted DDL. assert!( @@ -500,7 +613,8 @@ mod tests { enable_constraints: false, enable_simulation: false, }; - let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate"); + let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite) + .expect("test schema must validate"); // Metadata is always generated. assert!(ddl.contains("verisimdb_metadata")); @@ -516,7 +630,8 @@ mod tests { fn test_metadata_seeds_table_info() { let schema = test_schema(); let octad = OctadConfig::default(); - let ddl = generate_sidecar_schema(&schema, &octad).expect("test schema must validate"); + let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite) + .expect("test schema must validate"); assert!(ddl.contains("INSERT OR IGNORE INTO verisimdb_metadata")); assert!(ddl.contains("'posts'")); @@ -642,4 +757,96 @@ mod tests { ); } } + + // --- #45 acceptance: per-dialect DDL + storage mapping --- + + #[test] + fn test_sqlite_dialect_seed_and_portable_timestamp() { + let schema = test_schema(); + let octad = OctadConfig::default(); + let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite) + .expect("sqlite ddl"); + assert!(ddl.contains("INSERT OR IGNORE INTO verisimdb_metadata")); + assert!( + ddl.contains("CURRENT_TIMESTAMP"), + "portable timestamp must replace datetime(now)" + ); + assert!( + !ddl.contains("datetime('now')"), + "the SQLite-only datetime('now') must be gone (V-L2-F1)" + ); + assert!(ddl.contains("'posts'") && ddl.contains("verisimdb_provenance_log")); + } + + #[test] + fn test_postgres_dialect_uses_on_conflict_not_or_ignore() { + let schema = test_schema(); + let octad = OctadConfig::default(); + let ddl = generate_sidecar_schema(&schema, &octad, SqlDialect::Postgres) + .expect("postgres ddl"); + assert!( + ddl.contains("ON CONFLICT (table_name) DO NOTHING"), + "postgres metadata upsert must use ON CONFLICT" + ); + assert!( + !ddl.contains("INSERT OR IGNORE"), + "INSERT OR IGNORE is not valid PostgreSQL" + ); + assert!(ddl.contains("CURRENT_TIMESTAMP") && !ddl.contains("datetime('now')")); + assert!(ddl.contains("verisimdb_metadata") && ddl.contains("'posts'")); + } + + #[test] + fn test_both_dialects_share_the_octad_table_bodies() { + let schema = test_schema(); + let octad = OctadConfig::default(); + let s = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite).unwrap(); + let p = generate_sidecar_schema(&schema, &octad, SqlDialect::Postgres).unwrap(); + for table in [ + "verisimdb_provenance_log", + "verisimdb_lineage_graph", + "verisimdb_temporal_versions", + "verisimdb_access_policies", + ] { + assert!(s.contains(table), "sqlite missing {table}"); + assert!(p.contains(table), "postgres missing {table}"); + } + } + + #[test] + fn test_empty_schema_emits_no_seed_in_either_dialect() { + let schema = ParsedSchema { + tables: vec![], + source: None, + }; + let octad = OctadConfig::default(); + let s = generate_sidecar_schema(&schema, &octad, SqlDialect::Sqlite).unwrap(); + let p = generate_sidecar_schema(&schema, &octad, SqlDialect::Postgres).unwrap(); + assert!(!s.contains("INSERT OR IGNORE") && s.contains("verisimdb_metadata")); + assert!(!s.contains("Seed metadata from parsed schema")); + assert!(!p.contains("ON CONFLICT") && p.contains("verisimdb_metadata")); + assert!(!p.contains("Seed metadata from parsed schema")); + } + + #[test] + fn test_storage_to_dialect_mapping() { + assert_eq!( + SqlDialect::from_storage("sqlite").unwrap(), + SqlDialect::Sqlite + ); + assert_eq!( + SqlDialect::from_storage("postgres").unwrap(), + SqlDialect::Postgres + ); + assert_eq!( + SqlDialect::from_storage("PostgreSQL").unwrap(), + SqlDialect::Postgres + ); + let json_err = SqlDialect::from_storage("json").unwrap_err().to_string(); + assert!( + json_err.contains("not implemented") && json_err.contains("#112"), + "json must be rejected with the #112 pointer, got: {json_err}" + ); + assert!(SqlDialect::from_storage("mariadb").is_err()); + } } diff --git a/src/main.rs b/src/main.rs index 9733ca8..8d3bd99 100644 --- a/src/main.rs +++ b/src/main.rs @@ -170,10 +170,16 @@ fn main() -> Result<()> { // Create output directory. std::fs::create_dir_all(&output)?; + // The sidecar DDL dialect follows [sidecar].storage. This + // rejects `json` (tracked by #112) instead of silently + // emitting SQLite DDL for a non-SQLite store (V-L2-F1). + let dialect = codegen::overlay::SqlDialect::from_storage(&m.sidecar.storage)?; + // Generate sidecar overlay schema. Errors here surface invalid // table/column identifiers in the parsed schema before they // reach disk. - let overlay_ddl = codegen::overlay::generate_sidecar_schema(&schema, &m.octad)?; + let overlay_ddl = + codegen::overlay::generate_sidecar_schema(&schema, &m.octad, dialect)?; let overlay_path = format!("{}/sidecar_schema.sql", output); std::fs::write(&overlay_path, &overlay_ddl)?; println!("Generated sidecar schema: {}", overlay_path); diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs index 444ae1c..9b286bc 100644 --- a/src/manifest/mod.rs +++ b/src/manifest/mod.rs @@ -279,7 +279,11 @@ mod octad_tests { /// temporal versions, and access policies. It never writes to your target database. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SidecarConfig { - /// Storage backend for the sidecar: "sqlite" (default) or "json". + /// Storage backend for the sidecar. `"sqlite"` (default) is the only + /// implemented store; `"postgres"`/`"postgresql"` selects the + /// PostgreSQL DDL dialect. `"json"` is **not implemented** and is + /// rejected at `generate` time — tracked by #112 (V-L2-F2). The + /// dialect mapping lives in `codegen::overlay::SqlDialect`. #[serde(default = "default_sidecar_storage")] pub storage: String, diff --git a/tests/integration_test.rs b/tests/integration_test.rs index d387f95..706f840 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -78,7 +78,7 @@ fn test_full_pipeline_blog_schema() { enable_constraints: true, enable_simulation: false, }; - let overlay_ddl = overlay::generate_sidecar_schema(&schema, &octad).expect("schema is valid"); + let overlay_ddl = overlay::generate_sidecar_schema(&schema, &octad, overlay::SqlDialect::Sqlite).expect("schema is valid"); // Verify all expected sidecar tables are present. assert!( @@ -487,7 +487,7 @@ path = ".verisim/test.db" // Generate overlay. let overlay_ddl = - overlay::generate_sidecar_schema(&schema, &manifest.octad).expect("schema is valid"); + overlay::generate_sidecar_schema(&schema, &manifest.octad, overlay::SqlDialect::Sqlite).expect("schema is valid"); assert!(overlay_ddl.contains("verisimdb_provenance_log")); assert!(overlay_ddl.contains("verisimdb_temporal_versions")); assert!(