From 6255027648c38451393ddb18cbff5cc76f57f21c Mon Sep 17 00:00:00 2001 From: Guillaume Date: Sat, 23 May 2026 22:04:28 +0200 Subject: [PATCH] Add `dump --format=json` for IR output `pgschema dump` previously only emitted SQL. The IR it builds internally is already fully JSON-tagged on the public fields, so this exposes it directly via a new `--format` flag (default `sql`, new `json`). Motivation: downstream tooling (LSPs, diff visualizers, lint engines, CI gates) wants to consume the model without re-parsing the SQL dump. The internal IR was already the natural source of truth; this is just publishing it. The JSON output is pretty-printed (2-space indent) and stable under round-trip marshal/unmarshal because the IR uses sorted map keys and unexported sync.RWMutex fields are ignored by encoding/json. `--multi-file` is rejected with `--format=json` since the IR is a single document; unknown formats produce a clear error. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/dump/dump.go | 29 ++++++++++ cmd/dump/dump_integration_test.go | 91 +++++++++++++++++++++++++++++++ cmd/dump/dump_test.go | 77 ++++++++++++++++++++++++++ 3 files changed, 197 insertions(+) diff --git a/cmd/dump/dump.go b/cmd/dump/dump.go index 59fc167e..46ce9d98 100644 --- a/cmd/dump/dump.go +++ b/cmd/dump/dump.go @@ -1,6 +1,7 @@ package dump import ( + "encoding/json" "fmt" "os" @@ -11,6 +12,12 @@ import ( "github.com/spf13/cobra" ) +// Output format constants for the --format flag. +const ( + FormatSQL = "sql" + FormatJSON = "json" +) + var ( host string port int @@ -22,6 +29,7 @@ var ( file string noComments bool sslmode string + format string ) // DumpConfig holds configuration for dump execution @@ -36,6 +44,7 @@ type DumpConfig struct { File string NoComments bool SSLMode string + Format string // "sql" (default) or "json" } var DumpCmd = &cobra.Command{ @@ -58,6 +67,7 @@ func init() { DumpCmd.Flags().StringVar(&file, "file", "", "Output file path (required when --multi-file is used)") DumpCmd.Flags().BoolVar(&noComments, "no-comments", false, "Do not output object comment headers") DumpCmd.Flags().StringVar(&sslmode, "sslmode", "prefer", "SSL mode for database connection (disable, allow, prefer, require, verify-ca, verify-full) (env: PGSSLMODE)") + DumpCmd.Flags().StringVar(&format, "format", FormatSQL, "Output format: sql (default, schema dump as SQL) or json (IR as JSON for downstream tooling)") } // ExecuteDump executes the dump operation with the given configuration @@ -69,6 +79,16 @@ func ExecuteDump(config *DumpConfig) (string, error) { config.MultiFile = false } + switch config.Format { + case "", FormatSQL, FormatJSON: + default: + return "", fmt.Errorf("unsupported --format %q (expected %q or %q)", config.Format, FormatSQL, FormatJSON) + } + + if config.Format == FormatJSON && config.MultiFile { + return "", fmt.Errorf("--format=json is incompatible with --multi-file (the IR is a single document)") + } + // Load ignore configuration ignoreConfig, err := util.LoadIgnoreFileWithStructure() if err != nil { @@ -81,6 +101,14 @@ func ExecuteDump(config *DumpConfig) (string, error) { return "", fmt.Errorf("failed to get database schema: %w", err) } + if config.Format == FormatJSON { + out, err := json.MarshalIndent(schemaIR, "", " ") + if err != nil { + return "", fmt.Errorf("failed to marshal IR to JSON: %w", err) + } + return string(out) + "\n", nil + } + // Create an empty schema for comparison to generate a dump diff emptyIR := ir.NewIR() @@ -138,6 +166,7 @@ func runDump(cmd *cobra.Command, args []string) error { File: file, NoComments: noComments, SSLMode: finalSSLMode, + Format: format, } // Execute dump diff --git a/cmd/dump/dump_integration_test.go b/cmd/dump/dump_integration_test.go index 7a79041b..dffb5750 100644 --- a/cmd/dump/dump_integration_test.go +++ b/cmd/dump/dump_integration_test.go @@ -9,6 +9,7 @@ package dump import ( "context" + "encoding/json" "fmt" "os" "strings" @@ -610,3 +611,93 @@ func compareSchemaOutputs(t *testing.T, actualOutput, expectedOutput string, tes } } } + +// TestDumpCommand_FormatJSON exercises `dump --format=json` end-to-end: +// schema goes into a real database, the dump command serializes the IR +// to JSON, and that JSON is unmarshaled and asserted against the source. +func TestDumpCommand_FormatJSON(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + embeddedPG := testutil.SetupPostgres(t) + defer embeddedPG.Stop() + + conn, host, port, dbname, user, password := testutil.ConnectToPostgres(t, embeddedPG) + defer conn.Close() + + setupSQL := ` +CREATE TABLE users ( + id SERIAL PRIMARY KEY, + email TEXT NOT NULL UNIQUE, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE INDEX users_email_lower_idx ON users (lower(email)); +` + if _, err := conn.ExecContext(context.Background(), setupSQL); err != nil { + t.Fatalf("setup failed: %v", err) + } + + config := &DumpConfig{ + Host: host, + Port: port, + DB: dbname, + User: user, + Password: password, + Schema: "public", + Format: FormatJSON, + } + + output, err := ExecuteDump(config) + if err != nil { + t.Fatalf("ExecuteDump(json) failed: %v", err) + } + + trimmed := strings.TrimLeft(output, " \t\n") + if !strings.HasPrefix(trimmed, "{") { + preview := output + if len(preview) > 200 { + preview = preview[:200] + } + t.Fatalf("expected JSON object, got: %s", preview) + } + + var parsed ir.IR + if err := json.Unmarshal([]byte(output), &parsed); err != nil { + t.Fatalf("output is not valid JSON: %v\n%s", err, output) + } + + publicSchema, ok := parsed.Schemas["public"] + if !ok { + t.Fatalf("public schema missing from JSON output; got schemas: %v", keysOf(parsed.Schemas)) + } + usersTable, ok := publicSchema.Tables["users"] + if !ok { + t.Fatalf("users table missing from JSON output; got tables: %v", keysOf(publicSchema.Tables)) + } + if len(usersTable.Columns) != 3 { + t.Errorf("expected 3 columns on users, got %d: %+v", len(usersTable.Columns), usersTable.Columns) + } + if _, exists := usersTable.Indexes["users_email_lower_idx"]; !exists { + t.Errorf("expected expression index users_email_lower_idx in JSON output, got indexes: %v", keysOf(usersTable.Indexes)) + } + + // Round-trip: re-marshal the parsed IR and verify the second pass matches the first. + second, err := json.MarshalIndent(&parsed, "", " ") + if err != nil { + t.Fatalf("re-marshal failed: %v", err) + } + if strings.TrimRight(output, "\n") != strings.TrimRight(string(second), "\n") { + t.Error("JSON output is not stable under round-trip marshal/unmarshal") + } +} + +// keysOf returns the keys of a string-keyed map for diagnostic output. +func keysOf[V any](m map[string]V) []string { + ks := make([]string, 0, len(m)) + for k := range m { + ks = append(ks, k) + } + return ks +} diff --git a/cmd/dump/dump_test.go b/cmd/dump/dump_test.go index 8a1f1807..663da9f2 100644 --- a/cmd/dump/dump_test.go +++ b/cmd/dump/dump_test.go @@ -1,6 +1,7 @@ package dump import ( + "encoding/json" "os" "strings" "testing" @@ -12,6 +13,82 @@ import ( "github.com/spf13/cobra" ) +// TestIRJSONRoundTrip verifies that an in-memory IR survives a JSON +// marshal → unmarshal → marshal cycle byte-for-byte. This is the contract +// downstream consumers of `dump --format=json` depend on. +func TestIRJSONRoundTrip(t *testing.T) { + src := ir.NewIR() + src.Metadata.DatabaseVersion = "17.4" + + public := src.CreateSchema("public") + public.Owner = "postgres" + public.Tables["users"] = &ir.Table{ + Schema: "public", + Name: "users", + Type: ir.TableTypeBase, + Columns: []*ir.Column{ + {Name: "id", Position: 1, DataType: "integer", IsNullable: false}, + {Name: "email", Position: 2, DataType: "text", IsNullable: false}, + }, + Constraints: map[string]*ir.Constraint{}, + Indexes: map[string]*ir.Index{}, + Triggers: map[string]*ir.Trigger{}, + Policies: map[string]*ir.RLSPolicy{}, + } + + firstPass, err := json.MarshalIndent(src, "", " ") + if err != nil { + t.Fatalf("first marshal failed: %v", err) + } + + var rehydrated ir.IR + if err := json.Unmarshal(firstPass, &rehydrated); err != nil { + t.Fatalf("unmarshal failed: %v\nbytes:\n%s", err, firstPass) + } + + secondPass, err := json.MarshalIndent(&rehydrated, "", " ") + if err != nil { + t.Fatalf("second marshal failed: %v", err) + } + + if string(firstPass) != string(secondPass) { + t.Errorf("round-trip not stable.\nfirst:\n%s\n\nsecond:\n%s", firstPass, secondPass) + } +} + +// TestExecuteDump_FormatValidation verifies the new --format flag's input checks. +func TestExecuteDump_FormatValidation(t *testing.T) { + t.Run("unknown format rejected", func(t *testing.T) { + _, err := ExecuteDump(&DumpConfig{ + Host: "localhost", + Port: 5432, + DB: "irrelevant", + User: "irrelevant", + Schema: "public", + Format: "yaml", + }) + if err == nil || !strings.Contains(err.Error(), "unsupported --format") { + t.Errorf("expected unsupported-format error, got: %v", err) + } + }) + + t.Run("json + multi-file rejected", func(t *testing.T) { + _, err := ExecuteDump(&DumpConfig{ + Host: "localhost", + Port: 5432, + DB: "irrelevant", + User: "irrelevant", + Schema: "public", + Format: FormatJSON, + MultiFile: true, + File: "out.sql", + }) + if err == nil || !strings.Contains(err.Error(), "--multi-file") { + t.Errorf("expected json+multi-file incompatibility error, got: %v", err) + } + }) +} + func TestDumpCommand(t *testing.T) { // Test that the command is properly configured if DumpCmd.Use != "dump" {