From bb8f58cfbd0c0d5481face9a8ff748aa288cb40b Mon Sep 17 00:00:00 2001 From: "vytautas.karpavicius" Date: Mon, 18 May 2026 11:08:12 +0300 Subject: [PATCH] fix: preserve length modifier in varchar(n)[] array columns (#420) --- cmd/dump/dump_integration_test.go | 7 ++++ ir/normalize.go | 11 +++++-- ir/queries/queries.sql | 18 +++++----- ir/queries/queries.sql.go | 18 +++++----- .../manifest.json | 10 ++++++ .../pgdump.sql | 33 +++++++++++++++++++ .../pgschema.sql | 20 +++++++++++ .../raw.sql | 13 ++++++++ 8 files changed, 111 insertions(+), 19 deletions(-) create mode 100644 testdata/dump/issue_420_varchar_array_length_modifier/manifest.json create mode 100644 testdata/dump/issue_420_varchar_array_length_modifier/pgdump.sql create mode 100644 testdata/dump/issue_420_varchar_array_length_modifier/pgschema.sql create mode 100644 testdata/dump/issue_420_varchar_array_length_modifier/raw.sql diff --git a/cmd/dump/dump_integration_test.go b/cmd/dump/dump_integration_test.go index 7a79041b..a13c9b69 100644 --- a/cmd/dump/dump_integration_test.go +++ b/cmd/dump/dump_integration_test.go @@ -151,6 +151,13 @@ func TestDumpCommand_Issue191FunctionProcedureOverload(t *testing.T) { runExactMatchTest(t, "issue_191_function_procedure_overload") } +func TestDumpCommand_Issue420VarcharArrayLengthModifier(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + runExactMatchTest(t, "issue_420_varchar_array_length_modifier") +} + // Reproduces a bug where a column declared as `name` is dumped as `char[]`. // The inspector classifies any base type with pg_type.typelem <> 0 as an array, // but the `name` type has typelem = 18 (the OID of "char") despite not being an array. diff --git a/ir/normalize.go b/ir/normalize.go index 21c98e27..8afb20e2 100644 --- a/ir/normalize.go +++ b/ir/normalize.go @@ -1135,9 +1135,15 @@ func normalizePostgreSQLType(input string) string { // Handle direct type names typeName := input + // Decompose into base name and optional modifier+suffix (e.g., "bpchar(10)[]" -> "bpchar" + "(10)[]"). + // When there's no modifier, rest is empty and this is equivalent to a plain exact-match lookup. + baseName, rest := typeName, "" + if idx := strings.Index(typeName, "("); idx != -1 { + baseName, rest = typeName[:idx], typeName[idx:] + } // Check if we have a direct mapping - if normalized, exists := postgresTypeNormalization[typeName]; exists { - return normalized + if normalized, exists := postgresTypeNormalization[baseName]; exists { + return normalized + rest } // Remove pg_catalog prefix for unmapped types @@ -1480,4 +1486,3 @@ func findArrayClose(expr string, startIdx int) int { return -1 // Not found } - diff --git a/ir/queries/queries.sql b/ir/queries/queries.sql index fab04e86..062bc135 100644 --- a/ir/queries/queries.sql +++ b/ir/queries/queries.sql @@ -85,11 +85,12 @@ WITH column_base AS ( -- Array types: apply same schema qualification logic to element type -- Use typcategory = 'A' rather than typelem <> 0; the latter is true -- for non-array fixed-length types like name (typelem points to char). + -- Use format_type to preserve typmod for element types (e.g., varchar(128)[] for character varying(128)[]) CASE - WHEN en.nspname = 'pg_catalog' THEN et.typname || '[]' - WHEN en.nspname = c.table_schema THEN et.typname || '[]' - ELSE en.nspname || '.' || et.typname || '[]' - END + WHEN en.nspname = 'pg_catalog' THEN et.typname + WHEN en.nspname = c.table_schema THEN et.typname + ELSE en.nspname || '.' || et.typname + END || COALESCE(substring(format_type(a.atttypid, a.atttypmod) FROM '\([^)]*\)'), '') || '[]' WHEN dt.typtype = 'b' THEN -- Non-array base types: qualify if not in pg_catalog or table's schema -- Use format_type to preserve typmod for extension types (e.g., vector(384) for pgvector) @@ -203,11 +204,12 @@ WITH column_base AS ( -- Array types: apply same schema qualification logic to element type -- Use typcategory = 'A' rather than typelem <> 0; the latter is true -- for non-array fixed-length types like name (typelem points to char). + -- Use format_type to preserve typmod for element types (e.g., varchar(128)[] for character varying(128)[]) CASE - WHEN en.nspname = 'pg_catalog' THEN et.typname || '[]' - WHEN en.nspname = c.table_schema THEN et.typname || '[]' - ELSE en.nspname || '.' || et.typname || '[]' - END + WHEN en.nspname = 'pg_catalog' THEN et.typname + WHEN en.nspname = c.table_schema THEN et.typname + ELSE en.nspname || '.' || et.typname + END || COALESCE(substring(format_type(a.atttypid, a.atttypmod) FROM '\([^)]*\)'), '') || '[]' WHEN dt.typtype = 'b' THEN -- Non-array base types: qualify if not in pg_catalog or table's schema -- Use format_type to preserve typmod for extension types (e.g., vector(384) for pgvector) diff --git a/ir/queries/queries.sql.go b/ir/queries/queries.sql.go index cbca828f..0a5e4848 100644 --- a/ir/queries/queries.sql.go +++ b/ir/queries/queries.sql.go @@ -282,11 +282,12 @@ WITH column_base AS ( -- Array types: apply same schema qualification logic to element type -- Use typcategory = 'A' rather than typelem <> 0; the latter is true -- for non-array fixed-length types like name (typelem points to char). + -- Use format_type to preserve typmod for element types (e.g., varchar(128)[] for character varying(128)[]) CASE - WHEN en.nspname = 'pg_catalog' THEN et.typname || '[]' - WHEN en.nspname = c.table_schema THEN et.typname || '[]' - ELSE en.nspname || '.' || et.typname || '[]' - END + WHEN en.nspname = 'pg_catalog' THEN et.typname + WHEN en.nspname = c.table_schema THEN et.typname + ELSE en.nspname || '.' || et.typname + END || COALESCE(substring(format_type(a.atttypid, a.atttypmod) FROM '\([^)]*\)'), '') || '[]' WHEN dt.typtype = 'b' THEN -- Non-array base types: qualify if not in pg_catalog or table's schema -- Use format_type to preserve typmod for extension types (e.g., vector(384) for pgvector) @@ -472,11 +473,12 @@ WITH column_base AS ( -- Array types: apply same schema qualification logic to element type -- Use typcategory = 'A' rather than typelem <> 0; the latter is true -- for non-array fixed-length types like name (typelem points to char). + -- Use format_type to preserve typmod for element types (e.g., varchar(128)[] for character varying(128)[]) CASE - WHEN en.nspname = 'pg_catalog' THEN et.typname || '[]' - WHEN en.nspname = c.table_schema THEN et.typname || '[]' - ELSE en.nspname || '.' || et.typname || '[]' - END + WHEN en.nspname = 'pg_catalog' THEN et.typname + WHEN en.nspname = c.table_schema THEN et.typname + ELSE en.nspname || '.' || et.typname + END || COALESCE(substring(format_type(a.atttypid, a.atttypmod) FROM '\([^)]*\)'), '') || '[]' WHEN dt.typtype = 'b' THEN -- Non-array base types: qualify if not in pg_catalog or table's schema -- Use format_type to preserve typmod for extension types (e.g., vector(384) for pgvector) diff --git a/testdata/dump/issue_420_varchar_array_length_modifier/manifest.json b/testdata/dump/issue_420_varchar_array_length_modifier/manifest.json new file mode 100644 index 00000000..3d70e3d2 --- /dev/null +++ b/testdata/dump/issue_420_varchar_array_length_modifier/manifest.json @@ -0,0 +1,10 @@ +{ + "name": "issue_420_varchar_array_length_modifier", + "description": "pgschema dump silently drops the length modifier from varchar(n)[] array columns, emitting varchar[] instead of varchar(128)[]", + "source": "https://github.com/pgplex/pgschema/issues/420", + "notes": [ + "The SQL query for array types used et.typname || '[]' which discards the atttypmod", + "The fix uses format_type(a.atttypid, a.atttypmod) to extract the modifier (e.g., (128))", + "Also covers character(n)[] which had the same bug (bpchar typname)" + ] +} diff --git a/testdata/dump/issue_420_varchar_array_length_modifier/pgdump.sql b/testdata/dump/issue_420_varchar_array_length_modifier/pgdump.sql new file mode 100644 index 00000000..238babdd --- /dev/null +++ b/testdata/dump/issue_420_varchar_array_length_modifier/pgdump.sql @@ -0,0 +1,33 @@ +-- +-- PostgreSQL database dump +-- + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SET check_function_bodies = false; +SET client_min_messages = warning; +SET row_security = off; + +-- +-- Name: items; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.items ( + id integer NOT NULL, + name character varying(64), + tags character varying(128)[], + codes character(10)[] +); + +-- +-- Name: items items_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.items + ADD CONSTRAINT items_pkey PRIMARY KEY (id); + +-- +-- PostgreSQL database dump complete +-- diff --git a/testdata/dump/issue_420_varchar_array_length_modifier/pgschema.sql b/testdata/dump/issue_420_varchar_array_length_modifier/pgschema.sql new file mode 100644 index 00000000..bcd93306 --- /dev/null +++ b/testdata/dump/issue_420_varchar_array_length_modifier/pgschema.sql @@ -0,0 +1,20 @@ +-- +-- pgschema database dump +-- + +-- Dumped from database version PostgreSQL 18.0 +-- Dumped by pgschema version 1.9.0 + + +-- +-- Name: items; Type: TABLE; Schema: -; Owner: - +-- + +CREATE TABLE IF NOT EXISTS items ( + id integer, + name varchar(64), + tags varchar(128)[], + codes character(10)[], + CONSTRAINT items_pkey PRIMARY KEY (id) +); + diff --git a/testdata/dump/issue_420_varchar_array_length_modifier/raw.sql b/testdata/dump/issue_420_varchar_array_length_modifier/raw.sql new file mode 100644 index 00000000..5916bf61 --- /dev/null +++ b/testdata/dump/issue_420_varchar_array_length_modifier/raw.sql @@ -0,0 +1,13 @@ +-- +-- Test case for GitHub issue #420: varchar(n)[] length modifier silently dropped in dump +-- +-- The length modifier is lost when dumping array columns of character types +-- with a length constraint: varchar(128)[] is emitted as varchar[]. +-- + +CREATE TABLE items ( + id integer PRIMARY KEY, + name varchar(64), + tags varchar(128)[], + codes character(10)[] +);