Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,12 @@ pg_query-*.tar

# compiled nif
priv/*.so

# Fuzz build outputs (compiled binaries)
fuzz/_build/

# Generated deparse corpus - rebuild with: make fuzz-corpus
fuzz/corpus/deparse/

# libFuzzer-discovered corpus entries (hash-named files added during fuzzing runs)
fuzz/corpus/parse/[0-9a-f][0-9a-f][0-9a-f][0-9a-f]*
75 changes: 70 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,75 @@ ifeq ($(shell uname -s),Darwin)
LDFLAGS += -undefined dynamic_lookup
endif

.PHONY: all libpg_query_ex clean update-libpg_query
FUZZ_CC = clang
FUZZ_CFLAGS = -g -O1 -fsanitize=fuzzer,address -I$(LIBPG_QUERY_PATH)
FUZZ_LDFLAGS = -fsanitize=fuzzer,address -lpthread
FUZZ_SRCS = fuzz/fuzz_parse_protobuf.c \
fuzz/fuzz_scan.c \
fuzz/fuzz_deparse.c \
fuzz/fuzz_roundtrip.c
FUZZ_BINS = $(patsubst fuzz/%.c,fuzz/_build/%,$(FUZZ_SRCS))

.PHONY: all libpg_query_ex clean fuzz fuzz-corpus fuzz-clean patch update-libpg_query

all: priv/libpg_query_ex.so

priv:
mkdir -p priv

fuzz/_build:
mkdir -p fuzz/_build

$(LIBPG_QUERY_PATH)/libpg_query.a:
$(MAKE) -B -C $(LIBPG_QUERY_PATH) libpg_query.a

priv/libpg_query_ex.so: priv $(LIBPG_QUERY_PATH)/libpg_query.a c_src/libpg_query_ex.c
priv/libpg_query_ex.so: $(LIBPG_QUERY_PATH)/libpg_query.a c_src/libpg_query_ex.c
mkdir -p priv
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ c_src/libpg_query_ex.c $(LIBPG_QUERY_PATH)/libpg_query.a

# Build all fuzz targets (requires clang with libFuzzer)
fuzz: fuzz/_build $(LIBPG_QUERY_PATH)/libpg_query.a $(FUZZ_BINS)
@echo ""
@echo "Fuzz targets built in fuzz/_build/. Next steps:"
@echo " 1. Populate the deparse corpus: make fuzz-corpus"
@echo " 2. Run a fuzzer, e.g.:"
@echo " fuzz/_build/fuzz_parse_protobuf -max_len=65536 fuzz/corpus/parse/"
@echo " fuzz/_build/fuzz_deparse -max_len=65536 fuzz/corpus/deparse/"

fuzz/_build/%: fuzz/%.c $(LIBPG_QUERY_PATH)/libpg_query.a
$(FUZZ_CC) $(FUZZ_CFLAGS) -o $@ $< $(LIBPG_QUERY_PATH)/libpg_query.a $(FUZZ_LDFLAGS)

# Also build the corpus generator (not a fuzz target, no -fsanitize=fuzzer)
fuzz/_build/gen_deparse_corpus: fuzz/gen_deparse_corpus.c $(LIBPG_QUERY_PATH)/libpg_query.a
$(FUZZ_CC) -g -O1 -fsanitize=address -I$(LIBPG_QUERY_PATH) -o $@ $< \
$(LIBPG_QUERY_PATH)/libpg_query.a -fsanitize=address -lpthread

# Generate binary protobuf seeds for fuzz_deparse from the SQL corpus
fuzz-corpus: fuzz/_build/gen_deparse_corpus
@mkdir -p fuzz/corpus/deparse
fuzz/_build/gen_deparse_corpus fuzz/corpus/deparse \
"SELECT 1" \
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this autogenerated from fuzz/corpus/parse? Is there a script to regenerate the Makefile after we update the corpus?

Would be useful to have this meta documented for posterity.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's just inline, not autogenerated.

"SELECT id, name FROM users WHERE active = true" \
"SELECT u.id, o.total FROM users u JOIN orders o ON u.id = o.user_id" \
"INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com')" \
"UPDATE users SET active = false WHERE last_login < NOW() - INTERVAL '90 days'" \
"DELETE FROM sessions WHERE expires_at < NOW()" \
"WITH recent AS (SELECT * FROM events WHERE created_at > NOW() - INTERVAL '1 hour') SELECT count(*) FROM recent" \
"SELECT * FROM users WHERE id IN (SELECT user_id FROM orders WHERE total > 100)" \
"SELECT department, count(*), avg(salary) FROM employees GROUP BY department HAVING count(*) > 5" \
"SELECT id, salary, rank() OVER (PARTITION BY department ORDER BY salary DESC) FROM employees" \
"SELECT id::text, created_at::date, price::numeric(10,2) FROM products" \
"SELECT CASE WHEN status = 1 THEN 'active' WHEN status = 2 THEN 'pending' ELSE 'unknown' END FROM orders" \
"SELECT ARRAY[1,2,3], unnest(tags) FROM posts" \
"SELECT data->>'name', data->'address'->>'city' FROM profiles WHERE data @> '{\"active\":true}'" \
"SELECT id FROM users WHERE EXISTS (SELECT 1 FROM orders WHERE orders.user_id = users.id)" \
"SELECT * FROM events WHERE created_at BETWEEN '2024-01-01' AND '2024-12-31'" \
"SELECT * FROM users WHERE email ILIKE '%@example.com' AND name NOT LIKE 'test%'" \
"SELECT * FROM users WHERE deleted_at IS NULL AND parent_id IS NOT NULL" \
"SELECT * FROM users WHERE id = \$$1 AND status = \$$2" \
"SELECT id FROM admins UNION ALL SELECT id FROM moderators EXCEPT SELECT id FROM banned_users"

fuzz-clean:
$(RM) -r fuzz/_build fuzz/corpus/deparse fuzz/crashes

protobuf:
MIX_ENV=prod mix protox.generate --output-path=lib/pg_query/proto --multiple-files c_src/libpg_query/protobuf/pg_query.proto

Expand All @@ -32,6 +88,15 @@ clean:
$(MAKE) -C $(LIBPG_QUERY_PATH) clean
$(RM) priv/libpg_query_ex.so

# Apply all local patches to the vendored libpg_query source
patch:
@for p in patches/*.patch; do \
echo "Applying $$p ..."; \
git apply "$$p" || { echo "FAILED: $$p"; exit 1; }; \
done
@echo "All patches applied."

update-libpg_query:
git subtree pull -P "c_src/libpg_query" --squash https://github.com/pganalyze/libpg_query.git 15-latest
git subtree pull -P "c_src/libpg_query" --squash https://github.com/pganalyze/libpg_query.git 17-latest
$(MAKE) patch

40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,46 @@ def deps do
end
```

## Fuzzing

The `fuzz/` directory contains [libFuzzer](https://llvm.org/docs/LibFuzzer.html) harnesses that exercise the NIF boundary. You need clang with libFuzzer support (standard in most LLVM distributions).

Build all four targets:

```bash
make fuzz
```

Optionally seed the deparse corpus from known-good SQL strings:

```bash
make fuzz-corpus
```

Then run a target, for example:

```bash
# Fuzz the parse → deparse round-trip (highest-value target)
fuzz/_build/fuzz_roundtrip -max_len=4096 -artifact_prefix=fuzz/crashes/ fuzz/corpus/parse/

# Fuzz raw protobuf bytes fed directly into the deparser
fuzz/_build/fuzz_deparse -max_len=65536 -artifact_prefix=fuzz/crashes/ fuzz/corpus/deparse/

# Fuzz the SQL parser
fuzz/_build/fuzz_parse_protobuf -max_len=4096 -artifact_prefix=fuzz/crashes/ fuzz/corpus/parse/

# Fuzz the SQL scanner
fuzz/_build/fuzz_scan -max_len=4096 -artifact_prefix=fuzz/crashes/ fuzz/corpus/parse/
```

Crash artifacts are written to `fuzz/crashes/` (the `-artifact_prefix` flag controls this; without it libFuzzer writes to the current directory). Replay a crash by passing the file as a positional argument:

```bash
fuzz/_build/fuzz_roundtrip fuzz/crashes/<file>
```

Clean up build artefacts and generated corpus with `make fuzz-clean`.

## License

This Elixir interface is distributed under the terms of the [Apache 2.0 license](./LICENSE).
Expand Down
3 changes: 2 additions & 1 deletion c_src/libpg_query/.gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
*.sql binary
*.sql binary
*.psql binary
9 changes: 7 additions & 2 deletions c_src/libpg_query/.github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04]
os: [ubuntu-24.04]
compiler: [clang, gcc]
protobuf_lib: [protobuf-c, protobuf-cpp]
valgrind: [valgrind,no-valgrind]
exclude:
# this combination hits linking errors: see https://github.com/pganalyze/libpg_query/pull/289
- compiler: clang
protobuf_lib: protobuf-cpp
valgrind: valgrind
steps:
- name: Check out code
uses: actions/checkout@v4
Expand All @@ -21,7 +26,7 @@ jobs:
uses: actions/cache@v4
with:
path: protobuf-25.1
key: ${{ runner.os }}-protobuf-25.1
key: ${{ runner.os }}-${{ matrix.compiler }}-protobuf-25.1
- name: Build protobuf library
if: matrix.protobuf_lib == 'protobuf-cpp' && steps.cache-protobuf.outputs.cache-hit != 'true'
run: |
Expand Down
44 changes: 44 additions & 0 deletions c_src/libpg_query/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,50 @@

All versions are tagged by the major Postgres version, plus an individual semver for this library itself.

## 17-6.2.2 2026-01-26

* pg_query_normalize: Fix handling of special strings in DefElem [#325](https://github.com/pganalyze/libpg_query/pull/325)
- This avoids a crash when running the normalize function on certain utility statements
* pg_query_deparse_comments_for_query: Add missing initialization [#324](https://github.com/pganalyze/libpg_query/pull/324)
- This avoids a crash for callers that read the error field of the result when there is no error

## 17-6.2.1 2026-01-14

* Add pg_query_is_utility_stmt function to determine if query text contains utility statements [#313](https://github.com/pganalyze/libpg_query/pull/313)
* This is a fast check for callers that don't actually need the parse tree itself
* Add missing top-level postgres_deparse.h in Makefile install step
- This was an oversight from the previous 6.2.0 release
* Improve pg_query_summary function:
- Speed up summary truncate replacement logic
- Correctly handle `GRANT .. ON ALL TABLES IN SCHEMA` statements
- Correctly handle schema qualified filter columns

## 17-6.2.0 2025-12-10

* Add fast summary information function (pg_query_summary)
- This allows gathering certain information, for example which tables are referenced in a
statement, without requiring a Protobuf serialization step in a higher level library
- Additionally this can also be used to perform "smart truncation" of a query by
omitting deeply nested information (e.g. a CTE definition, or a target list) whilst
preserving more essential parts like the FROM claus
* Deparser:
- Introduce pretty printing / formatting
- Introduces a new optional pretty print mode that emits a human readable
output. A detailed explanation of the mechanism can be found at the start
of the deparser file.
- Rework handling of expressions inside typecasts
- Prefer (..)::type syntax, unless we are already in a function call.
- Use lowercase keywords in xmlroot functions
- This matches other XML functions as well as the Postgres documentation,
since these are closer to function argument names than regular keywords.
- Fix deparse of ALTER TABLE a ALTER b SET STATISTICS DEFAULT
- Fix the null pointer dereference when handling identity columns
* Allow alternate definitions of NAMEDATALEN identifier limit
- This allows building libpg_query with an override of the built-time limit of
Postgres identifiers (typically 63 characters)
* Normalization: Add support for CALL statements
* Bump Postgres to 17.7 and switch back to release tarballs

## 17-6.1.0 2025-04-02

* Update to Postgres 17.4, and add recent patches scheduled for Postgres 17.5 (not yet released)
Expand Down
41 changes: 28 additions & 13 deletions c_src/libpg_query/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@ PGDIR = $(root_dir)/tmp/postgres
PGDIRBZ2 = $(root_dir)/tmp/postgres.tar.bz2
PGDIRZIP = $(root_dir)/tmp/postgres.zip

PG_VERSION = 17.4
PG_VERSION = 17.7
PG_VERSION_MAJOR = $(call word-dot,$(PG_VERSION),1)
PG_VERSION_NUM = 170004
PG_BRANCH = REL_17_STABLE
PG_VERSION_NUM = 170007
PROTOC_VERSION = 25.1

VERSION = 6.1.0
VERSION = 6.2.2
VERSION_MAJOR = $(call word-dot,$(VERSION),1)
VERSION_MINOR = $(call word-dot,$(VERSION),2)
VERSION_PATCH = $(call word-dot,$(VERSION),3)
Expand All @@ -39,6 +38,7 @@ override CFLAGS += -g -I. -I./vendor -I./src/include -I./src/postgres/include -W

ifeq ($(OS),Windows_NT)
override CFLAGS += -I./src/postgres/include/port/win32
override TEST_CFLAGS += -I./src/postgres/include/port/win32
endif

override PG_CONFIGURE_FLAGS += -q --without-readline --without-zlib --without-icu
Expand Down Expand Up @@ -116,14 +116,9 @@ clean:
.PHONY: all clean build build_shared extract_source examples test install

$(PGDIR):
# We temporarily build off REL_17_STABLE to pull in https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=6da2ba1d8a031984eb016fed6741bb2ac945f19d
# TODO: Go back to upstream tarball once 17.5 is released
# tar -xjf $(PGDIRBZ2)
# curl -o $(PGDIRBZ2) https://ftp.postgresql.org/pub/source/v$(PG_VERSION)/postgresql-$(PG_VERSION).tar.bz2
# mv $(root_dir)/postgresql-$(PG_VERSION) $(PGDIR)
curl -L -o $(PGDIRZIP) https://github.com/postgres/postgres/archive/refs/heads/$(PG_BRANCH).zip
unzip $(PGDIRZIP)
mv $(root_dir)/postgres-$(PG_BRANCH) $(PGDIR)
curl -o $(PGDIRBZ2) https://ftp.postgresql.org/pub/source/v$(PG_VERSION)/postgresql-$(PG_VERSION).tar.bz2
tar -xjf $(PGDIRBZ2)
mv $(root_dir)/postgresql-$(PG_VERSION) $(PGDIR)
cd $(PGDIR); patch -p1 < $(root_dir)/patches/01_parser_additional_param_ref_support.patch
cd $(PGDIR); patch -p1 < $(root_dir)/patches/03_lexer_track_yyllocend.patch
cd $(PGDIR); patch -p1 < $(root_dir)/patches/04_lexer_comments_as_tokens.patch
Expand All @@ -133,6 +128,7 @@ $(PGDIR):
cd $(PGDIR); patch -p1 < $(root_dir)/patches/08_avoid_zero_length_delimiter_in_regression_tests.patch
cd $(PGDIR); patch -p1 < $(root_dir)/patches/09_allow_param_junk.patch
cd $(PGDIR); patch -p1 < $(root_dir)/patches/10_avoid_namespace_hashtab_impl_gen.patch
cd $(PGDIR); patch -p1 < $(root_dir)/patches/11_ifndef_namedatalen.patch
cd $(PGDIR); ./configure $(PG_CONFIGURE_FLAGS)
cd $(PGDIR); make -C src/pl/plpgsql/src pl_gram.h plerrcodes.h pl_reserved_kwlist_d.h pl_unreserved_kwlist_d.h
cd $(PGDIR); make -C src/port pg_config_paths.h
Expand Down Expand Up @@ -241,14 +237,15 @@ examples/normalize_error: examples/normalize_error.c $(ARLIB)
examples/simple_plpgsql: examples/simple_plpgsql.c $(ARLIB)
$(CC) $(TEST_CFLAGS) -o $@ -g examples/simple_plpgsql.c $(ARLIB) $(TEST_LDFLAGS)

TESTS = test/complex test/concurrency test/deparse test/fingerprint test/fingerprint_opts test/normalize test/normalize_utility test/parse test/parse_opts test/parse_protobuf test/parse_protobuf_opts test/parse_plpgsql test/scan test/split
TESTS = test/complex test/concurrency test/deparse test/fingerprint test/fingerprint_opts test/is_utility_stmt test/normalize test/normalize_utility test/parse test/parse_opts test/parse_protobuf test/parse_protobuf_opts test/parse_plpgsql test/scan test/split test/summary test/summary_truncate
test: $(TESTS)
ifeq ($(VALGRIND),1)
$(VALGRIND_MEMCHECK) test/complex || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/concurrency || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/deparse || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/fingerprint || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/fingerprint_opts || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/is_utility_stmt || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/normalize || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/normalize_utility || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/parse || (cat test/valgrind.log && false)
Expand All @@ -257,6 +254,8 @@ ifeq ($(VALGRIND),1)
$(VALGRIND_MEMCHECK) test/parse_protobuf_opts || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/scan || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/split || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/summary || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/summary_truncate || (cat test/valgrind.log && false)
# Output-based tests
$(VALGRIND_MEMCHECK) test/parse_plpgsql || (cat test/valgrind.log && false)
diff -Naur test/plpgsql_samples.expected.json test/plpgsql_samples.actual.json
Expand All @@ -266,6 +265,7 @@ else
test/deparse
test/fingerprint
test/fingerprint_opts
test/is_utility_stmt
test/normalize
test/normalize_utility
test/parse
Expand All @@ -274,6 +274,8 @@ else
test/parse_protobuf_opts
test/scan
test/split
test/summary
test/summary_truncate
# Output-based tests
test/parse_plpgsql
diff -Naur test/plpgsql_samples.expected.json test/plpgsql_samples.actual.json
Expand All @@ -297,6 +299,10 @@ test/fingerprint_opts: test/fingerprint_opts.c test/fingerprint_opts_tests.c $(A
# We have "-Isrc/" because this test uses pg_query_fingerprint_with_opts
$(CC) $(TEST_CFLAGS) -o $@ -Isrc/ test/fingerprint_opts.c $(ARLIB) $(TEST_LDFLAGS)

test/is_utility_stmt: test/framework/main.c test/is_utility_stmt.c $(ARLIB)
# We have "-Isrc/postgres/include" because this test uses pg_query_summary_direct
$(CC) $(TEST_CFLAGS) -o $@ -Isrc/postgres/include test/framework/main.c test/is_utility_stmt.c $(ARLIB) $(TEST_LDFLAGS)

test/normalize: test/normalize.c test/normalize_tests.c $(ARLIB)
$(CC) $(TEST_CFLAGS) -o $@ test/normalize.c $(ARLIB) $(TEST_LDFLAGS)

Expand All @@ -306,6 +312,14 @@ test/normalize_utility: test/normalize_utility.c test/normalize_utility_tests.c
test/parse: test/parse.c test/parse_tests.c $(ARLIB)
$(CC) $(TEST_CFLAGS) -o $@ test/parse.c $(ARLIB) $(TEST_LDFLAGS)

test/summary: test/framework/main.c test/summary.c test/summary_tests.c test/summary_tests_list.c $(ARLIB)
# We have "-Isrc/postgres/include" because this test uses pg_query_summary_direct
$(CC) $(TEST_CFLAGS) -o $@ -Isrc/postgres/include test/framework/main.c test/summary.c $(ARLIB) $(TEST_LDFLAGS)

test/summary_truncate: test/framework/main.c test/summary_truncate.c $(ARLIB)
# We have "-Isrc/postgres/include" because this test uses pg_query_summary_direct
$(CC) $(TEST_CFLAGS) -o $@ -Isrc/postgres/include test/framework/main.c test/summary_truncate.c $(ARLIB) $(TEST_LDFLAGS)

test/parse_opts: test/parse_opts.c test/parse_opts_tests.c $(ARLIB)
$(CC) $(TEST_CFLAGS) -o $@ test/parse_opts.c $(ARLIB) $(TEST_LDFLAGS)

Expand Down Expand Up @@ -336,4 +350,5 @@ install: $(ARLIB) $(SOLIB)
$(LN_S) $(SOLIBVER) "$(DESTDIR)"$(libdir)/$(SOLIB)
$(INSTALL) -d "$(DESTDIR)"$(includedir)/$(TARGET)
$(INSTALL) -m 644 pg_query.h "$(DESTDIR)"$(includedir)/pg_query.h
$(INSTALL) -m 644 postgres_deparse.h "$(DESTDIR)"$(includedir)/postgres_deparse.h
$(INSTALL) -m 644 protobuf/pg_query.proto "$(DESTDIR)"$(includedir)/$(TARGET)/pg_query.proto
Loading
Loading